[med-svn] [Git][med-team/plink2][master] 3 commits: New upstream version 2.00~a3-200409+dfsg
Dylan Aïssi
gitlab at salsa.debian.org
Tue Apr 21 08:12:41 BST 2020
Dylan Aïssi pushed to branch master at Debian Med / plink2
Commits:
394d8a28 by Dylan Aïssi at 2020-04-21T09:10:33+02:00
New upstream version 2.00~a3-200409+dfsg
- - - - -
2e9eabde by Dylan Aïssi at 2020-04-21T09:10:38+02:00
Update upstream source from tag 'upstream/2.00_a3-200409+dfsg'
Update to upstream version '2.00~a3-200409+dfsg'
with Debian dir 3aa9984f824459d1a5e7ef93fb0896f29ab2a716
- - - - -
43c5befa by Dylan Aïssi at 2020-04-21T09:12:06+02:00
Update changelogs
- - - - -
8 changed files:
- debian/changelog
- debian/upstream.docs/upstream.changelog
- plink2.cc
- plink2_data.cc
- plink2_data.h
- plink2_glm.cc
- plink2_glm.h
- plink2_help.cc
Changes:
=====================================
debian/changelog
=====================================
@@ -1,4 +1,4 @@
-plink2 (2.00~a3-200321+dfsg-1) UNRELEASED; urgency=medium
+plink2 (2.00~a3-200409+dfsg-1) UNRELEASED; urgency=medium
* New upstream release.
=====================================
debian/upstream.docs/upstream.changelog
=====================================
@@ -1,6 +1,10 @@
# Copy/Paste from https://www.cog-genomics.org/plink/2.0/
-21 Mar 2020: Fixed --glm multiallelic-variant handling bugs that could occur when 'genotypic', 'hethom', 'dominant', 'recessive', 'interaction', or --tests was specified, and corrected 'dominant'/'recessive' documentation. It is no longer necessary to trim zero- (or other-constant-) dosage alleles from multiallelic variants to get --glm results for the other alleles.
+9 Apr 2020: Firth regression implementation now uses the same maxit=25 value as R logistf(). 'UNFINISHED' error code added to flag logistic/Firth regression results which would change with even more iterations.
+
+28 Mar: Fixed --glm bug in 21 Mar build that caused segfaults when zero-MAF biallelic variants were present. --glm now errors out when no covariate file is specified, unless the 'allow-no-covars' modifier is specified.
+
+21 Mar: Fixed --glm multiallelic-variant handling bugs that could occur when 'genotypic', 'hethom', 'dominant', 'recessive', 'interaction', or --tests was specified, and corrected 'dominant'/'recessive' documentation. It is no longer necessary to trim zero- (or other-constant-) dosage alleles from multiallelic variants to get --glm results for the other alleles.
14 Mar: --make-pgen/--make-just-pvar 'vcfheader' column set added (this makes it possible to directly generate a valid sites-only VCF). Bgzipping of the .pvar file is not directly supported, but you can use a named pipe to accomplish that with low overhead.
=====================================
plink2.cc
=====================================
@@ -66,10 +66,10 @@ static const char ver_str[] = "PLINK v2.00a3"
#ifdef USE_MKL
" Intel"
#endif
- " (21 Mar 2020)";
+ " (9 Apr 2020)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
- ""
+ " "
#ifndef LAPACK_ILP64
" "
#endif
@@ -5171,6 +5171,7 @@ int main(int argc, char** argv) {
goto main_ret_INVALID_CMDLINE_2A;
}
uint32_t explicit_firth_fallback = 0;
+ uint32_t glm_allow_no_covars = 0;
for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
const char* cur_modif = argvk[arg_idx + param_idx];
const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -5313,7 +5314,7 @@ int main(int argc, char** argv) {
goto main_ret_INVALID_CMDLINE_A;
}
pc.glm_info.flags |= kfGlmLocalCats1based;
- } else if (likely(StrStartsWith(cur_modif, "local-cats0=", cur_modif_slen))) {
+ } else if (StrStartsWith(cur_modif, "local-cats0=", cur_modif_slen)) {
if (unlikely(pc.glm_info.local_cat_ct)) {
logerrputs("Error: Multiple --glm local-cats[0]= modifiers.\n");
goto main_ret_INVALID_CMDLINE;
@@ -5322,6 +5323,8 @@ int main(int argc, char** argv) {
logerrputs("Error: Invalid --glm local-cats0= category count (must be in [2, 4095]).\n");
goto main_ret_INVALID_CMDLINE_A;
}
+ } else if (likely(strequal_k(cur_modif, "allow-no-covars", cur_modif_slen))) {
+ glm_allow_no_covars = 1;
} else {
snprintf(g_logbuf, kLogbufSize, "Error: Invalid --glm argument '%s'.\n", cur_modif);
goto main_ret_INVALID_CMDLINE_WWA;
@@ -5371,6 +5374,10 @@ int main(int argc, char** argv) {
logerrputs("Error: --glm 'local-cats[0]=' must be used with 'local-covar='.\n");
goto main_ret_INVALID_CMDLINE_A;
}
+ if (unlikely((!pc.covar_fname) && (!pc.covar_range_list.name_ct) && (!glm_allow_no_covars))) {
+ logerrputs("Error: --glm invoked without --covar/--covar-name/--covar-col-nums; this is\nusually an analytical mistake. Add the 'allow-no-covars' modifier if you are\nsure you want this.\n");
+ goto main_ret_INVALID_CMDLINE_A;
+ }
} else {
if (unlikely(pc.glm_local_pvar_fname && pc.glm_info.local_first_covar_col)) {
logerrputs("Error: --glm local-pvar= and local-pos-cols= cannot be used together.\n");
@@ -6305,7 +6312,7 @@ int main(int argc, char** argv) {
if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 0, 7))) {
goto main_ret_INVALID_CMDLINE_2A;
}
- uint32_t vid_semicolon = 0;
+ uint32_t varid_semicolon = 0;
for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
const char* cur_modif = argvk[arg_idx + param_idx];
const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -6355,14 +6362,14 @@ int main(int argc, char** argv) {
make_plink2_flags |= kfMakePlink2TrimAlts;
} else if (strequal_k(cur_modif, "erase-alt2+", cur_modif_slen)) {
make_plink2_flags |= kfMakePlink2EraseAlt2Plus;
- } else if (strequal_k(cur_modif, "vid-split", cur_modif_slen)) {
- vid_semicolon |= 1;
- } else if (strequal_k(cur_modif, "vid-split-dup", cur_modif_slen)) {
- vid_semicolon |= 2;
- } else if (strequal_k(cur_modif, "vid-join", cur_modif_slen)) {
- vid_semicolon |= 4;
- } else if (strequal_k(cur_modif, "vid-dup", cur_modif_slen)) {
- make_plink2_flags |= kfMakePlink2VidDup;
+ } else if (strequal_k(cur_modif, "varid-split", cur_modif_slen)) {
+ varid_semicolon |= 1;
+ } else if (strequal_k(cur_modif, "varid-split-dup", cur_modif_slen)) {
+ varid_semicolon |= 2;
+ } else if (strequal_k(cur_modif, "varid-join", cur_modif_slen)) {
+ varid_semicolon |= 4;
+ } else if (strequal_k(cur_modif, "varid-dup", cur_modif_slen)) {
+ make_plink2_flags |= kfMakePlink2VaridDup;
} else if (strequal_k(cur_modif, "erase-phase", cur_modif_slen)) {
make_plink2_flags |= kfMakePgenErasePhase;
} else if (likely(strequal_k(cur_modif, "erase-dosage", cur_modif_slen))) {
@@ -6376,29 +6383,29 @@ int main(int argc, char** argv) {
logerrputs("Error: --make-bpgen 'trim-alts' and 'erase-alt2+' modifiers cannot be used\ntogether.\n");
goto main_ret_INVALID_CMDLINE_A;
}
- if (vid_semicolon) {
- if (unlikely((make_plink2_flags & kfMakePlink2VidDup) || (vid_semicolon & (vid_semicolon - 1)))) {
- logerrputs("Error: --make-bpgen 'vid-split', 'vid-split-dup', 'vid-dup', and 'vid-join'\nmodifiers are mutually exclusive.\n");
+ if (varid_semicolon) {
+ if (unlikely((make_plink2_flags & kfMakePlink2VaridDup) || (varid_semicolon & (varid_semicolon - 1)))) {
+ logerrputs("Error: --make-bpgen 'varid-split', 'varid-split-dup', 'varid-dup', and\n'varid-join' modifiers are mutually exclusive.\n");
goto main_ret_INVALID_CMDLINE_A;
}
- if (vid_semicolon & 3) {
+ if (varid_semicolon & 3) {
if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
- logerrputs("Error: --make-bpgen 'vid-split' must be used with a multiallelics= split mode.\n");
+ logerrputs("Error: --make-bpgen 'varid-split' must be used with a multiallelics= split\nmode.\n");
goto main_ret_INVALID_CMDLINE_A;
}
} else {
if (unlikely(!(make_plink2_flags & kfMakePlink2MJoin))) {
- logerrputs("Error: --make-bpgen 'vid-join' must be used with a multiallelics= join mode.\n");
+ logerrputs("Error: --make-bpgen 'varid-join' must be used with a multiallelics= join mode.\n");
goto main_ret_INVALID_CMDLINE_A;
}
}
- make_plink2_flags |= kfMakePlink2VidSemicolon;
- if (vid_semicolon == 2) {
- make_plink2_flags |= kfMakePlink2VidDup;
+ make_plink2_flags |= kfMakePlink2VaridSemicolon;
+ if (varid_semicolon == 2) {
+ make_plink2_flags |= kfMakePlink2VaridDup;
}
- } else if (make_plink2_flags & kfMakePlink2VidDup) {
+ } else if (make_plink2_flags & kfMakePlink2VaridDup) {
if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
- logerrputs("Error: --make-bpgen 'vid-dup' must be used with a multiallelics= split mode.\n");
+ logerrputs("Error: --make-bpgen 'varid-dup' must be used with a multiallelics= split mode.\n");
goto main_ret_INVALID_CMDLINE_A;
}
}
@@ -6423,7 +6430,7 @@ int main(int argc, char** argv) {
}
uint32_t explicit_pvar_cols = 0;
uint32_t explicit_psam_cols = 0;
- uint32_t vid_semicolon = 0;
+ uint32_t varid_semicolon = 0;
for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
const char* cur_modif = argvk[arg_idx + param_idx];
const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -6486,14 +6493,14 @@ int main(int argc, char** argv) {
make_plink2_flags |= kfMakePlink2TrimAlts;
} else if (strequal_k(cur_modif, "erase-alt2+", cur_modif_slen)) {
make_plink2_flags |= kfMakePlink2EraseAlt2Plus;
- } else if (strequal_k(cur_modif, "vid-split", cur_modif_slen)) {
- vid_semicolon |= 1;
- } else if (strequal_k(cur_modif, "vid-split-dup", cur_modif_slen)) {
- vid_semicolon |= 2;
- } else if (strequal_k(cur_modif, "vid-join", cur_modif_slen)) {
- vid_semicolon |= 4;
- } else if (strequal_k(cur_modif, "vid-dup", cur_modif_slen)) {
- make_plink2_flags |= kfMakePlink2VidDup;
+ } else if (strequal_k(cur_modif, "varid-split", cur_modif_slen)) {
+ varid_semicolon |= 1;
+ } else if (strequal_k(cur_modif, "varid-split-dup", cur_modif_slen)) {
+ varid_semicolon |= 2;
+ } else if (strequal_k(cur_modif, "varid-join", cur_modif_slen)) {
+ varid_semicolon |= 4;
+ } else if (strequal_k(cur_modif, "varid-dup", cur_modif_slen)) {
+ make_plink2_flags |= kfMakePlink2VaridDup;
} else if (strequal_k(cur_modif, "erase-phase", cur_modif_slen)) {
make_plink2_flags |= kfMakePgenErasePhase;
} else if (strequal_k(cur_modif, "erase-dosage", cur_modif_slen)) {
@@ -6517,25 +6524,25 @@ int main(int argc, char** argv) {
logerrputs("Error: --make-pgen 'trim-alts' and 'erase-alt2+' modifiers cannot be used\ntogether.\n");
goto main_ret_INVALID_CMDLINE_A;
}
- if (vid_semicolon) {
- if (unlikely((make_plink2_flags & kfMakePlink2VidDup) || (vid_semicolon & (vid_semicolon - 1)))) {
- logerrputs("Error: --make-pgen 'vid-split', 'vid-split-dup', 'vid-dup', and 'vid-join'\nmodifiers are mutually exclusive.\n");
+ if (varid_semicolon) {
+ if (unlikely((make_plink2_flags & kfMakePlink2VaridDup) || (varid_semicolon & (varid_semicolon - 1)))) {
+ logerrputs("Error: --make-pgen 'varid-split', 'varid-split-dup', 'varid-dup', and\n'varid-join' modifiers are mutually exclusive.\n");
goto main_ret_INVALID_CMDLINE_A;
}
- if (vid_semicolon & 3) {
+ if (varid_semicolon & 3) {
if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
- logerrputs("Error: --make-pgen 'vid-split' must be used with a multiallelics= split mode.\n");
+ logerrputs("Error: --make-pgen 'varid-split' must be used with a multiallelics= split mode.\n");
goto main_ret_INVALID_CMDLINE_A;
}
} else {
if (unlikely(!(make_plink2_flags & kfMakePlink2MJoin))) {
- logerrputs("Error: --make-pgen 'vid-join' must be used with a multiallelics= join mode.\n");
+ logerrputs("Error: --make-pgen 'varid-join' must be used with a multiallelics= join mode.\n");
goto main_ret_INVALID_CMDLINE_A;
}
}
- make_plink2_flags |= kfMakePlink2VidSemicolon;
- if (vid_semicolon == 2) {
- make_plink2_flags |= kfMakePlink2VidDup;
+ make_plink2_flags |= kfMakePlink2VaridSemicolon;
+ if (varid_semicolon == 2) {
+ make_plink2_flags |= kfMakePlink2VaridDup;
}
}
if (!explicit_pvar_cols) {
=====================================
plink2_data.cc
=====================================
@@ -3218,7 +3218,7 @@ PglErr PlanMultiallelicSplit(const uintptr_t* variant_include, const uintptr_t*
// Returns 1 iff there are exactly (allele_ct - 2) semicolons in
// orig_variant_id, and no two are adjacent (or leading/trailing).
-uint32_t VidSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
+uint32_t VaridSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
const char* id_iter = orig_variant_id;
for (uint32_t aidx = 2; aidx != allele_ct; ++aidx) {
const char* tok_end = strchr(id_iter, ';');
@@ -3233,7 +3233,7 @@ uint32_t VidSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
// Similar to WriteMapOrBim(), but there are enough small differences to
// justify making this a separate function instead of clogging the original
// with more conditionals.
-PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t vid_split, uint32_t vid_dup, MiscFlags misc_flags, uint32_t output_zst, uint32_t thread_ct) {
+PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t varid_split, uint32_t varid_dup, MiscFlags misc_flags, uint32_t output_zst, uint32_t thread_ct) {
unsigned char* bigstack_mark = g_bigstack_base;
char* cswritep = nullptr;
CompressStreamState css;
@@ -3247,7 +3247,7 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
goto WriteBimSplit_ret_NOMEM;
}
const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
- const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+ const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
VaridTemplate* varid_templatep = nullptr;
uint32_t missing_varid_slen = 0;
uint32_t missing_varid_match_blen = 0; // nonzero iff --set-missing-var-ids
@@ -3264,7 +3264,7 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
}
const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
- if (vid_dup) {
+ if (varid_dup) {
for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
const uint32_t insert_type = varid_templatep->insert_types[uii];
if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -3313,16 +3313,16 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
const char* ref_allele = cur_alleles[0];
const uint32_t ref_allele_slen = strlen(ref_allele);
uint32_t keep_orig_id = 1;
- if ((orig_allele_ct > 2) && (!vid_dup_nosplit)) {
+ if ((orig_allele_ct > 2) && (!varid_dup_nosplit)) {
keep_orig_id = 0;
if (varid_templatep && (!missing_varid_match_blen)) {
cur_varid_templatep = varid_templatep;
} else {
cur_varid_templatep = nullptr;
- if (vid_split) {
- if (VidSplitOk(orig_variant_id, orig_allele_ct)) {
+ if (varid_split) {
+ if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
varid_token_start = orig_variant_id;
- } else if (vid_dup) {
+ } else if (varid_dup) {
keep_orig_id = 1;
} else {
varid_token_start = nullptr;
@@ -3584,7 +3584,7 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
goto WritePvarSplit_ret_NOMEM;
}
const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
- const uint32_t vid_dup = (make_plink2_flags / kfMakePlink2VidDup) & 1;
+ const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
VaridTemplate* varid_templatep = nullptr;
if (!missing_varid_match) {
missing_varid_match = &(g_one_char_strs[92]); // '.'
@@ -3600,7 +3600,7 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
}
const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
- if (vid_dup) {
+ if (varid_dup) {
for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
const uint32_t insert_type = varid_templatep->insert_types[uii];
if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -3719,8 +3719,8 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
const VaridTemplate* cur_varid_templatep = nullptr;
const char* varid_token_start = nullptr; // for vid-split
- const uint32_t vid_split = (make_plink2_flags / kfMakePlink2VidSemicolon) & 1;
- const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+ const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
+ const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
const uint32_t split_just_snps = ((make_plink2_flags & (kfMakePlink2MSplitBase * 3)) == kfMakePlink2MSplitSnps);
uint32_t trs_variant_uidx = 0;
uintptr_t variant_uidx_base = 0;
@@ -3766,16 +3766,16 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
uint32_t split_ct_p1 = orig_allele_ct;
uint32_t keep_orig_id = 1;
if (orig_allele_ct > 2) {
- if (!vid_dup_nosplit) {
+ if (!varid_dup_nosplit) {
keep_orig_id = 0;
if (varid_templatep && (!missing_varid_match_blen)) {
cur_varid_templatep = varid_templatep;
} else {
cur_varid_templatep = nullptr;
- if (vid_split) {
- if (VidSplitOk(orig_variant_id, orig_allele_ct)) {
+ if (varid_split) {
+ if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
varid_token_start = orig_variant_id;
- } else if (vid_dup) {
+ } else if (varid_dup) {
keep_orig_id = 1;
} else {
varid_token_start = nullptr;
@@ -4230,7 +4230,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
goto WritePvarJoin_ret_NOMEM;
}
const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
- const uint32_t vid_dup = (make_plink2_flags / kfMakePlink2VidDup) & 1;
+ const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
VaridTemplate* varid_templatep = nullptr;
if (!missing_varid_match) {
missing_varid_match = &(g_one_char_strs[92]); // '.'
@@ -4246,7 +4246,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
}
const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
- if (vid_dup) {
+ if (varid_dup) {
for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
const uint32_t insert_type = varid_templatep->insert_types[uii];
if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -4368,7 +4368,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
}
info_end_key_idx = IdHtableFind("END", info_keys, info_keys_htable, strlen("END"), info_keys_htable_size);
if (info_end_key_idx != UINT32_MAX) {
- const int32_t knum = const_container_of[info_keys[info_end_key_idx], InfoVtype, key)->num;
+ const int32_t knum = const_container_of(info_keys[info_end_key_idx], InfoVtype, key)->num;
if ((knum != 1) && (knum != kInfoVtypeUnknown)) {
// TODO: verify type instead.
// but if number is not . or 1, this is not the INFO:END we're
@@ -4407,8 +4407,8 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
const VaridTemplate* cur_varid_templatep = nullptr;
const char* varid_token_start = nullptr; // for vid-split
- const uint32_t vid_split = (make_plink2_flags / kfMakePlink2VidSemicolon) & 1;
- const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+ const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
+ const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
uint32_t next_variant_idx = 0;
uint32_t trs_variant_uidx = 0;
uint32_t next_variant_uidx = 0;
@@ -4468,8 +4468,8 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
JoinVtype jvt = JoinCount(cur_alleles, allele_ct, &next_jc);
// previously validated
- if ((join_mode == kfMakePlink2MJoinSnps) && ) {
- }
+ // if ((join_mode == kfMakePlink2MJoinSnps) && ()) {
+ // }
// TODO
jc.snp_ct += next_jc.snp_ct;
@@ -4482,75 +4482,56 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
// No join needed. This is usually the common case, so we duplicate a
// bunch of code for the sake of avoiding slowdown here.
cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
- cswritep = u32toa_x(variant_bps[variant_uidx], '\t', cswritep);
- cswritep = strcpyax(cswritep, variant_ids[variant_uidx], '\t');
+ cswritep = u32toa_x(variant_bps[bp_start_variant_uidx], '\t', cswritep);
+ cswritep = strcpyax(cswritep, variant_ids[bp_start_variant_uidx], '\t');
uintptr_t allele_idx_offset_base;
if (!allele_idx_offsets) {
- allele_idx_offset_base = variant_uidx * 2;
+ allele_idx_offset_base = bp_start_variant_uidx * 2;
} else {
- allele_idx_offset_base = allele_idx_offsets[variant_uidx];
- cur_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
+ allele_idx_offset_base = allele_idx_offsets[bp_start_variant_uidx];
+ allele_ct = allele_idx_offsets[bp_start_variant_uidx + 1] - allele_idx_offset_base;
}
const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
- if (refalt1_select) {
- ref_allele_idx = refalt1_select[variant_uidx][0];
- alt1_allele_idx = refalt1_select[variant_uidx][1];
- }
- cswritep = strcpyax(cswritep, cur_alleles[ref_allele_idx], '\t');
- uint32_t alt_allele_written = 0;
- if ((!allele_presents) || IsSet(allele_presents, allele_idx_offset_base + alt1_allele_idx)) {
- cswritep = strcpya(cswritep, cur_alleles[alt1_allele_idx]);
- alt_allele_written = 1;
- }
+ cswritep = strcpyax(cswritep, cur_alleles[0], '\t');
+ cswritep = strcpya(cswritep, cur_alleles[1]);
if (unlikely(Cswrite(&css, &cswritep))) {
- goto WritePvar_ret_WRITE_FAIL;
+ goto WritePvarJoin_ret_WRITE_FAIL;
}
- if (cur_allele_ct > 2) {
- for (uint32_t allele_idx = 0; allele_idx != cur_allele_ct; ++allele_idx) {
- if ((allele_idx == ref_allele_idx) || (allele_idx == alt1_allele_idx) || (allele_presents && (!IsSet(allele_presents, allele_idx_offset_base + allele_idx)))) {
- continue;
- }
- if (alt_allele_written) {
- *cswritep++ = ',';
- }
- alt_allele_written = 1;
- cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
- if (unlikely(Cswrite(&css, &cswritep))) {
- goto WritePvar_ret_WRITE_FAIL;
- }
+ for (uint32_t allele_idx = 2; allele_idx != allele_ct; ++allele_idx) {
+ *cswritep++ = ',';
+ cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
+ if (unlikely(Cswrite(&css, &cswritep))) {
+ goto WritePvarJoin_ret_WRITE_FAIL;
}
}
- if (!alt_allele_written) {
- *cswritep++ = output_missing_geno_char;
- }
if (write_qual) {
*cswritep++ = '\t';
- if ((!qual_present) || (!IsSet(qual_present, variant_uidx))) {
+ if ((!qual_present) || (!IsSet(qual_present, bp_start_variant_uidx))) {
*cswritep++ = '.';
} else {
- cswritep = ftoa_g(quals[variant_uidx], cswritep);
+ cswritep = ftoa_g(quals[bp_start_variant_uidx], cswritep);
}
}
if (write_filter) {
*cswritep++ = '\t';
- if ((!filter_present) || (!IsSet(filter_present, variant_uidx))) {
+ if ((!filter_present) || (!IsSet(filter_present, bp_start_variant_uidx))) {
*cswritep++ = '.';
- } else if (!IsSet(filter_npass, variant_uidx)) {
+ } else if (!IsSet(filter_npass, bp_start_variant_uidx)) {
cswritep = strcpya_k(cswritep, "PASS");
} else {
- cswritep = strcpya(cswritep, filter_storage[variant_uidx]);
+ cswritep = strcpya(cswritep, filter_storage[bp_start_variant_uidx]);
}
}
if (write_info) {
*cswritep++ = '\t';
- const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, variant_uidx));
+ const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, bp_start_variant_uidx));
if (pvar_info_line_iter) {
- reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
+ reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, bp_start_variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
if (unlikely(reterr)) {
- goto WritePvar_ret_TSTREAM_FAIL;
+ goto WritePvarJoin_ret_TSTREAM_FAIL;
}
} else {
if (is_pr) {
@@ -4566,50 +4547,26 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
if (!variant_cms) {
*cswritep++ = '0';
} else {
- cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
+ cswritep = dtoa_g_p8(variant_cms[bp_start_variant_uidx], cswritep);
}
}
AppendBinaryEoln(&cswritep);
- } else {
+ // next_jc guaranteed to be zero-initialized
+ } else if (next_variant_idx) {
// TODO
- next_jc.snp_ct = 0;
- next_jc.nonsnp_ct = 0;
- next_jc.symbolic_ct = 0;
- next_jc.missalt_snp_ct = 0;
- next_jc.missalt_nonsnp_ct = 0;
- }
- if (next_variant_idx == variant_ct) {
- break;
- }
- this_pos_write_variant_ct = 0;
- jc = next_jc;
- prev_bp = cur_bp;
- bp_start_variant_idx = next_variant_idx;
- bp_start_variant_uidx = next_variant_uidx;
- if (next_variant_idx >= next_print_variant_idx) {
- if (pct > 10) {
- putc_unlocked('\b', stdout);
- }
- pct = (next_variant_idx * 100LLU) / variant_ct;
- printf("\b\b%u%%", pct++);
- fflush(stdout);
- next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
- }
- ;;;
-
-
+ ;;;;
const char* orig_variant_id = variant_ids[variant_uidx];
const char* ref_allele = cur_alleles[0];
const uint32_t ref_allele_slen = strlen(ref_allele);
uint32_t split_ct_p1 = allele_ct;
if (allele_ct > 2) {
- if (!vid_dup) {
+ if (!varid_dup) {
if (varid_templatep && (!missing_varid_match_blen)) {
cur_varid_templatep = varid_templatep;
} else {
cur_varid_templatep = nullptr;
- if (vid_split) {
- if (VidSplitOk(orig_variant_id, allele_ct)) {
+ if (varid_split) {
+ if (VaridSplitOk(orig_variant_id, allele_ct)) {
varid_token_start = orig_variant_id;
} else {
varid_token_start = nullptr;
@@ -4624,6 +4581,30 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
}
}
}
+ ;;;;
+ next_jc.snp_ct = 0;
+ next_jc.nonsnp_ct = 0;
+ next_jc.symbolic_ct = 0;
+ next_jc.missalt_snp_ct = 0;
+ next_jc.missalt_nonsnp_ct = 0;
+ }
+ if (next_variant_idx == variant_ct) {
+ break;
+ }
+ // this_pos_write_variant_ct = 0;
+ jc = next_jc;
+ prev_bp = cur_bp;
+ bp_start_variant_idx = next_variant_idx;
+ bp_start_variant_uidx = next_variant_uidx;
+ if (next_variant_idx >= next_print_variant_idx) {
+ if (pct > 10) {
+ putc_unlocked('\b', stdout);
+ }
+ pct = (next_variant_idx * 100LLU) / variant_ct;
+ printf("\b\b%u%%", pct++);
+ fflush(stdout);
+ next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
+ }
}
if (unlikely(CswriteCloseNull(&css, cswritep))) {
goto WritePvarJoin_ret_WRITE_FAIL;
@@ -4638,7 +4619,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
reterr = kPglRetNomem;
break;
WritePvarJoin_ret_TSTREAM_FAIL:
- TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs, &reterr);
+ TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
break;
WritePvarJoin_ret_WRITE_FAIL:
reterr = kPglRetWriteFail;
@@ -4646,6 +4627,9 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
WritePvarJoin_ret_INVALID_CMDLINE:
reterr = kPglRetInvalidCmdline;
break;
+ WritePvarJoin_ret_INCONSISTENT_INPUT:
+ reterr = kPglRetInconsistentInput;
+ break;
}
WritePvarJoin_ret_1:
CswriteCloseCond(&css, cswritep);
@@ -6689,6 +6673,7 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
uint32_t max_write_allele_ct = max_allele_ct;
uint32_t max_missalt_ct = 0;
if (make_plink2_flags & kfMakePlink2MMask) {
+ // TODO: enforce on command line
assert((!refalt1_select) && (!allele_presents));
if (make_plink2_flags & kfMakePlink2MJoin) {
reterr = PlanMultiallelicJoin(variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, make_plink2_flags, &write_variant_ct, &write_allele_idx_offsets, &max_write_allele_ct, &max_missalt_ct);
@@ -6736,7 +6721,7 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
reterr = WriteMapOrBim(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, variant_cms, variant_ct, max_allele_slen, '\t', bim_zst, max_thread_ct);
} else {
assert(write_variant_ct > variant_ct);
- reterr = WriteBimSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, variant_cms, varid_template_str, missing_varid_match, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, (make_plink2_flags / kfMakePlink2VidSemicolon) & 1, (make_plink2_flags / kfMakePlink2VidDup) & 1, misc_flags, bim_zst, max_thread_ct);
+ reterr = WriteBimSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, variant_cms, varid_template_str, missing_varid_match, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1, (make_plink2_flags / kfMakePlink2VaridDup) & 1, misc_flags, bim_zst, max_thread_ct);
}
if (unlikely(reterr)) {
goto MakePlink2NoVsort_ret_1;
=====================================
plink2_data.h
=====================================
@@ -45,8 +45,8 @@ FLAGSET_DEF_START()
kfMakePlink2TrimAlts = (1 << 10),
kfMakePlink2MMask = kfMakePlink2TrimAlts - kfMakePlink2MSplitBase,
kfMakePlink2EraseAlt2Plus = (1 << 11),
- kfMakePlink2VidSemicolon = (1 << 12),
- kfMakePlink2VidDup = (1 << 13),
+ kfMakePlink2VaridSemicolon = (1 << 12),
+ kfMakePlink2VaridDup = (1 << 13),
kfMakePlink2SetHhMissing = (1 << 14),
kfMakePlink2SetHhMissingKeepDosage = (1 << 15),
=====================================
plink2_glm.cc
=====================================
@@ -1160,6 +1160,8 @@ ENUM_U31_DEF_START()
kGlmErrcodeLogisticConvergeFail,
kGlmErrcodeFirthConvergeFail,
kGlmErrcodeInvalidResult,
+ // no codes for logistic-unfinished and firth-unfinished for now since we
+ // still report results there
// only during initial scan
kGlmErrcodeUnstableScale
@@ -2721,7 +2723,7 @@ void SolveLinearSystem(const float* ll, const float* yy, uint32_t predictor_ct,
}
}
-BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
+BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
// Similar to first part of logistic.cpp fitLM(), but incorporates changes
// from Pascal Pons et al.'s TopCoder code.
//
@@ -2813,6 +2815,7 @@ BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct,
return 1;
}
}
+ *is_unfinished_ptr = 1;
return 0;
}
}
@@ -2874,7 +2877,7 @@ void FirthComputeWeights(const float* yy, const float* xx, const float* pp, cons
}
#endif
-BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
+BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
// This is a port of Georg Heinze's logistf R function, adapted to use many
// of plink 1.9's optimizations; see
// http://cemsiis.meduniwien.ac.at/en/kb/science-research/software/statistical-software/fllogistf/
@@ -2902,6 +2905,8 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
// too)
//
// Returns 0 on success, 1 on convergence failure.
+ // is_unfinished assumed to be initialized to 0, and is set to 1 if we hit
+ // the iteration limit without satisfying other convergence criteria.
const uintptr_t predictor_ctav = RoundUpPow2(predictor_ct, kFloatPerFVec);
const uintptr_t sample_ctav = RoundUpPow2(sample_ct, kFloatPerFVec);
uint32_t is_last_iter = 0;
@@ -2934,15 +2939,18 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
// bugfix (4 Nov 2017): grad[] trailing elements must be zeroed out
ZeroFArr(predictor_ctav - predictor_ct, &(grad[predictor_ct]));
- // start with 80% of logistf convergence defaults (some reduction is
- // appropriate to be consistent with single-precision arithmetic); may tune
- // later.
+ // start with 80% of most logistf convergence defaults (some reduction is
+ // appropriate to be consistent with single-precision arithmetic). (Update,
+ // 9 Apr 2020: max_iter now matches logistf default since we've been shown a
+ // concrete example where it matters in a way that isn't masked by the
+ // single- vs. double-precision difference.)
+ //
// see also the hs_bail condition: if we ever try all five halfsteps, when
// dcoef_max and grad_max aren't that far from the normal convergence
// conditions, it's probably pointless to continue with single-precision
// arithmetic. (possible todo: use a fully-double-precision routine to
// finish the job when that happens.)
- const uint32_t max_iter_m1 = 19;
+ const uint32_t max_iter_m1 = 24;
const float gconv = S_CAST(float, 0.0001);
const float xconv = S_CAST(float, 0.0001);
const double lconv = 0.0001;
@@ -3049,7 +3057,12 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
}
// printf("%.9g %.9g %g %g\n", loglik, loglik_old, dcoef_max, grad_max);
const double loglik_change = loglik - loglik_old;
- is_last_iter = (iter_idx == max_iter_m1) || ((fabs(loglik_change) <= lconv) && (delta_and_grad_converged || hs_bail));
+ if ((fabs(loglik_change) <= lconv) && (delta_and_grad_converged || hs_bail)) {
+ is_last_iter = 1;
+ } else if (iter_idx == max_iter_m1) {
+ is_last_iter = 1;
+ *is_unfinished_ptr = 1;
+ }
}
}
@@ -3159,7 +3172,8 @@ typedef struct {
uint32_t allele_obs_ct;
double a1_dosage;
- uint32_t firth_fallback;
+ uint16_t firth_fallback;
+ uint16_t is_unfinished;
uint32_t case_allele_obs_ct;
double a1_case_dosage;
@@ -3671,12 +3685,13 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
// --xchr-model 1 corner case.)
if (!pgv.dosage_ct) {
if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
- // equivalent to SetBit(1 - omitted_allele_idx, const_alleles);
- const_alleles[0] = 2 >> omitted_allele_idx;
+ // bugfix (28 Mar 2020): didn't set the bit that actually
+ // mattered last week...
+ const_alleles[0] = 3;
}
} else if (pgv.dosage_ct == nm_sample_ct) {
if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
- const_alleles[0] = 2 >> omitted_allele_idx;
+ const_alleles[0] = 3;
}
}
machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -4045,6 +4060,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
block_aux_iter[nonomitted_allele_idx].a1_case_dosage = a1_case_dosage;
block_aux_iter[nonomitted_allele_idx].firth_fallback = 0;
+ block_aux_iter[nonomitted_allele_idx].is_unfinished = 0;
block_aux_iter[nonomitted_allele_idx].mach_r2 = mach_r2;
++nonomitted_allele_idx;
}
@@ -4172,6 +4188,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
for (uint32_t extra_regression_idx = 0; extra_regression_idx <= extra_regression_ct; ++extra_regression_idx) {
float* main_vals = &(nm_predictors_pmaj_buf[nm_sample_ctav]);
float* domdev_vals = nullptr;
+ uint32_t is_unfinished = 0;
if (extra_regression_ct) {
if (IsSet(const_alleles, extra_regression_idx + (extra_regression_idx >= omitted_allele_idx))) {
glm_err = SetGlmErr0(kGlmErrcodeConstAllele);
@@ -4308,7 +4325,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
goto GlmLogisticThread_skip_regression;
}
}
- if (LogisticRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, cholesky_decomp_return, pp_buf, sample_variance_buf, hh_return, gradient_buf, dcoef_buf)) {
+ if (LogisticRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, &is_unfinished, cholesky_decomp_return, pp_buf, sample_variance_buf, hh_return, gradient_buf, dcoef_buf)) {
if (is_sometimes_firth) {
ZeroFArr(cur_predictor_ctav, coef_return);
goto GlmLogisticThread_firth_fallback;
@@ -4357,7 +4374,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
}
}
}
- if (FirthRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, hh_return, inverse_corr_buf, inv_1d_buf, dbl_2d_buf, pp_buf, sample_variance_buf, gradient_buf, dcoef_buf, score_buf, tmpnxk_buf)) {
+ if (FirthRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, &is_unfinished, hh_return, inverse_corr_buf, inv_1d_buf, dbl_2d_buf, pp_buf, sample_variance_buf, gradient_buf, dcoef_buf, score_buf, tmpnxk_buf)) {
glm_err = SetGlmErr0(kGlmErrcodeFirthConvergeFail);
goto GlmLogisticThread_skip_regression;
}
@@ -4384,6 +4401,14 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
}
}
}
+ if (is_unfinished) {
+ block_aux_iter[extra_regression_idx].is_unfinished = 1;
+ if (allele_ct_m2 && beta_se_multiallelic_fused) {
+ for (uint32_t uii = 1; uii != allele_ct - 1; ++uii) {
+ block_aux_iter[uii].is_unfinished = 1;
+ }
+ }
+ }
{
double* beta_se_iter2 = beta_se_iter;
for (uint32_t pred_uidx = reported_pred_uidx_start; pred_uidx != reported_pred_uidx_biallelic_end; ++pred_uidx) {
@@ -6183,7 +6208,11 @@ PglErr GlmLogistic(const char* cur_pheno_name, const char* const* test_names, co
if (err_col) {
*cswritep++ = '\t';
if (test_is_valid) {
- *cswritep++ = '.';
+ if (!auxp->is_unfinished) {
+ *cswritep++ = '.';
+ } else {
+ cswritep = strcpya_k(cswritep, "UNFINISHED");
+ }
} else {
uint64_t glm_errcode;
memcpy(&glm_errcode, &(beta_se_iter[2 * test_idx]), 8);
@@ -6786,11 +6815,11 @@ THREAD_FUNC_DECL GlmLinearThread(void* raw_arg) {
// --xchr-model 1 corner case.)
if (!pgv.dosage_ct) {
if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
- const_alleles[0] = 2 >> omitted_allele_idx;
+ const_alleles[0] = 3;
}
} else if (pgv.dosage_ct == nm_sample_ct) {
if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
- const_alleles[0] = 2 >> omitted_allele_idx;
+ const_alleles[0] = 3;
}
}
machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -8849,11 +8878,11 @@ THREAD_FUNC_DECL GlmLinearSubbatchThread(void* raw_arg) {
// --xchr-model 1 corner case.)
if (!pgv.dosage_ct) {
if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
- const_alleles[0] = 2 >> omitted_allele_idx;
+ const_alleles[0] = 3;
}
} else if (pgv.dosage_ct == nm_sample_ct) {
if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
- const_alleles[0] = 2 >> omitted_allele_idx;
+ const_alleles[0] = 3;
}
}
machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -10645,6 +10674,7 @@ PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
}
assert(orig_variant_ct);
// common linear/logistic initialization
+ const GlmFlags glm_flags = glm_info_ptr->flags;
const uintptr_t* early_variant_include = orig_variant_include;
uint32_t* local_sample_uidx_order = nullptr;
uintptr_t* local_variant_include = nullptr;
@@ -10659,7 +10689,6 @@ PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
}
}
- const GlmFlags glm_flags = glm_info_ptr->flags;
common.glm_flags = glm_flags;
common.dosage_presents = nullptr;
common.dosage_mains = nullptr;
=====================================
plink2_glm.h
=====================================
@@ -115,9 +115,9 @@ void CleanupGlm(GlmInfo* glm_info_ptr);
// for testing purposes
// plink2_matrix.h must be included in this file
-// BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef);
+// BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
-// BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* hh, matrix_finvert_buf1_t* inv_1d_buf, float* flt_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf);
+// BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const PhenoCol* covar_cols, const char* covar_names, const uintptr_t* orig_variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const AlleleCode* maj_alleles, const char* const* allele_storage, const GlmInfo* glm_info_ptr, const AdjustInfo* adjust_info_ptr, const APerm* aperm_ptr, const char* local_covar_fname, const char* local_pvar_fname, const char* local_psam_fname, uint32_t raw_sample_ct, uint32_t orig_sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t orig_covar_ct, uintptr_t max_covar_name_blen, uint32_t raw_variant_ct, uint32_t orig_variant_ct, uint32_t max_variant_id_slen, uint32_t max_allele_slen, uint32_t xchr_model, double ci_size, double vif_thresh, double ln_pfilter, double output_min_ln, uint32_t max_thread_ct, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
=====================================
plink2_help.cc
=====================================
@@ -290,12 +290,12 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
/*
" --make-pgen ['vzs'] ['format='<code>] [{trim-alts | erase-alt2+}]\n"
" ['erase-phase'] ['erase-dosage'] ['multiallelics='<mode>]\n"
-" [{vid-split | vid-split-dup | vid-dup | vid-join}]\n"
+" [{varid-split | varid-split-dup | varid-dup | varid-join}]\n"
" ['pvar-cols='<col set descriptor>]\n"
" ['psam-cols='<col set descriptor>]\n"
" --make-bpgen ['vzs'] ['format='<code>] [{trim-alts | erase-alt2+}]\n"
" ['erase-phase'] ['erase-dosage'] ['multiallelics='<mode>]\n"
-" [{vid-split | vid-split-dup | vid-dup | vid-join}]\n"
+" [{varid-split | varid-split-dup | varid-dup | varid-join}]\n"
" --make-bed ['vzs'] [{trim-alts | erase-alt2+}]\n"
" ['multiallelics='<split mode>]\n"
*/
@@ -338,19 +338,21 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
" alleles are joined into a single multiallelic variant.\n"
" Symbolic ALT alleles are joined separately. Additional conditions apply\n"
" to them, and PLINK 2 usually errors out when they aren't met.\n"
-" When splitting with 'vid-dup', the new variants keep the original variant\n"
+" When splitting with 'varid-dup', the new variants keep the original\n"
+" variant\n"
" ID. Otherwise, new variant IDs are assigned as follows:\n"
" 1. If --set-all-var-ids was specified, that template is applied.\n"
" 2. Otherwise, if joining, and all source variant IDs are identical and\n"
" nonmissing, that ID is used.\n"
-" 3. Otherwise, if splitting with the 'vid-split' or 'vid-split-dup'\n"
+" 3. Otherwise, if splitting with the 'varid-split' or 'varid-split-dup'\n"
" modifier, the original variant ID is split on the ';' character when\n"
" the number of ';'s is correct. (When the post-split ID is the missing\n"
" code, --set-missing-var-ids is applied to it.) Similarly, if joining\n"
-" with the 'vid-join' modifier, the new variant ID is set to the\n"
+" with the 'varid-join' modifier, the new variant ID is set to the\n"
" original IDs joined with the ';' character when all original IDs are\n"
" nonmissing and contain exactly one ';' per extra ALT allele.\n"
-" 4. Otherwise, if splitting with 'vid-split-dup', the original ID is kept.\n"
+" 4. Otherwise, if splitting with 'varid-split-dup', the original ID is\n"
+" kept.\n"
" 5. Otherwise, if --set-missing-var-ids was specified, that template is\n"
" applied.\n"
" 6. Otherwise, the variant ID is set to the missing code.\n"
@@ -992,10 +994,10 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
HelpPrint("glm\0linear\0logistic\0assoc\0", &help_ctrl, 1,
" --glm ['zs'] ['omit-ref'] [{sex | no-x-sex}] ['log10'] ['pheno-ids']\n"
" [{genotypic | hethom | dominant | recessive}] ['interaction'] ['skip']\n"
-" ['hide-covar'] ['intercept'] [{no-firth | firth-fallback | firth}]\n"
-" ['cols='<col set desc.>] ['local-covar='<file>] ['local-psam='<file>]\n"
+" ['hide-covar'] [{no-firth | firth-fallback | firth}] ['intercept']\n"
+" ['cols='<col set desc>] ['local-covar='<file>] ['local-psam='<file>]\n"
" ['local-pos-cols='<key col #s> | 'local-pvar='<file>] ['local-haps']\n"
-" ['local-omit-last' | 'local-cats[0]='<category ct>]\n"
+" ['local-omit-last' | 'local-cats[0]='<category ct>] ['allow-no-covars']\n"
// " ['perm' | 'mperm='<value>] ['perm-count']\n"
" Basic association analysis on quantitative and/or case/control phenotypes.\n"
" For each variant, a linear (for quantitative traits) or logistic (for\n"
@@ -1037,6 +1039,10 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
" statistics are reported for all nonconstant predictors; 'hide-covar'\n"
" suppresses covariate-only results, while 'intercept' causes intercepts\n"
" to be reported.\n"
+" Since running --glm without at least e.g. principal component covariates\n"
+" is usually an analytical mistake, the 'allow-no-covars' modifier is now\n"
+" required when you're intentionally running --glm without a covariate\n"
+" file.\n"
" * By default, if the current phenotype and covariates are such that every\n"
" regression on a chromosome will fail, PLINK 2 errors out. To just skip\n"
" the phenotype or chromosome instead, add the 'skip' modifier.\n"
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/ac09b66d6e58edafb5a7d6c3bd202059933f3bdf...43c5befa322e49361c98fe3c6ee1423079948ba5
--
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/ac09b66d6e58edafb5a7d6c3bd202059933f3bdf...43c5befa322e49361c98fe3c6ee1423079948ba5
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200421/082118af/attachment-0001.html>
More information about the debian-med-commit
mailing list