[med-svn] [Git][med-team/plink2][master] 3 commits: New upstream version 2.00~a3-200409+dfsg

Dylan Aïssi gitlab at salsa.debian.org
Tue Apr 21 08:12:41 BST 2020



Dylan Aïssi pushed to branch master at Debian Med / plink2


Commits:
394d8a28 by Dylan Aïssi at 2020-04-21T09:10:33+02:00
New upstream version 2.00~a3-200409+dfsg
- - - - -
2e9eabde by Dylan Aïssi at 2020-04-21T09:10:38+02:00
Update upstream source from tag 'upstream/2.00_a3-200409+dfsg'

Update to upstream version '2.00~a3-200409+dfsg'
with Debian dir 3aa9984f824459d1a5e7ef93fb0896f29ab2a716
- - - - -
43c5befa by Dylan Aïssi at 2020-04-21T09:12:06+02:00
Update changelogs

- - - - -


8 changed files:

- debian/changelog
- debian/upstream.docs/upstream.changelog
- plink2.cc
- plink2_data.cc
- plink2_data.h
- plink2_glm.cc
- plink2_glm.h
- plink2_help.cc


Changes:

=====================================
debian/changelog
=====================================
@@ -1,4 +1,4 @@
-plink2 (2.00~a3-200321+dfsg-1) UNRELEASED; urgency=medium
+plink2 (2.00~a3-200409+dfsg-1) UNRELEASED; urgency=medium
 
   * New upstream release.
 


=====================================
debian/upstream.docs/upstream.changelog
=====================================
@@ -1,6 +1,10 @@
 # Copy/Paste from https://www.cog-genomics.org/plink/2.0/
 
-21 Mar 2020: Fixed --glm multiallelic-variant handling bugs that could occur when 'genotypic', 'hethom', 'dominant', 'recessive', 'interaction', or --tests was specified, and corrected 'dominant'/'recessive' documentation. It is no longer necessary to trim zero- (or other-constant-) dosage alleles from multiallelic variants to get --glm results for the other alleles.
+9 Apr 2020: Firth regression implementation now uses the same maxit=25 value as R logistf(). 'UNFINISHED' error code added to flag logistic/Firth regression results which would change with even more iterations.
+
+28 Mar: Fixed --glm bug in 21 Mar build that caused segfaults when zero-MAF biallelic variants were present. --glm now errors out when no covariate file is specified, unless the 'allow-no-covars' modifier is specified.
+
+21 Mar: Fixed --glm multiallelic-variant handling bugs that could occur when 'genotypic', 'hethom', 'dominant', 'recessive', 'interaction', or --tests was specified, and corrected 'dominant'/'recessive' documentation. It is no longer necessary to trim zero- (or other-constant-) dosage alleles from multiallelic variants to get --glm results for the other alleles.
 
 14 Mar: --make-pgen/--make-just-pvar 'vcfheader' column set added (this makes it possible to directly generate a valid sites-only VCF). Bgzipping of the .pvar file is not directly supported, but you can use a named pipe to accomplish that with low overhead.
 


=====================================
plink2.cc
=====================================
@@ -66,10 +66,10 @@ static const char ver_str[] = "PLINK v2.00a3"
 #ifdef USE_MKL
   " Intel"
 #endif
-  " (21 Mar 2020)";
+  " (9 Apr 2020)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
-  ""
+  " "
 #ifndef LAPACK_ILP64
   "  "
 #endif
@@ -5171,6 +5171,7 @@ int main(int argc, char** argv) {
             goto main_ret_INVALID_CMDLINE_2A;
           }
           uint32_t explicit_firth_fallback = 0;
+          uint32_t glm_allow_no_covars = 0;
           for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
             const char* cur_modif = argvk[arg_idx + param_idx];
             const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -5313,7 +5314,7 @@ int main(int argc, char** argv) {
                 goto main_ret_INVALID_CMDLINE_A;
               }
               pc.glm_info.flags |= kfGlmLocalCats1based;
-            } else if (likely(StrStartsWith(cur_modif, "local-cats0=", cur_modif_slen))) {
+            } else if (StrStartsWith(cur_modif, "local-cats0=", cur_modif_slen)) {
               if (unlikely(pc.glm_info.local_cat_ct)) {
                 logerrputs("Error: Multiple --glm local-cats[0]= modifiers.\n");
                 goto main_ret_INVALID_CMDLINE;
@@ -5322,6 +5323,8 @@ int main(int argc, char** argv) {
                 logerrputs("Error: Invalid --glm local-cats0= category count (must be in [2, 4095]).\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
+            } else if (likely(strequal_k(cur_modif, "allow-no-covars", cur_modif_slen))) {
+              glm_allow_no_covars = 1;
             } else {
               snprintf(g_logbuf, kLogbufSize, "Error: Invalid --glm argument '%s'.\n", cur_modif);
               goto main_ret_INVALID_CMDLINE_WWA;
@@ -5371,6 +5374,10 @@ int main(int argc, char** argv) {
               logerrputs("Error: --glm 'local-cats[0]=' must be used with 'local-covar='.\n");
               goto main_ret_INVALID_CMDLINE_A;
             }
+            if (unlikely((!pc.covar_fname) && (!pc.covar_range_list.name_ct) && (!glm_allow_no_covars))) {
+              logerrputs("Error: --glm invoked without --covar/--covar-name/--covar-col-nums; this is\nusually an analytical mistake.  Add the 'allow-no-covars' modifier if you are\nsure you want this.\n");
+              goto main_ret_INVALID_CMDLINE_A;
+            }
           } else {
             if (unlikely(pc.glm_local_pvar_fname && pc.glm_info.local_first_covar_col)) {
               logerrputs("Error: --glm local-pvar= and local-pos-cols= cannot be used together.\n");
@@ -6305,7 +6312,7 @@ int main(int argc, char** argv) {
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 0, 7))) {
             goto main_ret_INVALID_CMDLINE_2A;
           }
-          uint32_t vid_semicolon = 0;
+          uint32_t varid_semicolon = 0;
           for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
             const char* cur_modif = argvk[arg_idx + param_idx];
             const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -6355,14 +6362,14 @@ int main(int argc, char** argv) {
               make_plink2_flags |= kfMakePlink2TrimAlts;
             } else if (strequal_k(cur_modif, "erase-alt2+", cur_modif_slen)) {
               make_plink2_flags |= kfMakePlink2EraseAlt2Plus;
-            } else if (strequal_k(cur_modif, "vid-split", cur_modif_slen)) {
-              vid_semicolon |= 1;
-            } else if (strequal_k(cur_modif, "vid-split-dup", cur_modif_slen)) {
-              vid_semicolon |= 2;
-            } else if (strequal_k(cur_modif, "vid-join", cur_modif_slen)) {
-              vid_semicolon |= 4;
-            } else if (strequal_k(cur_modif, "vid-dup", cur_modif_slen)) {
-              make_plink2_flags |= kfMakePlink2VidDup;
+            } else if (strequal_k(cur_modif, "varid-split", cur_modif_slen)) {
+              varid_semicolon |= 1;
+            } else if (strequal_k(cur_modif, "varid-split-dup", cur_modif_slen)) {
+              varid_semicolon |= 2;
+            } else if (strequal_k(cur_modif, "varid-join", cur_modif_slen)) {
+              varid_semicolon |= 4;
+            } else if (strequal_k(cur_modif, "varid-dup", cur_modif_slen)) {
+              make_plink2_flags |= kfMakePlink2VaridDup;
             } else if (strequal_k(cur_modif, "erase-phase", cur_modif_slen)) {
               make_plink2_flags |= kfMakePgenErasePhase;
             } else if (likely(strequal_k(cur_modif, "erase-dosage", cur_modif_slen))) {
@@ -6376,29 +6383,29 @@ int main(int argc, char** argv) {
             logerrputs("Error: --make-bpgen 'trim-alts' and 'erase-alt2+' modifiers cannot be used\ntogether.\n");
             goto main_ret_INVALID_CMDLINE_A;
           }
-          if (vid_semicolon) {
-            if (unlikely((make_plink2_flags & kfMakePlink2VidDup) || (vid_semicolon & (vid_semicolon - 1)))) {
-              logerrputs("Error: --make-bpgen 'vid-split', 'vid-split-dup', 'vid-dup', and 'vid-join'\nmodifiers are mutually exclusive.\n");
+          if (varid_semicolon) {
+            if (unlikely((make_plink2_flags & kfMakePlink2VaridDup) || (varid_semicolon & (varid_semicolon - 1)))) {
+              logerrputs("Error: --make-bpgen 'varid-split', 'varid-split-dup', 'varid-dup', and\n'varid-join' modifiers are mutually exclusive.\n");
               goto main_ret_INVALID_CMDLINE_A;
             }
-            if (vid_semicolon & 3) {
+            if (varid_semicolon & 3) {
               if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
-                logerrputs("Error: --make-bpgen 'vid-split' must be used with a multiallelics= split mode.\n");
+                logerrputs("Error: --make-bpgen 'varid-split' must be used with a multiallelics= split\nmode.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
             } else {
               if (unlikely(!(make_plink2_flags & kfMakePlink2MJoin))) {
-                logerrputs("Error: --make-bpgen 'vid-join' must be used with a multiallelics= join mode.\n");
+                logerrputs("Error: --make-bpgen 'varid-join' must be used with a multiallelics= join mode.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
             }
-            make_plink2_flags |= kfMakePlink2VidSemicolon;
-            if (vid_semicolon == 2) {
-              make_plink2_flags |= kfMakePlink2VidDup;
+            make_plink2_flags |= kfMakePlink2VaridSemicolon;
+            if (varid_semicolon == 2) {
+              make_plink2_flags |= kfMakePlink2VaridDup;
             }
-          } else if (make_plink2_flags & kfMakePlink2VidDup) {
+          } else if (make_plink2_flags & kfMakePlink2VaridDup) {
             if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
-              logerrputs("Error: --make-bpgen 'vid-dup' must be used with a multiallelics= split mode.\n");
+              logerrputs("Error: --make-bpgen 'varid-dup' must be used with a multiallelics= split mode.\n");
               goto main_ret_INVALID_CMDLINE_A;
             }
           }
@@ -6423,7 +6430,7 @@ int main(int argc, char** argv) {
           }
           uint32_t explicit_pvar_cols = 0;
           uint32_t explicit_psam_cols = 0;
-          uint32_t vid_semicolon = 0;
+          uint32_t varid_semicolon = 0;
           for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
             const char* cur_modif = argvk[arg_idx + param_idx];
             const uint32_t cur_modif_slen = strlen(cur_modif);
@@ -6486,14 +6493,14 @@ int main(int argc, char** argv) {
               make_plink2_flags |= kfMakePlink2TrimAlts;
             } else if (strequal_k(cur_modif, "erase-alt2+", cur_modif_slen)) {
               make_plink2_flags |= kfMakePlink2EraseAlt2Plus;
-            } else if (strequal_k(cur_modif, "vid-split", cur_modif_slen)) {
-              vid_semicolon |= 1;
-            } else if (strequal_k(cur_modif, "vid-split-dup", cur_modif_slen)) {
-              vid_semicolon |= 2;
-            } else if (strequal_k(cur_modif, "vid-join", cur_modif_slen)) {
-              vid_semicolon |= 4;
-            } else if (strequal_k(cur_modif, "vid-dup", cur_modif_slen)) {
-              make_plink2_flags |= kfMakePlink2VidDup;
+            } else if (strequal_k(cur_modif, "varid-split", cur_modif_slen)) {
+              varid_semicolon |= 1;
+            } else if (strequal_k(cur_modif, "varid-split-dup", cur_modif_slen)) {
+              varid_semicolon |= 2;
+            } else if (strequal_k(cur_modif, "varid-join", cur_modif_slen)) {
+              varid_semicolon |= 4;
+            } else if (strequal_k(cur_modif, "varid-dup", cur_modif_slen)) {
+              make_plink2_flags |= kfMakePlink2VaridDup;
             } else if (strequal_k(cur_modif, "erase-phase", cur_modif_slen)) {
               make_plink2_flags |= kfMakePgenErasePhase;
             } else if (strequal_k(cur_modif, "erase-dosage", cur_modif_slen)) {
@@ -6517,25 +6524,25 @@ int main(int argc, char** argv) {
             logerrputs("Error: --make-pgen 'trim-alts' and 'erase-alt2+' modifiers cannot be used\ntogether.\n");
             goto main_ret_INVALID_CMDLINE_A;
           }
-          if (vid_semicolon) {
-            if (unlikely((make_plink2_flags & kfMakePlink2VidDup) || (vid_semicolon & (vid_semicolon - 1)))) {
-              logerrputs("Error: --make-pgen 'vid-split', 'vid-split-dup', 'vid-dup', and 'vid-join'\nmodifiers are mutually exclusive.\n");
+          if (varid_semicolon) {
+            if (unlikely((make_plink2_flags & kfMakePlink2VaridDup) || (varid_semicolon & (varid_semicolon - 1)))) {
+              logerrputs("Error: --make-pgen 'varid-split', 'varid-split-dup', 'varid-dup', and\n'varid-join' modifiers are mutually exclusive.\n");
               goto main_ret_INVALID_CMDLINE_A;
             }
-            if (vid_semicolon & 3) {
+            if (varid_semicolon & 3) {
               if (unlikely((make_plink2_flags & kfMakePlink2MJoin) || (!(make_plink2_flags & kfMakePlink2MMask)))) {
-                logerrputs("Error: --make-pgen 'vid-split' must be used with a multiallelics= split mode.\n");
+                logerrputs("Error: --make-pgen 'varid-split' must be used with a multiallelics= split mode.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
             } else {
               if (unlikely(!(make_plink2_flags & kfMakePlink2MJoin))) {
-                logerrputs("Error: --make-pgen 'vid-join' must be used with a multiallelics= join mode.\n");
+                logerrputs("Error: --make-pgen 'varid-join' must be used with a multiallelics= join mode.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
             }
-            make_plink2_flags |= kfMakePlink2VidSemicolon;
-            if (vid_semicolon == 2) {
-              make_plink2_flags |= kfMakePlink2VidDup;
+            make_plink2_flags |= kfMakePlink2VaridSemicolon;
+            if (varid_semicolon == 2) {
+              make_plink2_flags |= kfMakePlink2VaridDup;
             }
           }
           if (!explicit_pvar_cols) {


=====================================
plink2_data.cc
=====================================
@@ -3218,7 +3218,7 @@ PglErr PlanMultiallelicSplit(const uintptr_t* variant_include, const uintptr_t*
 
 // Returns 1 iff there are exactly (allele_ct - 2) semicolons in
 // orig_variant_id, and no two are adjacent (or leading/trailing).
-uint32_t VidSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
+uint32_t VaridSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
   const char* id_iter = orig_variant_id;
   for (uint32_t aidx = 2; aidx != allele_ct; ++aidx) {
     const char* tok_end = strchr(id_iter, ';');
@@ -3233,7 +3233,7 @@ uint32_t VidSplitOk(const char* orig_variant_id, uint32_t allele_ct) {
 // Similar to WriteMapOrBim(), but there are enough small differences to
 // justify making this a separate function instead of clogging the original
 // with more conditionals.
-PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t vid_split, uint32_t vid_dup, MiscFlags misc_flags, uint32_t output_zst, uint32_t thread_ct) {
+PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t varid_split, uint32_t varid_dup, MiscFlags misc_flags, uint32_t output_zst, uint32_t thread_ct) {
   unsigned char* bigstack_mark = g_bigstack_base;
   char* cswritep = nullptr;
   CompressStreamState css;
@@ -3247,7 +3247,7 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
       goto WriteBimSplit_ret_NOMEM;
     }
     const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
-    const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+    const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
     VaridTemplate* varid_templatep = nullptr;
     uint32_t missing_varid_slen = 0;
     uint32_t missing_varid_match_blen = 0; // nonzero iff --set-missing-var-ids
@@ -3264,7 +3264,7 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
       }
       const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
       VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
-      if (vid_dup) {
+      if (varid_dup) {
         for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
           const uint32_t insert_type = varid_templatep->insert_types[uii];
           if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -3313,16 +3313,16 @@ PglErr WriteBimSplit(const char* outname, const uintptr_t* variant_include, cons
       const char* ref_allele = cur_alleles[0];
       const uint32_t ref_allele_slen = strlen(ref_allele);
       uint32_t keep_orig_id = 1;
-      if ((orig_allele_ct > 2) && (!vid_dup_nosplit)) {
+      if ((orig_allele_ct > 2) && (!varid_dup_nosplit)) {
         keep_orig_id = 0;
         if (varid_templatep && (!missing_varid_match_blen)) {
           cur_varid_templatep = varid_templatep;
         } else {
           cur_varid_templatep = nullptr;
-          if (vid_split) {
-            if (VidSplitOk(orig_variant_id, orig_allele_ct)) {
+          if (varid_split) {
+            if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
               varid_token_start = orig_variant_id;
-            } else if (vid_dup) {
+            } else if (varid_dup) {
               keep_orig_id = 1;
             } else {
               varid_token_start = nullptr;
@@ -3584,7 +3584,7 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
       goto WritePvarSplit_ret_NOMEM;
     }
     const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
-    const uint32_t vid_dup = (make_plink2_flags / kfMakePlink2VidDup) & 1;
+    const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
     VaridTemplate* varid_templatep = nullptr;
     if (!missing_varid_match) {
       missing_varid_match = &(g_one_char_strs[92]); // '.'
@@ -3600,7 +3600,7 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
       }
       const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
       VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
-      if (vid_dup) {
+      if (varid_dup) {
         for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
           const uint32_t insert_type = varid_templatep->insert_types[uii];
           if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -3719,8 +3719,8 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
 
     const VaridTemplate* cur_varid_templatep = nullptr;
     const char* varid_token_start = nullptr; // for vid-split
-    const uint32_t vid_split = (make_plink2_flags / kfMakePlink2VidSemicolon) & 1;
-    const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+    const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
+    const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
     const uint32_t split_just_snps = ((make_plink2_flags & (kfMakePlink2MSplitBase * 3)) == kfMakePlink2MSplitSnps);
     uint32_t trs_variant_uidx = 0;
     uintptr_t variant_uidx_base = 0;
@@ -3766,16 +3766,16 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
       uint32_t split_ct_p1 = orig_allele_ct;
       uint32_t keep_orig_id = 1;
       if (orig_allele_ct > 2) {
-        if (!vid_dup_nosplit) {
+        if (!varid_dup_nosplit) {
           keep_orig_id = 0;
           if (varid_templatep && (!missing_varid_match_blen)) {
             cur_varid_templatep = varid_templatep;
           } else {
             cur_varid_templatep = nullptr;
-            if (vid_split) {
-              if (VidSplitOk(orig_variant_id, orig_allele_ct)) {
+            if (varid_split) {
+              if (VaridSplitOk(orig_variant_id, orig_allele_ct)) {
                 varid_token_start = orig_variant_id;
-              } else if (vid_dup) {
+              } else if (varid_dup) {
                 keep_orig_id = 1;
               } else {
                 varid_token_start = nullptr;
@@ -4230,7 +4230,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
       goto WritePvarJoin_ret_NOMEM;
     }
     const uint32_t new_variant_id_overflow_missing = (misc_flags / kfMiscNewVarIdOverflowMissing) & 1;
-    const uint32_t vid_dup = (make_plink2_flags / kfMakePlink2VidDup) & 1;
+    const uint32_t varid_dup = (make_plink2_flags / kfMakePlink2VaridDup) & 1;
     VaridTemplate* varid_templatep = nullptr;
     if (!missing_varid_match) {
       missing_varid_match = &(g_one_char_strs[92]); // '.'
@@ -4246,7 +4246,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
       }
       const uint32_t overflow_substitute_blen = new_variant_id_overflow_missing? (missing_varid_slen + 1) : 0;
       VaridTemplateInit(varid_template_str, missing_varid_match, chr_buf, new_variant_id_max_allele_slen, overflow_substitute_blen, varid_templatep);
-      if (vid_dup) {
+      if (varid_dup) {
         for (uint32_t uii = 0; uii != varid_templatep->insert_ct; ++uii) {
           const uint32_t insert_type = varid_templatep->insert_types[uii];
           if ((insert_type == 3) || ((insert_type == 2) && (varid_templatep->alleles_needed & 4))) {
@@ -4368,7 +4368,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
       }
       info_end_key_idx = IdHtableFind("END", info_keys, info_keys_htable, strlen("END"), info_keys_htable_size);
       if (info_end_key_idx != UINT32_MAX) {
-        const int32_t knum = const_container_of[info_keys[info_end_key_idx], InfoVtype, key)->num;
+        const int32_t knum = const_container_of(info_keys[info_end_key_idx], InfoVtype, key)->num;
         if ((knum != 1) && (knum != kInfoVtypeUnknown)) {
           // TODO: verify type instead.
           // but if number is not . or 1, this is not the INFO:END we're
@@ -4407,8 +4407,8 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
 
     const VaridTemplate* cur_varid_templatep = nullptr;
     const char* varid_token_start = nullptr; // for vid-split
-    const uint32_t vid_split = (make_plink2_flags / kfMakePlink2VidSemicolon) & 1;
-    const uint32_t vid_dup_nosplit = vid_dup && (!vid_split);
+    const uint32_t varid_split = (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1;
+    const uint32_t varid_dup_nosplit = varid_dup && (!varid_split);
     uint32_t next_variant_idx = 0;
     uint32_t trs_variant_uidx = 0;
     uint32_t next_variant_uidx = 0;
@@ -4468,8 +4468,8 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
         const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
         JoinVtype jvt = JoinCount(cur_alleles, allele_ct, &next_jc);
         // previously validated
-        if ((join_mode == kfMakePlink2MJoinSnps) && ) {
-        }
+        // if ((join_mode == kfMakePlink2MJoinSnps) && ()) {
+        // }
 
         // TODO
         jc.snp_ct += next_jc.snp_ct;
@@ -4482,75 +4482,56 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
         // No join needed.  This is usually the common case, so we duplicate a
         // bunch of code for the sake of avoiding slowdown here.
         cswritep = memcpya(cswritep, chr_buf, chr_buf_blen);
-        cswritep = u32toa_x(variant_bps[variant_uidx], '\t', cswritep);
-        cswritep = strcpyax(cswritep, variant_ids[variant_uidx], '\t');
+        cswritep = u32toa_x(variant_bps[bp_start_variant_uidx], '\t', cswritep);
+        cswritep = strcpyax(cswritep, variant_ids[bp_start_variant_uidx], '\t');
         uintptr_t allele_idx_offset_base;
         if (!allele_idx_offsets) {
-          allele_idx_offset_base = variant_uidx * 2;
+          allele_idx_offset_base = bp_start_variant_uidx * 2;
         } else {
-          allele_idx_offset_base = allele_idx_offsets[variant_uidx];
-          cur_allele_ct = allele_idx_offsets[variant_uidx + 1] - allele_idx_offset_base;
+          allele_idx_offset_base = allele_idx_offsets[bp_start_variant_uidx];
+          allele_ct = allele_idx_offsets[bp_start_variant_uidx + 1] - allele_idx_offset_base;
         }
         const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
-        if (refalt1_select) {
-          ref_allele_idx = refalt1_select[variant_uidx][0];
-          alt1_allele_idx = refalt1_select[variant_uidx][1];
-        }
-        cswritep = strcpyax(cswritep, cur_alleles[ref_allele_idx], '\t');
-        uint32_t alt_allele_written = 0;
-        if ((!allele_presents) || IsSet(allele_presents, allele_idx_offset_base + alt1_allele_idx)) {
-          cswritep = strcpya(cswritep, cur_alleles[alt1_allele_idx]);
-          alt_allele_written = 1;
-        }
+        cswritep = strcpyax(cswritep, cur_alleles[0], '\t');
+        cswritep = strcpya(cswritep, cur_alleles[1]);
         if (unlikely(Cswrite(&css, &cswritep))) {
-          goto WritePvar_ret_WRITE_FAIL;
+          goto WritePvarJoin_ret_WRITE_FAIL;
         }
-        if (cur_allele_ct > 2) {
-          for (uint32_t allele_idx = 0; allele_idx != cur_allele_ct; ++allele_idx) {
-            if ((allele_idx == ref_allele_idx) || (allele_idx == alt1_allele_idx) || (allele_presents && (!IsSet(allele_presents, allele_idx_offset_base + allele_idx)))) {
-              continue;
-            }
-            if (alt_allele_written) {
-              *cswritep++ = ',';
-            }
-            alt_allele_written = 1;
-            cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
-            if (unlikely(Cswrite(&css, &cswritep))) {
-              goto WritePvar_ret_WRITE_FAIL;
-            }
+        for (uint32_t allele_idx = 2; allele_idx != allele_ct; ++allele_idx) {
+          *cswritep++ = ',';
+          cswritep = strcpya(cswritep, cur_alleles[allele_idx]);
+          if (unlikely(Cswrite(&css, &cswritep))) {
+            goto WritePvarJoin_ret_WRITE_FAIL;
           }
         }
-        if (!alt_allele_written) {
-          *cswritep++ = output_missing_geno_char;
-        }
 
         if (write_qual) {
           *cswritep++ = '\t';
-          if ((!qual_present) || (!IsSet(qual_present, variant_uidx))) {
+          if ((!qual_present) || (!IsSet(qual_present, bp_start_variant_uidx))) {
             *cswritep++ = '.';
           } else {
-            cswritep = ftoa_g(quals[variant_uidx], cswritep);
+            cswritep = ftoa_g(quals[bp_start_variant_uidx], cswritep);
           }
         }
 
         if (write_filter) {
           *cswritep++ = '\t';
-          if ((!filter_present) || (!IsSet(filter_present, variant_uidx))) {
+          if ((!filter_present) || (!IsSet(filter_present, bp_start_variant_uidx))) {
             *cswritep++ = '.';
-          } else if (!IsSet(filter_npass, variant_uidx)) {
+          } else if (!IsSet(filter_npass, bp_start_variant_uidx)) {
             cswritep = strcpya_k(cswritep, "PASS");
           } else {
-            cswritep = strcpya(cswritep, filter_storage[variant_uidx]);
+            cswritep = strcpya(cswritep, filter_storage[bp_start_variant_uidx]);
           }
         }
 
         if (write_info) {
           *cswritep++ = '\t';
-          const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, variant_uidx));
+          const uint32_t is_pr = all_nonref || (nonref_flags && IsSet(nonref_flags, bp_start_variant_uidx));
           if (pvar_info_line_iter) {
-            reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
+            reterr = PvarInfoReloadAndWrite(info_pr_flag_present, info_col_idx, bp_start_variant_uidx, is_pr, &pvar_reload_txs, &pvar_info_line_iter, &cswritep, &trs_variant_uidx);
             if (unlikely(reterr)) {
-              goto WritePvar_ret_TSTREAM_FAIL;
+              goto WritePvarJoin_ret_TSTREAM_FAIL;
             }
           } else {
             if (is_pr) {
@@ -4566,50 +4547,26 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
           if (!variant_cms) {
             *cswritep++ = '0';
           } else {
-            cswritep = dtoa_g_p8(variant_cms[variant_uidx], cswritep);
+            cswritep = dtoa_g_p8(variant_cms[bp_start_variant_uidx], cswritep);
           }
         }
         AppendBinaryEoln(&cswritep);
-      } else {
+        // next_jc guaranteed to be zero-initialized
+      } else if (next_variant_idx) {
         // TODO
-        next_jc.snp_ct = 0;
-        next_jc.nonsnp_ct = 0;
-        next_jc.symbolic_ct = 0;
-        next_jc.missalt_snp_ct = 0;
-        next_jc.missalt_nonsnp_ct = 0;
-      }
-      if (next_variant_idx == variant_ct) {
-        break;
-      }
-      this_pos_write_variant_ct = 0;
-      jc = next_jc;
-      prev_bp = cur_bp;
-      bp_start_variant_idx = next_variant_idx;
-      bp_start_variant_uidx = next_variant_uidx;
-      if (next_variant_idx >= next_print_variant_idx) {
-        if (pct > 10) {
-          putc_unlocked('\b', stdout);
-        }
-        pct = (next_variant_idx * 100LLU) / variant_ct;
-        printf("\b\b%u%%", pct++);
-        fflush(stdout);
-        next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
-      }
-      ;;;
-
-
+        ;;;;
       const char* orig_variant_id = variant_ids[variant_uidx];
       const char* ref_allele = cur_alleles[0];
       const uint32_t ref_allele_slen = strlen(ref_allele);
       uint32_t split_ct_p1 = allele_ct;
       if (allele_ct > 2) {
-        if (!vid_dup) {
+        if (!varid_dup) {
           if (varid_templatep && (!missing_varid_match_blen)) {
             cur_varid_templatep = varid_templatep;
           } else {
             cur_varid_templatep = nullptr;
-            if (vid_split) {
-              if (VidSplitOk(orig_variant_id, allele_ct)) {
+            if (varid_split) {
+              if (VaridSplitOk(orig_variant_id, allele_ct)) {
                 varid_token_start = orig_variant_id;
               } else {
                 varid_token_start = nullptr;
@@ -4624,6 +4581,30 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
           }
         }
       }
+       ;;;;
+        next_jc.snp_ct = 0;
+        next_jc.nonsnp_ct = 0;
+        next_jc.symbolic_ct = 0;
+        next_jc.missalt_snp_ct = 0;
+        next_jc.missalt_nonsnp_ct = 0;
+      }
+      if (next_variant_idx == variant_ct) {
+        break;
+      }
+      // this_pos_write_variant_ct = 0;
+      jc = next_jc;
+      prev_bp = cur_bp;
+      bp_start_variant_idx = next_variant_idx;
+      bp_start_variant_uidx = next_variant_uidx;
+      if (next_variant_idx >= next_print_variant_idx) {
+        if (pct > 10) {
+          putc_unlocked('\b', stdout);
+        }
+        pct = (next_variant_idx * 100LLU) / variant_ct;
+        printf("\b\b%u%%", pct++);
+        fflush(stdout);
+        next_print_variant_idx = (pct * S_CAST(uint64_t, variant_ct)) / 100;
+      }
     }
     if (unlikely(CswriteCloseNull(&css, cswritep))) {
       goto WritePvarJoin_ret_WRITE_FAIL;
@@ -4638,7 +4619,7 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
     reterr = kPglRetNomem;
     break;
   WritePvarJoin_ret_TSTREAM_FAIL:
-    TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs, &reterr);
+    TextStreamErrPrint(pvar_info_reload, &pvar_reload_txs);
     break;
   WritePvarJoin_ret_WRITE_FAIL:
     reterr = kPglRetWriteFail;
@@ -4646,6 +4627,9 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
   WritePvarJoin_ret_INVALID_CMDLINE:
     reterr = kPglRetInvalidCmdline;
     break;
+  WritePvarJoin_ret_INCONSISTENT_INPUT:
+    reterr = kPglRetInconsistentInput;
+    break;
   }
  WritePvarJoin_ret_1:
   CswriteCloseCond(&css, cswritep);
@@ -6689,6 +6673,7 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
     uint32_t max_write_allele_ct = max_allele_ct;
     uint32_t max_missalt_ct = 0;
     if (make_plink2_flags & kfMakePlink2MMask) {
+      // TODO: enforce on command line
       assert((!refalt1_select) && (!allele_presents));
       if (make_plink2_flags & kfMakePlink2MJoin) {
         reterr = PlanMultiallelicJoin(variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, make_plink2_flags, &write_variant_ct, &write_allele_idx_offsets, &max_write_allele_ct, &max_missalt_ct);
@@ -6736,7 +6721,7 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
         reterr = WriteMapOrBim(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, variant_cms, variant_ct, max_allele_slen, '\t', bim_zst, max_thread_ct);
       } else {
         assert(write_variant_ct > variant_ct);
-        reterr = WriteBimSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, variant_cms, varid_template_str, missing_varid_match, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, (make_plink2_flags / kfMakePlink2VidSemicolon) & 1, (make_plink2_flags / kfMakePlink2VidDup) & 1, misc_flags, bim_zst, max_thread_ct);
+        reterr = WriteBimSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, variant_cms, varid_template_str, missing_varid_match, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, (make_plink2_flags / kfMakePlink2VaridSemicolon) & 1, (make_plink2_flags / kfMakePlink2VaridDup) & 1, misc_flags, bim_zst, max_thread_ct);
       }
       if (unlikely(reterr)) {
         goto MakePlink2NoVsort_ret_1;


=====================================
plink2_data.h
=====================================
@@ -45,8 +45,8 @@ FLAGSET_DEF_START()
   kfMakePlink2TrimAlts = (1 << 10),
   kfMakePlink2MMask = kfMakePlink2TrimAlts - kfMakePlink2MSplitBase,
   kfMakePlink2EraseAlt2Plus = (1 << 11),
-  kfMakePlink2VidSemicolon = (1 << 12),
-  kfMakePlink2VidDup = (1 << 13),
+  kfMakePlink2VaridSemicolon = (1 << 12),
+  kfMakePlink2VaridDup = (1 << 13),
 
   kfMakePlink2SetHhMissing = (1 << 14),
   kfMakePlink2SetHhMissingKeepDosage = (1 << 15),


=====================================
plink2_glm.cc
=====================================
@@ -1160,6 +1160,8 @@ ENUM_U31_DEF_START()
   kGlmErrcodeLogisticConvergeFail,
   kGlmErrcodeFirthConvergeFail,
   kGlmErrcodeInvalidResult,
+  // no codes for logistic-unfinished and firth-unfinished for now since we
+  // still report results there
 
   // only during initial scan
   kGlmErrcodeUnstableScale
@@ -2721,7 +2723,7 @@ void SolveLinearSystem(const float* ll, const float* yy, uint32_t predictor_ct,
   }
 }
 
-BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
+BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
   // Similar to first part of logistic.cpp fitLM(), but incorporates changes
   // from Pascal Pons et al.'s TopCoder code.
   //
@@ -2813,6 +2815,7 @@ BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct,
             return 1;
           }
         }
+        *is_unfinished_ptr = 1;
         return 0;
       }
     }
@@ -2874,7 +2877,7 @@ void FirthComputeWeights(const float* yy, const float* xx, const float* pp, cons
 }
 #endif
 
-BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
+BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
   // This is a port of Georg Heinze's logistf R function, adapted to use many
   // of plink 1.9's optimizations; see
   //   http://cemsiis.meduniwien.ac.at/en/kb/science-research/software/statistical-software/fllogistf/
@@ -2902,6 +2905,8 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
   //         too)
   //
   // Returns 0 on success, 1 on convergence failure.
+  // is_unfinished assumed to be initialized to 0, and is set to 1 if we hit
+  // the iteration limit without satisfying other convergence criteria.
   const uintptr_t predictor_ctav = RoundUpPow2(predictor_ct, kFloatPerFVec);
   const uintptr_t sample_ctav = RoundUpPow2(sample_ct, kFloatPerFVec);
   uint32_t is_last_iter = 0;
@@ -2934,15 +2939,18 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
   // bugfix (4 Nov 2017): grad[] trailing elements must be zeroed out
   ZeroFArr(predictor_ctav - predictor_ct, &(grad[predictor_ct]));
 
-  // start with 80% of logistf convergence defaults (some reduction is
-  // appropriate to be consistent with single-precision arithmetic); may tune
-  // later.
+  // start with 80% of most logistf convergence defaults (some reduction is
+  // appropriate to be consistent with single-precision arithmetic).  (Update,
+  // 9 Apr 2020: max_iter now matches logistf default since we've been shown a
+  // concrete example where it matters in a way that isn't masked by the
+  // single- vs. double-precision difference.)
+  //
   // see also the hs_bail condition: if we ever try all five halfsteps, when
   // dcoef_max and grad_max aren't that far from the normal convergence
   // conditions, it's probably pointless to continue with single-precision
   // arithmetic.  (possible todo: use a fully-double-precision routine to
   // finish the job when that happens.)
-  const uint32_t max_iter_m1 = 19;
+  const uint32_t max_iter_m1 = 24;
   const float gconv = S_CAST(float, 0.0001);
   const float xconv = S_CAST(float, 0.0001);
   const double lconv = 0.0001;
@@ -3049,7 +3057,12 @@ BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, ui
     }
     // printf("%.9g %.9g %g %g\n", loglik, loglik_old, dcoef_max, grad_max);
     const double loglik_change = loglik - loglik_old;
-    is_last_iter = (iter_idx == max_iter_m1) || ((fabs(loglik_change) <= lconv) && (delta_and_grad_converged || hs_bail));
+    if ((fabs(loglik_change) <= lconv) && (delta_and_grad_converged || hs_bail)) {
+      is_last_iter = 1;
+    } else if (iter_idx == max_iter_m1) {
+      is_last_iter = 1;
+      *is_unfinished_ptr = 1;
+    }
   }
 }
 
@@ -3159,7 +3172,8 @@ typedef struct {
   uint32_t allele_obs_ct;
   double a1_dosage;
 
-  uint32_t firth_fallback;
+  uint16_t firth_fallback;
+  uint16_t is_unfinished;
   uint32_t case_allele_obs_ct;
   double a1_case_dosage;
 
@@ -3671,12 +3685,13 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
           // --xchr-model 1 corner case.)
           if (!pgv.dosage_ct) {
             if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
-              // equivalent to SetBit(1 - omitted_allele_idx, const_alleles);
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              // bugfix (28 Mar 2020): didn't set the bit that actually
+              // mattered last week...
+              const_alleles[0] = 3;
             }
           } else if (pgv.dosage_ct == nm_sample_ct) {
             if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              const_alleles[0] = 3;
             }
           }
           machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -4045,6 +4060,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
 
           block_aux_iter[nonomitted_allele_idx].a1_case_dosage = a1_case_dosage;
           block_aux_iter[nonomitted_allele_idx].firth_fallback = 0;
+          block_aux_iter[nonomitted_allele_idx].is_unfinished = 0;
           block_aux_iter[nonomitted_allele_idx].mach_r2 = mach_r2;
           ++nonomitted_allele_idx;
         }
@@ -4172,6 +4188,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
           for (uint32_t extra_regression_idx = 0; extra_regression_idx <= extra_regression_ct; ++extra_regression_idx) {
             float* main_vals = &(nm_predictors_pmaj_buf[nm_sample_ctav]);
             float* domdev_vals = nullptr;
+            uint32_t is_unfinished = 0;
             if (extra_regression_ct) {
               if (IsSet(const_alleles, extra_regression_idx + (extra_regression_idx >= omitted_allele_idx))) {
                 glm_err = SetGlmErr0(kGlmErrcodeConstAllele);
@@ -4308,7 +4325,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
                   goto GlmLogisticThread_skip_regression;
                 }
               }
-              if (LogisticRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, cholesky_decomp_return, pp_buf, sample_variance_buf, hh_return, gradient_buf, dcoef_buf)) {
+              if (LogisticRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, &is_unfinished, cholesky_decomp_return, pp_buf, sample_variance_buf, hh_return, gradient_buf, dcoef_buf)) {
                 if (is_sometimes_firth) {
                   ZeroFArr(cur_predictor_ctav, coef_return);
                   goto GlmLogisticThread_firth_fallback;
@@ -4357,7 +4374,7 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
                   }
                 }
               }
-              if (FirthRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, hh_return, inverse_corr_buf, inv_1d_buf, dbl_2d_buf, pp_buf, sample_variance_buf, gradient_buf, dcoef_buf, score_buf, tmpnxk_buf)) {
+              if (FirthRegression(nm_pheno_buf, nm_predictors_pmaj_buf, nm_sample_ct, cur_predictor_ct, coef_return, &is_unfinished, hh_return, inverse_corr_buf, inv_1d_buf, dbl_2d_buf, pp_buf, sample_variance_buf, gradient_buf, dcoef_buf, score_buf, tmpnxk_buf)) {
                 glm_err = SetGlmErr0(kGlmErrcodeFirthConvergeFail);
                 goto GlmLogisticThread_skip_regression;
               }
@@ -4384,6 +4401,14 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
                 }
               }
             }
+            if (is_unfinished) {
+              block_aux_iter[extra_regression_idx].is_unfinished = 1;
+              if (allele_ct_m2 && beta_se_multiallelic_fused) {
+                for (uint32_t uii = 1; uii != allele_ct - 1; ++uii) {
+                  block_aux_iter[uii].is_unfinished = 1;
+                }
+              }
+            }
             {
               double* beta_se_iter2 = beta_se_iter;
               for (uint32_t pred_uidx = reported_pred_uidx_start; pred_uidx != reported_pred_uidx_biallelic_end; ++pred_uidx) {
@@ -6183,7 +6208,11 @@ PglErr GlmLogistic(const char* cur_pheno_name, const char* const* test_names, co
                 if (err_col) {
                   *cswritep++ = '\t';
                   if (test_is_valid) {
-                    *cswritep++ = '.';
+                    if (!auxp->is_unfinished) {
+                      *cswritep++ = '.';
+                    } else {
+                      cswritep = strcpya_k(cswritep, "UNFINISHED");
+                    }
                   } else {
                     uint64_t glm_errcode;
                     memcpy(&glm_errcode, &(beta_se_iter[2 * test_idx]), 8);
@@ -6786,11 +6815,11 @@ THREAD_FUNC_DECL GlmLinearThread(void* raw_arg) {
           // --xchr-model 1 corner case.)
           if (!pgv.dosage_ct) {
             if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              const_alleles[0] = 3;
             }
           } else if (pgv.dosage_ct == nm_sample_ct) {
             if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              const_alleles[0] = 3;
             }
           }
           machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -8849,11 +8878,11 @@ THREAD_FUNC_DECL GlmLinearSubbatchThread(void* raw_arg) {
           // --xchr-model 1 corner case.)
           if (!pgv.dosage_ct) {
             if ((genocounts[0] == nm_sample_ct) || (genocounts[1] == nm_sample_ct) || (genocounts[2] == nm_sample_ct)) {
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              const_alleles[0] = 3;
             }
           } else if (pgv.dosage_ct == nm_sample_ct) {
             if (DosageIsConstant(dosage_sum, dosage_ssq, nm_sample_ct)) {
-              const_alleles[0] = 2 >> omitted_allele_idx;
+              const_alleles[0] = 3;
             }
           }
           machr2_dosage_sums[1 - omitted_allele_idx] = dosage_sum;
@@ -10645,6 +10674,7 @@ PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
     }
     assert(orig_variant_ct);
     // common linear/logistic initialization
+    const GlmFlags glm_flags = glm_info_ptr->flags;
     const uintptr_t* early_variant_include = orig_variant_include;
     uint32_t* local_sample_uidx_order = nullptr;
     uintptr_t* local_variant_include = nullptr;
@@ -10659,7 +10689,6 @@ PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
       }
     }
 
-    const GlmFlags glm_flags = glm_info_ptr->flags;
     common.glm_flags = glm_flags;
     common.dosage_presents = nullptr;
     common.dosage_mains = nullptr;


=====================================
plink2_glm.h
=====================================
@@ -115,9 +115,9 @@ void CleanupGlm(GlmInfo* glm_info_ptr);
 
 // for testing purposes
 // plink2_matrix.h must be included in this file
-// BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef);
+// BoolErr LogisticRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* ll, float* pp, float* vv, float* hh, float* grad, float* dcoef) {
 
-// BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, float* hh, matrix_finvert_buf1_t* inv_1d_buf, float* flt_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf);
+// BoolErr FirthRegression(const float* yy, const float* xx, uint32_t sample_ct, uint32_t predictor_ct, float* coef, uint32_t* is_unfinished_ptr, float* hh, double* half_inverted_buf, MatrixInvertBuf1* inv_1d_buf, double* dbl_2d_buf, float* pp, float* vv, float* grad, float* dcoef, float* ww, float* tmpnxk_buf) {
 
 PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const PhenoCol* covar_cols, const char* covar_names, const uintptr_t* orig_variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const AlleleCode* maj_alleles, const char* const* allele_storage, const GlmInfo* glm_info_ptr, const AdjustInfo* adjust_info_ptr, const APerm* aperm_ptr, const char* local_covar_fname, const char* local_pvar_fname, const char* local_psam_fname, uint32_t raw_sample_ct, uint32_t orig_sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t orig_covar_ct, uintptr_t max_covar_name_blen, uint32_t raw_variant_ct, uint32_t orig_variant_ct, uint32_t max_variant_id_slen, uint32_t max_allele_slen, uint32_t xchr_model, double ci_size, double vif_thresh, double ln_pfilter, double output_min_ln, uint32_t max_thread_ct, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
 


=====================================
plink2_help.cc
=====================================
@@ -290,12 +290,12 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
                /*
 "  --make-pgen ['vzs'] ['format='<code>] [{trim-alts | erase-alt2+}]\n"
 "              ['erase-phase'] ['erase-dosage'] ['multiallelics='<mode>]\n"
-"              [{vid-split | vid-split-dup | vid-dup | vid-join}]\n"
+"              [{varid-split | varid-split-dup | varid-dup | varid-join}]\n"
 "              ['pvar-cols='<col set descriptor>]\n"
 "              ['psam-cols='<col set descriptor>]\n"
 "  --make-bpgen ['vzs'] ['format='<code>] [{trim-alts | erase-alt2+}]\n"
 "               ['erase-phase'] ['erase-dosage'] ['multiallelics='<mode>]\n"
-"               [{vid-split | vid-split-dup | vid-dup | vid-join}]\n"
+"               [{varid-split | varid-split-dup | varid-dup | varid-join}]\n"
 "  --make-bed ['vzs'] [{trim-alts | erase-alt2+}]\n"
 "             ['multiallelics='<split mode>]\n"
                */
@@ -338,19 +338,21 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "                alleles are joined into a single multiallelic variant.\n"
 "      Symbolic ALT alleles are joined separately.  Additional conditions apply\n"
 "      to them, and PLINK 2 usually errors out when they aren't met.\n"
-"      When splitting with 'vid-dup', the new variants keep the original variant\n"
+"      When splitting with 'varid-dup', the new variants keep the original\n"
+"      variant\n"
 "      ID.  Otherwise, new variant IDs are assigned as follows:\n"
 "      1. If --set-all-var-ids was specified, that template is applied.\n"
 "      2. Otherwise, if joining, and all source variant IDs are identical and\n"
 "         nonmissing, that ID is used.\n"
-"      3. Otherwise, if splitting with the 'vid-split' or 'vid-split-dup'\n"
+"      3. Otherwise, if splitting with the 'varid-split' or 'varid-split-dup'\n"
 "         modifier, the original variant ID is split on the ';' character when\n"
 "         the number of ';'s is correct.  (When the post-split ID is the missing\n"
 "         code, --set-missing-var-ids is applied to it.)  Similarly, if joining\n"
-"         with the 'vid-join' modifier, the new variant ID is set to the\n"
+"         with the 'varid-join' modifier, the new variant ID is set to the\n"
 "         original IDs joined with the ';' character when all original IDs are\n"
 "         nonmissing and contain exactly one ';' per extra ALT allele.\n"
-"      4. Otherwise, if splitting with 'vid-split-dup', the original ID is kept.\n"
+"      4. Otherwise, if splitting with 'varid-split-dup', the original ID is\n"
+"         kept.\n"
 "      5. Otherwise, if --set-missing-var-ids was specified, that template is\n"
 "         applied.\n"
 "      6. Otherwise, the variant ID is set to the missing code.\n"
@@ -992,10 +994,10 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
     HelpPrint("glm\0linear\0logistic\0assoc\0", &help_ctrl, 1,
 "  --glm ['zs'] ['omit-ref'] [{sex | no-x-sex}] ['log10'] ['pheno-ids']\n"
 "        [{genotypic | hethom | dominant | recessive}] ['interaction'] ['skip']\n"
-"        ['hide-covar'] ['intercept'] [{no-firth | firth-fallback | firth}]\n"
-"        ['cols='<col set desc.>] ['local-covar='<file>] ['local-psam='<file>]\n"
+"        ['hide-covar'] [{no-firth | firth-fallback | firth}] ['intercept']\n"
+"        ['cols='<col set desc>] ['local-covar='<file>] ['local-psam='<file>]\n"
 "        ['local-pos-cols='<key col #s> | 'local-pvar='<file>] ['local-haps']\n"
-"        ['local-omit-last' | 'local-cats[0]='<category ct>]\n"
+"        ['local-omit-last' | 'local-cats[0]='<category ct>] ['allow-no-covars']\n"
                // "        ['perm' | 'mperm='<value>] ['perm-count']\n"
 "    Basic association analysis on quantitative and/or case/control phenotypes.\n"
 "    For each variant, a linear (for quantitative traits) or logistic (for\n"
@@ -1037,6 +1039,10 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "      statistics are reported for all nonconstant predictors; 'hide-covar'\n"
 "      suppresses covariate-only results, while 'intercept' causes intercepts\n"
 "      to be reported.\n"
+"      Since running --glm without at least e.g. principal component covariates\n"
+"      is usually an analytical mistake, the 'allow-no-covars' modifier is now\n"
+"      required when you're intentionally running --glm without a covariate\n"
+"      file.\n"
 "    * By default, if the current phenotype and covariates are such that every\n"
 "      regression on a chromosome will fail, PLINK 2 errors out.  To just skip\n"
 "      the phenotype or chromosome instead, add the 'skip' modifier.\n"



View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/ac09b66d6e58edafb5a7d6c3bd202059933f3bdf...43c5befa322e49361c98fe3c6ee1423079948ba5

-- 
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/ac09b66d6e58edafb5a7d6c3bd202059933f3bdf...43c5befa322e49361c98fe3c6ee1423079948ba5
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200421/082118af/attachment-0001.html>


More information about the debian-med-commit mailing list