[med-svn] [Git][med-team/plink2][master] 3 commits: New upstream version 2.00~a3-210701+dfsg

Dylan Aïssi (@daissi) gitlab at salsa.debian.org
Fri Jul 23 20:53:57 BST 2021



Dylan Aïssi pushed to branch master at Debian Med / plink2


Commits:
dd37e023 by Dylan Aïssi at 2021-07-23T21:49:18+02:00
New upstream version 2.00~a3-210701+dfsg
- - - - -
52ff17d8 by Dylan Aïssi at 2021-07-23T21:49:21+02:00
Update upstream source from tag 'upstream/2.00_a3-210701+dfsg'

Update to upstream version '2.00~a3-210701+dfsg'
with Debian dir 1a532024f15d7baeaed7a5c3387acd594edc4880
- - - - -
d2773ed4 by Dylan Aïssi at 2021-07-23T21:50:27+02:00
Bump d/changelog

- - - - -


27 changed files:

- debian/changelog
- include/pgenlib_misc.cc
- include/pgenlib_misc.h
- include/pgenlib_read.cc
- include/pgenlib_read.h
- include/plink2_string.cc
- include/plink2_string.h
- include/plink2_text.cc
- include/plink2_text.h
- pgen_compress.cc
- plink2.cc
- plink2_cmdline.cc
- plink2_cmdline.h
- plink2_common.h
- plink2_data.cc
- plink2_data.h
- plink2_export.cc
- plink2_fasta.cc
- plink2_glm.cc
- plink2_help.cc
- plink2_import.cc
- plink2_merge.cc
- plink2_merge.h
- plink2_misc.cc
- plink2_psam.cc
- plink2_pvar.cc
- plink2_pvar.h


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+plink2 (2.00~a3-210701+dfsg-1) UNRELEASED; urgency=medium
+
+  * New upstream version
+
+ -- Dylan Aïssi <daissi at debian.org>  Fri, 23 Jul 2021 21:49:33 +0200
+
 plink2 (2.00~a3-210203+dfsg-1) unstable; urgency=medium
 
   * New upstream version


=====================================
include/pgenlib_misc.cc
=====================================
@@ -122,7 +122,7 @@ void CopyGenomatchSubset(const uintptr_t* __restrict raw_bitarr, const uintptr_t
 }
 
 // Variant of ExpandBytearr() which is based off a target 2-bit value instead
-// of single expand_mask bits.  expand_size must be the number of instance of
+// of single expand_mask bits.  expand_size must be the number of instances of
 // the target value in genovec.
 void ExpandBytearrFromGenoarr(const void* __restrict compact_bitarr, const uintptr_t* __restrict genoarr, uintptr_t match_word, uint32_t genoword_ct, uint32_t expand_size, uint32_t read_start_bit, uintptr_t* __restrict target) {
   const uint32_t expand_sizex_m1 = expand_size + read_start_bit - 1;
@@ -726,6 +726,23 @@ void GenoarrCountFreqsUnsafe(const uintptr_t* genoarr, uint32_t sample_ct, STD_A
   genocounts[3] = bothset_ct;
 }
 
+uintptr_t MostCommonGenoUnsafe(const uintptr_t* genoarr, uint32_t sample_ct) {
+  STD_ARRAY_DECL(uint32_t, 4, genocounts);
+  GenoarrCountFreqsUnsafe(genoarr, sample_ct, genocounts);
+  uint32_t most_common_geno_ct = genocounts[0];
+  if (most_common_geno_ct * 2 >= sample_ct) {
+    return 0;
+  }
+  uintptr_t most_common_geno = 0;
+  for (uintptr_t cur_geno = 1; cur_geno != 4; ++cur_geno) {
+    if (most_common_geno_ct < genocounts[cur_geno]) {
+      most_common_geno = cur_geno;
+      most_common_geno_ct = genocounts[cur_geno];
+    }
+  }
+  return most_common_geno;
+}
+
 // geno_vvec now allowed to be unaligned.
 void CountSubset3FreqVec6(const VecW* __restrict geno_vvec, const VecW* __restrict interleaved_mask_vvec, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp) {
   assert(!(vec_ct % 6));


=====================================
include/pgenlib_misc.h
=====================================
@@ -232,6 +232,10 @@ void Count3FreqVec6(const VecW* geno_vvec, uint32_t vec_ct, uint32_t* __restrict
 // vector-alignment preferred.
 void GenoarrCountFreqsUnsafe(const uintptr_t* genoarr, uint32_t sample_ct, STD_ARRAY_REF(uint32_t, 4) genocounts);
 
+// GenoarrCountsFreqsUnsafe() wrapper that returns most common genotype,
+// breaking ties in favor of the lower value.
+uintptr_t MostCommonGenoUnsafe(const uintptr_t* genoarr, uint32_t sample_ct);
+
 // geno_vvec now allowed to be unaligned.
 void CountSubset3FreqVec6(const VecW* __restrict geno_vvec, const VecW* __restrict interleaved_mask_vvec, uint32_t vec_ct, uint32_t* __restrict even_ctp, uint32_t* __restrict odd_ctp, uint32_t* __restrict bothset_ctp);
 


=====================================
include/pgenlib_read.cc
=====================================
@@ -2696,8 +2696,7 @@ void CopyAndSubsetDifflist(const uintptr_t* __restrict sample_include, const uin
 // Populates pgrp->ldbase_genovec or
 // pgrp->ldbase_{raregeno,difflist_sample_ids,difflist_len}, depending on
 // storage type.
-// Currently just called by ReadDifflistOrGenovecSubsetUnsafe(), which isn't
-// exploited by plink2 yet.
+// Currently just called by ReadDifflistOrGenovecSubsetUnsafe().
 PglErr LdLoadMinimalSubsetIfNecessary(const uintptr_t* __restrict sample_include, const uint32_t* __restrict sample_include_cumulative_popcounts, uint32_t sample_ct, uint32_t vidx, PgenReaderMain* pgrp) {
   if (!LdLoadNecessary(vidx, pgrp)) {
     return kPglRetSuccess;


=====================================
include/pgenlib_read.h
=====================================
@@ -277,6 +277,14 @@ HEADER_INLINE unsigned char* PgrGetVrtypes(PgenReader* pgr_ptr) {
   return pgrp->fi.vrtypes;
 }
 
+HEADER_INLINE uint32_t PgrGetVrtype(const PgenReader* pgr_ptr, uint32_t vidx) {
+  const PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);
+  if (pgrp->fi.vrtypes) {
+    return pgrp->fi.vrtypes[vidx];
+  }
+  return pgrp->fi.const_vrtype;
+}
+
 HEADER_INLINE uintptr_t* PgrGetNonrefFlags(PgenReader* pgr_ptr) {
   PgenReaderMain* pgrp = &GET_PRIVATE(*pgr_ptr, m);
   return pgrp->fi.nonref_flags;


=====================================
include/plink2_string.cc
=====================================
@@ -219,52 +219,41 @@ void WordWrap(uint32_t suffix_len, char* strbuf) {
 }
 
 
-static const uint32_t kPow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
+// This implementation is from Kendall Willets.  See
+//   https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/
 
 #ifdef USE_AVX2
-static const unsigned char kLzUintSlenBase[] =
-{9, 9, 9, 8,
- 8, 8, 7, 7,
- 7, 6, 6, 6,
- 6, 5, 5, 5,
- 4, 4, 4, 3,
- 3, 3, 3, 2,
- 2, 2, 1, 1,
- 1, 0, 0, 0,
- 1};  // UintSlen(0) needs to be 1, not zero
+static const uint64_t kLzcntUintSlenTable[] =
+  {42949672960LL, 42949672960LL, 41949672960LL, 41949672960LL, 41949672960LL,
+   38554705664LL, 38554705664LL, 38554705664LL, 34349738368LL, 34349738368LL,
+   34349738368LL, 34349738368LL, 30063771072LL, 30063771072LL, 30063771072LL,
+   25769703776LL, 25769703776LL, 25769703776LL, 21474826480LL, 21474826480LL,
+   21474826480LL, 21474826480LL, 17179868184LL, 17179868184LL, 17179868184LL,
+   12884901788LL, 12884901788LL, 12884901788LL,  8589934582LL,  8589934582LL,
+    8589934582LL,  4294967296LL};
 
 uint32_t UintSlen(uint32_t num) {
   const uint32_t lz_ct = _lzcnt_u32(num);
-  const uint32_t slen_base = kLzUintSlenBase[lz_ct];
-  return slen_base + (num >= kPow10[slen_base]);
+  return (num + kLzcntUintSlenTable[lz_ct]) >> 32;
 }
 #else
-// could also use something like ((32 - lz_ct) * 77) >> 8, since 77/256 is a
-// sufficiently good approximation of ln(2)/ln(10), but that's a bit slower and
-// this table doesn't take much space
-//
-// bugfix (29 Mar 2018): this table was totally wrong, ugh
-static const unsigned char kBsrUintSlenBase[] =
-{1, 1, 1, 1,
- 2, 2, 2, 3,
- 3, 3, 4, 4,
- 4, 4, 5, 5,
- 5, 6, 6, 6,
- 7, 7, 7, 7,
- 8, 8, 8, 9,
- 9, 9, 9, 9};
+static const uint64_t kBsrUintSlenTable[] =
+  {4294967296LL,  8589934582LL,  8589934582LL,  8589934582LL,  12884901788LL,
+   12884901788LL, 12884901788LL, 17179868184LL, 17179868184LL, 17179868184LL,
+   21474826480LL, 21474826480LL, 21474826480LL, 21474826480LL, 25769703776LL,
+   25769703776LL, 25769703776LL, 30063771072LL, 30063771072LL, 30063771072LL,
+   34349738368LL, 34349738368LL, 34349738368LL, 34349738368LL, 38554705664LL,
+   38554705664LL, 38554705664LL, 41949672960LL, 41949672960LL, 41949672960LL,
+   42949672960LL, 42949672960LL};
 
 uint32_t UintSlen(uint32_t num) {
   // tried divide-by-10 and divide-by-100 loops, they were slower
   // also tried a hardcoded binary tree, it was better but still slower
 
   // bsru32(0) is undefined
-  if (num < 10) {
-    return 1;
-  }
+  num |= 1;
   const uint32_t top_bit_pos = bsru32(num);
-  const uint32_t slen_base = kBsrUintSlenBase[top_bit_pos];
-  return slen_base + (num >= kPow10[slen_base]);
+  return (num + kBsrUintSlenTable[top_bit_pos]) >> 32;
 }
 #endif
 


=====================================
include/plink2_string.h
=====================================
@@ -596,6 +596,11 @@ HEADER_INLINE void StrptrArrNsort(uintptr_t ct, const char** strptr_arr) {
   std::sort(R_CAST(StrNsortDeref*, strptr_arr), &(R_CAST(StrNsortDeref*, strptr_arr)[ct]));
 }
 
+// Considered adding stable-sort variants of StrptrArrSort, but looks like it's
+// usually better to define a more sophisticated comparator that removes the
+// need for the explicit stable-sort.  (Note that std::stable_sort can throw
+// std::bad_alloc, etc.)
+
 // need to expose these for plink2_cmdline bigstack-allocating
 // SortStrboxIndexed()'s use
 void SortStrbox32bFinish(uintptr_t str_ct, uintptr_t max_str_blen, uint32_t use_nsort, Strbuf28Ui* filled_wkspace, char* sorted_strbox, uint32_t* id_map);


=====================================
include/plink2_text.cc
=====================================
@@ -98,6 +98,7 @@ BoolErr GzRawInit(const void* buf, uint32_t nbytes, GzRawDecompressStream* gzp)
     return 1;
   }
   gzp->ds_initialized = 1;
+  gzp->eof = 0;
   return 0;
 }
 
@@ -316,45 +317,68 @@ BoolErr IsPathologicallyLongLineOrToken(const char* line_start, const char* load
 const char kShortErrRfileTruncatedGz[] = "GzRawStreamRead: gzipped file appears to be truncated";
 
 PglErr GzRawStreamRead(char* dst_end, FILE* ff, GzRawDecompressStream* gzp, char** dst_iterp, const char** errmsgp) {
-  z_stream* dsp = &gzp->ds;
-  if ((!dsp->avail_in) && feof_unlocked(ff)) {
+  if (gzp->eof) {
+    // (!dsp->avail_in) && feof_unlocked(ff)) {
     return kPglRetSuccess;
   }
+  z_stream* dsp = &gzp->ds;
   char* dst_iter = *dst_iterp;
   do {
-    int zerr = Z_OK;
     if (dsp->avail_in) {  // can be zero after TextRewind()
-      dsp->next_out = R_CAST(unsigned char*, dst_iter);
-      dsp->avail_out = dst_end - dst_iter;
-      zerr = inflate(dsp, Z_SYNC_FLUSH);
-      if (unlikely((zerr < 0) || (zerr == Z_NEED_DICT))) {
-        if (dsp->msg) {
-          *errmsgp = dsp->msg;
-        } else {
-          *errmsgp = zError(zerr);
+      while (1) {
+        dsp->next_out = R_CAST(unsigned char*, dst_iter);
+        dsp->avail_out = dst_end - dst_iter;
+        int zerr = inflate(dsp, Z_SYNC_FLUSH);
+        if (unlikely((zerr < 0) || (zerr == Z_NEED_DICT))) {
+          if (dsp->msg) {
+            *errmsgp = dsp->msg;
+          } else {
+            *errmsgp = zError(zerr);
+          }
+          return kPglRetDecompressFail;
+        }
+        dst_iter = R_CAST(char*, dsp->next_out);
+        if (zerr != Z_STREAM_END) {
+          break;
+        }
+        // bugfix (25 May 2021): may need to open a new stream, or skip
+        // trailing garbage
+        const uint32_t trailing_byte_ct = dsp->avail_in;
+        if (trailing_byte_ct < 2) {
+          if (trailing_byte_ct) {
+            gzp->in[0] = dsp->next_in[0];
+          }
+          const uint32_t nbytes = fread_unlocked(&(gzp->in[trailing_byte_ct]), 1, kDecompressChunkSize - trailing_byte_ct, ff);
+          dsp->next_in = gzp->in;
+          dsp->avail_in = trailing_byte_ct + nbytes;
         }
-        return kPglRetDecompressFail;
+        if ((dsp->avail_in < 2) || (dsp->next_in[0] != 31) || (dsp->next_in[1] != 139)) {
+          // EOF or trailing garbage
+          gzp->eof = 1;
+          *dst_iterp = dst_iter;
+          return kPglRetSuccess;
+        }
+#ifdef NDEBUG
+        inflateReset(dsp);
+#else
+        const int errcode = inflateReset(dsp);
+        assert(errcode == Z_OK);
+#endif
       }
-      dst_iter = R_CAST(char*, dsp->next_out);
       if (dsp->avail_in) {
-        assert(dst_iter == dst_end);
         break;
       }
     }
     const uint32_t nbytes = fread_unlocked(gzp->in, 1, kDecompressChunkSize, ff);
     dsp->next_in = gzp->in;
     dsp->avail_in = nbytes;
-    if (!nbytes) {
+    if (unlikely(!nbytes)) {
       if (unlikely(!feof_unlocked(ff))) {
         *errmsgp = strerror(errno);
         return kPglRetReadFail;
       }
-      if (unlikely(zerr == Z_OK)) {
-        *errmsgp = kShortErrRfileTruncatedGz;
-        return kPglRetDecompressFail;
-      }
-      // Normal EOF.
-      break;
+      *errmsgp = kShortErrRfileTruncatedGz;
+      return kPglRetDecompressFail;
     }
   } while (dst_iter != dst_end);
   *dst_iterp = dst_iter;
@@ -391,12 +415,12 @@ PglErr ZstRawStreamRead(char* dst_end, FILE* ff, ZstRawDecompressStream* zstp, c
       return kPglRetReadFail;
     }
     zstp->ib.pos = 0;
-    zstp->ib.size = nbytes + n_inbytes;
-    if (!nbytes) {
-      if (unlikely(n_inbytes)) {
-        *errmsgp = kShortErrZstdPrefixUnknown;
-        return kPglRetDecompressFail;
-      }
+    const uint32_t new_size = nbytes + n_inbytes;
+    zstp->ib.size = new_size;
+    // bugfix (28 Feb 2021): Possible for nbytes to be zero at the same time
+    // n_inbytes is positive when e.g. the input .zst file is the concatenation
+    // of multiple ordinary .zst files, which is permitted by the spec.
+    if (!new_size) {
       break;
     }
   }
@@ -678,6 +702,7 @@ void TextFileRewind(textFILE* txf_ptr) {
   if (basep->file_type != kFileUncompressed) {
     if (basep->file_type == kFileGzip) {
       txfp->rds.gz.ds.avail_in = 0;
+      txfp->rds.gz.eof = 0;
 #ifdef NDEBUG
       inflateReset(&txfp->rds.gz.ds);
 #else
@@ -1171,6 +1196,7 @@ THREAD_FUNC_DECL TextStreamThread(void* raw_arg) {
         if (file_type != kFileUncompressed) {
           if (file_type == kFileGzip) {
             rdsp->gz.ds.avail_in = 0;
+            rdsp->gz.eof = 0;
 #ifdef NDEBUG
             inflateReset(&rdsp->gz.ds);
 #else
@@ -1257,6 +1283,7 @@ THREAD_FUNC_DECL TextStreamThread(void* raw_arg) {
         case kFileGzip:
           {
             GzRawDecompressStream* gzp = &rdsp->gz;
+            gzp->eof = 0;
             z_stream* dsp = &gzp->ds;
 #ifdef NDEBUG
             inflateReset(dsp);


=====================================
include/plink2_text.h
=====================================
@@ -150,6 +150,7 @@ typedef struct GzRawDecompressStreamStruct {
   unsigned char* in;
   z_stream ds;
   uint32_t ds_initialized;
+  uint32_t eof;
 } GzRawDecompressStream;
 
 // BgzfRawDecompressStream declared in plink2_bgzf.h.


=====================================
pgen_compress.cc
=====================================
@@ -30,9 +30,9 @@ int32_t main(int32_t argc, char** argv) {
     if ((argc < 3) || (argc > 5)) {
       fputs(
 "Usage:\n"
-"pgen_compress [input .bed or .pgen] [output filename] {sample_ct}\n"
+"pgen_compress <input .bed or .pgen> <output filename> [sample_ct]\n"
 "  (sample_ct is required when loading a .bed file)\n"
-"pgen_compress -u [input .pgen] [output .bed]\n"
+"pgen_compress -u <input .pgen> <output .bed>\n"
             , stdout);
       goto main_ret_INVALID_CMDLINE;
     }


=====================================
plink2.cc
=====================================
@@ -71,7 +71,7 @@ static const char ver_str[] = "PLINK v2.00a3"
 #ifdef USE_MKL
   " Intel"
 #endif
-  " (3 Feb 2021)";
+  " (1 Jul 2021)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
   " "
@@ -999,9 +999,17 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
         goto Plink2Core_ret_NOMEM;
       }
       const uint32_t nonref_flags_already_loaded = (nonref_flags != nullptr);
-      if ((!nonref_flags) && ((header_ctrl & 192) == 192)) {
-        if (unlikely(bigstack_alloc_w(raw_variant_ctl, &nonref_flags))) {
-          goto Plink2Core_ret_NOMEM;
+      if (!nonref_flags_already_loaded) {
+        const uint32_t nonref_flags_status_shifted = header_ctrl & 192;
+        if (nonref_flags_status_shifted == 192) {
+          if (unlikely(bigstack_alloc_w(raw_variant_ctl, &nonref_flags))) {
+            goto Plink2Core_ret_NOMEM;
+          }
+        } else if ((pgfi.const_vrtype != kPglVrtypePlink1) && (!nonref_flags_status_shifted)) {
+          // This should never happen with a chain of plink2-generated .pgen
+          // files, but it's permitted by the specification and makes future
+          // merge unsafe.
+          logerrputs("Warning: .pgen file indicates that provisional-REF information is stored in the\ncompanion .pvar, but that .pvar lacks either an INFO/PR header or an INFO\ncolumn.  Assuming all REF alleles are correct.\n");
         }
       }
       pgfi.nonref_flags = nonref_flags;
@@ -2884,8 +2892,6 @@ uint32_t CmdlineSingleChr(const ChrInfo* cip, MiscFlags misc_flags) {
 }
 
 void GetExportfTargets(const char* const* argvk, uint32_t param_ct, ExportfFlags* exportf_flags_ptr, IdpasteFlags* exportf_id_paste_ptr, uint64_t* format_param_idxs_ptr) {
-  // does not error out if no format present, since this is needed for --recode
-  // translation
   // supports multiple formats
   uint64_t format_param_idxs = 0;
   for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
@@ -2984,7 +2990,9 @@ void GetExportfTargets(const char* const* argvk, uint32_t param_ct, ExportfFlags
       }
     case 'o':
       if (!strcmp(cur_modif2, "xford")) {
-        cur_format = kfExportfOxGen;
+        cur_format = kfExportfOxGenV1;
+      } else if (!strcmp(cur_modif2, "xford-v2")) {
+        cur_format = kfExportfOxGenV2;
       }
       break;
     case 'p':
@@ -3014,7 +3022,7 @@ void GetExportfTargets(const char* const* argvk, uint32_t param_ct, ExportfFlags
         } else if (!strcmp(&(cur_modif2[2]), "-4.2")) {
           cur_format = kfExportfVcf42;
         } else if ((!strcmp(&(cur_modif2[2]), "-fid")) || (!strcmp(&(cur_modif2[2]), "-iid"))) {
-          snprintf(g_logbuf, kLogbufSize, "Note: --export 'v%s' modifier is deprecated.  Use 'vcf' + 'id-paste=%s'.\n", cur_modif2, &(cur_modif2[3]));
+          logprintf("Note: --export 'v%s' modifier is deprecated.  Use 'vcf' + 'id-paste=%s'.\n", cur_modif2, &(cur_modif2[3]));
           cur_format = kfExportfVcf43;
           *exportf_id_paste_ptr = (cur_modif2[3] == 'f')? kfIdpasteFid : kfIdpasteIid;
         }
@@ -3443,21 +3451,9 @@ int main(int argc, char** argv) {
           break;
         case 'r':
           if (strequal_k(flagname_p, "recode", flag_slen)) {
-            // special case: translate to "export ped" if no format specified
-            const uint32_t param_ct = GetParamCt(argvk, argc, arg_idx);
-            if (unlikely(param_ct > 4)) {
-              fputs("Error: --recode accepts at most 4 arguments.\n", stderr);
-              goto main_ret_INVALID_CMDLINE;
-            }
-            ExportfFlags dummy;
-            IdpasteFlags dummy2;
-            uint64_t format_param_idxs;
-            GetExportfTargets(&(argvk[arg_idx]), param_ct, &dummy, &dummy2, &format_param_idxs);
-            if (!format_param_idxs) {
-              snprintf(flagname_write_iter, kMaxFlagBlen, "export ped");
-            } else {
-              snprintf(flagname_write_iter, kMaxFlagBlen, "export");
-            }
+            // don't bother translating no-modifier --recode to "--export ped",
+            // just let it fail
+            snprintf(flagname_write_iter, kMaxFlagBlen, "export");
           } else if (strequal_k(flagname_p, "remove-founders", flag_slen)) {
             snprintf(flagname_write_iter, kMaxFlagBlen, "keep-founders");
           } else if (strequal_k(flagname_p, "remove-nonfounders", flag_slen)) {
@@ -4658,21 +4654,16 @@ int main(int argc, char** argv) {
             goto main_ret_1;
           }
           pc.filter_flags |= kfFilterPvarReq;
-        } else if (strequal_k_unsafe(flagname_p2, "xport") || strequal_k_unsafe(flagname_p2, "xport ped")) {
+        } else if (strequal_k_unsafe(flagname_p2, "xport")) {
           // todo: determine actual limit
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 0, 50))) {
             goto main_ret_INVALID_CMDLINE_2A;
           }
           uint64_t format_param_idxs = 0;
-          if (!flagname_p2[5]) {
-            GetExportfTargets(&(argvk[arg_idx]), param_ct, &pc.exportf_info.flags, &pc.exportf_info.idpaste_flags, &format_param_idxs);
-            if (unlikely(!format_param_idxs)) {
-              logerrputs("Error: --export requires at least one output format.  (Did you forget 'ped' or\n'vcf'?)\n");
-              goto main_ret_INVALID_CMDLINE_A;
-            }
-            logputsb();
-          } else {
-            pc.exportf_info.flags = kfExportfPed;
+          GetExportfTargets(&(argvk[arg_idx]), param_ct, &pc.exportf_info.flags, &pc.exportf_info.idpaste_flags, &format_param_idxs);
+          if (unlikely(!format_param_idxs)) {
+            logerrputs("Error: --export requires at least one output format.  (Did you forget 'ped' or\n'vcf'?)\n");
+            goto main_ret_INVALID_CMDLINE_A;
           }
           // can't have e.g. bgen-1.1 and bgen-1.2 simultaneously, since they
           // have the same extension and different content.
@@ -4689,6 +4680,10 @@ int main(int argc, char** argv) {
             logerrputs("Error: 'A' and 'AD' formats cannot be exported simultaneously.\n");
             goto main_ret_INVALID_CMDLINE;
           }
+          if (unlikely((pc.exportf_info.flags & (kfExportfOxGenV1 | kfExportfOxGenV2)) == (kfExportfOxGenV1 | kfExportfOxGenV2))) {
+            logerrputs("Error: 'oxford' and 'oxford-v2' formats cannot be exported simultaneously.\n");
+            goto main_ret_INVALID_CMDLINE;
+          }
           for (uint32_t param_idx = 1; param_idx <= param_ct; ++param_idx) {
             // could use AdvBoundedTo0Bit()...
             if ((format_param_idxs >> param_idx) & 1) {
@@ -5799,7 +5794,7 @@ int main(int argc, char** argv) {
                 logerrputs("Error: Multiple --het cols= modifiers.\n");
                 goto main_ret_INVALID_CMDLINE;
               }
-              reterr = ParseColDescriptor(&(cur_modif[5]), "maybefid\0fid\0maybesid\0sid\0hom\0het\0nobs\0f\0", "het", kfHardyColChrom, kfHardyColDefault, 1, &pc.hardy_flags);
+              reterr = ParseColDescriptor(&(cur_modif[5]), "maybefid\0fid\0maybesid\0sid\0hom\0het\0nobs\0f\0", "het", kfHetColMaybefid, kfHetColDefault, 1, &pc.het_flags);
               if (unlikely(reterr)) {
                 goto main_ret_1;
               }
@@ -7801,7 +7796,7 @@ int main(int argc, char** argv) {
           const char* cur_modif = argvk[arg_idx + 1];
           const uint32_t cur_modif_slen = strlen(cur_modif);
           if (strequal_k(cur_modif, "erase", cur_modif_slen)) {
-            if (unlikely(pmerge_info.merge_info_mode != kMergeQicModeErase)) {
+            if (unlikely(pmerge_info.merge_info_mode != kMergeInfoCmModeErase)) {
               logerrputs("Error: \"--merge-xheader-mode erase\" requires \"--merge-info-mode erase\".\n");
               goto main_ret_INVALID_CMDLINE_A;
             }
@@ -7815,34 +7810,24 @@ int main(int argc, char** argv) {
         } else if (unlikely(strequal_k_unsafe(flagname_p2, "erge-equal-pos"))) {
           logerrputs("Error: --merge-equal-pos has been retired.  Use e.g. --set-all-var-ids before\nmerging instead.\n");
           goto main_ret_INVALID_CMDLINE_A;
-        } else if (strequal_k_unsafe(flagname_p2, "erge-qual-mode") ||
-                   strequal_k_unsafe(flagname_p2, "erge-info-mode") ||
-                   strequal_k_unsafe(flagname_p2, "erge-cm-mode")) {
+        } else if (strequal_k_unsafe(flagname_p2, "erge-qual-mode")) {
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 1))) {
             goto main_ret_INVALID_CMDLINE_2A;
           }
           const char* cur_modif = argvk[arg_idx + 1];
           const uint32_t cur_modif_slen = strlen(cur_modif);
-          MergeQicMode mode;
           if (strequal_k(cur_modif, "erase", cur_modif_slen)) {
-            mode = kMergeQicModeErase;
+            pmerge_info.merge_qual_mode = kMergeQualModeErase;
           } else if (strequal_k(cur_modif, "nm-match", cur_modif_slen)) {
-            mode = kMergeQicModeNmMatch;
+            pmerge_info.merge_qual_mode = kMergeQualModeNmMatch;
           } else if (strequal_k(cur_modif, "nm-first", cur_modif_slen)) {
-            mode = kMergeQicModeNmFirst;
-          } else if (likely(strequal_k(cur_modif, "first", cur_modif_slen))) {
-            mode = kMergeQicModeFirst;
-          } else {
-            snprintf(g_logbuf, kLogbufSize, "Error: Invalid --%s argument '%s'.\n", flagname_p, cur_modif);
+            pmerge_info.merge_qual_mode = kMergeQualModeNmFirst;
+          } else if (strequal_k(cur_modif, "first", cur_modif_slen)) {
+            pmerge_info.merge_qual_mode = kMergeQualModeFirst;
+          } else if (unlikely(!strequal_k(cur_modif, "min", cur_modif_slen))) {
+            snprintf(g_logbuf, kLogbufSize, "Error: Invalid --merge-qual-mode argument '%s'.\n", cur_modif);
             goto main_ret_INVALID_CMDLINE_WWA;
           }
-          if (flagname_p2[5] == 'q') {
-            pmerge_info.merge_qual_mode = mode;
-          } else if (flagname_p2[5] == 'i') {
-            pmerge_info.merge_info_mode = mode;
-          } else {
-            pmerge_info.merge_cm_mode = mode;
-          }
         } else if (strequal_k_unsafe(flagname_p2, "erge-filter-mode")) {
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 1))) {
             goto main_ret_INVALID_CMDLINE_2A;
@@ -7861,6 +7846,31 @@ int main(int argc, char** argv) {
             snprintf(g_logbuf, kLogbufSize, "Error: Invalid --merge-filter-mode argument '%s'.\n", cur_modif);
             goto main_ret_INVALID_CMDLINE_WWA;
           }
+        } else if (strequal_k_unsafe(flagname_p2, "erge-info-mode") ||
+                   strequal_k_unsafe(flagname_p2, "erge-cm-mode")) {
+          if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 1))) {
+            goto main_ret_INVALID_CMDLINE_2A;
+          }
+          const char* cur_modif = argvk[arg_idx + 1];
+          const uint32_t cur_modif_slen = strlen(cur_modif);
+          MergeInfoCmMode mode;
+          if (strequal_k(cur_modif, "erase", cur_modif_slen)) {
+            mode = kMergeInfoCmModeErase;
+          } else if (strequal_k(cur_modif, "nm-match", cur_modif_slen)) {
+            mode = kMergeInfoCmModeNmMatch;
+          } else if (strequal_k(cur_modif, "nm-first", cur_modif_slen)) {
+            mode = kMergeInfoCmModeNmFirst;
+          } else if (likely(strequal_k(cur_modif, "first", cur_modif_slen))) {
+            mode = kMergeInfoCmModeFirst;
+          } else {
+            snprintf(g_logbuf, kLogbufSize, "Error: Invalid --%s argument '%s'.\n", flagname_p, cur_modif);
+            goto main_ret_INVALID_CMDLINE_WWA;
+          }
+          if (flagname_p2[5] == 'i') {
+            pmerge_info.merge_info_mode = mode;
+          } else {
+            pmerge_info.merge_cm_mode = mode;
+          }
         } else if (strequal_k_unsafe(flagname_p2, "erge-info-sort") ||
                    strequal_k_unsafe(flagname_p2, "erge-pheno-sort")) {
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 1))) {
@@ -8546,6 +8556,48 @@ int main(int argc, char** argv) {
             goto main_ret_1;
           }
           pc.command_flags1 |= kfCommand1Pmerge;
+        } else if (strequal_k_unsafe(flagname_p2, "merge-list-dir")) {
+          if (unlikely((!(pc.command_flags1 & kfCommand1Pmerge)) || (!pmerge_info.list_fname))) {
+            logerrputs("Error: --pmerge-list-dir must be used with --pmerge-list.\n");
+            goto main_ret_INVALID_CMDLINE_A;
+          }
+          if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 1))) {
+            goto main_ret_INVALID_CMDLINE_2A;
+          }
+          const char* dir_str = argvk[arg_idx + 1];
+          const uint32_t dir_slen = strlen(dir_str);
+          uint32_t append_path_sep = 0;
+#ifdef _WIN32
+          const char path_sep = '\\';
+          if ((dir_str[dir_slen - 1] != '/') && (dir_str[dir_slen - 1] != '\\')) {
+            append_path_sep = 1;
+          }
+#else
+          const char path_sep = '/';
+          if (dir_str[dir_slen - 1] != '/') {
+            append_path_sep = 1;
+          }
+#endif
+          const uint32_t alloc_blen = dir_slen + 1 + append_path_sep;
+          if (unlikely(alloc_blen > (kPglFnamesize - 9))) {
+            logerrputs("Error: --pmerge-list-dir argument too long.\n");
+            goto main_ret_OPEN_FAIL;
+          }
+          if (unlikely(pgl_malloc(alloc_blen, &pmerge_info.list_base_dir))) {
+            goto main_ret_NOMEM;
+          }
+          char* write_iter = memcpya(pmerge_info.list_base_dir, dir_str, dir_slen);
+          if (append_path_sep) {
+            *write_iter++ = path_sep;
+          }
+          *write_iter = '\0';
+        } else if (strequal_k_unsafe(flagname_p2, "merge-output-vzs")) {
+          if (unlikely(!(pc.command_flags1 & kfCommand1Pmerge))) {
+            logerrputs("Error: --pmerge-output-vzs must be used with --pmerge or --pmerge-list.\n");
+            goto main_ret_INVALID_CMDLINE_A;
+          }
+          pmerge_info.flags |= kfPmergeOutputVzs;
+          goto main_param_zero;
         } else if (strequal_k_unsafe(flagname_p2, "gen-diff")) {
           if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 1, 7))) {
             goto main_ret_INVALID_CMDLINE_2A;
@@ -10565,7 +10617,7 @@ int main(int argc, char** argv) {
           const uint32_t no_samples_ok = !((pc.dependency_flags & (kfFilterAllReq | kfFilterPsamReq)) || (pc.command_flags1 & kfCommand1Pmerge));
           const uint32_t is_vcf = (xload / kfXloadVcf) & 1;
           pvar_is_compressed = ((import_flags & (kfImportKeepAutoconv | kfImportKeepAutoconvVzs)) != kfImportKeepAutoconv);
-          if (no_samples_ok && is_vcf && (!(import_flags & kfImportKeepAutoconv)) && pc.command_flags1) {
+          if (no_samples_ok && is_vcf && (!(import_flags & (kfImportKeepAutoconv | kfImportVcfRefNMissing))) && pc.command_flags1) {
             // special case: just treat the VCF as a .pvar file
             strcpy(pvarname, pgenname);
             pgenname[0] = '\0';
@@ -10609,7 +10661,6 @@ int main(int argc, char** argv) {
           goto main_ret_1;
         }
 
-        // todo: we have to skip this when merging is involved
         pc.hard_call_thresh = UINT32_MAX;
         pc.dosage_erase_thresh = 0;
 
@@ -10676,19 +10727,11 @@ int main(int argc, char** argv) {
       }
 
       if (pc.command_flags1 & kfCommand1Pmerge) {
-        const uint32_t zst_level = g_zst_level;
         char* merge_outname_end = outname_end;
-        if (pc.command_flags1 == kfCommand1Pmerge) {
-          import_flags |= kfImportKeepAutoconv;
-        } else {
-          if (make_plink2_flags & (kfMakePgen | kfMakePvar | kfMakePsam)) {
-            merge_outname_end = strcpya_k(merge_outname_end, "-merge");
-          }
-          if (!(import_flags & kfImportKeepAutoconv)) {
-            g_zst_level = 1;
-          }
+        if (make_plink2_flags & (kfMakePgen | kfMakePvar | kfMakePsam)) {
+          merge_outname_end = strcpya_k(merge_outname_end, "-merge");
         }
-        reterr = Pmerge(&pmerge_info, pc.sample_sort_fname, pc.misc_flags, import_flags, pc.sample_sort_mode, pc.fam_cols, pc.missing_pheno, pc.max_thread_ct, pc.sort_vars_mode, pgenname, psamname, pvarname, outname, merge_outname_end, &chr_info);
+        reterr = Pmerge(&pmerge_info, pc.sample_sort_fname, pc.misc_flags, pc.sample_sort_mode, pc.fam_cols, pc.missing_pheno, pc.max_thread_ct, pc.sort_vars_mode, pgenname, psamname, pvarname, outname, merge_outname_end, &chr_info);
         if (unlikely(reterr)) {
           goto main_ret_1;
         }
@@ -10702,10 +10745,9 @@ int main(int argc, char** argv) {
         // otherwise error out (since --real-ref-alleles does not make sense on
         // the merged .pgen).
         pc.misc_flags &= ~kfMiscRealRefAlleles;
-        g_zst_level = zst_level;
       }
 
-      if ((pc.command_flags1 & (~(kfCommand1MakePlink2 | kfCommand1Validate | kfCommand1WriteSnplist | kfCommand1WriteCovar | kfCommand1WriteSamples))) || ((pc.command_flags1 & kfCommand1MakePlink2) && (pc.sort_vars_mode > kSortNone))) {
+      if ((pc.command_flags1 & (~(kfCommand1MakePlink2 | kfCommand1Validate | kfCommand1WriteSnplist | kfCommand1WriteCovar | kfCommand1WriteSamples))) || ((pc.command_flags1 & kfCommand1MakePlink2) && (pc.sort_vars_mode <= kSortNone))) {
         // split-chromosome prohibited for all commands unless explicitly
         // permitted here
         pc.dependency_flags |= kfFilterNoSplitChr;


=====================================
plink2_cmdline.cc
=====================================
@@ -746,6 +746,15 @@ BoolErr bigstack_calloc_kcp(uintptr_t ct, const char*** kcp_arr_ptr) {
   return 0;
 }
 
+BoolErr bigstack_calloc_cpp(uintptr_t ct, char**** cpp_arr_ptr) {
+  *cpp_arr_ptr = S_CAST(char***, bigstack_alloc(ct * sizeof(intptr_t)));
+  if (unlikely(!(*cpp_arr_ptr))) {
+    return 1;
+  }
+  ZeroPtrArr(ct, *cpp_arr_ptr);
+  return 0;
+}
+
 BoolErr bigstack_end_calloc_uc(uintptr_t ct, unsigned char** uc_arr_ptr) {
   *uc_arr_ptr = S_CAST(unsigned char*, bigstack_end_alloc(ct));
   if (unlikely(!(*uc_arr_ptr))) {
@@ -1098,7 +1107,6 @@ void BitvecInvertCopy(const uintptr_t* __restrict source_bitvec, uintptr_t word_
 #endif
 }
 
-/*
 void BitvecXor(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec) {
   // main_bitvec := main_bitvec XOR arg_bitvec
 #ifdef __LP64__
@@ -1134,7 +1142,6 @@ void BitvecXor(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintpt
   }
 #endif
 }
-*/
 
 void BitvecInvertAndMask(const uintptr_t* __restrict include_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec) {
   // main_bitvec := (~main_bitvec) AND include_bitvec


=====================================
plink2_cmdline.h
=====================================
@@ -604,6 +604,11 @@ HEADER_INLINE BoolErr bigstack_alloc_wpp(uintptr_t ct, uintptr_t**** wpp_arr_ptr
   return !(*wpp_arr_ptr);
 }
 
+HEADER_INLINE BoolErr bigstack_alloc_cpp(uintptr_t ct, char**** cpp_arr_ptr) {
+  *cpp_arr_ptr = S_CAST(char***, bigstack_alloc(ct * sizeof(intptr_t)));
+  return !(*cpp_arr_ptr);
+}
+
 HEADER_INLINE BoolErr bigstack_alloc_u32pp(uintptr_t ct, uint32_t**** u32pp_arr_ptr) {
   *u32pp_arr_ptr = S_CAST(uint32_t***, bigstack_alloc(ct * sizeof(intptr_t)));
   return !(*u32pp_arr_ptr);
@@ -636,6 +641,8 @@ BoolErr bigstack_calloc_cp(uintptr_t ct, char*** cp_arr_ptr);
 
 BoolErr bigstack_calloc_kcp(uintptr_t ct, const char*** kcp_arr_ptr);
 
+BoolErr bigstack_calloc_cpp(uintptr_t ct, char**** cpp_arr_ptr);
+
 HEADER_INLINE BoolErr bigstack_calloc_c(uintptr_t ct, char** c_arr_ptr) {
   return bigstack_calloc_uc(ct, R_CAST(unsigned char**, c_arr_ptr));
 }
@@ -1177,7 +1184,7 @@ HEADER_INLINE void AlignedBitarrInvertCopy(const uintptr_t* __restrict source_bi
   }
 }
 
-// void BitvecXor(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec);
+void BitvecXor(const uintptr_t* __restrict arg_bitvec, uintptr_t word_ct, uintptr_t* main_bitvec);
 
 void BitvecInvertAndMask(const uintptr_t* __restrict include_bitvec, uintptr_t word_ct, uintptr_t* __restrict main_bitvec);
 


=====================================
plink2_common.h
=====================================
@@ -171,19 +171,21 @@ FLAGSET64_DEF_START()
   kfExportfLgenRef = (1 << 25),
   kfExportfList = (1 << 26),
   kfExportfRlist = (1 << 27),
-  kfExportfOxGen = (1 << 28),
-  kfExportfPed = (1 << 29),
-  kfExportfCompound = (1 << 30),
-  kfExportfStructure = (1U << 31),
-  kfExportfTranspose = (1LLU << 32),
-  kfExportfVcf42 = (1LLU << 33),
-  kfExportfVcf43 = (1LLU << 34),
+  kfExportfOxGenV1 = (1 << 28),
+  kfExportfOxGenV2 = (1 << 29),
+  kfExportfOxGen = kfExportfOxGenV1 | kfExportfOxGenV2,
+  kfExportfPed = (1 << 30),
+  kfExportfCompound = (1U << 31),
+  kfExportfStructure = (1LLU << 32),
+  kfExportfTranspose = (1LLU << 33),
+  kfExportfVcf42 = (1LLU << 34),
+  kfExportfVcf43 = (1LLU << 35),
   kfExportfVcf = kfExportfVcf42 | kfExportfVcf43,
   kfExportfTypemask = (2LLU * kfExportfVcf43) - kfExportf23,
-  kfExportfIncludeAlt = (1LLU << 35),
-  kfExportfBgz = (1LLU << 36),
-  kfExportfOmitNonmaleY = (1LLU << 37),
-  kfExportfSampleV2 = (1LLU << 38)
+  kfExportfIncludeAlt = (1LLU << 36),
+  kfExportfBgz = (1LLU << 37),
+  kfExportfOmitNonmaleY = (1LLU << 38),
+  kfExportfSampleV2 = (1LLU << 39)
 FLAGSET64_DEF_END(ExportfFlags);
 
 FLAGSET_DEF_START()
@@ -389,7 +391,7 @@ void PopulateRescaledDosageF(const uintptr_t* genoarr, const uintptr_t* dosage_p
 
 // assumes trailing bits of genoarr are zeroed out
 HEADER_INLINE uint32_t AtLeastOneHetUnsafe(const uintptr_t* genoarr, uint32_t sample_ct) {
-  const uint32_t sample_ctl2 = DivUp(sample_ct, kBitsPerWordD2);
+  const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
   for (uint32_t uii = 0; uii != sample_ctl2; ++uii) {
     const uintptr_t geno_word = genoarr[uii];
     if (Word01(geno_word)) {


=====================================
plink2_data.cc
=====================================
@@ -174,7 +174,7 @@ void PvarInfoWrite(uint32_t info_pr_flag_present, uint32_t is_pr, char* info_tok
   uint32_t info_token_slen = info_token_end - info_token;
   char* info_token_pr = nullptr;
   if (info_pr_flag_present) {
-    info_token_pr = PrInInfoToken(info_token_slen, info_token);
+    info_token_pr = InfoPrStart(info_token_slen, info_token);
   }
   char* write_iter = *write_iter_ptr;
   if (is_pr || (!info_token_pr))  {
@@ -553,14 +553,20 @@ PglErr WritePvar(const char* outname, const uintptr_t* variant_include, const Ch
     const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
     const uint32_t all_nonref = (nonref_flags_storage == 2);
     uint32_t write_info_pr = all_nonref;
-    uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
+    if (pvar_info_reload && (!(pvar_psam_flags & kfPvarColXinfo))) {
+      // bugfix (6 Feb 2021): don't write INFO column when INFO column
+      // explicitly excluded here, but pvar_info_reload is needed for some
+      // other reason
+      pvar_info_reload = nullptr;
+    }
+    const uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
     if (write_info && nonref_flags) {
       write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
     }
     write_info_pr = write_info_pr && write_info;
     if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
       logputs("\n");
-      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
+      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n\"provisional reference\" flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
       goto WritePvar_ret_INCONSISTENT_INPUT;
     }
 
@@ -1184,9 +1190,9 @@ PglErr WritePsam(const char* outname, const uintptr_t* sample_include, const Sam
 
 /*
 #ifdef NO_UNALIGNED
-#  error "Unaligned accesses in BitvecResort()."
+#  error "Unaligned accesses in BitvecPermute()."
 #endif
-void BitvecResort(const uintptr_t* bitvec, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, unsigned char* writebuf) {
+void BitvecPermute(const uintptr_t* bitvec, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, unsigned char* writebuf) {
   const uint32_t sample_ctl_m1 = BitCtToWordCt(sample_ct) - 1;
   uint32_t widx = 0;
   uint32_t cur_word_entry_ct = kBitsPerWord;
@@ -1210,37 +1216,111 @@ void BitvecResort(const uintptr_t* bitvec, const uint32_t* new_sample_idx_to_old
 }
 */
 
+// Now assumes trailing bits of genovec have been zeroed out.
+// writebuf need not be word-aligned.
 #ifdef NO_UNALIGNED
-#  error "Unaligned accesses in GenovecResort()."
+#  error "Unaligned accesses in GenovecPermute()."
 #endif
-void GenovecResort(const uintptr_t* genovec, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, void* writebuf) {
-  // writebuf need not be word-aligned
+void GenovecPermute(const uintptr_t* genovec, const uint32_t* old_sample_idx_to_new, uint32_t sample_ct, void* writebuf) {
+  const uintptr_t most_common_geno_word = MostCommonGenoUnsafe(genovec, sample_ct) * kMask5555;
   const uint32_t sample_ctl2_m1 = NypCtToWordCt(sample_ct) - 1;
-  const uint32_t* new_sample_idx_to_old_iter = new_sample_idx_to_old;
   uintptr_t* writebuf_walias = S_CAST(uintptr_t*, writebuf);
   for (uint32_t widx = 0; widx != sample_ctl2_m1; ++widx) {
-    uintptr_t cur_word = 0;
-    // this is noticeably better than the ascending loop
-    for (uint32_t uii = kBitsPerWordD2 - 1; ; --uii) {
-      cur_word |= GetNyparrEntry(genovec, new_sample_idx_to_old_iter[uii]);
-      if (!uii) {
-        break;
+    writebuf_walias[widx] = most_common_geno_word;
+  }
+  const uint32_t trailing_bit_ct = 2 * ModNz(sample_ct, kBitsPerWordD2);
+  SubwordStore(bzhi_max(most_common_geno_word, trailing_bit_ct), DivUp(trailing_bit_ct, CHAR_BIT), &(writebuf_walias[sample_ctl2_m1]));
+
+  unsigned char* writebuf_b = S_CAST(unsigned char*, writebuf);
+  for (uint32_t widx = 0; ; ++widx) {
+    uintptr_t geno_word_xor;
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        return;
       }
-      cur_word = cur_word << 2;
+      geno_word_xor = bzhi_max(most_common_geno_word ^ genovec[widx], trailing_bit_ct);
+    } else {
+      geno_word_xor = most_common_geno_word ^ genovec[widx];
     }
-    writebuf_walias[widx] = cur_word;
-    new_sample_idx_to_old_iter = &(new_sample_idx_to_old_iter[kBitsPerWordD2]);
+    if (!geno_word_xor) {
+      continue;
+    }
+    const uint32_t* cur_old_sample_idx_to_new = &(old_sample_idx_to_new[widx * kBitsPerWordD2]);
+    do {
+      const uint32_t bit_read_shift_ct = ctzw(geno_word_xor) & (kBitsPerWord - 2);
+      const uintptr_t cur_geno_xor = (geno_word_xor >> bit_read_shift_ct) & 3;
+      const uint32_t new_sample_idx = cur_old_sample_idx_to_new[bit_read_shift_ct / 2];
+      const uint32_t new_byte_idx = new_sample_idx / 4;
+      const uint32_t bit_write_shift_ct = 2 * (new_sample_idx % 4);
+      // Value has been preset to most_common_geno, so if we xor it with
+      // (most_common_geno ^ actual_geno), the result is actual_geno.
+      writebuf_b[new_byte_idx] ^= cur_geno_xor << bit_write_shift_ct;
+      geno_word_xor &= (~(3 * k1LU)) << bit_read_shift_ct;
+    } while (geno_word_xor);
   }
-  const uint32_t cur_word_entry_ct = ModNz(sample_ct, kBitsPerWordD2);
-  uintptr_t cur_word = 0;
-  for (uint32_t uii = cur_word_entry_ct - 1; ; --uii) {
-    cur_word |= GetNyparrEntry(genovec, new_sample_idx_to_old_iter[uii]);
-    if (!uii) {
-      break;
+}
+
+void GenovecPermuteSubset(const uintptr_t* genovec, const uintptr_t* sample_include, const uintptr_t* sample_include_interleaved_vec, const uint32_t* old_sample_idx_to_new, uint32_t raw_sample_ct, uint32_t sample_ct, void* writebuf) {
+  if (sample_ct == raw_sample_ct) {
+    GenovecPermute(genovec, old_sample_idx_to_new, sample_ct, writebuf);
+    return;
+  }
+  STD_ARRAY_DECL(uint32_t, 4, genocounts);
+  GenoarrCountSubsetFreqs(genovec, sample_include_interleaved_vec, raw_sample_ct, sample_ct, genocounts);
+  uintptr_t most_common_geno = 0;
+  uint32_t most_common_geno_ct = genocounts[0];
+  if (most_common_geno_ct * 2 < sample_ct) {
+    for (uintptr_t cur_geno = 1; cur_geno != 4; ++cur_geno) {
+      if (most_common_geno_ct < genocounts[cur_geno]) {
+        most_common_geno = cur_geno;
+        most_common_geno_ct = genocounts[cur_geno];
+      }
     }
-    cur_word = cur_word << 2;
   }
-  SubwordStore(cur_word, NypCtToByteCt(cur_word_entry_ct), &(writebuf_walias[sample_ctl2_m1]));
+  const uintptr_t most_common_geno_word = most_common_geno * kMask5555;
+  const uint32_t sample_ctl2_m1 = NypCtToWordCt(sample_ct) - 1;
+  uintptr_t* writebuf_walias = S_CAST(uintptr_t*, writebuf);
+  for (uint32_t widx = 0; widx != sample_ctl2_m1; ++widx) {
+    writebuf_walias[widx] = most_common_geno_word;
+  }
+  const uint32_t write_trailing_bit_ct = 2 * ModNz(sample_ct, kBitsPerWordD2);
+  SubwordStore(bzhi_max(most_common_geno_word, write_trailing_bit_ct), DivUp(write_trailing_bit_ct, CHAR_BIT), &(writebuf_walias[sample_ctl2_m1]));
+  if (most_common_geno_ct == sample_ct) {
+    return;
+  }
+
+  const uint32_t raw_sample_ctl2_m1 = NypCtToWordCt(raw_sample_ct) - 1;
+  const Halfword* sample_include_alias = R_CAST(const Halfword*, sample_include);
+  unsigned char* writebuf_b = S_CAST(unsigned char*, writebuf);
+  for (uint32_t widx = 0; ; ++widx) {
+    uintptr_t geno_word_xor;
+    if (widx >= raw_sample_ctl2_m1) {
+      if (widx > raw_sample_ctl2_m1) {
+        return;
+      }
+      geno_word_xor = bzhi_max(most_common_geno_word ^ genovec[widx], 2 * ModNz(raw_sample_ct, kBitsPerWordD2));
+    } else {
+      geno_word_xor = most_common_geno_word ^ genovec[widx];
+    }
+    if (!geno_word_xor) {
+      continue;
+    }
+    geno_word_xor &= UnpackHalfwordToWord(sample_include_alias[widx]) * 3;
+    if (!geno_word_xor) {
+      continue;
+    }
+    const uint32_t* cur_old_sample_idx_to_new = &(old_sample_idx_to_new[widx * kBitsPerWordD2]);
+    do {
+      const uint32_t bit_read_shift_ct = ctzw(geno_word_xor) & (kBitsPerWord - 2);
+      const uintptr_t cur_geno_xor = (geno_word_xor >> bit_read_shift_ct) & 3;
+      const uint32_t new_sample_idx = cur_old_sample_idx_to_new[bit_read_shift_ct / 2];
+      // assert(new_sample_idx != UINT32_MAX);
+      const uint32_t new_byte_idx = new_sample_idx / 4;
+      const uint32_t bit_write_shift_ct = 2 * (new_sample_idx % 4);
+      writebuf_b[new_byte_idx] ^= cur_geno_xor << bit_write_shift_ct;
+      geno_word_xor &= (~(3 * k1LU)) << bit_read_shift_ct;
+    } while (geno_word_xor);
+  }
 }
 
 // Revised phaseraw:
@@ -1292,7 +1372,7 @@ void UnpackHphaseSubset(const uintptr_t* __restrict all_hets, const uintptr_t* _
   }
 }
 
-void UnpackAndResortHphase(const uintptr_t* __restrict all_hets, const uintptr_t* __restrict phaseraw, const uintptr_t* sample_include, const uint32_t* old_sample_idx_to_new, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t** phasepresent_ptr, uintptr_t* __restrict phaseinfo) {
+void UnpackAndPermuteHphase(const uintptr_t* __restrict all_hets, const uintptr_t* __restrict phaseraw, const uintptr_t* sample_include, const uint32_t* old_sample_idx_to_new, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t** phasepresent_ptr, uintptr_t* __restrict phaseinfo) {
   const uintptr_t* aux2a_iter = &(phaseraw[8 / kBytesPerWord]);
   const uint32_t* old_sample_idx_to_new_iter = old_sample_idx_to_new;
   const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
@@ -1509,44 +1589,122 @@ void CopyDosage(const uintptr_t* __restrict read_dosagepresent, const Dosage* re
   memcpy(write_dosagevals, read_dosagevals, dosage_ct * sizeof(Dosage));
 }
 
-uint32_t CopyAndResort8bit(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict new_sample_idx_to_old, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals, uint32_t* __restrict cumulative_popcount_buf) {
-  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
-  FillCumulativePopcounts(src_subset, raw_sample_ctl, cumulative_popcount_buf);
+uint32_t CopyAndPermute8bit(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict old_sample_idx_to_new, uint32_t sample_ct, uint32_t val_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals) {
   const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
   ZeroWArr(sample_ctl, dst_subset);
   const unsigned char* src_vals_uc = S_CAST(const unsigned char*, src_vals);
   unsigned char* dst_vals_uc = S_CAST(unsigned char*, dst_vals);
-  unsigned char* dst_vals_iter = dst_vals_uc;
-  // Tried word-based loop, was significantly worse
-  for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
-    const uint32_t old_sample_idx = new_sample_idx_to_old[new_sample_idx];
-    if (IsSet(src_subset, old_sample_idx)) {
+
+  uintptr_t cur_bits = src_subset[0];
+  uint32_t new_val_ct;
+  if (!sample_include) {
+    uintptr_t old_sample_idx_base = 0;
+    for (uint32_t old_val_idx = 0; old_val_idx != val_ct; ++old_val_idx) {
+      const uint32_t old_sample_idx = BitIter1(src_subset, &old_sample_idx_base, &cur_bits);
+      const uint32_t new_sample_idx = old_sample_idx_to_new[old_sample_idx];
+      SetBit(new_sample_idx, dst_subset);
+      dst_vals_uc[new_sample_idx] = src_vals_uc[old_val_idx];
+    }
+    new_val_ct = val_ct;
+  } else {
+    uintptr_t widx = 0;
+    new_val_ct = 0;
+    for (uint32_t old_val_idx = 0; old_val_idx != val_ct; ++old_val_idx) {
+      const uintptr_t old_sample_idx_lowbit = BitIter1y(src_subset, &widx, &cur_bits);
+      if (!(sample_include[widx] & old_sample_idx_lowbit)) {
+        continue;
+      }
+      const uint32_t old_sample_idx = widx * kBitsPerWord + ctzw(old_sample_idx_lowbit);
+      const uint32_t new_sample_idx = old_sample_idx_to_new[old_sample_idx];
       SetBit(new_sample_idx, dst_subset);
-      const uint32_t old_dosagevals_idx = RawToSubsettedPos(src_subset, cumulative_popcount_buf, old_sample_idx);
-      *dst_vals_iter++ = src_vals_uc[old_dosagevals_idx];
+      dst_vals_uc[new_sample_idx] = src_vals_uc[old_val_idx];
+      ++new_val_ct;
     }
   }
-  return dst_vals_iter - dst_vals_uc;
+  uintptr_t new_sample_idx_base = 0;
+  cur_bits = dst_subset[0];
+  for (uint32_t new_val_idx = 0; new_val_idx != new_val_ct; ++new_val_idx) {
+    const uint32_t new_sample_idx = BitIter1(dst_subset, &new_sample_idx_base, &cur_bits);
+    dst_vals_uc[new_val_idx] = dst_vals_uc[new_sample_idx];
+  }
+  return new_val_ct;
 }
 
-uint32_t CopyAndResort16bit(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict new_sample_idx_to_old, uint32_t raw_sample_ct, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals, uint32_t* __restrict cumulative_popcount_buf) {
-  const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
-  FillCumulativePopcounts(src_subset, raw_sample_ctl, cumulative_popcount_buf);
+uint32_t CopyAndPermute16bit(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict old_sample_idx_to_new, uint32_t sample_ct, uint32_t val_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals) {
   const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
   ZeroWArr(sample_ctl, dst_subset);
   const uint16_t* src_vals_u16 = S_CAST(const uint16_t*, src_vals);
   uint16_t* dst_vals_u16 = S_CAST(uint16_t*, dst_vals);
-  uint16_t* dst_vals_iter = dst_vals_u16;
-  // Tried word-based loop, was significantly worse
-  for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
-    const uint32_t old_sample_idx = new_sample_idx_to_old[new_sample_idx];
-    if (IsSet(src_subset, old_sample_idx)) {
+
+  uintptr_t cur_bits = src_subset[0];
+  uint32_t new_val_ct;
+  if (!sample_include) {
+    uintptr_t old_sample_idx_base = 0;
+    for (uint32_t old_val_idx = 0; old_val_idx != val_ct; ++old_val_idx) {
+      const uint32_t old_sample_idx = BitIter1(src_subset, &old_sample_idx_base, &cur_bits);
+      const uint32_t new_sample_idx = old_sample_idx_to_new[old_sample_idx];
+      SetBit(new_sample_idx, dst_subset);
+      dst_vals_u16[new_sample_idx] = src_vals_u16[old_val_idx];
+    }
+    new_val_ct = val_ct;
+  } else {
+    uintptr_t widx = 0;
+    new_val_ct = 0;
+    for (uint32_t old_val_idx = 0; old_val_idx != val_ct; ++old_val_idx) {
+      const uintptr_t old_sample_idx_lowbit = BitIter1y(src_subset, &widx, &cur_bits);
+      if (!(sample_include[widx] & old_sample_idx_lowbit)) {
+        continue;
+      }
+      const uint32_t old_sample_idx = widx * kBitsPerWord + ctzw(old_sample_idx_lowbit);
+      const uint32_t new_sample_idx = old_sample_idx_to_new[old_sample_idx];
       SetBit(new_sample_idx, dst_subset);
-      const uint32_t old_dosagevals_idx = RawToSubsettedPos(src_subset, cumulative_popcount_buf, old_sample_idx);
-      *dst_vals_iter++ = src_vals_u16[old_dosagevals_idx];
+      dst_vals_u16[new_sample_idx] = src_vals_u16[old_val_idx];
+      ++new_val_ct;
     }
   }
-  return dst_vals_iter - dst_vals_u16;
+  uintptr_t new_sample_idx_base = 0;
+  cur_bits = dst_subset[0];
+  for (uint32_t new_val_idx = 0; new_val_idx != new_val_ct; ++new_val_idx) {
+    const uint32_t new_sample_idx = BitIter1(dst_subset, &new_sample_idx_base, &cur_bits);
+    dst_vals_u16[new_val_idx] = dst_vals_u16[new_sample_idx];
+  }
+  return new_val_ct;
+}
+
+uint32_t Dense8bitToSparse(const uintptr_t* __restrict set, uint32_t sample_ctl, void* __restrict vals) {
+  unsigned char* vals_uc = S_CAST(unsigned char*, vals);
+  uint32_t write_idx = 0;
+  for (uint32_t widx = 0; widx != sample_ctl; ++widx) {
+    uintptr_t set_word = set[widx];
+    if (!set_word) {
+      continue;
+    }
+    unsigned char* cur_vals_read = &(vals_uc[widx * kBitsPerWord]);
+    do {
+      const uint32_t sample_idx_lowbits = ctzw(set_word);
+      vals_uc[write_idx++] = cur_vals_read[sample_idx_lowbits];
+      set_word &= set_word - 1;
+    } while (set_word);
+  }
+  return write_idx;
+}
+
+uint32_t Dense16bitToSparse(const uintptr_t* __restrict set, uint32_t sample_ctl, void* __restrict vals) {
+  uint16_t* vals_u16 = S_CAST(uint16_t*, vals);
+  uint32_t write_idx = 0;
+  for (uint32_t widx = 0; widx != sample_ctl; ++widx) {
+    uintptr_t set_word = set[widx];
+    if (!set_word) {
+      continue;
+    }
+    uint16_t* cur_vals_read = &(vals_u16[widx * kBitsPerWord]);
+    do {
+      const uint32_t sample_idx_lowbits = ctzw(set_word);
+      vals_u16[write_idx++] = cur_vals_read[sample_idx_lowbits];
+      set_word &= set_word - 1;
+    } while (set_word);
+  }
+  return write_idx;
 }
 
 // Requires trailing bits of genovec to be zeroed out.
@@ -3517,13 +3675,13 @@ BoolErr FillInfoVtypeNum(const char* num_str_start, int32_t* info_vtype_num_ptr)
     } else if (first_num_char == 'R') {
       *info_vtype_num_ptr = kInfoVtypeR;
     } else if (likely(first_num_char == 'G')) {
-      *info_vtype_num_ptr = kInfoVtypeG;
+      *info_vtype_num_ptr = kInfoVtypeUnknown;
     } else {
       return 1;
     }
   } else {
     uint32_t val;
-    if (unlikely(ScanmovPosintCapped(UINT32_MAX, &num_str_start, &val) || (num_str_start[0] != ','))) {
+    if (unlikely(ScanmovPosintCapped(0x3fffffff, &num_str_start, &val) || (num_str_start[0] != ','))) {
       return 1;
     }
     *info_vtype_num_ptr = val;
@@ -3681,14 +3839,17 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
     const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
     const uint32_t all_nonref = (nonref_flags_storage == 2);
     uint32_t write_info_pr = all_nonref;
-    uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
+    if (pvar_info_reload && (!(pvar_psam_flags & kfPvarColXinfo))) {
+      pvar_info_reload = nullptr;
+    }
+    const uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
     if (write_info && nonref_flags) {
       write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
     }
     write_info_pr = write_info_pr && write_info;
     if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
       logputs("\n");
-      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
+      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n\"provisional reference\" flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
       goto WritePvarSplit_ret_INCONSISTENT_INPUT;
     }
 
@@ -4041,24 +4202,6 @@ PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, con
                       const char* cur_info_cur = info_curs[kpos];
 
                       const char* subtoken_end = AdvToDelimOrEnd(cur_info_cur, cur_info_end, ',');
-                      if (knum == kInfoVtypeG) {
-                        if (unlikely(subtoken_end == cur_info_end)) {
-                          snprintf(g_logbuf, kLogbufSize, "Error: Too few values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
-                          goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
-                        }
-                        cswritep = memcpya(cswritep, cur_info_cur, 1 + S_CAST(uintptr_t, subtoken_end - cur_info_cur));
-                        cur_info_cur = subtoken_end;
-                        const uint32_t skip_ct = alt_allele_idx - 1;
-                        if (skip_ct) {
-                          cur_info_cur = AdvToNthDelimChecked(&(cur_info_cur[1]), cur_info_end, skip_ct, ',');
-                          if (unlikely(!subtoken_end)) {
-                            snprintf(g_logbuf, kLogbufSize, "Error: Too few values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
-                            goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
-                          }
-                        }
-                        ++cur_info_cur;
-                        subtoken_end = AdvToDelimOrEnd(cur_info_cur, cur_info_end, ',');
-                      }
                       if (unlikely((subtoken_end == cur_info_end) != is_last_allele)) {
                         snprintf(g_logbuf, kLogbufSize, "Error: Wrong number of values for INFO key '%s', variant ID '%s'.\n", cur_key_str, orig_variant_id);
                         goto WritePvarSplit_ret_MALFORMED_INPUT_WW;
@@ -4337,14 +4480,17 @@ PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, cons
     const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
     const uint32_t all_nonref = (nonref_flags_storage == 2);
     uint32_t write_info_pr = all_nonref;
-    uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
+    if (pvar_info_reload && (!(pvar_psam_flags & kfPvarColXinfo))) {
+      pvar_info_reload = nullptr;
+    }
+    const uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
     if (write_info && nonref_flags) {
       write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
     }
     write_info_pr = write_info_pr && write_info;
     if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
       logputs("\n");
-      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
+      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n\"provisional reference\" flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
       goto WritePvarJoin_ret_INCONSISTENT_INPUT;
     }
 
@@ -4736,7 +4882,8 @@ typedef struct MakeBedlikeCtxStruct {
 
   const uintptr_t* variant_include;
   uint32_t* sample_include_cumulative_popcounts;
-  const uint32_t* collapsed_sort_map;
+  // unlike MakePgenCtx, old_sample_idx is subsetted here
+  const uint32_t* old_sample_idx_to_new;
 
   PgenReader** pgr_ptrs;
 
@@ -4775,7 +4922,7 @@ THREAD_FUNC_DECL MakeBedlikeThread(void* raw_arg) {
   PgrSetSampleSubsetIndex(ctx->sample_include_cumulative_popcounts, pgrp, &pssi);
   const uintptr_t* sex_male_collapsed_interleaved = mcp->sex_male_collapsed_interleaved;
   const uintptr_t* sex_female_collapsed_interleaved = mcp->sex_female_collapsed_interleaved;
-  const uint32_t* collapsed_sort_map = ctx->collapsed_sort_map;
+  const uint32_t* old_sample_idx_to_new = ctx->old_sample_idx_to_new;
   const Plink2WriteFlags plink2_write_flags = mcp->plink2_write_flags;
   const uint32_t set_hh_missing = plink2_write_flags & kfPlink2WriteSetHhMissing;
   const uint32_t set_mixed_mt_missing = plink2_write_flags & kfPlink2WriteSetMixedMtMissing;
@@ -4878,12 +5025,11 @@ THREAD_FUNC_DECL MakeBedlikeThread(void* raw_arg) {
       if (write_plink1) {
         PgrPlink2ToPlink1InplaceUnsafe(sample_ct, genovec);
       }
-      // trailing bytes don't matter, but trailing bits of last byte may
       ZeroTrailingNyps(sample_ct, genovec);
-      if (!collapsed_sort_map) {
+      if (!old_sample_idx_to_new) {
         writebuf_iter = memcpyua(writebuf_iter, genovec, sample_ct4);
       } else {
-        GenovecResort(genovec, collapsed_sort_map, sample_ct, writebuf_iter);
+        GenovecPermute(genovec, old_sample_idx_to_new, sample_ct, writebuf_iter);
         writebuf_iter = &(writebuf_iter[sample_ct4]);
       }
     }
@@ -4957,7 +5103,7 @@ PglErr MakeBedlikeMain(const uintptr_t* sample_include, const uint32_t* new_samp
     // tried more threads, pointless since this is too I/O-bound
     // (exception: reordering samples)
     uint32_t calc_thread_ct = (max_thread_ct > 2)? (max_thread_ct - 1) : max_thread_ct;
-    ctx.collapsed_sort_map = new_sample_idx_to_old;
+    ctx.old_sample_idx_to_new = nullptr;
     if (!new_sample_idx_to_old) {
       // Without BMI2 instructions, subsetting is most expensive with
       // sample_ct near 2/3 of raw_sample_ct; up to ~7 compute threads are
@@ -4977,15 +5123,22 @@ PglErr MakeBedlikeMain(const uintptr_t* sample_include, const uint32_t* new_samp
       if (calc_thread_max < calc_thread_ct) {
         calc_thread_ct = calc_thread_max;
       }
-    } else if (sample_ct < raw_sample_ct) {
-      uint32_t* new_collapsed_sort_map;
-      if (unlikely(bigstack_alloc_u32(sample_ct, &new_collapsed_sort_map))) {
+    } else {
+      uint32_t* old_sample_idx_to_new;
+      if (unlikely(bigstack_alloc_u32(raw_sample_ct, &old_sample_idx_to_new))) {
         goto MakeBedlikeMain_ret_NOMEM;
       }
-      // bugfix (26 Mar 2018): forgot to initialize this
-      memcpy(new_collapsed_sort_map, new_sample_idx_to_old, sample_ct * sizeof(int32_t));
-      UidxsToIdxs(sample_include, ctx.sample_include_cumulative_popcounts, sample_ct, new_collapsed_sort_map);
-      ctx.collapsed_sort_map = new_collapsed_sort_map;
+      if (sample_ct == raw_sample_ct) {
+        for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
+          old_sample_idx_to_new[new_sample_idx_to_old[new_sample_idx]] = new_sample_idx;
+        }
+      } else {
+        for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
+          const uint32_t old_sample_uidx = new_sample_idx_to_old[new_sample_idx];
+          old_sample_idx_to_new[RawToSubsettedPos(sample_include, ctx.sample_include_cumulative_popcounts, old_sample_uidx)] = new_sample_idx;
+        }
+      }
+      ctx.old_sample_idx_to_new = old_sample_idx_to_new;
     }
 
     if (make_plink2_flags & kfMakeBed) {
@@ -5122,8 +5275,8 @@ PglErr MakeBedlikeMain(const uintptr_t* sample_include, const uint32_t* new_samp
 typedef struct MakePgenCtxStruct {
   MakeCommon* mcp;
 
-  const uint32_t* new_sample_idx_to_old;
   uint32_t* old_sample_idx_to_new;
+  uintptr_t* sample_include_interleaved_vec;
   // combine existing chr_mask/xymt_codes/haploid_mask/chr_idx_to_foidx with
   // new collapsed chromosome boundary table
   uint32_t* write_chr_fo_vidx_start;
@@ -5150,7 +5303,6 @@ typedef struct MakePgenCtxStruct {
   Dosage** thread_write_dosagevals;
   uintptr_t** thread_write_dphasepresents;
   SDosage** thread_write_dphasedeltas;
-  uint32_t** thread_cumulative_popcount_bufs;
   PglErr write_reterr;
   int32_t write_errno;
 } MakePgenCtx;
@@ -5168,7 +5320,6 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
   const uintptr_t tidx = arg->tidx;
   MakePgenCtx* ctx = S_CAST(MakePgenCtx*, arg->sharedp->context);
 
-  const uint32_t* new_sample_idx_to_old = ctx->new_sample_idx_to_old;
   const uint32_t* old_sample_idx_to_new = ctx->old_sample_idx_to_new;
   const MakeCommon* mcp = ctx->mcp;
   const ChrInfo* cip = mcp->cip;
@@ -5176,6 +5327,7 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
   const uintptr_t* write_allele_idx_offsets = ctx->write_allele_idx_offsets;
   const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select_iter) = mcp->refalt1_select;
   const uintptr_t* sample_include = mcp->sample_include;
+  const uintptr_t* sample_include_interleaved_vec = ctx->sample_include_interleaved_vec;
 
   const uintptr_t* sex_male_collapsed = ctx->sex_male_collapsed;
 
@@ -5216,7 +5368,7 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
   }
   uintptr_t* write_genovec = nullptr;
   // assumes sample_include == nullptr if sample_ct == raw_sample_ct
-  if (new_sample_idx_to_old || sample_include) {
+  if (old_sample_idx_to_new || sample_include) {
     write_genovec = ctx->thread_write_genovecs[tidx];
     write_genovec[sample_ctl2 - 1] = 0;
   }
@@ -5242,7 +5394,6 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
   uintptr_t* write_dphasepresent = nullptr;
   SDosage* write_dphasedeltas = nullptr;
   SDosage* tmp_dphasedeltas = nullptr;
-  uint32_t* cumulative_popcount_buf = nullptr;
   if (ctx->thread_write_dosagepresents) {
     write_dosagepresent = ctx->thread_write_dosagepresents[tidx];
     write_dosagevals = ctx->thread_write_dosagevals[tidx];
@@ -5252,9 +5403,6 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
       tmp_dphasedeltas = &(write_dphasedeltas[RoundUpPow2(sample_ct, kCacheline / 2)]);
     }
   }
-  if ((ctx->thread_write_mhc || ctx->thread_write_dosagepresents) && new_sample_idx_to_old) {
-    cumulative_popcount_buf = ctx->thread_cumulative_popcount_bufs[tidx];
-  }
   uint32_t variant_idx_offset = 0;
   uint32_t allele_ct = 2;
   uint32_t parity = 0;
@@ -5369,21 +5517,22 @@ THREAD_FUNC_DECL MakePgenThread(void* raw_arg) {
       uint32_t write_rare10_ct = 0;
       uint32_t write_dosage_ct = 0;
       uint32_t write_dphase_ct = 0;
-      if (new_sample_idx_to_old) {
-        GenovecResort(loadbuf_iter, new_sample_idx_to_old, sample_ct, write_genovec);
+      if (old_sample_idx_to_new) {
+        ZeroTrailingNyps(raw_sample_ct, loadbuf_iter);
+        GenovecPermuteSubset(loadbuf_iter, sample_include, sample_include_interleaved_vec, old_sample_idx_to_new, raw_sample_ct, sample_ct, write_genovec);
         if (read_rare01_ct) {
-          write_rare01_ct = CopyAndResort8bit(read_patch_01_set, read_patch_01_vals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_patch_01_set, write_patch_01_vals, cumulative_popcount_buf);
+          write_rare01_ct = CopyAndPermute8bit(sample_include, read_patch_01_set, read_patch_01_vals, old_sample_idx_to_new, raw_sample_ct, read_rare01_ct, write_patch_01_set, write_patch_01_vals);
         }
         if (read_rare10_ct) {
-          write_rare10_ct = CopyAndResort16bit(read_patch_10_set, read_patch_10_vals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_patch_10_set, write_patch_10_vals, cumulative_popcount_buf);
+          write_rare10_ct = CopyAndPermute16bit(sample_include, read_patch_10_set, read_patch_10_vals, old_sample_idx_to_new, raw_sample_ct, read_rare10_ct, write_patch_10_set, write_patch_10_vals);
         }
         if (is_hphase) {
-          UnpackAndResortHphase(all_hets, cur_phaseraw, sample_include, old_sample_idx_to_new, raw_sample_ct, sample_ct, &cur_write_phasepresent, write_phaseinfo);
+          UnpackAndPermuteHphase(all_hets, cur_phaseraw, sample_include, old_sample_idx_to_new, raw_sample_ct, sample_ct, &cur_write_phasepresent, write_phaseinfo);
         }
         if (is_dosage) {
-          write_dosage_ct = CopyAndResort16bit(cur_dosagepresent, cur_dosagevals, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_dosagepresent, write_dosagevals, cumulative_popcount_buf);
+          write_dosage_ct = CopyAndPermute16bit(sample_include, cur_dosagepresent, cur_dosagevals, old_sample_idx_to_new, raw_sample_ct, read_dosage_ct, write_dosagepresent, write_dosagevals);
           if (is_dphase) {
-            write_dphase_ct = CopyAndResort16bit(cur_dphasepresent, cur_dphasedelta, new_sample_idx_to_old, raw_sample_ct, sample_ct, write_dphasepresent, write_dphasedeltas, cumulative_popcount_buf);
+            write_dphase_ct = CopyAndPermute16bit(sample_include, cur_dphasepresent, cur_dphasedelta, old_sample_idx_to_new, raw_sample_ct, read_dphase_ct, write_dphasepresent, write_dphasedeltas);
           }
         }
       } else if (sample_include) {
@@ -5865,7 +6014,6 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
     const uint32_t subsetting_required = (sample_ct != raw_sample_ct);
     const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
     mcp->sample_include = subsetting_required? sample_include : nullptr;
-    ctx.new_sample_idx_to_old = new_sample_idx_to_old;
     ctx.sex_male_collapsed = sex_male_collapsed;
     ctx.sex_female_collapsed = sex_female_collapsed;
     ctx.write_reterr = kPglRetSuccess;
@@ -6017,15 +6165,25 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
       const uint32_t sample_ctl2 = NypCtToWordCt(sample_ct);
       const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
       ctx.thread_write_genovecs = nullptr;
+      ctx.old_sample_idx_to_new = nullptr;
+      ctx.sample_include_interleaved_vec = nullptr;
       uint32_t write_mhc_needed = 0;
       if (new_sample_idx_to_old || subsetting_required) {
         if (unlikely(bigstack_alloc_wp(1, &ctx.thread_write_genovecs))) {
           goto MakePgenRobust_ret_NOMEM;
         }
-        if (read_phase_present && new_sample_idx_to_old) {
+        if (new_sample_idx_to_old) {
           if (unlikely(bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new))) {
             goto MakePgenRobust_ret_NOMEM;
           }
+          if (subsetting_required) {
+            // SetAllU32Arr(raw_sample_ct, ctx.old_sample_idx_to_new);
+            const uint32_t raw_sample_ctv = BitCtToVecCt(raw_sample_ct);
+            if (unlikely(bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.sample_include_interleaved_vec))) {
+              goto MakePgenRobust_ret_NOMEM;
+            }
+            FillInterleavedMaskVec(sample_include, raw_sample_ctv, ctx.sample_include_interleaved_vec);
+          }
           for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
             ctx.old_sample_idx_to_new[new_sample_idx_to_old[new_sample_idx]] = new_sample_idx;
           }
@@ -6080,12 +6238,6 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
           }
         }
       }
-      if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
-        if (unlikely(bigstack_alloc_u32p(1, &ctx.thread_cumulative_popcount_bufs) ||
-                     bigstack_alloc_u32(raw_sample_ctl, &(ctx.thread_cumulative_popcount_bufs[0])))) {
-          goto MakePgenRobust_ret_NOMEM;
-        }
-      }
       mcp->refalt1_select = refalt1_select;
       if (refalt1_select) {
         if (write_allele_idx_offsets) {
@@ -6159,7 +6311,27 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
 
       const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);
       uintptr_t load_variant_vec_ct = raw_sample_ctv2;
-      uint32_t loaded_vrtypes_needed = (read_gflags & kfPgenGlobalMultiallelicHardcallFound)? 1 : 0;
+      // bugfix (8 Jun 2021): forgot to include multiallelic track in
+      // load_variant_vec_ct computation
+      uint32_t loaded_vrtypes_needed = 0;
+      if (read_gflags & kfPgenGlobalMultiallelicHardcallFound) {
+        loaded_vrtypes_needed = 1;
+        // raw format:
+        //   rare01_ct, padded out to a word
+        //   rare10_ct, padded out to a word
+        //   [round up to vector boundary, for patch_01_set]
+        //   aux1a, if not mode 15:
+        //     patch_01_set as bitarray, raw_sample_ctl words
+        //     patch_01_vals (raw_sample_ct * sizeof(AlleleCode)), round up to
+        //       word boundary
+        //     [round up to vector boundary, for patch_10_set]
+        //   aux1b, if not mode 15:
+        //     patch_10_set as bitarray, raw_sample_ctl words
+        //     patch_10_vals (raw_sample_ct * 2 * sizeof(AlleleCode)), round up
+        //       to word boundary
+        const uintptr_t multiallelic_raw_vec_ct = WordCtToVecCt(2) + WordCtToVecCt(raw_sample_ctl + DivUp(raw_sample_ct * sizeof(AlleleCode), kBytesPerWord)) + WordCtToVecCt(raw_sample_ctl + DivUp(raw_sample_ct * 2 * sizeof(AlleleCode), kBytesPerWord));
+        load_variant_vec_ct += WordCtToVecCt(multiallelic_raw_vec_ct);
+      }
       if (read_phase_present || read_dosage_present) {
         loaded_vrtypes_needed = 1;
         if (read_phase_present) {
@@ -6837,7 +7009,6 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
     }
     MakeCommon mc;
     mc.plink2_write_flags = kfPlink2Write0;
-    const uint32_t raw_sample_ctl = BitCtToWordCt(raw_sample_ct);
     const uint32_t sample_ctl = BitCtToWordCt(sample_ct);
     ctx.sex_male_collapsed = nullptr;  // defensive
     if (make_plink2_flags & kfMakePlink2SetHhMissing) {
@@ -7024,9 +7195,9 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
       if (!new_sample_idx_to_old) {
         // hphase doesn't seem to affect read:write ratio much
 #ifdef USE_AVX2
-        const uint32_t max_calc_thread_ct = 2;
+        const uint32_t max_calc_thread_ct = 3;
 #else
-        const uint32_t max_calc_thread_ct = 2 + subsetting_required;
+        const uint32_t max_calc_thread_ct = 3 + subsetting_required;
 #endif
         if (calc_thread_ct > max_calc_thread_ct) {
           calc_thread_ct = max_calc_thread_ct;
@@ -7099,7 +7270,6 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
         }
       }
       ctx.pwcs = &(mpgwp->pwcs[0]);
-      ctx.new_sample_idx_to_old = new_sample_idx_to_old;
       ctx.thread_write_genovecs = nullptr;
       ctx.thread_write_mhc = nullptr;
 
@@ -7107,15 +7277,26 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
       // the I/O thread loads the next (64k * thread_ct).
       uintptr_t other_per_thread_cacheline_ct = 2 * load_vblock_cacheline_ct;
 
+      ctx.old_sample_idx_to_new = nullptr;
+      ctx.sample_include_interleaved_vec = nullptr;
+
       uint32_t write_mhc_needed = 0;
       if (new_sample_idx_to_old || subsetting_required) {
         if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_genovecs)) {
           goto MakePlink2NoVsort_fallback;
         }
-        if (read_phase_present && new_sample_idx_to_old) {
-          if (bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new)) {
+        if (new_sample_idx_to_old) {
+          if (unlikely(bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new))) {
             goto MakePlink2NoVsort_fallback;
           }
+          if (subsetting_required) {
+            // SetAllU32Arr(raw_sample_ct, ctx.old_sample_idx_to_new);
+            const uint32_t raw_sample_ctv = BitCtToVecCt(raw_sample_ct);
+            if (unlikely(bigstack_alloc_w(raw_sample_ctv * kWordsPerVec, &ctx.sample_include_interleaved_vec))) {
+              goto MakePlink2NoVsort_fallback;
+            }
+            FillInterleavedMaskVec(sample_include, raw_sample_ctv, ctx.sample_include_interleaved_vec);
+          }
           for (uint32_t new_sample_idx = 0; new_sample_idx != sample_ct; ++new_sample_idx) {
             ctx.old_sample_idx_to_new[new_sample_idx_to_old[new_sample_idx]] = new_sample_idx;
           }
@@ -7134,13 +7315,6 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
         write_mhcraw_cacheline_ct = DivUp(mhcwrite_word_ct, kWordsPerCacheline);
         other_per_thread_cacheline_ct += write_mhcraw_cacheline_ct;
       }
-      if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
-        // ctx.thread_cumulative_popcount_bufs
-        other_per_thread_cacheline_ct += Int32CtToCachelineCt(raw_sample_ctl);
-        if (bigstack_alloc_u32p(calc_thread_ct, &ctx.thread_cumulative_popcount_bufs)) {
-          goto MakePlink2NoVsort_fallback;
-        }
-      }
       ctx.thread_write_phasepresents = nullptr;
       ctx.thread_all_hets = nullptr;
       ctx.thread_write_dosagepresents = nullptr;
@@ -7230,9 +7404,6 @@ PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo*
           if (write_mhc_needed) {
             ctx.thread_write_mhc[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(write_mhcraw_cacheline_ct * kCacheline));
           }
-          if ((write_mhc_needed || read_dosage_present) && new_sample_idx_to_old) {
-            ctx.thread_cumulative_popcount_bufs[tidx] = S_CAST(uint32_t*, bigstack_alloc_raw(Int32CtToCachelineCt(raw_sample_ctl) * kCacheline));
-          }
         }
       }
       snprintf(outname_end, kMaxOutfnameExtBlen, ".pgen");
@@ -7786,14 +7957,17 @@ PglErr WritePvarResorted(const char* outname, const uintptr_t* variant_include,
     const uint32_t raw_variant_ctl = BitCtToWordCt(raw_variant_ct);
     const uint32_t all_nonref = (nonref_flags_storage == 2);
     uint32_t write_info_pr = all_nonref;
-    uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
+    if (pvar_info_reload && (!(pvar_psam_flags & kfPvarColXinfo))) {
+      pvar_info_reload = nullptr;
+    }
+    const uint32_t write_info = (pvar_psam_flags & kfPvarColInfo) || pvar_info_reload;
     if (write_info && nonref_flags) {
       write_info_pr = !IntersectionIsEmpty(variant_include, nonref_flags, raw_variant_ctl);
     }
     write_info_pr = write_info_pr && write_info;
     if (unlikely(write_info_pr && (info_flags & kfInfoPrNonflagPresent))) {
       logputs("\n");
-      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n'provisional reference' flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
+      logerrputs("Error: Conflicting INFO/PR definitions.  Either fix all REF alleles so that the\n\"provisional reference\" flag is no longer needed, or remove/rename the other\nuse of the INFO/PR key.\n");
       goto WritePvarResorted_ret_INCONSISTENT_INPUT;
     }
 


=====================================
plink2_data.h
=====================================
@@ -71,13 +71,14 @@ CONSTI32(kMaxInfoKeySlen, kMaxIdSlen);
 #define MAX_INFO_KEY_SLEN_STR MAX_ID_SLEN_STR
 
 // We only need to distinguish between the following INFO-value-type cases:
-// Number=0 (flag), Number=<positive integer>, Number=., Number=A, Number=R,
-// and Number=G.  We use negative numbers to represent the last 4 cases in
+// Number=0 (flag), Number=<positive integer>, Number=., Number=A, and
+// Number=R.  (Number=G is being treated as Number=., since otherwise chrX is a
+// nightmare; see https://github.com/samtools/hts-specs/issues/272 for some
+// discussion.)  We use negative numbers to represent the last 3 cases in
 // InfoVtype.
 CONSTI32(kInfoVtypeUnknown, -1);
 CONSTI32(kInfoVtypeA, -2);
 CONSTI32(kInfoVtypeR, -3);
-CONSTI32(kInfoVtypeG, -4);
 
 // Main fixed data structure when splitting/joining INFO is a hashmap of keys.
 // Behavior when splitting:
@@ -87,17 +88,15 @@ CONSTI32(kInfoVtypeG, -4);
 // - Number=A and Number=R require splitting the value on ',' and verifying the
 //   comma count is correct, but is otherwise straightforward since alleles
 //   can't be permuted.
-// - Number=G requires a bit more work but isn't fundamentally different from
-//   A/R.
+// - Number=G is being treated as Number=.
 // When joining:
 // - Field order is determined by header line order.
-// - Number=. and Number>0 just require a buffer of size ~info_reload_slen, and
-//   a boolean indicating whether no mismatch has been found.
+// - Number=./G and Number>0 just require a buffer of size ~info_reload_slen,
+//   and a boolean indicating whether no mismatch has been found.
 // - Number=0 (Flag) requires a single boolean, we perform an or operation.
-// - Number=A/R/G are the messy ones: we need to have enough space for
+// - Number=A/R are the messy ones: we need to have enough space for
 //   max_write_allele_ct (or that minus 1) comma-separated values in the =A and
-//   =R cases, and max_write_allele_ct * (max_write_allele_ct + 1) / 2 in the
-//   diploid =G case.
+//   =R cases.
 //   Since we permit already-multiallelic variants to be part of a join, the =G
 //   case may require a lot of working memory to handle.  We reserve up to 1/16
 //   of remaining workspace memory for this when we cannot prove that we can
@@ -136,6 +135,14 @@ uint32_t DataParentalColsAreRequired(const uintptr_t* sample_include, const Samp
 
 char* AppendPhenoStr(const PhenoCol* pheno_col, const char* output_missing_pheno, uint32_t omp_slen, uint32_t sample_uidx, char* write_iter);
 
+uint32_t CopyAndPermute8bit(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict old_sample_idx_to_new, uint32_t sample_ct, uint32_t val_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);
+
+uint32_t CopyAndPermute16bit(const uintptr_t* __restrict sample_include, const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uint32_t* __restrict old_sample_idx_to_new, uint32_t sample_ct, uint32_t val_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);
+
+uint32_t Dense8bitToSparse(const uintptr_t* __restrict set, uint32_t sample_ctl, void* __restrict vals);
+
+uint32_t Dense16bitToSparse(const uintptr_t* __restrict set, uint32_t sample_ctl, void* __restrict vals);
+
 PglErr LoadAlleleAndGenoCounts(const uintptr_t* sample_include, const uintptr_t* founder_info, const uintptr_t* sex_nm, const uintptr_t* sex_male, const uintptr_t* variant_include, const ChrInfo* cip, const uintptr_t* allele_idx_offsets, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t founder_ct, uint32_t male_ct, uint32_t nosex_ct, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t first_hap_uidx, uint32_t is_minimac3_r2, uint32_t max_thread_ct, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, uintptr_t* allele_presents, uint64_t* allele_ddosages, uint64_t* founder_allele_ddosages, uint32_t* variant_missing_hc_cts, uint32_t* variant_missing_dosage_cts, uint32_t* variant_hethap_cts, STD_ARRAY_PTR_DECL(uint32_t, 3, raw_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_raw_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, x_male_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_male_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, x_nosex_geno_cts), STD_ARRAY_PTR_DECL(uint32_t, 3, founder_x_nosex_geno_cts), double* imp_r2_vals);
 
 void ApplyHardCallThresh(const uintptr_t* dosage_present, const Dosage* dosage_main, uint32_t dosage_ct, uint32_t hard_call_halfdist, uintptr_t* genovec);


=====================================
plink2_export.cc
=====================================
@@ -964,6 +964,7 @@ PglErr ExportOxGen(const uintptr_t* sample_include, const uint32_t* sample_inclu
       }
     }
     const uint32_t max_chr_blen = GetMaxChrSlen(cip) + 1;
+    const uint32_t is_v2 = (exportf_flags / kfExportfOxGenV2) & 1;
     // if no dosages, all genotypes are 6 bytes (missing = " 0 0 0")
     // with dosages, we print up to 5 digits past the decimal point, so 7 bytes
     //   + space for each number, 24 bytes max
@@ -971,7 +972,7 @@ PglErr ExportOxGen(const uintptr_t* sample_include, const uint32_t* sample_inclu
     char* chr_buf;  // includes trailing space
     char* writebuf;
     if (unlikely(bigstack_alloc_c(max_chr_blen, &chr_buf) ||
-                 bigstack_alloc_c(kMaxMediumLine + max_chr_blen + kMaxIdSlen + 16 + 2 * max_allele_slen + max_geno_slen * sample_ct, &writebuf))) {
+                 bigstack_alloc_c(kMaxMediumLine + max_chr_blen + (kMaxIdSlen << is_v2) + 16 + 2 * max_allele_slen + max_geno_slen * sample_ct, &writebuf))) {
       goto ExportOxGen_ret_NOMEM;
     }
     {
@@ -1035,7 +1036,12 @@ PglErr ExportOxGen(const uintptr_t* sample_include, const uint32_t* sample_inclu
         is_y = (chr_idx == y_code);
       }
       write_iter = memcpya(write_iter, chr_buf, chr_blen);
-      write_iter = strcpyax(write_iter, variant_ids[variant_uidx], ' ');
+      const char* variant_id = variant_ids[variant_uidx];
+      const uint32_t variant_id_slen = strlen(variant_id);
+      write_iter = memcpyax(write_iter, variant_id, variant_id_slen, ' ');
+      if (is_v2) {
+        write_iter = memcpyax(write_iter, variant_id, variant_id_slen, ' ');
+      }
       write_iter = u32toa_x(variant_bps[variant_uidx], ' ', write_iter);
       uintptr_t allele_idx_offset_base = variant_uidx * 2;
       if (allele_idx_offsets) {
@@ -3158,12 +3164,13 @@ THREAD_FUNC_DECL ExportBgen13Thread(void* raw_arg) {
 // export an ID hash table as well (since such a hash table is always
 // constructed for the sake of detecting and warning about duplicate
 // post-id-paste IDs).
-BoolErr ExportIdpaste(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* ftypename, uint32_t sample_ct, IdpasteFlags exportf_id_paste, char exportf_id_delim, uintptr_t* max_exported_sample_id_blen_ptr, char** exported_sample_ids_ptr) {
+BoolErr ExportIdpaste(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* ftypename, const char* reimport_flagname, uint32_t sample_ct, IdpasteFlags exportf_id_paste, char exportf_id_delim, uintptr_t* max_exported_sample_id_blen_ptr, char** exported_sample_ids_ptr) {
   const uint32_t write_fid = DataFidColIsRequired(sample_include, siip, sample_ct, exportf_id_paste / kfIdpasteMaybefid);
   const char* sample_ids = siip->sample_ids;
   const char* sids = siip->sids;
   const uintptr_t max_sample_id_blen = siip->max_sample_id_blen;
   uintptr_t max_sid_blen = siip->max_sid_blen;
+  const uint32_t write_iid = (exportf_id_paste / kfIdpasteIid) & 1;
   const uint32_t write_sid = DataSidColIsRequired(sample_include, sids, sample_ct, max_sid_blen, exportf_id_paste / kfIdpasteMaybesid);
   if (write_sid && (!sids)) {
     max_sid_blen = 2;
@@ -3194,7 +3201,7 @@ BoolErr ExportIdpaste(const uintptr_t* sample_include, const SampleIdInfo* siip,
       }
       exported_sample_ids_iter = memcpyax(exported_sample_ids_iter, orig_sample_id, fid_slen, id_delim);
     }
-    if (exportf_id_paste & kfIdpasteIid) {
+    if (write_iid) {
       const char* orig_iid = &(orig_fid_end[1]);
       const uint32_t iid_slen = strlen(orig_iid);
       if ((!id_delim_warning) && memchr(orig_iid, id_delim, iid_slen)) {
@@ -3217,12 +3224,11 @@ BoolErr ExportIdpaste(const uintptr_t* sample_include, const SampleIdInfo* siip,
     }
     exported_sample_ids_iter[-1] = '\0';
   }
-  // todo: revise this warning condition?
-  if (id_delim_warning) {
+  if (id_delim_warning && (write_fid + write_iid + write_sid >= 2)) {
     if (exportf_id_delim) {
-      logerrprintfww("Warning: '%c' present in original sample IDs; --export %s will not be able to reconstruct them. Consider rerunning with a different --export id-delim= value.\n", exportf_id_delim, ftypename);
+      logerrprintfww("Warning: '%c' present in original sample IDs; --%s will not be able to reconstruct them. Consider rerunning with a different --export id-delim= value.\n", exportf_id_delim, reimport_flagname);
     } else {
-      logerrprintfww("Warning: '_' present in original sample IDs; --export %s will not be able to reconstruct them. Consider rerunning with a suitable --export id-delim= value.\n", ftypename);
+      logerrprintfww("Warning: '_' present in original sample IDs; --%s will not be able to reconstruct them. Consider rerunning with a suitable --export id-delim= value.\n", reimport_flagname);
     }
   }
   if (PopulateStrboxHtable(exported_sample_ids, sample_ct, max_exported_sample_id_blen, exported_id_htable_size, new_id_htable)) {
@@ -3333,7 +3339,7 @@ PglErr ExportBgen13(const char* outname, const uintptr_t* sample_include, uint32
     // always save sample IDs...
     char* exported_sample_ids;
     uintptr_t max_exported_sample_id_blen;
-    if (unlikely(ExportIdpaste(sample_include, siip, use_zstd_compression? "bgen-1.3" : "bgen-1.2", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
+    if (unlikely(ExportIdpaste(sample_include, siip, use_zstd_compression? "bgen-1.3" : "bgen-1.2", "bgen", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
       goto ExportBgen13_ret_NOMEM;
     }
     // Compute total length of sample ID block now; this is necessary to fill
@@ -3898,7 +3904,7 @@ PglErr ExportOxSampleV2(const char* outname, const uintptr_t* sample_include, co
 
     char* exported_sample_ids;
     uintptr_t max_exported_sample_id_blen;
-    if (unlikely(ExportIdpaste(sample_include, &piip->sii, "sample-v2", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
+    if (unlikely(ExportIdpaste(sample_include, &piip->sii, "sample-v2", "sample", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
       goto ExportOxSampleV2_ret_NOMEM;
     }
 
@@ -4607,7 +4613,7 @@ PglErr ExportVcf(const uintptr_t* sample_include, const uint32_t* sample_include
     write_iter = strcpya_k(write_iter, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" EOLN_STR "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
     char* exported_sample_ids;
     uintptr_t max_exported_sample_id_blen;
-    if (unlikely(ExportIdpaste(sample_include, siip, "vcf", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
+    if (unlikely(ExportIdpaste(sample_include, siip, "vcf", "vcf", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
       goto ExportVcf_ret_NOMEM;
     }
     for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {
@@ -7976,7 +7982,7 @@ PglErr ExportBcf(const uintptr_t* sample_include, const uint32_t* sample_include
 
       char* exported_sample_ids;
       uintptr_t max_exported_sample_id_blen;
-      if (unlikely(ExportIdpaste(sample_include, siip, "bcf", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
+      if (unlikely(ExportIdpaste(sample_include, siip, "bcf", "bcf", sample_ct, exportf_id_paste, exportf_id_delim, &max_exported_sample_id_blen, &exported_sample_ids))) {
         goto ExportBcf_ret_NOMEM;
       }
       for (uint32_t sample_idx = 0; sample_idx != sample_ct; ++sample_idx) {


=====================================
plink2_fasta.cc
=====================================
@@ -542,7 +542,8 @@ PglErr ProcessFa(const uintptr_t* variant_include, const char* const* variant_id
       if (ucc < 'A') {
         // > = ascii 62
         // ; = ascii 59
-        if (ucc == ';') {
+        // bugfix (25 May 2021): newline is also valid
+        if ((ucc == ';') || ((ucc < 32) && (ucc != '\t'))) {
           continue;
         }
         is_first_noncomment_line = 0;


=====================================
plink2_glm.cc
=====================================
@@ -4383,7 +4383,9 @@ THREAD_FUNC_DECL GlmLogisticThread(void* raw_arg) {
               ZeromovFArr(nm_sample_ct_rem, &nm_predictors_pmaj_iter);
             }
             nm_predictors_pmaj_istart = nm_predictors_pmaj_iter;
-            prev_nm = !missing_ct;
+            // bugfix (13 Apr 2021): if local covariates are present, we can't
+            // optimize as aggressively
+            prev_nm = !(missing_ct || local_covar_ct);
           } else {
             // bugfix (15 Aug 2018): this was not handling --parameters
             // correctly when a covariate was only needed as part of an
@@ -7436,7 +7438,7 @@ THREAD_FUNC_DECL GlmLinearThread(void* raw_arg) {
                 }
               }
               nm_predictors_pmaj_istart = nm_predictors_pmaj_iter;
-              prev_nm = !missing_ct;
+              prev_nm = !(missing_ct || local_covar_ct);
             } else {
               // bugfix (15 Aug 2018): this was not handling --parameters
               // correctly when a covariate was only needed as part of an
@@ -9504,7 +9506,7 @@ THREAD_FUNC_DECL GlmLinearSubbatchThread(void* raw_arg) {
                 }
               }
               nm_predictors_pmaj_istart = nm_predictors_pmaj_iter;
-              prev_nm = !missing_ct;
+              prev_nm = !(missing_ct || local_covar_ct);
             } else {
               // bugfix (15 Aug 2018): this was not handling --parameters
               // correctly when a covariate was only needed as part of an
@@ -11140,6 +11142,8 @@ PglErr GlmMain(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, c
             if (new_max_covar_name_blen < condition_blen) {
               new_max_covar_name_blen = condition_blen;
             }
+            // TODO: this count will be different with a multiallelic variant
+            logprintf("--glm: One --condition covariate added.\n");
           } else {
             if (unlikely(ii == -2)) {
               logerrprintfww("Error: Duplicate --condition variant ID '%s'.\n", glm_info_ptr->condition_varname);


=====================================
plink2_help.cc
=====================================
@@ -463,8 +463,12 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "    * 'rlist': .rlist + .fam + .map fileset, where the .rlist file is a\n"
 "                genotype-based list which omits the most common genotype for\n"
 "                each variant.  Also supports 'omit-nonmale-y'.\n"
-"    * 'oxford': Oxford-format .gen + .sample.  When the 'bgz' modifier is\n"
-"                present, the .gen file is block-gzipped.\n"
+"    * 'oxford', 'oxford-v2': Oxford-format .gen + .sample.  When the 'bgz'\n"
+"                             modifier is present, the .gen file is\n"
+"                             block-gzipped.  'oxford' requests the original\n"
+"                             .gen file format with 5 leading columns\n"
+"                             (understood by older PLINK builds); 'oxford-v2'\n"
+"                             requests the current 6-leading-column flavor.\n"
 "    * 'ped': PLINK 1 sample-major (.ped + .map), loadable with --file.\n"
 "    * 'compound-genotypes': Same as 'ped', except that the space between each\n"
 "                            pair of same-variant allele codes is removed.\n"
@@ -1311,8 +1315,8 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "      scoresums: Score sums.\n"
 "    The default is maybefid,maybesid,phenos,nallele,dosagesum,scoreavgs.\n"
 "    For more sophisticated polygenic risk scoring, we recommend looking at the\n"
-"    LDpred (https://github.com/bvilhjal/ldpred ) and PRSice-2\n"
-"    (https://www.prsice.info/ ) software packages.\n\n"
+"    LDpred2 (https://privefl.github.io/bigsnpr/articles/LDpred2.html ) and\n"
+"    PRSice-2 (https://www.prsice.info/ ) software packages.\n\n"
                );
     HelpPrint("variant-score\0vscore\0", &help_ctrl, 1,
 "  --variant-score <filename> ['bin' | 'bin4' | 'cols='<col set descriptor>]\n"
@@ -1715,9 +1719,9 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "    Linearly transform named covariates (and quantitative phenotypes, if\n"
 "    --variance-standardize) to mean-zero, variance 1.  If no arguments are\n"
 "    provided, all possible phenotypes/covariates are affected.\n"
-"    This is frequently necessary to prevent multicollinearity when dealing with\n"
-"    covariates where abs(mean) is much larger than abs(standard deviation),\n"
-"    such as year of birth.\n"
+"    This is frequently necessary to prevent the multicollinearity check from\n"
+"    failing when dealing with covariates where abs(mean) is much larger than\n"
+"    the standard deviation, such as year of birth.\n"
 "  --quantile-normalize [...]       : Force named covariates and quantitative\n"
 "  --pheno-quantile-normalize [...]   phenotypes to a N(0,1) distribution,\n"
 "  --covar-quantile-normalize [...]   preserving only the original rank orders.\n"
@@ -2118,7 +2122,9 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "                            * 'file'/'f' uses the order in the given file\n"
 "                              (named in the last argument).\n"
                );
-    HelpPrint("pmerge\0pmerge-list\0sample-inner-join\0variant-inner-join\0pheno-inner-join\0merge-mode\0merge-parents-mode\0merge-sex-mode\0merge-pheno-mode\0merge-xheader-mode\0merge-qual-mode\0merge-filter-mode\0merge-info-mode\0merge-cm-mode\0", &help_ctrl, 0,
+    HelpPrint("pmerge\0pmerge-list\0pmerge-list-dir\0pmerge-output-vzs\0sample-inner-join\0variant-inner-join\0pheno-inner-join\0merge-mode\0merge-parents-mode\0merge-sex-mode\0merge-pheno-mode\0merge-xheader-mode\0merge-qual-mode\0merge-filter-mode\0merge-info-mode\0merge-cm-mode\0", &help_ctrl, 0,
+"  --pmerge-list-dir <dir>  : Specify base dir to join to --pmerge-list entries.\n"
+"  --pmerge-output-vzs      : Compress the .pvar file from --pmerge[-list].\n"
 "  --sample-inner-join      : By default, --pmerge[-list] performs an 'outer\n"
 "  --variant-inner-join       join': the merged fileset contains the union of\n"
 "  --pheno-inner-join         the samples in the input filesets, and ditto for\n"
@@ -2142,9 +2148,11 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "  --merge-filter-mode <m>    QUAL/FILTER/INFO/CM entries.\n"
 "  --merge-info-mode <mode>   * 'erase' = remove column\n"
 "  --merge-cm-mode <mode>     * 'nm-match' = nonmissing values must match\n"
-"                             * 'nm-first' = keep first nonmissing (qual/info/CM\n"
+"                             * 'nm-first' = keep first nonmissing (info/CM\n"
 "                                            default)\n"
 "                             * 'first' = keep first value, even if missing\n"
+"                             * 'min' = keep minimum value (--merge-qual-mode\n"
+"                                       default, not applicable to others)\n"
 "                             * 'np-union' = keep all non-PASS values\n"
 "                                            (--merge-filter-mode default, not\n"
 "                                            applicable to others)\n"


=====================================
plink2_import.cc
=====================================
@@ -3119,7 +3119,7 @@ PglErr VcfToPgen(const char* vcfname, const char* preexisting_psamname, const ch
       allele_idx_end += alt_ct + 1;
       const uint32_t variant_idx_lowbits = variant_ct % kBitsPerWord;
       if (info_pr_present) {
-        if (PrInInfoToken(info_end - info_start, info_start)) {
+        if (PrInInfo(info_end - info_start, info_start)) {
           nonref_word |= k1LU << variant_idx_lowbits;
         }
         if (variant_idx_lowbits == (kBitsPerWord - 1)) {
@@ -7881,65 +7881,68 @@ PglErr BcfToPgen(const char* bcfname, const char* preexisting_psamname, const ch
       }
       // INFO
       uintptr_t info_pr_here = 0;
-      uint64_t cur_info_slen_ubound = n_info - 1;
-      for (uint32_t uii = 0; uii != n_info; ++uii) {
-        uint32_t sidx;
-        if (unlikely(ScanBcfTypedInt(&parse_iter, &sidx) || (parse_iter > shared_end) || (sidx >= fif_string_idx_end))) {
-          goto BcfToPgen_ret_VREC_GENERIC;
-        }
-        const uint32_t key_slen = fif_slens[sidx];
-        if (unlikely((!key_slen) || ScanBcfType(&parse_iter, &value_type, &value_ct) || (parse_iter > shared_end))) {
-          goto BcfToPgen_ret_VREC_GENERIC;
-        }
-        cur_info_slen_ubound += key_slen;
-        if (value_ct) {
-          const uint32_t bytes_per_elem = kBcfBytesPerElem[value_type];
-          const uint64_t vec_byte_ct = bytes_per_elem * S_CAST(uint64_t, value_ct);
-          if (unlikely((!bytes_per_elem) || (S_CAST(uint64_t, shared_end - parse_iter) < vec_byte_ct))) {
+      if (n_info) {
+        // bugfix (1 Jul 2021): avoid underflow when n_info == 0
+        uint64_t cur_info_slen_ubound = n_info - 1;
+        for (uint32_t uii = 0; uii != n_info; ++uii) {
+          uint32_t sidx;
+          if (unlikely(ScanBcfTypedInt(&parse_iter, &sidx) || (parse_iter > shared_end) || (sidx >= fif_string_idx_end))) {
             goto BcfToPgen_ret_VREC_GENERIC;
           }
-          if (value_type == 1) {
-            // int8, max len 4 ("-127"), add 1 for comma
-            cur_info_slen_ubound += 5 * S_CAST(uint64_t, value_ct);
-          } else if (value_type == 2) {
-            // int16, max len 6
-            cur_info_slen_ubound += 7 * S_CAST(uint64_t, value_ct);
-          } else if (value_type == 3) {
-            // int32, max len 11
-            cur_info_slen_ubound += 12 * S_CAST(uint64_t, value_ct);
-          } else if (value_type == 5) {
-            cur_info_slen_ubound += (kMaxFloatGSlen + 1) * S_CAST(uint64_t, value_ct);
-          } else {
-            // string
-            cur_info_slen_ubound += value_ct + 1;
+          const uint32_t key_slen = fif_slens[sidx];
+          if (unlikely((!key_slen) || ScanBcfType(&parse_iter, &value_type, &value_ct) || (parse_iter > shared_end))) {
+            goto BcfToPgen_ret_VREC_GENERIC;
           }
-          parse_iter = &(parse_iter[vec_byte_ct]);
-        } else {
-          // tolerate value_type == value_ct == 0 "untyped-missing" special
-          // case, but only if field is of type Flag.  Note that this means the
-          // flag IS present (and yes, bcftools emits this since it's compact).
-          //
-          // value_ct == 0, value_type == 7 is also valid (string, missing).
-          //
-          // Otherwise, value_ct == 0 should not happen.
-          if (!IsSet(info_flags, sidx)) {
-            if (unlikely(value_ct != 7)) {
+          cur_info_slen_ubound += key_slen;
+          if (value_ct) {
+            const uint32_t bytes_per_elem = kBcfBytesPerElem[value_type];
+            const uint64_t vec_byte_ct = bytes_per_elem * S_CAST(uint64_t, value_ct);
+            if (unlikely((!bytes_per_elem) || (S_CAST(uint64_t, shared_end - parse_iter) < vec_byte_ct))) {
               goto BcfToPgen_ret_VREC_GENERIC;
             }
-            cur_info_slen_ubound += 2;
+            if (value_type == 1) {
+              // int8, max len 4 ("-127"), add 1 for comma
+              cur_info_slen_ubound += 5 * S_CAST(uint64_t, value_ct);
+            } else if (value_type == 2) {
+              // int16, max len 6
+              cur_info_slen_ubound += 7 * S_CAST(uint64_t, value_ct);
+            } else if (value_type == 3) {
+              // int32, max len 11
+              cur_info_slen_ubound += 12 * S_CAST(uint64_t, value_ct);
+            } else if (value_type == 5) {
+              cur_info_slen_ubound += (kMaxFloatGSlen + 1) * S_CAST(uint64_t, value_ct);
+            } else {
+              // string
+              cur_info_slen_ubound += value_ct + 1;
+            }
+            parse_iter = &(parse_iter[vec_byte_ct]);
+          } else {
+            // tolerate value_type == value_ct == 0 "untyped-missing" special
+            // case, but only if field is of type Flag.  Note that this means the
+            // flag IS present (and yes, bcftools emits this since it's compact).
+            //
+            // value_ct == 0, value_type == 7 is also valid (string, missing).
+            //
+            // Otherwise, value_ct == 0 should not happen.
+            if (!IsSet(info_flags, sidx)) {
+              if (unlikely(value_ct != 7)) {
+                goto BcfToPgen_ret_VREC_GENERIC;
+              }
+              cur_info_slen_ubound += 2;
+            }
           }
-        }
-        if (sidx == pr_sidx) {
-          if (unlikely(info_pr_here)) {
-            putc_unlocked('\n', stdout);
-            snprintf(g_logbuf, kLogbufSize, "Error: Variant record #%" PRIuPTR " in --bcf file has multiple INFO/PR entries.\n", vrec_idx);
-            goto BcfToPgen_ret_MALFORMED_INPUT_WW;
+          if (sidx == pr_sidx) {
+            if (unlikely(info_pr_here)) {
+              putc_unlocked('\n', stdout);
+              snprintf(g_logbuf, kLogbufSize, "Error: Variant record #%" PRIuPTR " in --bcf file has multiple INFO/PR entries.\n", vrec_idx);
+              goto BcfToPgen_ret_MALFORMED_INPUT_WW;
+            }
+            info_pr_here = 1;
           }
-          info_pr_here = 1;
         }
-      }
-      if (cur_info_slen_ubound > filter_info_slen_ubound) {
-        filter_info_slen_ubound = cur_info_slen_ubound;
+        if (cur_info_slen_ubound > filter_info_slen_ubound) {
+          filter_info_slen_ubound = cur_info_slen_ubound;
+        }
       }
       if (unlikely(parse_iter != shared_end)) {
         goto BcfToPgen_ret_VREC_GENERIC;
@@ -9199,7 +9202,7 @@ PglErr OxSampleToPsam(const char* samplename, const char* const_fid, const char*
       } else if (col_type_char == 'B') {
         at_least_one_binary_pheno = 1;
         cur_col_type = kOxSampleColBinary;
-      } else if (col_type_char == 'P') {
+      } else if ((col_type_char == 'C') || (col_type_char == 'P')) {
         cur_col_type = kOxSampleColQuantitative;
       } else if (likely(col_type_char == '0')) {
         ++initial_skip_col_ct;
@@ -9888,13 +9891,40 @@ PglErr OxGenToPgen(const char* genname, const char* samplename, const char* cons
     uintptr_t variant_skip_ct = 0;
     char* line_iter = TextLineEnd(&gen_txs);
     ++line_idx;
+    if (unlikely(!TextGetUnsafe2(&gen_txs, &line_iter))) {
+      if (TextStreamErrcode2(&gen_txs, &reterr)) {
+        goto OxGenToPgen_ret_TSTREAM_FAIL;
+      }
+      logerrputs("Error: Empty .gen file.\n");
+      goto OxGenToPgen_ret_INCONSISTENT_INPUT;
+    }
+    uint32_t is_v2 = 0;
+    {
+      const uint32_t token_ct = CountTokens(line_iter);
+      const uint32_t expected_v2_token_ct = 3 * sample_ct + 6;
+      if (token_ct == expected_v2_token_ct) {
+        is_v2 = 1;
+      } else if (unlikely(token_ct != expected_v2_token_ct - 1)) {
+        logerrprintfww("Error: Unexpected number of columns in .gen file (%u or %u expected).\n", expected_v2_token_ct - 1, expected_v2_token_ct);
+        goto OxGenToPgen_ret_INCONSISTENT_INPUT;
+      }
+    }
+    goto OxGenToPgen_loop_start;
     for (; TextGetUnsafe2(&gen_txs, &line_iter); ++line_idx) {
+    OxGenToPgen_loop_start:
+      ;  // make this work with plain C, as opposed to just C++, compilers
       char* chr_code_str = line_iter;
       char* chr_code_end = CurTokenEnd(chr_code_str);
       const char* variant_id_str = FirstNonTspace(chr_code_end);
       if (unlikely(IsEolnKns(*variant_id_str))) {
         goto OxGenToPgen_ret_MISSING_TOKENS;
       }
+      if (is_v2) {
+        variant_id_str = FirstNonTspace(CurTokenEnd(variant_id_str));
+        if (unlikely(IsEolnKns(*variant_id_str))) {
+          goto OxGenToPgen_ret_MISSING_TOKENS;
+        }
+      }
 
       if (!single_chr_str) {
         uint32_t cur_chr_code;
@@ -10055,10 +10085,6 @@ PglErr OxGenToPgen(const char* genname, const char* samplename, const char* cons
       goto OxGenToPgen_ret_WRITE_FAIL;
     }
     if (unlikely(!variant_ct)) {
-      if (!variant_skip_ct) {
-        logerrputs("Error: Empty .gen file.\n");
-        goto OxGenToPgen_ret_INCONSISTENT_INPUT;
-      }
       logerrprintfww("Error: All %" PRIuPTR " variant%s in .gen file skipped due to chromosome filter.\n", variant_skip_ct, (variant_skip_ct == 1)? "" : "s");
       goto OxGenToPgen_ret_INCONSISTENT_INPUT;
     }
@@ -10138,7 +10164,7 @@ PglErr OxGenToPgen(const char* genname, const char* samplename, const char* cons
           continue;
         }
       }
-      const char* linebuf_iter = NextTokenMult(FirstNonTspace(&(chr_code_end[1])), 4);
+      const char* linebuf_iter = NextTokenMult(FirstNonTspace(&(chr_code_end[1])), 4 + is_v2);
       uint32_t inner_loop_last = kBitsPerWordD2 - 1;
       Dosage* dosage_main_iter = dosage_main;
       for (uint32_t widx = 0; ; ++widx) {
@@ -13843,6 +13869,14 @@ PglErr OxHapslegendToPgen(const char* hapsname, const char* legendname, const ch
     }
     reterr = InitTextStream(hapsname, max_line_blen, ClipU32(max_thread_ct - 1, 1, 4), &haps_txs);
     if (unlikely(reterr)) {
+      if (reterr == kPglRetOpenFail) {
+        const uint32_t slen = strlen(hapsname);
+        if ((!StrEndsWith(hapsname, ".haps", slen)) &&
+            (!StrEndsWith(hapsname, ".haps.gz", slen))) {
+          logerrprintfww("Error: Failed to open %s : %s. (--haps expects a complete filename; did you forget '.haps' at the end?)\n", hapsname, strerror(errno));
+          goto OxHapslegendToPgen_ret_1;
+        }
+      }
       goto OxHapslegendToPgen_ret_TSTREAM_FAIL_HAPS;
     }
     unsigned char* bigstack_mark2 = g_bigstack_base;
@@ -13926,7 +13960,14 @@ PglErr OxHapslegendToPgen(const char* hapsname, const char* legendname, const ch
       }
       reterr = InitTextStream(legendname, max_line_blen, 1, &legend_txs);
       if (unlikely(reterr)) {
-        goto OxHapslegendToPgen_ret_1;
+        if (reterr == kPglRetOpenFail) {
+          const uint32_t slen = strlen(legendname);
+          if (!StrEndsWith(legendname, ".legend", slen)) {
+            logerrprintfww("Error: Failed to open %s : %s. (--legend expects a complete filename; did you forget '.legend' at the end?)\n", legendname, strerror(errno));
+            goto OxHapslegendToPgen_ret_1;
+          }
+        }
+        goto OxHapslegendToPgen_ret_TSTREAM_FAIL_LEGEND;
       }
       ++line_idx_legend;
       char* legend_line_start = TextGet(&legend_txs);


=====================================
plink2_merge.cc
=====================================
The diff for this file was not included because it is too large.

=====================================
plink2_merge.h
=====================================
@@ -36,7 +36,8 @@ FLAGSET_DEF_START()
   kfPmergeSampleInnerJoin = (1 << 0),
   kfPmergeVariantInnerJoin = (1 << 1),
   kfPmergePhenoInnerJoin = (1 << 2),
-  kfPmergeMultiallelicsAlreadyJoined = (1 << 3)
+  kfPmergeMultiallelicsAlreadyJoined = (1 << 3),
+  kfPmergeOutputVzs = (1 << 4)
 FLAGSET_DEF_END(PmergeFlags);
 
 ENUM_U31_DEF_START()
@@ -58,11 +59,12 @@ ENUM_U31_DEF_START()
 ENUM_U31_DEF_END(MergeXheaderMode);
 
 ENUM_U31_DEF_START()
-  kMergeQicModeErase,
-  kMergeQicModeNmMatch,
-  kMergeQicModeNmFirst,
-  kMergeQicModeFirst
-ENUM_U31_DEF_END(MergeQicMode);
+  kMergeQualModeErase,
+  kMergeQualModeNmMatch,
+  kMergeQualModeNmFirst,
+  kMergeQualModeFirst,
+  kMergeQualModeMin
+ENUM_U31_DEF_END(MergeQualMode);
 
 ENUM_U31_DEF_START()
   kMergeFilterModeErase,
@@ -72,6 +74,13 @@ ENUM_U31_DEF_START()
   kMergeFilterModeNonpassUnion
 ENUM_U31_DEF_END(MergeFilterMode);
 
+ENUM_U31_DEF_START()
+  kMergeInfoCmModeErase,
+  kMergeInfoCmModeNmMatch,
+  kMergeInfoCmModeNmFirst,
+  kMergeInfoCmModeFirst
+ENUM_U31_DEF_END(MergeInfoCmMode);
+
 ENUM_U31_DEF_START()
   kSort0,
   kSortNone,
@@ -108,10 +117,10 @@ typedef struct PmergeStruct {
   MergePhenoMode merge_sex_mode;
   MergePhenoMode merge_pheno_mode;
   MergeXheaderMode merge_xheader_mode;
-  MergeQicMode merge_qual_mode;
+  MergeQualMode merge_qual_mode;
   MergeFilterMode merge_filter_mode;
-  MergeQicMode merge_info_mode;
-  MergeQicMode merge_cm_mode;
+  MergeInfoCmMode merge_info_mode;
+  MergeInfoCmMode merge_cm_mode;
   SortMode merge_pheno_sort;
   SortMode merge_info_sort;
   uint32_t max_allele_ct;
@@ -119,6 +128,7 @@ typedef struct PmergeStruct {
   char* pvar_fname;
   char* psam_fname;
   char* list_fname;
+  char* list_base_dir;
 } PmergeInfo;
 
 typedef struct PgenDiffStruct {
@@ -138,7 +148,7 @@ void InitPgenDiff(PgenDiffInfo* pgen_diff_info_ptr);
 
 void CleanupPgenDiff(PgenDiffInfo* pgen_diff_info_ptr);
 
-PglErr Pmerge(const PmergeInfo* pmip, const char* sample_sort_fname, MiscFlags misc_flags, ImportFlags import_flags, SortMode sample_sort_mode, FamCol fam_cols, int32_t missing_pheno, uint32_t max_thread_ct, SortMode sort_vars_mode, char* pgenname, char* psamname, char* pvarname, char* outname, char* outname_end, ChrInfo* cip);
+PglErr Pmerge(const PmergeInfo* pmip, const char* sample_sort_fname, MiscFlags misc_flags, SortMode sample_sort_mode, FamCol fam_cols, int32_t missing_pheno, uint32_t max_thread_ct, SortMode sort_vars_mode, char* pgenname, char* psamname, char* pvarname, char* outname, char* outname_end, ChrInfo* cip);
 
 PglErr PgenDiff(const uintptr_t* orig_sample_include, const SampleIdInfo* siip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const PgenDiffInfo* pdip, uint32_t raw_sample_ct, uint32_t orig_sample_ct, uint32_t raw_variant_ct, uint32_t max_allele_ct1, uint32_t max_allele_slen, uint32_t max_thread_ct, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
 


=====================================
plink2_misc.cc
=====================================
@@ -1305,7 +1305,9 @@ PglErr Plink1ClusterImport(const char* within_fname, const char* catpheno_name,
           }
           char* cat_name_start = cat_name_iter;
           cat_name_iter = memcpya(cat_name_iter, main_token_start, main_token_slen + 1);
-          cur_cat_idx = nonnull_cat_ct + 1;
+          // bugfix (5 May 2021): dropped this increment in mid-Jan refactor
+          ++nonnull_cat_ct;
+          cur_cat_idx = nonnull_cat_ct;
           cur_cat_names[cur_cat_idx] = cat_name_start;
         }
         // permit duplicates if category is identical
@@ -6451,11 +6453,17 @@ PglErr SampleCounts(const uintptr_t* sample_include, const SampleIdInfo* siip, c
         y_variant_ct = CountChrVariantsUnsafe(variant_include, cip, y_code);
       }
       uintptr_t haploid_mask_lowbit = cip->haploid_mask[0] & 1;
-      if (male_ct) {
-        uint32_t x_code;
-        if ((!haploid_mask_lowbit) && XymtExists(cip, kChrOffsetX, &x_code)) {
+      // bugfix (28 Mar 2021): if chrX is present, no autosomal diploid
+      // chromosomes are present, and there are no males, we need to set the
+      // low bit of chr_types and increment diploid_chr_type_ct.
+      uint32_t x_code;
+      if ((!haploid_mask_lowbit) && XymtExists(cip, kChrOffsetX, &x_code)) {
+        if (male_ct) {
           chr_types |= 2;
           ++diploid_chr_type_ct;
+        } else if (!chr_types) {
+          chr_types = 1;
+          ++diploid_chr_type_ct;
         }
       }
       if (y_variant_ct) {


=====================================
plink2_psam.cc
=====================================
@@ -236,10 +236,6 @@ PglErr LoadPsam(const char* psamname, const RangeList* pheno_range_list_ptr, Fam
         snprintf(g_logbuf, kLogbufSize, "Error: IID column is not first or second in %s.\n", psamname);
         goto LoadPsam_ret_MALFORMED_INPUT_WW;
       }
-      if (unlikely((psam_cols_mask & 2) && ((col_types[1] != 1) || (col_skips[1] != 1)))) {
-        snprintf(g_logbuf, kLogbufSize, "Error: SID column does not immediately follow IID column in %s.\n", psamname);
-        goto LoadPsam_ret_MALFORMED_INPUT_WW;
-      }
       if (unlikely(in_interval)) {
         snprintf(g_logbuf, kLogbufSize, "Error: --pheno-name range is inconsistent with %s.\n", psamname);
         goto LoadPsam_ret_INCONSISTENT_INPUT_WW;
@@ -248,6 +244,10 @@ PglErr LoadPsam(const char* psamname, const RangeList* pheno_range_list_ptr, Fam
       for (rpf_col_idx = relevant_postfid_col_ct - 1; rpf_col_idx; --rpf_col_idx) {
         col_skips[rpf_col_idx] -= col_skips[rpf_col_idx - 1];
       }
+      if (unlikely((psam_cols_mask & 2) && ((col_types[1] != 1) || (col_skips[1] != 1)))) {
+        snprintf(g_logbuf, kLogbufSize, "Error: SID column does not immediately follow IID column in %s.\n", psamname);
+        goto LoadPsam_ret_MALFORMED_INPUT_WW;
+      }
 
       line_iter = AdvPastDelim(linebuf_iter, '\n');
       ++line_idx;
@@ -787,9 +787,10 @@ PglErr LoadPsam(const char* psamname, const RangeList* pheno_range_list_ptr, Fam
     if (fid_present) {
       piip->sii.flags |= kfSampleIdFidPresent;
     }
-    // special case: if there's exactly one phenotype and it's all-missing,
-    // discard it
-    if ((pheno_ct == 1) && (!PopcountWords(pheno_cols[0].nonmiss, raw_sample_ctl))) {
+    // special case: if there's exactly one phenotype, it has the default name,
+    // and it's all-missing, discard it.  This removes forced .fam-derived and
+    // similar phenotype columns, and is unlikely to break anything else..
+    if ((pheno_ct == 1) && (!strcmp(*pheno_names_ptr, "PHENO1")) && (!PopcountWords(pheno_cols[0].nonmiss, raw_sample_ctl))) {
       free(*pheno_names_ptr);
       *pheno_names_ptr = nullptr;
       CleanupPhenoCols(1, pheno_cols);


=====================================
plink2_pvar.cc
=====================================
@@ -450,7 +450,19 @@ void BackfillChrIdxs(const ChrInfo* cip, uint32_t chrs_encountered_m1, uint32_t
   }
 }
 
-char* PrInInfoToken(uint32_t info_slen, char* info_token) {
+uint32_t PrInInfo(uint32_t info_slen, char* info_token) {
+  if (memequal_k(info_token, "PR", 2) && ((info_slen == 2) || (info_token[2] == ';'))) {
+    return 1;
+  }
+  if (memequal_k(&(info_token[S_CAST(int32_t, info_slen) - 3]), ";PR", 3)) {
+    return 1;
+  }
+  info_token[info_slen] = '\0';
+  char* first_info_end = strchr(info_token, ';');
+  return first_info_end && (strstr(info_token, ";PR;") != nullptr);
+}
+
+char* InfoPrStart(uint32_t info_slen, char* info_token) {
   if (memequal_k(info_token, "PR", 2) && ((info_slen == 2) || (info_token[2] == ';'))) {
     return info_token;
   }


=====================================
plink2_pvar.h
=====================================
@@ -82,10 +82,12 @@ void VaridTemplateInit(const char* varid_template_str, const char* missing_id_ma
 
 char* VaridTemplateWrite(const VaridTemplate* vtp, const char* ref_start, const char* alt1_start, uint32_t cur_bp, uint32_t ref_token_slen, uint32_t extra_alt_ct, uint32_t alt_token_slen, char* dst);
 
-// assumes info_token[-1] is safe to read
-// may set info_token[info_slen] to \0, since it needs to use strstr()
+// These functions assume info_token[-1] is safe to read
+// They may set info_token[info_slen] to \0, since they need to use strstr()
 // (todo: try memmem()... except it isn't available on 32-bit mingw?)
-char* PrInInfoToken(uint32_t info_slen, char* info_token);
+uint32_t PrInInfo(uint32_t info_slen, char* info_token);
+
+char* InfoPrStart(uint32_t info_slen, char* info_token);
 
 // cip, max_variant_id_slen, and info_reload are in/out parameters.
 // Chromosome filtering is performed if cip requests it.



View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/cce84c08bbabda72d56f0ef78c4056523175bb52...d2773ed43c23f74d0db64432442fb669681849cf

-- 
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/compare/cce84c08bbabda72d56f0ef78c4056523175bb52...d2773ed43c23f74d0db64432442fb669681849cf
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210723/0fc74038/attachment-0001.htm>


More information about the debian-med-commit mailing list