[med-svn] [Git][med-team/plink2][upstream] New upstream version 2.00~a3-200321+dfsg

Dylan Aïssi gitlab at salsa.debian.org
Fri Mar 27 07:45:04 GMT 2020



Dylan Aïssi pushed to branch upstream at Debian Med / plink2


Commits:
2aaa4872 by Dylan Aïssi at 2020-03-27T08:42:52+01:00
New upstream version 2.00~a3-200321+dfsg
- - - - -


28 changed files:

- include/pgenlib_misc.cc
- include/pgenlib_misc.h
- include/pgenlib_read.cc
- include/plink2_base.cc
- include/plink2_base.h
- include/plink2_bits.cc
- include/plink2_bits.h
- include/plink2_string.cc
- include/plink2_string.h
- pgenlibr/src/Makevars
- plink2.cc
- plink2_cmdline.cc
- plink2_cmdline.h
- plink2_common.cc
- plink2_common.h
- plink2_data.cc
- plink2_data.h
- plink2_decompress.h
- plink2_export.cc
- plink2_export.h
- plink2_filter.cc
- plink2_glm.cc
- plink2_help.cc
- plink2_import.cc
- plink2_import.h
- plink2_matrix.h
- plink2_matrix_calc.cc
- plink2_pvar.cc


Changes:

=====================================
include/pgenlib_misc.cc
=====================================
@@ -1181,12 +1181,12 @@ void TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, ui
 #    else
         VecW tmp_lo = vecw_unpacklo16(loader0, loader1);
         VecW tmp_hi = vecw_unpackhi16(loader0, loader1);
-        loader0 = (tmp_lo & m16) | vecw_and_notfirst(m16, vecw_slli(tmp_hi, 16));
-        loader1 = (vecw_srli(tmp_lo, 16) & m16) | (vecw_and_notfirst(m16, tmp_hi));
+        loader0 = vecw_blendv(vecw_slli(tmp_hi, 16), tmp_lo, m16);
+        loader1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 16), m16);
         tmp_lo = vecw_unpacklo16(loader2, loader3);
         tmp_hi = vecw_unpackhi16(loader2, loader3);
-        loader2 = (tmp_lo & m16) | vecw_and_notfirst(m16, vecw_slli(tmp_hi, 16));
-        loader3 = (vecw_srli(tmp_lo, 16) & m16) | (vecw_and_notfirst(m16, tmp_hi));
+        loader2 = vecw_blendv(vecw_slli(tmp_hi, 16), tmp_lo, m16);
+        loader3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 16), m16);
 #    endif
         //    (0,0) (0,1) (1,0) ... (7,1) (0,2) ... (7,3) (8,0) ... (31,3)
         //  + (0,4) ... (31,7)
@@ -1232,7 +1232,7 @@ void TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, ui
       // and target_4567 to
       //   _ (4, 0) _ (5, 0) _ (6, 0) _ (7, 0) _ (4, 1) _ (5, 1) _ (6, 1) ...
       // This is perfectly arranged for movemask.
-      VecW target_4567 = (m8 & vecw_srli(loader, 7)) | vecw_and_notfirst(m8, loader);
+      VecW target_4567 = vecw_blendv(loader, vecw_srli(loader, 7), m8);
       target_iter7[vidx] = vecw_movemask(target_4567);
       target_4567 = vecw_slli(target_4567, 2);
       target_iter6[vidx] = vecw_movemask(target_4567);
@@ -1240,7 +1240,7 @@ void TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, ui
       target_iter5[vidx] = vecw_movemask(target_4567);
       target_4567 = vecw_slli(target_4567, 2);
       target_iter4[vidx] = vecw_movemask(target_4567);
-      VecW target_0123 = (vecw_slli(loader, 1) & m8) | vecw_and_notfirst(m8, vecw_slli(loader, 8));
+      VecW target_0123 = vecw_blendv(vecw_slli(loader, 8), vecw_slli(loader, 1), m8);
       target_iter3[vidx] = vecw_movemask(target_0123);
       target_0123 = vecw_slli(target_0123, 2);
       target_iter2[vidx] = vecw_movemask(target_0123);
@@ -1260,7 +1260,7 @@ void TransposeNypblock64(const uintptr_t* read_iter, uint32_t read_ul_stride, ui
   Vec8thUint* target_iter3 = &(target_iter2[write_v8ui_stride]);
   for (uint32_t vidx = 0; vidx != vec_ct; ++vidx) {
     const VecW loader = source_iter[vidx];
-    VecW target_0123 = (vecw_slli(loader, 1) & m8) | vecw_and_notfirst(m8, vecw_slli(loader, 8));
+    VecW target_0123 = vecw_blendv(vecw_slli(loader, 8), vecw_slli(loader, 1), m8);
     target_iter3[vidx] = vecw_movemask(target_0123);
     target_0123 = vecw_slli(target_0123, 2);
     target_iter2[vidx] = vecw_movemask(target_0123);
@@ -1420,8 +1420,9 @@ void SplitHomRef2het(const uintptr_t* genoarr, uint32_t sample_ct, uintptr_t* __
   }
 }
 
-
-/*
+// Note that once the result elements are only 4 bits wide, shuffle8 should
+// beat a 256x4 lookup table when SSE4 is available.  (possible todo: see if
+// shuffle8-based loop also wins here.)
 void GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4, uint32_t sample_ct, void* __restrict result) {
   const uint32_t* table_alias = S_CAST(const uint32_t*, table256x1bx4);
   const unsigned char* genoarr_alias = R_CAST(const unsigned char*, genoarr);
@@ -1440,7 +1441,6 @@ void GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4,
     }
   }
 }
-*/
 
 void GenoarrLookup16x4bx2(const uintptr_t* genoarr, const void* table16x4bx2, uint32_t sample_ct, void* __restrict result) {
   const uint64_t* table_alias = S_CAST(const uint64_t*, table16x4bx2);
@@ -1666,6 +1666,54 @@ void InitLookup16x8bx2(void* table16x8bx2) {
   }
 }
 
+void InitLookup256x1bx4(void* table256x1bx4) {
+  unsigned char* table_iter = S_CAST(unsigned char*, table256x1bx4);
+  unsigned char vals[4];
+  vals[0] = table_iter[0];
+  vals[1] = table_iter[4];
+  vals[2] = table_iter[8];
+  vals[3] = table_iter[12];
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = vals[high_idx];
+    for (uint32_t second_idx = 0; second_idx != 4; ++second_idx) {
+      const uint32_t cur_second = vals[second_idx];
+      for (uint32_t third_idx = 0; third_idx != 4; ++third_idx) {
+        const uint32_t cur_third = vals[third_idx];
+        for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+          *table_iter++ = vals[low_idx];
+          *table_iter++ = cur_third;
+          *table_iter++ = cur_second;
+          *table_iter++ = cur_high;
+        }
+      }
+    }
+  }
+}
+
+void InitLookup256x2bx4(void* table256x2bx4) {
+  uint16_t* table_iter = S_CAST(uint16_t*, table256x2bx4);
+  uint16_t vals[4];
+  vals[0] = table_iter[0];
+  vals[1] = table_iter[4];
+  vals[2] = table_iter[8];
+  vals[3] = table_iter[12];
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = vals[high_idx];
+    for (uint32_t second_idx = 0; second_idx != 4; ++second_idx) {
+      const uint32_t cur_second = vals[second_idx];
+      for (uint32_t third_idx = 0; third_idx != 4; ++third_idx) {
+        const uint32_t cur_third = vals[third_idx];
+        for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+          *table_iter++ = vals[low_idx];
+          *table_iter++ = cur_third;
+          *table_iter++ = cur_second;
+          *table_iter++ = cur_high;
+        }
+      }
+    }
+  }
+}
+
 void InitLookup256x4bx4(void* table256x4bx4) {
   uint32_t* table_iter = S_CAST(uint32_t*, table256x4bx4);
   uint32_t vals[4];
@@ -1798,6 +1846,158 @@ void InitPhaseLookup4b(void* table56x4bx2) {
   }
 }
 
+#ifdef __LP64__
+void PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* __restrict result) {
+  const __m128i* table_alias = S_CAST(const __m128i*, table56x8bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* phasepresent_alias = R_CAST(const Halfword*, phasepresent);
+  const Halfword* phaseinfo_alias = R_CAST(const Halfword*, phaseinfo);
+  __m128i* result_iter = S_CAST(__m128i*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t phasepresent_hw_shifted = 0;
+  uintptr_t phaseinfo_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3);
+          if (phasepresent_hw_shifted & 16) {
+            cur_idx ^= 16 | (phaseinfo_hw_shifted & 2);
+          }
+          memcpy(result_iter, &(table_alias[cur_idx]), 8);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    phasepresent_hw_shifted = phasepresent_alias[widx];
+    if (!phasepresent_hw_shifted) {
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        _mm_storeu_si128(result_iter, table_alias[geno_word & 15]);
+        ++result_iter;
+        geno_word >>= 4;
+      }
+    } else {
+      phasepresent_hw_shifted = phasepresent_hw_shifted << 4;
+      phaseinfo_hw_shifted = phaseinfo_alias[widx];
+      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 1;
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        const uintptr_t cur_idx = ((geno_word & 15) | (phasepresent_hw_shifted & 48)) ^ (phaseinfo_hw_shifted & 6);
+        _mm_storeu_si128(result_iter, table_alias[cur_idx]);
+        ++result_iter;
+        geno_word >>= 4;
+        phasepresent_hw_shifted >>= 2;
+        phaseinfo_hw_shifted >>= 2;
+      }
+    }
+  }
+}
+#else
+void PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* __restrict result) {
+  const uint64_t* table_alias = S_CAST(const uint64_t*, table56x8bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* phasepresent_alias = R_CAST(const Halfword*, phasepresent);
+  const Halfword* phaseinfo_alias = R_CAST(const Halfword*, phaseinfo);
+  uint64_t* result_iter = S_CAST(uint64_t*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t phasepresent_hw_shifted = 0;
+  uintptr_t phaseinfo_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3);
+          if (phasepresent_hw_shifted & 16) {
+            cur_idx ^= 16 | (phaseinfo_hw_shifted & 2);
+          }
+          memcpy(result_iter, &(table_alias[cur_idx * 2]), 8);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    phasepresent_hw_shifted = phasepresent_alias[widx];
+    if (!phasepresent_hw_shifted) {
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        memcpy(result_iter, &(table_alias[(geno_word & 15) * 2]), 16);
+        result_iter = &(result_iter[2]);
+        geno_word >>= 4;
+      }
+    } else {
+      phasepresent_hw_shifted = phasepresent_hw_shifted << 4;
+      phaseinfo_hw_shifted = phaseinfo_alias[widx];
+      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 1;
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        const uintptr_t cur_idx = ((geno_word & 15) | (phasepresent_hw_shifted & 48)) ^ (phaseinfo_hw_shifted & 6);
+        memcpy(result_iter, &(table_alias[cur_idx * 2]), 16);
+        geno_word >>= 4;
+        phasepresent_hw_shifted >>= 2;
+        phaseinfo_hw_shifted >>= 2;
+      }
+    }
+  }
+}
+#endif
+
+void InitPhaseLookup8b(void* table56x8bx2) {
+  uint64_t* table_iter = S_CAST(uint64_t*, table56x8bx2);
+  uint64_t vals[4];
+  vals[0] = table_iter[0];
+  table_iter[1] = vals[0];
+  vals[1] = table_iter[2];
+  table_iter[3] = vals[0];
+  vals[2] = table_iter[4];
+  table_iter[5] = vals[0];
+  vals[3] = table_iter[6];
+  table_iter[7] = vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [16][0]..[31][1]: bit 4 is set
+  // low bits must be 01 or 11
+  const uint64_t val_phaseinfo0 = table_iter[2];
+  table_iter[3] = vals[0];
+  const uint64_t val_phaseinfo1 = table_iter[6];
+  table_iter[7] = vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = vals[high_idx];
+    table_iter[2] = val_phaseinfo0;
+    table_iter[3] = cur_high;
+    table_iter[6] = val_phaseinfo1;
+    table_iter[7] = cur_high;
+    table_iter = &(table_iter[8]);
+  }
+  // [32][0]..[39][1]: bit 5 set, bit 4 unset
+  // high bits must be 00 or 01
+  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {
+    const uint64_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  table_iter = &(table_iter[16]);
+  // [48][0]..[55][1]: bits 4 and 5 set
+  for (uint32_t high_idx = 0; high_idx != 2; ++high_idx) {
+    const uint64_t cur_high = high_idx? val_phaseinfo0 : val_phaseinfo1;
+    table_iter[2] = val_phaseinfo0;
+    table_iter[3] = cur_high;
+    table_iter[6] = val_phaseinfo1;
+    table_iter[7] = cur_high;
+    table_iter = &(table_iter[8]);
+  }
+}
+
 // bits 0..3: two genotypes
 // bits 4..5: two (phasepresent | sex_male) bits
 // bits 1,3: unpacked phaseinfo xor
@@ -1925,6 +2125,393 @@ void GenoarrSexLookup4b(const uintptr_t* genoarr, const uintptr_t* sex_male, con
   }
 }
 
+void InitPhaseXNohhLookup8b(void* table64x8bx2) {
+  uint64_t* table_iter = S_CAST(uint64_t*, table64x8bx2);
+  uint64_t vals[4];
+  vals[0] = table_iter[0];
+  table_iter[1] = vals[0];
+  vals[1] = table_iter[2];
+  table_iter[3] = vals[0];
+  vals[2] = table_iter[4];
+  table_iter[5] = vals[0];
+  vals[3] = table_iter[6];
+  table_iter[7] = vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [16][0]..[31][1]: bit 4 is set
+  uint64_t male_or_phasepresent_vals[4];
+  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+    male_or_phasepresent_vals[low_idx] = *table_iter++;
+    *table_iter++ = vals[0];
+  }
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = male_or_phasepresent_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [32][0]..[47][1]: bit 5 set, bit 4 unset
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = male_or_phasepresent_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [48][0]..[63][1]: bits 4 and 5 set
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint64_t cur_high = male_or_phasepresent_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = male_or_phasepresent_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+}
+
+#ifdef __LP64__
+void GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result) {
+  const __m128i* table_alias = S_CAST(const __m128i*, table64x8bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* sex_male_alias = R_CAST(const Halfword*, sex_male);
+  __m128i* result_iter = S_CAST(__m128i*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t male_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3) | (male_hw_shifted & 16);
+          memcpy(result_iter, &(table_alias[cur_idx]), 8);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    male_hw_shifted = sex_male_alias[widx];
+    male_hw_shifted <<= 4;
+    for (uint32_t uii = 0; uii != loop_len; ++uii) {
+      _mm_storeu_si128(result_iter, table_alias[(geno_word & 15) | (male_hw_shifted & 48)]);
+      ++result_iter;
+      geno_word >>= 4;
+      male_hw_shifted >>= 2;
+    }
+  }
+}
+#else
+void GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result) {
+  const uint64_t* table_alias = S_CAST(const uint64_t*, table64x8bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* sex_male_alias = R_CAST(const Halfword*, sex_male);
+  uint64_t* result_iter = S_CAST(uint64_t*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t male_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3) | (male_hw_shifted & 16);
+          memcpy(result_iter, &(table_alias[cur_idx * 2]), 8);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    male_hw_shifted = sex_male_alias[widx];
+    male_hw_shifted <<= 4;
+    for (uint32_t uii = 0; uii != loop_len; ++uii) {
+      memcpy(result_iter, &(table_alias[((geno_word & 15) | (male_hw_shifted & 48)) * 2]), 16);
+      result_iter = &(result_iter[2]);
+      geno_word >>= 4;
+      male_hw_shifted >>= 2;
+    }
+  }
+}
+#endif
+
+void VcfPhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x4bx2, uint32_t sample_ct, void* __restrict result) {
+  const uint64_t* table_alias = S_CAST(const uint64_t*, table246x4bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* cur_phased_alias = R_CAST(const Halfword*, cur_phased);
+  const Halfword* phaseinfo_alias = R_CAST(const Halfword*, phaseinfo);
+  uint64_t* result_iter = S_CAST(uint64_t*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t cur_phased_hw_shifted = 0;
+  uintptr_t phaseinfo_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3) | (cur_phased_hw_shifted & 16) | (phaseinfo_hw_shifted & 64);
+          memcpy(result_iter, &(table_alias[cur_idx]), 4);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    cur_phased_hw_shifted = cur_phased_alias[widx];
+    if (!cur_phased_hw_shifted) {
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        *result_iter++ = table_alias[geno_word & 15];
+        geno_word >>= 4;
+      }
+    } else {
+      cur_phased_hw_shifted = cur_phased_hw_shifted << 4;
+      phaseinfo_hw_shifted = phaseinfo_alias[widx];
+
+      // note that this must be on a separate line (or we have to static_cast)
+      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 6;
+
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        const uintptr_t cur_idx = (geno_word & 15) | (cur_phased_hw_shifted & 48) | (phaseinfo_hw_shifted & 192);
+        *result_iter++ = table_alias[cur_idx];
+        geno_word >>= 4;
+        cur_phased_hw_shifted >>= 2;
+        phaseinfo_hw_shifted >>= 2;
+      }
+    }
+  }
+}
+
+void InitVcfPhaseLookup4b(void* table246x4bx2) {
+  uint32_t* table_iter = S_CAST(uint32_t*, table246x4bx2);
+  uint32_t unphased_vals[4];
+  unphased_vals[0] = table_iter[0];
+  table_iter[1] = unphased_vals[0];
+  unphased_vals[1] = table_iter[2];
+  table_iter[3] = unphased_vals[0];
+  unphased_vals[2] = table_iter[4];
+  table_iter[5] = unphased_vals[0];
+  unphased_vals[3] = table_iter[6];
+  table_iter[7] = unphased_vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = unphased_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = unphased_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [16][0]..[31][1]: first entry is phased and unflipped, second is unphased
+  uint32_t phased_unflipped_vals[4];
+  phased_unflipped_vals[0] = table_iter[0];
+  table_iter[1] = unphased_vals[0];
+  phased_unflipped_vals[1] = table_iter[2];
+  table_iter[3] = unphased_vals[0];
+  phased_unflipped_vals[2] = table_iter[4];
+  table_iter[5] = unphased_vals[0];
+  phased_unflipped_vals[3] = table_iter[6];
+  table_iter[7] = unphased_vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = unphased_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = phased_unflipped_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [32][0]..[63][1]: second entry is phased and unflipped
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = phased_unflipped_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = unphased_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = phased_unflipped_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = phased_unflipped_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [64][0]..[79][1] should be impossible
+  table_iter = &(table_iter[32]);
+  // [80][0]..[95][1]: first entry is phased and flipped, second is unphased
+  // genotype must be 01
+  const uint32_t phased_flipped_01 = table_iter[2];
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    table_iter[2] = phased_flipped_01;
+    table_iter[3] = unphased_vals[high_idx];
+    table_iter = &(table_iter[8]);
+  }
+  // [96][0]..[111][1] should be impossible
+  table_iter = &(table_iter[32]);
+  // [112][0]..[127][1]: first entry phased-flipped, second phased-unflipped
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    table_iter[2] = phased_flipped_01;
+    table_iter[3] = phased_unflipped_vals[high_idx];
+    table_iter = &(table_iter[8]);
+  }
+  // [128][0]..[163][1] should be impossible
+  table_iter = &(table_iter[72]);
+  // [164][0]..[167][1]: second entry phased-flipped, first entry unphased
+  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+    *table_iter++ = unphased_vals[low_idx];
+    *table_iter++ = phased_flipped_01;
+  }
+  // [168][0]..[179][1] should be impossible
+  table_iter = &(table_iter[24]);
+  // [180][0]..[183][1]: second entry phased-flipped, first phased-unflipped
+  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+    *table_iter++ = phased_unflipped_vals[low_idx];
+    *table_iter++ = phased_flipped_01;
+  }
+  // [184][0]..[244][1] should be impossible
+  // [245][0]..[245][1]: both phased-flipped
+  table_iter[122] = phased_flipped_01;
+  table_iter[123] = phased_flipped_01;
+}
+
+void VcfPhaseLookup2b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x2bx2, uint32_t sample_ct, void* __restrict result) {
+  const uint32_t* table_alias = S_CAST(const uint32_t*, table246x2bx2);
+  const uint32_t sample_ctl2_m1 = (sample_ct - 1) / kBitsPerWordD2;
+  const Halfword* cur_phased_alias = R_CAST(const Halfword*, cur_phased);
+  const Halfword* phaseinfo_alias = R_CAST(const Halfword*, phaseinfo);
+  uint32_t* result_iter = S_CAST(uint32_t*, result);
+  uint32_t loop_len = kBitsPerWordD4;
+  uintptr_t geno_word = 0;
+  uintptr_t cur_phased_hw_shifted = 0;
+  uintptr_t phaseinfo_hw_shifted = 0;
+  for (uint32_t widx = 0; ; ++widx) {
+    if (widx >= sample_ctl2_m1) {
+      if (widx > sample_ctl2_m1) {
+        if (sample_ct % 2) {
+          uintptr_t cur_idx = (geno_word & 3) | (cur_phased_hw_shifted & 16) | (phaseinfo_hw_shifted & 64);
+          memcpy(result_iter, &(table_alias[cur_idx]), 2);
+        }
+        return;
+      }
+      loop_len = ModNz(sample_ct, kBitsPerWordD2) / 2;
+    }
+    geno_word = genoarr[widx];
+    cur_phased_hw_shifted = cur_phased_alias[widx];
+    if (!cur_phased_hw_shifted) {
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        *result_iter++ = table_alias[geno_word & 15];
+        geno_word >>= 4;
+      }
+    } else {
+      cur_phased_hw_shifted = cur_phased_hw_shifted << 4;
+      phaseinfo_hw_shifted = phaseinfo_alias[widx];
+
+      // note that this must be on a separate line (or we have to static_cast)
+      phaseinfo_hw_shifted = phaseinfo_hw_shifted << 6;
+
+      for (uint32_t uii = 0; uii != loop_len; ++uii) {
+        const uintptr_t cur_idx = (geno_word & 15) | (cur_phased_hw_shifted & 48) | (phaseinfo_hw_shifted & 192);
+        *result_iter++ = table_alias[cur_idx];
+        geno_word >>= 4;
+        cur_phased_hw_shifted >>= 2;
+        phaseinfo_hw_shifted >>= 2;
+      }
+    }
+  }
+}
+
+void InitVcfPhaseLookup2b(void* table246x2bx2) {
+  uint16_t* table_iter = S_CAST(uint16_t*, table246x2bx2);
+  uint16_t unphased_vals[4];
+  unphased_vals[0] = table_iter[0];
+  table_iter[1] = unphased_vals[0];
+  unphased_vals[1] = table_iter[2];
+  table_iter[3] = unphased_vals[0];
+  unphased_vals[2] = table_iter[4];
+  table_iter[5] = unphased_vals[0];
+  unphased_vals[3] = table_iter[6];
+  table_iter[7] = unphased_vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = unphased_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = unphased_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [16][0]..[31][1]: first entry is phased and unflipped, second is unphased
+  uint16_t phased_unflipped_vals[4];
+  phased_unflipped_vals[0] = table_iter[0];
+  table_iter[1] = unphased_vals[0];
+  phased_unflipped_vals[1] = table_iter[2];
+  table_iter[3] = unphased_vals[0];
+  phased_unflipped_vals[2] = table_iter[4];
+  table_iter[5] = unphased_vals[0];
+  phased_unflipped_vals[3] = table_iter[6];
+  table_iter[7] = unphased_vals[0];
+  table_iter = &(table_iter[8]);
+  for (uint32_t high_idx = 1; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = unphased_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = phased_unflipped_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [32][0]..[63][1]: second entry is phased and unflipped
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = phased_unflipped_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = unphased_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    const uint32_t cur_high = phased_unflipped_vals[high_idx];
+    for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+      *table_iter++ = phased_unflipped_vals[low_idx];
+      *table_iter++ = cur_high;
+    }
+  }
+  // [64][0]..[79][1] should be impossible
+  table_iter = &(table_iter[32]);
+  // [80][0]..[95][1]: first entry is phased and flipped, second is unphased
+  // genotype must be 01
+  const uint32_t phased_flipped_01 = table_iter[2];
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    table_iter[2] = phased_flipped_01;
+    table_iter[3] = unphased_vals[high_idx];
+    table_iter = &(table_iter[8]);
+  }
+  // [96][0]..[111][1] should be impossible
+  table_iter = &(table_iter[32]);
+  // [112][0]..[127][1]: first entry phased-flipped, second phased-unflipped
+  for (uint32_t high_idx = 0; high_idx != 4; ++high_idx) {
+    table_iter[2] = phased_flipped_01;
+    table_iter[3] = phased_unflipped_vals[high_idx];
+    table_iter = &(table_iter[8]);
+  }
+  // [128][0]..[163][1] should be impossible
+  table_iter = &(table_iter[72]);
+  // [164][0]..[167][1]: second entry phased-flipped, first entry unphased
+  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+    *table_iter++ = unphased_vals[low_idx];
+    *table_iter++ = phased_flipped_01;
+  }
+  // [168][0]..[179][1] should be impossible
+  table_iter = &(table_iter[24]);
+  // [180][0]..[183][1]: second entry phased-flipped, first phased-unflipped
+  for (uint32_t low_idx = 0; low_idx != 4; ++low_idx) {
+    *table_iter++ = phased_unflipped_vals[low_idx];
+    *table_iter++ = phased_flipped_01;
+  }
+  // [184][0]..[244][1] should be impossible
+  // [245][0]..[245][1]: both phased-flipped
+  table_iter[122] = phased_flipped_01;
+  table_iter[123] = phased_flipped_01;
+}
+
+
 void ClearGenoarrMissing1bit8Unsafe(const uintptr_t* __restrict genoarr, uint32_t* subset_sizep, uintptr_t* __restrict subset, void* __restrict sparse_vals) {
   const uint32_t orig_subset_size = *subset_sizep;
   Halfword* subset_alias = R_CAST(Halfword*, subset);


=====================================
include/pgenlib_misc.h
=====================================
@@ -79,7 +79,7 @@
 // 10000 * major + 100 * minor + patch
 // Exception to CONSTI32, since we want the preprocessor to have access to this
 // value.  Named with all caps as a consequence.
-#define PGENLIB_INTERNAL_VERNUM 1501
+#define PGENLIB_INTERNAL_VERNUM 1504
 
 #ifdef __cplusplus
 namespace plink2 {
@@ -472,6 +472,8 @@ void SplitHomRef2het(const uintptr_t* genoarr, uint32_t sample_ct, uintptr_t* __
 // so they aren't always the right choice.
 // When lookup table rows are 16 bytes, they are assumed to be 16-byte aligned
 // in 64-bit builds.  result[] is not assumed to be aligned.
+void GenoarrLookup256x1bx4(const uintptr_t* genoarr, const void* table256x1bx4, uint32_t sample_ct, void* __restrict result);
+
 void GenoarrLookup16x4bx2(const uintptr_t* genoarr, const void* table16x4bx2, uint32_t sample_ct, void* result);
 
 void GenoarrLookup256x2bx4(const uintptr_t* genoarr, const void* table256x2bx4, uint32_t sample_ct, void* result);
@@ -515,6 +517,10 @@ void InitLookup16x4bx2(void* table16x4bx2);
 
 void InitLookup16x8bx2(void* table16x8bx2);
 
+void InitLookup256x1bx4(void* table256x1bx4);
+
+void InitLookup256x2bx4(void* table256x2bx4);
+
 void InitLookup256x4bx4(void* table256x4bx4);
 
 void PhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x4bx2, uint32_t sample_ct, void* result);
@@ -522,6 +528,10 @@ void PhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* phasepresent, cons
 // [0][0]..[3][0], [17][0], and [19][0] should contain the relevant values
 void InitPhaseLookup4b(void* table56x4bx2);
 
+void PhaseLookup8b(const uintptr_t* genoarr, const uintptr_t* phasepresent, const uintptr_t* phaseinfo, const void* table56x8bx2, uint32_t sample_ct, void* result);
+
+void InitPhaseLookup8b(void* table56x8bx2);
+
 // het-haploid prohibited.  64-entry table suffices: we use the same bits for
 // phasepresent and sex_male since they can't be true simultaneously.
 // phaseinfo is xor'd with bits 1 and 3 instead of 1 and 2.
@@ -533,6 +543,25 @@ void InitPhaseXNohhLookup4b(void* table64x4bx2);
 // uses same table as PhaseXNohhLookup
 void GenoarrSexLookup4b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x4bx2, uint32_t sample_ct, void* result);
 
+void InitPhaseXNohhLookup8b(void* table64x8bx2);
+
+void GenoarrSexLookup8b(const uintptr_t* genoarr, const uintptr_t* sex_male, const void* table64x8bx2, uint32_t sample_ct, void* result);
+
+// Unlike PhaseLookup4b(), this allows the cur_phased bit to be set when the
+// genoarr entry is not 01 (het).
+void VcfPhaseLookup4b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x4bx2, uint32_t sample_ct, void* __restrict result);
+
+// Precondition:
+//   [0], [2], [4], [6] initialized with unphased entries
+//   [32], [34], [36], [38] initialized with phased-unflipped entries
+//   [162] initialized with phased-flipped case
+void InitVcfPhaseLookup4b(void* table246x4bx2);
+
+void VcfPhaseLookup2b(const uintptr_t* genoarr, const uintptr_t* cur_phased, const uintptr_t* phaseinfo, const void* table246x2bx2, uint32_t sample_ct, void* __restrict result);
+
+void InitVcfPhaseLookup2b(void* table246x2bx2);
+
+
 // Analogue of BitIter1x.
 HEADER_INLINE uint32_t GenoIter1x(const uintptr_t* __restrict genoarr, uintptr_t match_word, uintptr_t* __restrict widxp, uintptr_t* __restrict cur_bitsp) {
   uintptr_t cur_bits = *cur_bitsp;


=====================================
include/pgenlib_read.cc
=====================================
@@ -6636,39 +6636,39 @@ PglErr ParseAux2Subset(const unsigned char* fread_end, const uintptr_t* __restri
       }
       ExpandThenSubsetBytearr(aux2_start, all_hets, sample_include, het_ct, sample_ct, 1, phaseinfo);
     }
-    *phasepresent_ct_ptr = PopcountWords(phasepresent, sample_ctl);
-    return kPglRetSuccess;
-  }
-  const uint32_t het_ctdl = het_ct / kBitsPerWord;
+    // bugfix (25 Feb 2020): forgot to mask out subsetted_10het here
+  } else {
+    const uint32_t het_ctdl = het_ct / kBitsPerWord;
 
-  // explicit phasepresent
-  const uintptr_t* aux2_first_part = R_CAST(const uintptr_t*, aux2_start);
-  uintptr_t* aux2_first_part_copy = workspace_subset;
-  aux2_first_part_copy[het_ctdl] = 0;
-  memcpy(aux2_first_part_copy, aux2_first_part, 1 + (het_ct / CHAR_BIT));
-  const uint32_t raw_phasepresent_ct = PopcountWords(aux2_first_part_copy, het_ctdl + 1) - 1;
-  if (unlikely(!raw_phasepresent_ct)) {
-    // there shouldn't be a hphase track at all in this case
-    return kPglRetMalformedInput;
-  }
-  const unsigned char* aux2_second_part = &(aux2_start[1 + (het_ct / CHAR_BIT)]);
-  *fread_pp = aux2_second_part;
-  if (PtrAddCk(fread_end, DivUp(raw_phasepresent_ct, CHAR_BIT), fread_pp)) {
-    return kPglRetMalformedInput;
-  }
-  if (!phaseinfo) {
-    return kPglRetSuccess;
-  }
-  if (!sample_include) {
-    ExpandBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_ctl, raw_phasepresent_ct, 1, phasepresent, phaseinfo);
-    if (!subsetted_10het) {
-      *phasepresent_ct_ptr = raw_phasepresent_ct;
+    // explicit phasepresent
+    const uintptr_t* aux2_first_part = R_CAST(const uintptr_t*, aux2_start);
+    uintptr_t* aux2_first_part_copy = workspace_subset;
+    aux2_first_part_copy[het_ctdl] = 0;
+    memcpy(aux2_first_part_copy, aux2_first_part, 1 + (het_ct / CHAR_BIT));
+    const uint32_t raw_phasepresent_ct = PopcountWords(aux2_first_part_copy, het_ctdl + 1) - 1;
+    if (unlikely(!raw_phasepresent_ct)) {
+      // there shouldn't be a hphase track at all in this case
+      return kPglRetMalformedInput;
+    }
+    const unsigned char* aux2_second_part = &(aux2_start[1 + (het_ct / CHAR_BIT)]);
+    *fread_pp = aux2_second_part;
+    if (PtrAddCk(fread_end, DivUp(raw_phasepresent_ct, CHAR_BIT), fread_pp)) {
+      return kPglRetMalformedInput;
+    }
+    if (!phaseinfo) {
       return kPglRetSuccess;
     }
-  } else {
-    // could skip if intersection of phasepresent with sample_include is empty,
-    // but this function call should be fast enough there anyway?
-    ExpandThenSubsetBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_include, sample_ct, raw_phasepresent_ct, 1, phasepresent, phaseinfo);
+    if (!sample_include) {
+      ExpandBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_ctl, raw_phasepresent_ct, 1, phasepresent, phaseinfo);
+      if (!subsetted_10het) {
+        *phasepresent_ct_ptr = raw_phasepresent_ct;
+        return kPglRetSuccess;
+      }
+    } else {
+      // could skip if intersection of phasepresent with sample_include is
+      // empty, but this function call should be fast enough there anyway?
+      ExpandThenSubsetBytearrNested(aux2_second_part, aux2_first_part_copy, all_hets, sample_include, sample_ct, raw_phasepresent_ct, 1, phasepresent, phaseinfo);
+    }
   }
   if (subsetted_10het) {
     BitvecInvmask(subsetted_10het, sample_ctl, phasepresent);


=====================================
include/plink2_base.cc
=====================================
@@ -173,11 +173,11 @@ BoolErr ScanIntAbsBounded(const char* str_iter, uint64_t bound, int32_t* valp) {
   if (ctou32(*valp) >= 10) {
     if (*valp == -3) {
       sign = -1;
-    } else if (*valp != -5) {
+    } else if (unlikely(*valp != -5)) {
       return 1;
     }
     *valp = ctou32(*str_iter++) - 48;
-    if (unlikely(ctou32(*valp) >= 10)) {
+    if (unlikely(*valp >= 10)) {
       return 1;
     }
   }
@@ -260,7 +260,7 @@ BoolErr ScanIntAbsBounded32(const char* str_iter, uint32_t bound_div_10, uint32_
   if (val >= 10) {
     if (val == 0xfffffffdU) {
       sign = -1;
-    } else if (val != 0xfffffffbU) {
+    } else if (unlikely(val != 0xfffffffbU)) {
       return 1;
     }
     val = ctou32(*str_iter++) - 48;


=====================================
include/plink2_base.h
=====================================
@@ -1019,6 +1019,10 @@ HEADER_INLINE void vecw_storeu(void* mem_addr, VecW vv) {
   _mm256_storeu_si256(S_CAST(__m256i*, mem_addr), R_CAST(__m256i, vv));
 }
 
+HEADER_INLINE void vecu32_storeu(void* mem_addr, VecU32 vv) {
+  _mm256_storeu_si256(S_CAST(__m256i*, mem_addr), R_CAST(__m256i, vv));
+}
+
 HEADER_INLINE void veci32_storeu(void* mem_addr, VecI32 vv) {
   _mm256_storeu_si256(S_CAST(__m256i*, mem_addr), R_CAST(__m256i, vv));
 }
@@ -1059,6 +1063,18 @@ HEADER_INLINE VecUc vecuc_min(VecUc v1, VecUc v2) {
   return R_CAST(VecUc, _mm256_min_epu8(R_CAST(__m256i, v1), R_CAST(__m256i, v2)));
 }
 
+HEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {
+  return R_CAST(VecW, _mm256_blendv_epi8(R_CAST(__m256i, aa), R_CAST(__m256i, bb), R_CAST(__m256i, mask)));
+}
+
+HEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {
+  return R_CAST(VecU32, _mm256_blendv_epi8(R_CAST(__m256i, aa), R_CAST(__m256i, bb), R_CAST(__m256i, mask)));
+}
+
+HEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {
+  return R_CAST(VecU16, _mm256_blendv_epi8(R_CAST(__m256i, aa), R_CAST(__m256i, bb), R_CAST(__m256i, mask)));
+}
+
 #  else  // !USE_AVX2
 
 #    define VCONST_W(xx) {xx, xx}
@@ -1238,6 +1254,10 @@ HEADER_INLINE void vecw_storeu(void* mem_addr, VecW vv) {
   _mm_storeu_si128(S_CAST(__m128i*, mem_addr), R_CAST(__m128i, vv));
 }
 
+HEADER_INLINE void vecu32_storeu(void* mem_addr, VecU32 vv) {
+  _mm_storeu_si128(S_CAST(__m128i*, mem_addr), R_CAST(__m128i, vv));
+}
+
 HEADER_INLINE void veci32_storeu(void* mem_addr, VecI32 vv) {
   _mm_storeu_si128(S_CAST(__m128i*, mem_addr), R_CAST(__m128i, vv));
 }
@@ -1368,6 +1388,18 @@ HEADER_INLINE uintptr_t vecw_extract64_0(VecW vv) {
 HEADER_INLINE uintptr_t vecw_extract64_1(VecW vv) {
   return _mm_extract_epi64(R_CAST(__m128i, vv), 1);
 }
+
+HEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {
+  return R_CAST(VecW, _mm_blendv_epi8(R_CAST(__m128i, aa), R_CAST(__m128i, bb), R_CAST(__m128i, mask)));
+}
+
+HEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {
+  return R_CAST(VecU32, _mm_blendv_epi8(R_CAST(__m128i, aa), R_CAST(__m128i, bb), R_CAST(__m128i, mask)));
+}
+
+HEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {
+  return R_CAST(VecU16, _mm_blendv_epi8(R_CAST(__m128i, aa), R_CAST(__m128i, bb), R_CAST(__m128i, mask)));
+}
 #    else
 HEADER_INLINE uintptr_t vecw_extract64_0(VecW vv) {
   return R_CAST(uintptr_t, _mm_movepi64_pi64(R_CAST(__m128i, vv)));
@@ -1377,6 +1409,20 @@ HEADER_INLINE uintptr_t vecw_extract64_1(VecW vv) {
   const __m128i v0 = _mm_srli_si128(R_CAST(__m128i, vv), 8);
   return R_CAST(uintptr_t, _mm_movepi64_pi64(v0));
 }
+
+// N.B. we do *not* enforce the low bits of each mask byte matching the high
+// bit.
+HEADER_INLINE VecW vecw_blendv(VecW aa, VecW bb, VecW mask) {
+  return vecw_and_notfirst(mask, aa) | (mask & bb);
+}
+
+HEADER_INLINE VecU32 vecu32_blendv(VecU32 aa, VecU32 bb, VecU32 mask) {
+  return vecu32_and_notfirst(mask, aa) | (mask & bb);
+}
+
+HEADER_INLINE VecU16 vecu16_blendv(VecU16 aa, VecU16 bb, VecU16 mask) {
+  return vecu16_and_notfirst(mask, aa) | (mask & bb);
+}
 #    endif
 
 HEADER_INLINE VecI16 veci16_max(VecI16 v1, VecI16 v2) {
@@ -1398,7 +1444,6 @@ HEADER_INLINE VecU16 vecu16_min8(VecU16 v1, VecU16 v2) {
 HEADER_INLINE VecUc vecuc_min(VecUc v1, VecUc v2) {
   return R_CAST(VecUc, _mm_min_epu8(R_CAST(__m128i, v1), R_CAST(__m128i, v2)));
 }
-
 #  endif  // !USE_AVX2
 
 HEADER_INLINE VecW vecw_bytesum(VecW src, VecW m0) {


=====================================
include/plink2_bits.cc
=====================================
@@ -1682,12 +1682,12 @@ void TransposeBitblock64(const uintptr_t* read_iter, uintptr_t read_ul_stride, u
 #    else
         VecW tmp_lo = vecw_unpacklo8(loader0, loader1);
         VecW tmp_hi = vecw_unpackhi8(loader0, loader1);
-        loader0 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-        loader1 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+        loader0 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+        loader1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
         tmp_lo = vecw_unpacklo8(loader2, loader3);
         tmp_hi = vecw_unpackhi8(loader2, loader3);
-        loader2 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-        loader3 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+        loader2 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+        loader3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
 #    endif
         // -> (0,0) (1,0) (2,0) (3,0) (0,1) (1,1) (2,1) (3,1) (0,2) ...
         const VecW lo_0123 = vecw_unpacklo16(loader0, loader1);
@@ -2086,20 +2086,20 @@ void TransposeNybbleblock(const uintptr_t* read_iter, uint32_t read_ul_stride, u
 #    else
       VecW tmp_lo = vecw_unpacklo8(even_nybbles0, odd_nybbles0);
       VecW tmp_hi = vecw_unpackhi8(even_nybbles0, odd_nybbles0);
-      even_nybbles0 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-      odd_nybbles0 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+      even_nybbles0 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+      odd_nybbles0 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
       tmp_lo = vecw_unpacklo8(even_nybbles1, odd_nybbles1);
       tmp_hi = vecw_unpackhi8(even_nybbles1, odd_nybbles1);
-      even_nybbles1 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-      odd_nybbles1 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+      even_nybbles1 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+      odd_nybbles1 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
       tmp_lo = vecw_unpacklo8(even_nybbles2, odd_nybbles2);
       tmp_hi = vecw_unpackhi8(even_nybbles2, odd_nybbles2);
-      even_nybbles2 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-      odd_nybbles2 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+      even_nybbles2 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+      odd_nybbles2 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
       tmp_lo = vecw_unpacklo8(even_nybbles3, odd_nybbles3);
       tmp_hi = vecw_unpackhi8(even_nybbles3, odd_nybbles3);
-      even_nybbles3 = (tmp_lo & m8) | vecw_and_notfirst(m8, vecw_slli(tmp_hi, 8));
-      odd_nybbles3 = (vecw_srli(tmp_lo, 8) & m8) | vecw_and_notfirst(m8, tmp_hi);
+      even_nybbles3 = vecw_blendv(vecw_slli(tmp_hi, 8), tmp_lo, m8);
+      odd_nybbles3 = vecw_blendv(tmp_hi, vecw_srli(tmp_lo, 8), m8);
 #    endif
 
       const VecW even_lo = vecw_unpacklo16(even_nybbles0, even_nybbles1);


=====================================
include/plink2_bits.h
=====================================
@@ -504,6 +504,8 @@ uintptr_t CountByte(const void* bytearr, unsigned char ucc, uintptr_t byte_ct);
 
 uintptr_t CountU16(const void* u16arr, uint16_t usii, uintptr_t u16_ct);
 
+
+// Applies sample_include to {src_subset, src_vals}.
 uint32_t Copy1bit8Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);
 
 uint32_t Copy1bit16Subset(const uintptr_t* __restrict src_subset, const void* __restrict src_vals, const uintptr_t* __restrict sample_include, uint32_t src_subset_size, uint32_t sample_ct, uintptr_t* __restrict dst_subset, void* __restrict dst_vals);


=====================================
include/plink2_string.cc
=====================================
@@ -1047,6 +1047,31 @@ BoolErr ScanmovUintCapped(uint64_t cap, const char** str_iterp, uint32_t* valp)
   *str_iterp = str_iter;
   return ScanmovUintCappedFinish(cap, str_iterp, valp);
 }
+
+// 2^{-31} < floor <= 0 <= cap < 2^31
+BoolErr ScanmovIntBounded(uint64_t abs_floor, uint64_t cap, const char** str_iterp, int32_t* valp) {
+  const char* str_iter = *str_iterp;
+  *valp = ctou32(*str_iter++) - 48;
+  int32_t sign = 1;
+  if (ctou32(*valp) >= 10) {
+    if (*valp == -3) {
+      sign = -1;
+      cap = abs_floor;
+    } else if (unlikely(*valp != -5)) {  // accept leading '+'
+      return 1;
+    }
+    *valp = ctou32(*str_iter++) - 48;
+    if (unlikely(*valp >= 10)) {
+      return 1;
+    }
+  }
+  *str_iterp = str_iter;
+  if (ScanmovUintCappedFinish(cap, str_iterp, R_CAST(uint32_t*, valp))) {
+    return 1;
+  }
+  *valp *= sign;
+  return 0;
+}
 #else
 BoolErr ScanmovPosintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, uint32_t* valp) {
   const char* str_iter = *str_iterp;
@@ -1111,6 +1136,37 @@ BoolErr ScanmovUintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char
     val = val * 10 + cur_digit;
   }
 }
+
+BoolErr ScanmovIntBounded32(uint32_t abs_floor_div_10, uint32_t abs_floor_mod_10, uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, int32_t* valp) {
+  const char* str_iter = *str_iterp;
+  uint32_t val = ctou32(*str_iter++) - 48;
+  int32_t sign = 1;
+  if (val >= 10) {
+    if (val == 0xfffffffdU) {
+      sign = -1;
+      cap_div_10 = abs_floor_div_10;
+      cap_mod_10 = abs_floor_mod_10;
+    } else if (unlikely(val != 0xfffffffbU)) {
+      return 1;
+    }
+    val = ctou32(*str_iter++) - 48;
+    if (unlikely(val >= 10)) {
+      return 1;
+    }
+  }
+  for (; ; ++str_iter) {
+    const uint32_t cur_digit = ctou32(*str_iter) - 48;
+    if (cur_digit >= 10) {
+      *valp = sign * S_CAST(int32_t, val);
+      *str_iterp = str_iter;
+      return 0;
+    }
+    if (unlikely((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10)))) {
+      return 1;
+    }
+    val = val * 10 + cur_digit;
+  }
+}
 #endif
 
 static const double kPositivePow10[] = {1, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12, 1.0e13, 1.0e14, 1.0e15};
@@ -3091,6 +3147,30 @@ uintptr_t ExpsearchNsortStrLb(const char* idbuf, const char* nsorted_strbox, uin
   return start_idx;
 }
 
+uint32_t IsInfStr(const char* ss, uint32_t slen, uint32_t* is_neg_ptr) {
+  const char first_char = *ss;
+  if (first_char == '-') {
+    *is_neg_ptr = 1;
+    ++ss;
+    --slen;
+  } else if (first_char == '+') {
+    ++ss;
+    --slen;
+  }
+  if (slen == 3) {
+    uint32_t four_chars;
+    memcpy(&four_chars, ss, 4);  // OVERREAD
+    // assumes little-endian
+    return ((four_chars & 0xdfdfdf) == 0x464e49);
+  }
+  if (slen != 8) {
+    return 0;
+  }
+  uint64_t eight_chars;
+  memcpy(&eight_chars, ss, 8);
+  return ((eight_chars & 0xdfdfdfdfdfdfdfdfLLU) == 0x5954494e49464e49LLU);
+}
+
 
 #ifdef __LP64__
 CXXCONST_CP Memrchr(const char* str_start, char needle, uintptr_t slen) {


=====================================
include/plink2_string.h
=====================================
@@ -921,11 +921,16 @@ BoolErr ScanPosintptr(const char* str_iter, uintptr_t* valp);
 BoolErr ScanmovPosintCapped(uint64_t cap, const char** str_iterp, uint32_t* valp);
 
 BoolErr ScanmovUintCapped(uint64_t cap, const char** str_iterp, uint32_t* valp);
+
+// 2^{-31} < -abs_floor <= 0 <= cap < 2^31
+BoolErr ScanmovIntBounded(uint64_t abs_floor, uint64_t cap, const char** str_iterp, int32_t* valp);
 #else
 BoolErr ScanmovPosintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, uint32_t* valp);
 
 BoolErr ScanmovUintCapped32(uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, uint32_t* valp);
 
+BoolErr ScanmovIntBounded32(uint32_t abs_floor_div_10, uint32_t abs_floor_mod_10, uint32_t cap_div_10, uint32_t cap_mod_10, const char** str_iterp, int32_t* valp);
+
 HEADER_INLINE BoolErr ScanmovPosintCapped(uint32_t cap, const char** str_iterp, uint32_t* valp) {
  return ScanmovPosintCapped32(cap / 10, cap % 10, str_iterp, valp);
 }
@@ -933,6 +938,10 @@ HEADER_INLINE BoolErr ScanmovPosintCapped(uint32_t cap, const char** str_iterp,
 HEADER_INLINE BoolErr ScanmovUintCapped(uint32_t cap, const char** str_iterp, uint32_t* valp) {
  return ScanmovUintCapped32(cap / 10, cap % 10, str_iterp, valp);
 }
+
+HEADER_INLINE BoolErr ScanmovIntBounded(uint32_t abs_floor, uint32_t cap, const char** str_iterp, int32_t* valp) {
+  return ScanmovIntBounded32(abs_floor / 10, abs_floor % 10, cap / 10, cap % 10, str_iterp, valp);
+}
 #endif
 
 HEADER_INLINE BoolErr ScanmovUintDefcap(const char** str_iterp, uint32_t* valp) {
@@ -1524,6 +1533,11 @@ HEADER_INLINE uint32_t IsNanStr(const char* ss, uint32_t slen) {
   return (slen == 2) || ((ctou32(ss[2]) & 0xdf) == 78);
 }
 
+// Matches "inf"/"infinity", any capitalization, can have sign in front.
+// Assumes is_neg zero-initialized.
+// Assumes one-char overread is ok.
+uint32_t IsInfStr(const char* ss, uint32_t slen, uint32_t* is_neg_ptr);
+
 
 HEADER_INLINE CXXCONST_CP AdvToDelimOrEnd(const char* str_iter, const char* str_end, char delim) {
   CXXCONST_CP memchr_result = S_CAST(CXXCONST_CP, memchr(str_iter, delim, str_end - str_iter));


=====================================
pgenlibr/src/Makevars
=====================================
@@ -1,5 +1,5 @@
 SOURCES = $(wildcard libdeflate/lib/*.c) $(wildcard libdeflate/lib/x86/*.c)
-OBJECTS = include/plink2_base.o include/pgenlib_misc.o include/pgenlib_read.o pvar_ffi_support.o pgenlib_ffi_support.o include/plink2_bgzf.o include/plink2_string.o include/plink2_text.o include/plink2_thread.o include/plink2_zstfile.o pvar.o pgenlibr.o RcppExports.o $(SOURCES:.c=.o)
+OBJECTS = include/plink2_base.o include/plink2_bits.o include/pgenlib_misc.o include/pgenlib_read.o pvar_ffi_support.o pgenlib_ffi_support.o include/plink2_bgzf.o include/plink2_string.o include/plink2_text.o include/plink2_thread.o include/plink2_zstfile.o pvar.o pgenlibr.o RcppExports.o $(SOURCES:.c=.o)
 PKG_CFLAGS = -Ilibdeflate -Ilibdeflate/common
 PKG_CPPFLAGS =
 PKG_LIBS = -lpthread -lzstd


=====================================
plink2.cc
=====================================
@@ -66,7 +66,7 @@ static const char ver_str[] = "PLINK v2.00a3"
 #ifdef USE_MKL
   " Intel"
 #endif
-  " (17 Feb 2020)";
+  " (21 Mar 2020)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
   ""
@@ -2523,12 +2523,12 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
         if (pcp->command_flags1 & kfCommand1MakePlink2) {
           // todo: unsorted case (--update-chr, etc.)
           if (pcp->sort_vars_flags != kfSort0) {
-            reterr = MakePlink2Vsort(xheader, sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, chr_idxs, xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_allele_ct, max_allele_slen, max_filter_slen, info_reload_slen, pcp->max_thread_ct, pcp->hard_call_thresh, pcp->dosage_erase_thresh, make_plink2_flags, (pcp->sort_vars_flags == kfSortNatural), pcp->pvar_psam_flags, &simple_pgr, outname, outname_end);
+            reterr = MakePlink2Vsort(sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, chr_idxs, xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_allele_ct, max_allele_slen, max_filter_slen, info_reload_slen, pcp->max_thread_ct, pcp->hard_call_thresh, pcp->dosage_erase_thresh, make_plink2_flags, (pcp->sort_vars_flags == kfSortNatural), pcp->pvar_psam_flags, xheader, &simple_pgr, outname, outname_end);
           } else {
             if (vpos_sortstatus & kfUnsortedVarBp) {
               logerrputs("Warning: Variants are not sorted by position.  Consider rerunning with the\n--sort-vars flag added to remedy this.\n");
             }
-            reterr = MakePlink2NoVsort(xheader, sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, pcp->varid_template_str, pcp->varid_multi_template_str, pcp->varid_multi_nonsnp_template_str, pcp->missing_varid_match, xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_allele_ct, max_allele_slen, max_filter_slen, info_reload_slen, pcp->max_thread_ct, pcp->hard_call_thresh, pcp->dosage_erase_thresh, pcp->new_variant_id_max_allele_slen, pcp->misc_flags, make_plink2_flags, pcp->pvar_psam_flags, pgr_alloc_cacheline_ct, &pgfi, &simple_pgr, outname, outname_end);
+            reterr = MakePlink2NoVsort(sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, new_sample_idx_to_old, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, pcp->varid_template_str, pcp->varid_multi_template_str, pcp->varid_multi_nonsnp_template_str, pcp->missing_varid_match, xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_allele_ct, max_allele_slen, max_filter_slen, info_reload_slen, vpos_sortstatus, pcp->max_thread_ct, pcp->hard_call_thresh, pcp->dosage_erase_thresh, pcp->new_variant_id_max_allele_slen, pcp->misc_flags, make_plink2_flags, pcp->pvar_psam_flags, pgr_alloc_cacheline_ct, xheader, &pgfi, &simple_pgr, outname, outname_end);
           }
           if (unlikely(reterr)) {
             goto Plink2Core_ret_1;
@@ -2538,7 +2538,7 @@ PglErr Plink2Core(const Plink2Cmdline* pcp, MakePlink2Flags make_plink2_flags, c
         }
 
         if (pcp->command_flags1 & kfCommand1Exportf) {
-          reterr = Exportf(sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, &(pcp->exportf_info), xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_variant_id_slen, max_allele_slen, max_filter_slen, info_reload_slen, pcp->max_thread_ct, make_plink2_flags, pgr_alloc_cacheline_ct, xheader, &pgfi, &simple_pgr, outname, outname_end);
+          reterr = Exportf(sample_include, &pii, sex_nm, sex_male, pheno_cols, pheno_names, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, info_reload_slen? pvarname : nullptr, variant_cms, &(pcp->exportf_info), xheader_blen, info_flags, raw_sample_ct, sample_ct, pheno_ct, max_pheno_name_blen, raw_variant_ct, variant_ct, max_variant_id_slen, max_allele_slen, max_filter_slen, info_reload_slen, vpos_sortstatus, pcp->max_thread_ct, make_plink2_flags, pgr_alloc_cacheline_ct, xheader, &pgfi, &simple_pgr, outname, outname_end);
           if (unlikely(reterr)) {
             goto Plink2Core_ret_1;
           }
@@ -5943,8 +5943,19 @@ int main(int argc, char** argv) {
           }
           pc.filter_flags |= kfFilterPsamReq;
         } else if (strequal_k_unsafe(flagname_p2, "eep-autoconv")) {
+          if (unlikely(EnforceParamCtRange(argvk[arg_idx], param_ct, 0, 1))) {
+            goto main_ret_INVALID_CMDLINE_2A;
+          }
+          if (param_ct) {
+            const char* cur_modif = argvk[arg_idx + 1];
+            if (likely(!strcmp(cur_modif, "vzs"))) {
+              import_flags |= kfImportKeepAutoconvVzs;
+            } else {
+              snprintf(g_logbuf, kLogbufSize, "Error: Invalid --keep-autoconv argument '%s'.\n", cur_modif);
+              goto main_ret_INVALID_CMDLINE_WWA;
+            }
+          }
           import_flags |= kfImportKeepAutoconv;
-          goto main_param_zero;
         } else if (strequal_k_unsafe(flagname_p2, "eep-females")) {
           pc.filter_flags |= kfFilterPsamReq | kfFilterExclMales | kfFilterExclNosex;
           goto main_param_zero;
@@ -6424,12 +6435,12 @@ int main(int argc, char** argv) {
                 goto main_ret_INVALID_CMDLINE;
               }
               explicit_pvar_cols = 1;
-              reterr = ParseColDescriptor(&(cur_modif[strlen("pvar-cols=")]), "xheader\0maybequal\0qual\0maybefilter\0filter\0maybeinfo\0info\0maybecm\0cm\0", "--make-pgen pvar-cols", kfPvarColXheader, kfPvarColDefault, 0, &pc.pvar_psam_flags);
+              reterr = ParseColDescriptor(&(cur_modif[strlen("pvar-cols=")]), "xheader\0vcfheader\0maybequal\0qual\0maybefilter\0filter\0maybeinfo\0info\0maybecm\0cm\0", "--make-pgen pvar-cols", kfPvarColXheader, kfPvarColDefault, 0, &pc.pvar_psam_flags);
               if (unlikely(reterr)) {
                 goto main_ret_1;
               }
-              if (unlikely((pc.pvar_psam_flags & kfPvarColXinfo) && (!(pc.pvar_psam_flags & kfPvarColXheader)))) {
-                logerrputs("Error: --make-pgen pvar-cols= expression cannot exclude xheader when info is\npresent.\n");
+              if (unlikely((pc.pvar_psam_flags & kfPvarColXinfo) && (!(pc.pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader))))) {
+                logerrputs("Error: --make-pgen pvar-cols= expression cannot exclude xheader/vcfheader when\ninfo is present.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
             } else if (StrStartsWith(cur_modif, "format=", cur_modif_slen)) {
@@ -6585,12 +6596,12 @@ int main(int argc, char** argv) {
                 goto main_ret_INVALID_CMDLINE;
               }
               explicit_cols = 1;
-              reterr = ParseColDescriptor(&(cur_modif[5]), "xheader\0maybequal\0qual\0maybefilter\0filter\0maybeinfo\0info\0maybecm\0cm\0", "make-just-pvar", kfPvarColXheader, kfPvarColDefault, 0, &pc.pvar_psam_flags);
+              reterr = ParseColDescriptor(&(cur_modif[5]), "xheader\0vcfheader\0maybequal\0qual\0maybefilter\0filter\0maybeinfo\0info\0maybecm\0cm\0", "make-just-pvar", kfPvarColXheader, kfPvarColDefault, 0, &pc.pvar_psam_flags);
               if (unlikely(reterr)) {
                 goto main_ret_1;
               }
-              if (unlikely((pc.pvar_psam_flags & kfPvarColXinfo) && (!(pc.pvar_psam_flags & kfPvarColXheader)))) {
-                logerrputs("Error: --make-just-pvar cols= expression cannot exclude xheader when info is\npresent.\n");
+              if (unlikely((pc.pvar_psam_flags & kfPvarColXinfo) && (!(pc.pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader))))) {
+                logerrputs("Error: --make-just-pvar cols= expression cannot exclude xheader/vcfheader when\ninfo is present.\n");
                 goto main_ret_INVALID_CMDLINE_A;
               }
               if (pc.pvar_psam_flags & kfPvarColInfo) {
@@ -9858,29 +9869,53 @@ int main(int argc, char** argv) {
         const uint32_t convname_slen = convname_end - outname;
         uint32_t pgen_generated = 1;
         uint32_t psam_generated = 1;
+
+        // Compress by default for VCF/BCF due to potentially large INFO
+        // section; otherwise only do it in "--keep-autoconv vzs" case.
+        uint32_t pvar_is_compressed;
         if (xload & (kfXloadVcf | kfXloadBcf)) {
           const uint32_t no_samples_ok = !(pc.dependency_flags & (kfFilterAllReq | kfFilterPsamReq));
           const uint32_t is_vcf = (xload / kfXloadVcf) & 1;
+          pvar_is_compressed = ((import_flags & (kfImportKeepAutoconv | kfImportKeepAutoconvVzs)) != kfImportKeepAutoconv);
           if (no_samples_ok && is_vcf && (!(import_flags & kfImportKeepAutoconv)) && pc.command_flags1) {
             // special case: just treat the VCF as a .pvar file
             strcpy(pvarname, pgenname);
             pgenname[0] = '\0';
             goto main_reinterpret_vcf_instead_of_converting;
-          } else if (is_vcf) {
+          }
+          // Default to compression level 1 for temporary .pvar files for now.
+          //
+          // Level 1 may actually be best in a much wider variety of scenarios
+          // as of this writing, but I won't try to tune any other compression
+          // defaults for now.  Interestingly, the current setup is actually a
+          // bit backwards given observed zstd behavior: the long INFO fields
+          // motivating automatic compression here are best handled with level
+          // 3, while the other simpler text files generated by plink2 are
+          // likely to compress *better*, not just faster, with level 1 for
+          // some reason.
+          const uint32_t zst_level = g_zst_level;
+          if (!(import_flags & kfImportKeepAutoconv)) {
+            g_zst_level = 1;
+          }
+          if (is_vcf) {
             reterr = VcfToPgen(pgenname, (load_params & kfLoadParamsPsam)? psamname : nullptr, const_fid, vcf_dosage_import_field, pc.misc_flags, import_flags, no_samples_ok, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, id_delim, idspace_to, vcf_min_gq, vcf_min_dp, vcf_max_dp, vcf_half_call, pc.fam_cols, pc.max_thread_ct, outname, convname_end, &chr_info, &pgen_generated, &psam_generated);
           } else {
             reterr = BcfToPgen(pgenname, (load_params & kfLoadParamsPsam)? psamname : nullptr, const_fid, vcf_dosage_import_field, pc.misc_flags, import_flags, no_samples_ok, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, id_delim, idspace_to, vcf_min_gq, vcf_min_dp, vcf_max_dp, vcf_half_call, pc.fam_cols, pc.max_thread_ct, outname, convname_end, &chr_info, &pgen_generated, &psam_generated);
           }
-        } else if (xload & kfXloadOxGen) {
-          reterr = OxGenToPgen(pgenname, psamname, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, pc.max_thread_ct, outname, convname_end, &chr_info);
-        } else if (xload & kfXloadOxBgen) {
-          reterr = OxBgenToPgen(pgenname, psamname, const_fid, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, id_delim, idspace_to, pc.max_thread_ct, outname, convname_end, &chr_info);
-        } else if (xload & kfXloadOxHaps) {
-          reterr = OxHapslegendToPgen(pgenname, pvarname, psamname, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.max_thread_ct, outname, convname_end, &chr_info);
-        } else if (xload & kfXloadPlink1Dosage) {
-          reterr = Plink1DosageToPgen(pgenname, psamname, (xload & kfXloadMap)? pvarname : nullptr, import_single_chr_str, &plink1_dosage_info, pc.misc_flags, import_flags, pc.fam_cols, pc.missing_pheno, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, pc.max_thread_ct, outname, convname_end, &chr_info);
-        } else if (xload & kfXloadGenDummy) {
-          reterr = GenerateDummy(&gendummy_info, pc.misc_flags, import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, pc.max_thread_ct, &main_sfmt, outname, convname_end, &chr_info);
+          g_zst_level = zst_level;
+        } else {
+          pvar_is_compressed = (import_flags / kfImportKeepAutoconvVzs) & 1;
+          if (xload & kfXloadOxGen) {
+            reterr = OxGenToPgen(pgenname, psamname, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, pc.max_thread_ct, outname, convname_end, &chr_info);
+          } else if (xload & kfXloadOxBgen) {
+            reterr = OxBgenToPgen(pgenname, psamname, const_fid, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, id_delim, idspace_to, pc.max_thread_ct, outname, convname_end, &chr_info);
+          } else if (xload & kfXloadOxHaps) {
+            reterr = OxHapslegendToPgen(pgenname, pvarname, psamname, import_single_chr_str, ox_missing_code, pc.misc_flags, import_flags, oxford_import_flags, pc.max_thread_ct, outname, convname_end, &chr_info);
+          } else if (xload & kfXloadPlink1Dosage) {
+            reterr = Plink1DosageToPgen(pgenname, psamname, (xload & kfXloadMap)? pvarname : nullptr, import_single_chr_str, &plink1_dosage_info, pc.misc_flags, import_flags, pc.fam_cols, pc.missing_pheno, pc.hard_call_thresh, pc.dosage_erase_thresh, import_dosage_certainty, pc.max_thread_ct, outname, convname_end, &chr_info);
+          } else if (xload & kfXloadGenDummy) {
+            reterr = GenerateDummy(&gendummy_info, pc.misc_flags, import_flags, pc.hard_call_thresh, pc.dosage_erase_thresh, pc.max_thread_ct, &main_sfmt, outname, convname_end, &chr_info);
+          }
         }
         if (reterr || (!pc.command_flags1)) {
           goto main_ret_1;
@@ -9893,7 +9928,7 @@ int main(int argc, char** argv) {
         if (pgen_generated) {
           snprintf(memcpya(pgenname, outname, convname_slen), kMaxOutfnameExtBlen - 10, ".pgen");
         }
-        snprintf(memcpya(pvarname, outname, convname_slen), kMaxOutfnameExtBlen - 10, ".pvar");
+        snprintf(memcpya(pvarname, outname, convname_slen), kMaxOutfnameExtBlen - 10, pvar_is_compressed? ".pvar.zst" : ".pvar");
         if (psam_generated) {
           snprintf(memcpya(psamname, outname, convname_slen), kMaxOutfnameExtBlen - 10, ".psam");
         }


=====================================
plink2_cmdline.cc
=====================================
@@ -3856,6 +3856,10 @@ PglErr CmdlineParsePhase2(const char* ver_str, const char* errstr_append, const
       uint32_t arg_idx = flag_map[cur_flag_idx] + 1;
       while ((arg_idx < S_CAST(uint32_t, argc)) && (!IsCmdlineFlag(argvk[arg_idx]))) {
         logputs(" ");
+        // Thought about special-casing argvk[arg_idx] == " ", so that
+        // "--id-delim ' '" actually works with --rerun, but that adds too much
+        // complexity to the rereader to be worth it.  Instead we just document
+        // the incompatibility.
         logputs(argvk[arg_idx++]);
       }
       logputs("\n");


=====================================
plink2_cmdline.h
=====================================
@@ -1761,12 +1761,35 @@ typedef struct CmpExprStruct {
   // arguably too small for noncopyable to be a great idea, but the next
   // iteration of this struct probably won't have that issue.
   NONCOPYABLE(CmpExprStruct);
-  // Restrict to [pheno/covar name] [operator] [pheno val] for now.  could
-  // support or/and, parentheses, etc. later.
-
-  // Currently stores null-terminated pheno/covar name, followed by
-  // null-terminated value string.  Storage format needs to be synced with
-  // ValidateAndAllocCmpExpr().
+  // Restrict to <key> <operator> <value> for now; key=INFO/pheno/covar.
+  //
+  // Almost certainly want to support conjunctions/disjunctions later; but
+  // there are complications regarding the command-line interface:
+  // * Phenotype/covariate names and VCFv4.2 INFO keys can contain parenthesis
+  //   and mathematical-operator characters.  And we actually need to permit at
+  //   least the latter, since autogenerated categorical-covariate names
+  //   contain '=' for a good reason.  Some sort of escaping is probably in
+  //   in order, but...
+  // * Escaping is not a big deal in a shell script, but most
+  //   conjunctions/disjunctions can already be emulated easily enough in a
+  //   shell script by calling plink2 multiple times.  While there's some
+  //   execution-time improvement, the main value-add from directly supporting
+  //   logical operations in --keep-if/--extract-if-info is reduced
+  //   interactive-use cognitive load.
+  // * We can't treat multiple command-line arguments differently from the
+  //   corresponding space-containing single command-line argument; otherwise
+  //   --rerun breaks.  So "multiple-argument-form supports keys with special
+  //   characters but not conjunctions/disjunctions; single-argument-form is
+  //   the other way around" is not an option.
+  // The least-bad solution I've thought of is to add --keep-if-file/etc. flags
+  // which read the expression from a short text file.  In that case, we'd at
+  // least be able to define normal quoting and escaping rules without worrying
+  // about confusing interactions with bash.  Deferring implementation for now
+  // in hopes of coming up with a better idea, but this should go in before
+  // beta testing begins.
+
+  // Currently stores null-terminated key, followed by null-terminated value
+  // string.  Storage format needs to be synced with ValidateAndAllocCmpExpr().
   char* pheno_name;
   CmpBinaryOp binary_op;
 } CmpExpr;


=====================================
plink2_common.cc
=====================================
@@ -260,6 +260,56 @@ void PopulateRescaledDosage(const uintptr_t* genoarr, const uintptr_t* dosage_pr
   }
 }
 
+static_assert(sizeof(AlleleCode) == 1, "AtLeastOneMultiallelicHet() needs to be updated.");
+uint32_t AtLeastOneMultiallelicHet(const PgenVariant* pgvp, uint32_t sample_ct) {
+  if (pgvp->patch_01_ct) {
+    return 1;
+  }
+  {
+    const uintptr_t* genoarr = pgvp->genovec;
+    const uint32_t fullvec_ct = sample_ct / kBitsPerWordD2;
+    for (uint32_t uii = 0; uii != fullvec_ct; ++uii) {
+      const uintptr_t geno_word = genoarr[uii];
+      if (Word01(geno_word)) {
+        return 1;
+      }
+    }
+    const uint32_t remainder = sample_ct % kBitsPerWordD2;
+    if (remainder) {
+      if (Word01(bzhi(genoarr[fullvec_ct], 2 * remainder))) {
+        return 1;
+      }
+    }
+  }
+  const uint32_t patch_10_ct = pgvp->patch_10_ct;
+  if (patch_10_ct) {
+    const AlleleCode* patch_10_vals = pgvp->patch_10_vals;
+#ifdef __LP64__
+    const uint32_t fullvec_ct = patch_10_ct / (kBytesPerVec / 2);
+    const VecU16* patch_10_valias = R_CAST(const VecU16*, patch_10_vals);
+    const VecU16 m8 = vecu16_set1(0xff);
+    for (uint32_t vidx = 0; vidx != fullvec_ct; ++vidx) {
+      const VecU16 vv_orig = vecu16_loadu(&(patch_10_valias[vidx]));
+      const VecU16 vv_hi = vecu16_srli(vv_orig, 8);
+      const VecU16 vv_lo = vv_orig & m8;
+      const VecU16 vv_eq = (vv_hi == vv_lo);
+      if (vecu16_movemask(vv_eq) != kVec8thUintMax) {
+        return 1;
+      }
+    }
+    uint32_t uii = fullvec_ct * (kBytesPerVec / 2);
+#else
+    uint32_t uii = 0;
+#endif
+    for (; uii != patch_10_ct; ++uii) {
+      if (patch_10_vals[2 * uii] != patch_10_vals[2 * uii + 1]) {
+        return 1;
+      }
+    }
+  }
+  return 0;
+}
+
 void SetHetMissing(uintptr_t word_ct, uintptr_t* genovec) {
   // 01 -> 11, nothing else changes
 #ifdef __LP64__
@@ -1401,9 +1451,8 @@ char* ChrNameStd(const ChrInfo* cip, uint32_t chr_idx, char* buf) {
 char* chrtoa(const ChrInfo* cip, uint32_t chr_idx, char* buf) {
   // assumes chr_idx is valid
   if (!chr_idx) {
-    // TODO: probably add 'chr' in front here when output encoding calls for
-    // it, but wait till beta since this would technically be
-    // compatibility-breaking
+    // Note that this never has a 'chr' prefix.  Which is probably fine, it
+    // isn't a real chromosome.
     *buf++ = '0';
     return buf;
   }
@@ -1581,9 +1630,12 @@ uint32_t GetChrCode(const char* chr_name, const ChrInfo* cip, uint32_t name_slen
 }
 
 uint32_t GetChrCodeCounted(const ChrInfo* cip, uint32_t name_slen, char* chr_name) {
-  // when the chromosome name isn't null-terminated
-  // (yeah, probably want to revise the called functions so that chr_name
-  // doesn't need to be mutable here)
+  // When the chromosome name isn't null-terminated.
+  // Yeah, probably want to revise this so that chr_name doesn't need to be
+  // mutable here.  However, that currently requires new substitutes for both
+  // GetChrCodeRaw AND IdHtableFind (IdHtableFindNnt doesn't work due to
+  // overread); when either of those are needed for some other reason, that's a
+  // good time to revise this function.
   char* s_end = &(chr_name[name_slen]);
   const char tmpc = *s_end;
   *s_end = '\0';


=====================================
plink2_common.h
=====================================
@@ -201,26 +201,27 @@ FLAGSET_DEF_START()
   kfPvarZs = (1 << 0),
 
   kfPvarColXheader = (1 << 1),
-  kfPvarColMaybequal = (1 << 2),
-  kfPvarColQual = (1 << 3),
-  kfPvarColMaybefilter = (1 << 4),
-  kfPvarColFilter = (1 << 5),
-  kfPvarColMaybeinfo = (1 << 6),
-  kfPvarColInfo = (1 << 7),
+  kfPvarColVcfheader = (1 << 2),
+  kfPvarColMaybequal = (1 << 3),
+  kfPvarColQual = (1 << 4),
+  kfPvarColMaybefilter = (1 << 5),
+  kfPvarColFilter = (1 << 6),
+  kfPvarColMaybeinfo = (1 << 7),
+  kfPvarColInfo = (1 << 8),
   kfPvarColXinfo = (kfPvarColInfo * 2) - kfPvarColMaybeinfo,
-  kfPvarColMaybecm = (1 << 8),
-  kfPvarColCm = (1 << 9),
+  kfPvarColMaybecm = (1 << 9),
+  kfPvarColCm = (1 << 10),
   kfPvarColDefault = (kfPvarColXheader | kfPvarColMaybequal | kfPvarColMaybefilter | kfPvarColMaybeinfo | kfPvarColMaybecm),
   kfPvarColAll = ((kfPvarColCm * 2) - kfPvarColXheader),
-  kfPsamColMaybefid = (1 << 10),
-  kfPsamColFid = (1 << 11),
-  kfPsamColMaybesid = (1 << 12),
-  kfPsamColSid = (1 << 13),
-  kfPsamColMaybeparents = (1 << 14),
-  kfPsamColParents = (1 << 15),
-  kfPsamColSex = (1 << 16),
-  kfPsamColPheno1 = (1 << 17),
-  kfPsamColPhenos = (1 << 18),
+  kfPsamColMaybefid = (1 << 11),
+  kfPsamColFid = (1 << 12),
+  kfPsamColMaybesid = (1 << 13),
+  kfPsamColSid = (1 << 14),
+  kfPsamColMaybeparents = (1 << 15),
+  kfPsamColParents = (1 << 16),
+  kfPsamColSex = (1 << 17),
+  kfPsamColPheno1 = (1 << 18),
+  kfPsamColPhenos = (1 << 19),
   kfPsamColDefault = (kfPsamColMaybefid | kfPsamColMaybesid | kfPsamColMaybeparents | kfPsamColSex | kfPsamColPhenos),
   kfPsamColAll = ((kfPsamColPhenos * 2) - kfPsamColMaybefid)
 FLAGSET_DEF_END(PvarPsamFlags);
@@ -382,6 +383,20 @@ void PopulateDenseDosage(const uintptr_t* genoarr, const uintptr_t* dosage_prese
 
 void PopulateRescaledDosage(const uintptr_t* genoarr, const uintptr_t* dosage_present, const Dosage* dosage_main, double slope, double intercept, double missing_val, uint32_t sample_ct, uint32_t dosage_ct, double* expanded_dosages);
 
+// assumes trailing bits of genoarr are zeroed out
+HEADER_INLINE uint32_t AtLeastOneHetUnsafe(const uintptr_t* genoarr, uint32_t sample_ct) {
+  const uint32_t sample_ctl2 = DivUp(sample_ct, kBitsPerWordD2);
+  for (uint32_t uii = 0; uii != sample_ctl2; ++uii) {
+    const uintptr_t geno_word = genoarr[uii];
+    if (Word01(geno_word)) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+uint32_t AtLeastOneMultiallelicHet(const PgenVariant* pgvp, uint32_t sample_ct);
+
 void SetHetMissing(uintptr_t word_ct, uintptr_t* genovec);
 
 void SetHetMissingCleardosage(uintptr_t word_ct, uintptr_t* genovec, uint32_t* write_dosage_ct_ptr, uintptr_t* dosagepresent, Dosage* dosage_main);
@@ -691,6 +706,7 @@ PglErr FinalizeChrInfo(ChrInfo* cip);
 void CleanupChrInfo(ChrInfo* cip);
 
 // assumes chr_idx is valid
+// note that chr_idx == 0 is always rendered as '0', never 'chr0'
 char* chrtoa(const ChrInfo* cip, uint32_t chr_idx, char* buf);
 
 uint32_t GetMaxChrSlen(const ChrInfo* cip);
@@ -1068,6 +1084,17 @@ HEADER_INLINE BoolErr StoreStringAtEndK(unsigned char* arena_bottom, const char*
   return 0;
 }
 
+HEADER_INLINE BoolErr StoreStringAndPrecharAtEnd(unsigned char* arena_bottom, const char* src, unsigned char prechar, uintptr_t slen, unsigned char** arena_top_ptr, char** dst) {
+  if (PtrWSubCk(arena_bottom, slen + 2, arena_top_ptr)) {
+    return 1;
+  }
+  **arena_top_ptr = prechar;
+  char* dst_write = 1 + R_CAST(char*, *arena_top_ptr);
+  memcpyx(dst_write, src, slen, '\0');
+  *dst = dst_write;
+  return 0;
+}
+
 // These use g_textbuf.
 PglErr WriteSampleIdsOverride(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* outname, uint32_t sample_ct, SampleIdFlags override_flags);
 HEADER_INLINE PglErr WriteSampleIds(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* outname, uint32_t sample_ct) {


=====================================
plink2_data.cc
=====================================
@@ -20,6 +20,8 @@
 #include "plink2_data.h"
 #include "plink2_pvar.h"
 
+#include <time.h>
+
 #ifdef __cplusplus
 namespace plink2 {
 #endif
@@ -266,7 +268,262 @@ void AppendChrsetLine(const ChrInfo* cip, char** write_iter_ptr) {
   AppendBinaryEoln(write_iter_ptr);
 }
 
-PglErr WritePvar(const char* outname, const char* xheader, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct) {
+// fileformat, fileDate, source
+void AppendVcfHeaderStart(uint32_t v43, char** cswritepp) {
+  char* cswritep = *cswritepp;
+  cswritep = strcpya_k(cswritep, "##fileformat=VCFv4.");
+  *cswritep++ = v43 + '2';
+  cswritep = strcpya_k(cswritep, EOLN_STR "##fileDate=");
+  time_t rawtime;
+  time(&rawtime);
+  const struct tm* loctime = localtime(&rawtime);
+  cswritep += strftime(cswritep, kMaxMediumLine, "%Y%m%d", loctime);
+  cswritep = strcpya_k(cswritep, EOLN_STR "##source=PLINKv2.00" EOLN_STR);
+  *cswritepp = cswritep;
+  return;
+}
+
+// Note that the order-of-operations page lists this as happening right after
+// the filtering performed by LoadPvar().  Which is effectively true, since we
+// ignore variant_include (this is safe since LoadPvar() always initializes
+// all variant_bps[] and allele_storage[] entries appropriately).
+// possible todo: ChrInfo can have a length field, which is initialized by the
+// ##contig header line when possible, but when that doesn't exist LoadPvar()
+// can conditionally detect INFO:END and take that into account.  (Or a reason
+// to keep the entire info_end array in memory may emerge.)
+uint32_t ChrLenLbound(const ChrInfo* cip, const uint32_t* variant_bps, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uint32_t* new_variant_idx_to_old, uint32_t chr_fo_idx, uint32_t max_allele_slen, UnsortedVar vpos_sortstatus) {
+  const uint32_t vidx_start = cip->chr_fo_vidx_start[chr_fo_idx];
+  const uint32_t vidx_end = cip->chr_fo_vidx_start[chr_fo_idx + 1];
+  assert(vidx_start != vidx_end);
+  if (!(vpos_sortstatus & kfUnsortedVarBp)) {
+    if (!new_variant_idx_to_old) {
+      if (max_allele_slen == 1) {
+        return variant_bps[vidx_end - 1];
+      }
+      uint32_t bp_end = 0;
+      for (uint32_t vidx = vidx_end; vidx != vidx_start; ) {
+        --vidx;
+        const uint32_t cur_bp = variant_bps[vidx];
+        if (cur_bp + max_allele_slen <= bp_end) {
+          break;
+        }
+        uintptr_t allele_idx_offset_base = vidx * 2;
+        if (allele_idx_offsets) {
+          allele_idx_offset_base = allele_idx_offsets[vidx];
+        }
+        // We only care about reference-allele length.
+        const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
+        if (cur_bp_end > bp_end) {
+          bp_end = cur_bp_end;
+        }
+      }
+      return bp_end;
+    }
+    if (max_allele_slen == 1) {
+      return variant_bps[new_variant_idx_to_old[vidx_end - 1]];
+    }
+    uint32_t bp_end = 0;
+    for (uint32_t new_vidx = vidx_end; new_vidx != vidx_start; ) {
+      --new_vidx;
+      const uint32_t old_vidx = new_variant_idx_to_old[new_vidx];
+      const uint32_t cur_bp = variant_bps[old_vidx];
+      if (cur_bp + max_allele_slen <= bp_end) {
+        break;
+      }
+      uintptr_t allele_idx_offset_base = old_vidx * 2;
+      if (allele_idx_offsets) {
+        allele_idx_offset_base = allele_idx_offsets[old_vidx];
+      }
+      const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
+      if (cur_bp_end > bp_end) {
+        bp_end = cur_bp_end;
+      }
+    }
+    return bp_end;
+  }
+  uint32_t bp_end = U32ArrMax(&(variant_bps[vidx_start]), vidx_end - vidx_start);
+  if (max_allele_slen == 1) {
+    return bp_end;
+  }
+  uint32_t min_check_bp = 0;
+  if (bp_end >= max_allele_slen) {
+    min_check_bp = bp_end + 1 - max_allele_slen;
+  }
+  for (uint32_t vidx = vidx_start; vidx != vidx_end; ++vidx) {
+    const uint32_t cur_bp = variant_bps[vidx];
+    if (cur_bp < min_check_bp) {
+      continue;
+    }
+    uintptr_t allele_idx_offset_base = vidx * 2;
+    if (allele_idx_offsets) {
+      allele_idx_offset_base = allele_idx_offsets[vidx];
+    }
+    const uint32_t cur_bp_end = cur_bp + strlen(allele_storage[allele_idx_offset_base]) - 1;
+    if (cur_bp_end > bp_end) {
+      bp_end = cur_bp_end;
+    }
+  }
+  return bp_end;
+}
+
+PglErr PvarXheaderWrite(const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uint32_t* new_variant_idx_to_old, uintptr_t xheader_blen, uint32_t vcfheader, uint32_t write_filter, uint32_t write_info, uint32_t append_info_pr_header_line, uint32_t max_allele_slen, UnsortedVar vpos_sortstatus, char* xheader, CompressStreamState* css_ptr, char** cswritepp) {
+  unsigned char* bigstack_mark = g_bigstack_base;
+  PglErr reterr = kPglRetSuccess;
+  {
+    if (!vcfheader) {
+      if (write_filter && write_info) {
+        if (unlikely(CsputsStd(xheader, xheader_blen, css_ptr, cswritepp))) {
+          goto PvarXheaderWrite_ret_WRITE_FAIL;
+        }
+      } else {
+        // Filter out FILTER/INFO definitions iff the corresponding column has
+        // been removed.
+        const char* copy_start = xheader;
+        const char* xheader_end = &(xheader[xheader_blen]);
+        for (const char* xheader_iter = xheader; xheader_iter != xheader_end; ) {
+          const char* next_line_start = AdvPastDelim(xheader_iter, '\n');
+          if (((!write_filter) && StrStartsWithUnsafe(xheader_iter, "##FILTER=<ID=")) ||
+              ((!write_info) && StrStartsWithUnsafe(xheader_iter, "##INFO=<ID="))) {
+            if (copy_start != xheader_iter) {
+              if (unlikely(CsputsStd(copy_start, xheader_iter - copy_start, css_ptr, cswritepp))) {
+                goto PvarXheaderWrite_ret_WRITE_FAIL;
+              }
+            }
+            copy_start = next_line_start;
+          }
+          xheader_iter = next_line_start;
+        }
+        if (copy_start != xheader_end) {
+          if (unlikely(CsputsStd(copy_start, xheader_end - copy_start, css_ptr, cswritepp))) {
+            goto PvarXheaderWrite_ret_WRITE_FAIL;
+          }
+        }
+      }
+    } else {
+      // See the start of ExportVcf().
+      AppendVcfHeaderStart(1, cswritepp);
+      const uint32_t chr_ctl = BitCtToWordCt(cip->chr_ct);
+      uintptr_t* written_contig_header_lines;
+      if (unlikely(bigstack_calloc_w(chr_ctl, &written_contig_header_lines))) {
+        goto PvarXheaderWrite_ret_NOMEM;
+      }
+      uint32_t contig_zero_written = 0;
+      char* cswritep = *cswritepp;
+      // ExportVcf() has to perform a customized --merge-par operation, so it
+      // has special handling of chrX/PAR1/PAR2 ##contig header lines.  We omit
+      // that here.
+      char* xheader_end = &(xheader[xheader_blen]);
+      for (char* line_end = xheader; line_end != xheader_end; ) {
+        char* line_start = line_end;
+        line_end = AdvPastDelim(line_start, '\n');
+        const uint32_t slen = line_end - line_start;
+        if ((slen > 14) && StrStartsWithUnsafe(line_start, "##contig=<ID=")) {
+          char* contig_name_start = &(line_start[13]);
+          char* contig_name_end = S_CAST(char*, memchr(contig_name_start, ',', slen - 14));
+          if (!contig_name_end) {
+            // if this line is technically well-formed (ends in '>'), it's
+            // useless anyway, throw it out
+            continue;
+          }
+          const uint32_t chr_idx = GetChrCodeCounted(cip, contig_name_end - contig_name_start, contig_name_start);
+          if (IsI32Neg(chr_idx) || (!IsSet(cip->chr_mask, chr_idx))) {
+            continue;
+          }
+          const uint32_t chr_fo_idx = cip->chr_idx_to_foidx[chr_idx];
+          if (unlikely(IsSet(written_contig_header_lines, chr_fo_idx))) {
+            logerrputs("Error: Duplicate ##contig line in .pvar file.\n");
+            goto PvarXheaderWrite_ret_MALFORMED_INPUT;
+          }
+          SetBit(chr_fo_idx, written_contig_header_lines);
+          // if --output-chr was used at some point, we need to sync the
+          // ##contig chromosome code with the code in the .pvar body.
+          char* chr_name_write_start = strcpya_k(cswritep, "##contig=<ID=");
+          char* chr_name_write_end = chrtoa(cip, chr_idx, chr_name_write_start);
+          if ((*chr_name_write_start == '0') && (chr_name_write_end == &(chr_name_write_start[1]))) {
+            // --allow-extra-chr 0 special case
+            // note that cswritep has *not* been advanced
+            contig_zero_written = 1;  // technically we write this a bit later
+            continue;
+          }
+          cswritep = chr_name_write_end;
+          if (unlikely(Cswrite(css_ptr, &cswritep))) {
+            goto PvarXheaderWrite_ret_WRITE_FAIL;
+          }
+          if (unlikely(CsputsStd(contig_name_end, line_end - contig_name_end, css_ptr, &cswritep))) {
+            goto PvarXheaderWrite_ret_WRITE_FAIL;
+          }
+        } else {
+          if (!write_filter) {
+            if (StrStartsWithUnsafe(line_start, "##FILTER=<ID=")) {
+              continue;
+            }
+          }
+          if (!write_info) {
+            if (StrStartsWithUnsafe(line_start, "##INFO=<ID=")) {
+              continue;
+            }
+          }
+          if (unlikely(CsputsStd(line_start, slen, css_ptr, &cswritep))) {
+            goto PvarXheaderWrite_ret_WRITE_FAIL;
+          }
+        }
+      }
+      // fill in the missing ##contig lines
+      if (contig_zero_written) {
+        cswritep = strcpya_k(cswritep, "##contig=<ID=0,length=2147483645>" EOLN_STR);
+      }
+      for (uint32_t chr_fo_idx = 0; chr_fo_idx != cip->chr_ct; ++chr_fo_idx) {
+        if (IsSet(written_contig_header_lines, chr_fo_idx)) {
+          continue;
+        }
+        const uint32_t chr_idx = cip->chr_file_order[chr_fo_idx];
+        // AllBitsAreZero() doesn't do what we want in the --sort-vars case,
+        // but fortunately we don't need it there.
+        if ((!IsSet(cip->chr_mask, chr_idx)) || (variant_include && AllBitsAreZero(variant_include, cip->chr_fo_vidx_start[chr_fo_idx], cip->chr_fo_vidx_start[chr_fo_idx + 1]))) {
+          continue;
+        }
+        char* chr_name_write_start = strcpya_k(cswritep, "##contig=<ID=");
+        char* chr_name_write_end = chrtoa(cip, chr_idx, chr_name_write_start);
+        if ((*chr_name_write_start == '0') && (chr_name_write_end == &(chr_name_write_start[1]))) {
+          // --allow-extra-chr 0 special case
+          if (contig_zero_written) {
+            continue;
+          }
+          contig_zero_written = 1;
+          cswritep = strcpya_k(chr_name_write_end, ",length=2147483645");
+        } else {
+          cswritep = strcpya_k(chr_name_write_end, ",length=");
+          const uint32_t pos_end = ChrLenLbound(cip, variant_bps, allele_idx_offsets, allele_storage, new_variant_idx_to_old, chr_fo_idx, max_allele_slen, vpos_sortstatus);
+          cswritep = u32toa(pos_end, cswritep);
+        }
+        *cswritep++ = '>';
+        AppendBinaryEoln(&cswritep);
+        if (unlikely(Cswrite(css_ptr, &cswritep))) {
+          goto PvarXheaderWrite_ret_WRITE_FAIL;
+        }
+      }
+      *cswritepp = cswritep;
+    }
+    if (append_info_pr_header_line) {
+      *cswritepp = strcpya_k(*cswritepp, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
+    }
+  }
+  while (0) {
+  PvarXheaderWrite_ret_NOMEM:
+    reterr = kPglRetNomem;
+    break;
+  PvarXheaderWrite_ret_WRITE_FAIL:
+    reterr = kPglRetWriteFail;
+    break;
+  PvarXheaderWrite_ret_MALFORMED_INPUT:
+    reterr = kPglRetMalformedInput;
+    break;
+  }
+  BigstackReset(bigstack_mark);
+  return reterr;
+}
+
+PglErr WritePvar(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
   // allele_presents must be nullptr unless we're trimming alt alleles
   // split/join cases handled by WritePvarSplit() and WritePvarJoin()
   unsigned char* bigstack_mark = g_bigstack_base;
@@ -316,37 +573,10 @@ PglErr WritePvar(const char* outname, const char* xheader, const uintptr_t* vari
     char* pvar_info_line_iter = nullptr;
     uint32_t info_col_idx = 0;  // could save this during first load instead
     const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
-    if (pvar_psam_flags & kfPvarColXheader) {
-      if (write_filter && write_info) {
-        if (unlikely(CsputsStd(xheader, xheader_blen, &css, &cswritep))) {
-          goto WritePvar_ret_WRITE_FAIL;
-        }
-      } else {
-        // Filter out FILTER/INFO definitions iff the corresponding column has
-        // been removed.
-        const char* copy_start = xheader;
-        const char* xheader_end = &(xheader[xheader_blen]);
-        for (const char* xheader_iter = xheader; xheader_iter != xheader_end; ) {
-          const char* next_line_start = AdvPastDelim(xheader_iter, '\n');
-          if (((!write_filter) && StrStartsWithUnsafe(xheader_iter, "##FILTER=<ID=")) ||
-              ((!write_info) && StrStartsWithUnsafe(xheader_iter, "##INFO=<ID="))) {
-            if (copy_start != xheader_iter) {
-              if (unlikely(CsputsStd(copy_start, xheader_iter - copy_start, &css, &cswritep))) {
-                goto WritePvar_ret_WRITE_FAIL;
-              }
-            }
-            copy_start = next_line_start;
-          }
-          xheader_iter = next_line_start;
-        }
-        if (copy_start != xheader_end) {
-          if (unlikely(CsputsStd(copy_start, xheader_end - copy_start, &css, &cswritep))) {
-            goto WritePvar_ret_WRITE_FAIL;
-          }
-        }
-      }
-      if (write_info_pr && (!info_pr_flag_present)) {
-        cswritep = strcpya_k(cswritep, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
+    if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
+      reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
+      if (unlikely(reterr)) {
+        goto WritePvar_ret_1;
       }
     }
     // bugfix (30 Jul 2017): may be necessary to reload INFO when no ## lines
@@ -2586,7 +2816,7 @@ uintptr_t InitWriteAlleleIdxOffsets(const uintptr_t* variant_include, const uint
 //     is printed if INFO:END isn't defined, and an error occurs if either REF
 //     is multi-character, or there's an INFO:END mismatch.
 //   - Error out if REF alleles aren't all consistent, or any ALT allele is
-//     duplicated (necessary to use --pmerge to merge the latter).
+//     duplicated (note that --pmerge must support the latter).
 //   - For joined not-entirely-SNP non-symbolic variants, the final REF is the
 //     longest of the original REFs; ALT alleles have bases added to the end if
 //     necessary.  (Yes, this causes SNPs to stop being visible to a strlen ==
@@ -2675,9 +2905,10 @@ typedef struct JoinCountsStruct {
 JoinVtype JoinCount(const char* const* cur_alleles, uintptr_t allele_ct, JoinCounts* jcp) {
   jcp->snp_ct = 0;
   jcp->symbolic_ct = 0;
+  jcp->missalt_snp_ct = 0;
+  jcp->missalt_nonsnp_ct = 0;
   if (cur_alleles[0][1] == '\0') {
     jcp->nonsnp_ct = 0;
-    jcp->missalt_nonsnp_ct = 0;
     for (uintptr_t allele_idx = 1; allele_idx != allele_ct; ++allele_idx) {
       const char* cur_allele = cur_alleles[allele_idx];
       if (cur_allele[0] == '<') {
@@ -2703,12 +2934,12 @@ JoinVtype JoinCount(const char* const* cur_alleles, uintptr_t allele_ct, JoinCou
     }
     return kJoinVtypeSnp;
   }
-  jcp->missalt_snp_ct = 0;
   for (uint32_t allele_idx = 1; allele_idx != allele_ct; ++allele_idx) {
     const char* cur_allele = cur_alleles[allele_idx];
     if (cur_allele[0] == '<') {
       return kJoinVtypeError;
-    } else if (memequal_k(cur_allele, ".", 2)) {
+    }
+    if (memequal_k(cur_allele, ".", 2)) {
       if (allele_ct == 2) {
         jcp->nonsnp_ct = 0;
         jcp->missalt_nonsnp_ct = 1;
@@ -3336,7 +3567,7 @@ PglErr ParseInfoHeader(const char* xheader, uintptr_t xheader_blen, const char*
   return reterr;
 }
 
-PglErr WritePvarSplit(const char* outname, const char* xheader, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct) {
+PglErr WritePvarSplit(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
   unsigned char* bigstack_mark = g_bigstack_base;
   char* cswritep = nullptr;
   PglErr reterr = kPglRetSuccess;
@@ -3407,14 +3638,18 @@ PglErr WritePvarSplit(const char* outname, const char* xheader, const uintptr_t*
     }
 
     char* pvar_info_line_iter = nullptr;
+    uint32_t write_filter = 0;
+    if (pvar_psam_flags & kfPvarColFilter) {
+      write_filter = 1;
+    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
+      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
+    }
     uint32_t info_col_idx = 0;  // could save this during first load instead
     const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
-    if (pvar_psam_flags & kfPvarColXheader) {
-      if (unlikely(CsputsStd(xheader, xheader_blen, &css, &cswritep))) {
-        goto WritePvarSplit_ret_WRITE_FAIL;
-      }
-      if (write_info_pr && (!info_pr_flag_present)) {
-        cswritep = strcpya_k(cswritep, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
+    if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
+      reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
+      if (unlikely(reterr)) {
+        goto WritePvarSplit_ret_1;
       }
     }
     // could also make this an array-of-structs
@@ -3451,17 +3686,9 @@ PglErr WritePvarSplit(const char* outname, const char* xheader, const uintptr_t*
     if (write_qual) {
       cswritep = strcpya_k(cswritep, "\tQUAL");
     }
-
-    uint32_t write_filter = 0;
-    if (pvar_psam_flags & kfPvarColFilter) {
-      write_filter = 1;
-    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
-      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
-    }
     if (write_filter) {
       cswritep = strcpya_k(cswritep, "\tFILTER");
     }
-
     if (write_info) {
       cswritep = strcpya_k(cswritep, "\tINFO");
     }
@@ -3986,7 +4213,7 @@ PglErr MakeFilterHtable(const uintptr_t* variant_include, const uintptr_t* filte
 }
 
 /*
-PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t max_write_allele_ct, uint32_t max_missalt_ct, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct) {
+PglErr WritePvarJoin(const char* outname, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* missing_varid_match, const char* const* info_keys, const uint32_t* info_keys_htable, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uint32_t new_variant_id_max_allele_slen, uint32_t max_write_allele_ct, uint32_t max_missalt_ct, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t info_key_ct, uint32_t info_keys_htable_size, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
   unsigned char* bigstack_mark = g_bigstack_base;
   char* cswritep = nullptr;
   PglErr reterr = kPglRetSuccess;
@@ -4057,14 +4284,18 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
     }
 
     char* pvar_info_line_iter = nullptr;
+    uint32_t write_filter = 0;
+    if (pvar_psam_flags & kfPvarColFilter) {
+      write_filter = 1;
+    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
+      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
+    }
     uint32_t info_col_idx = 0;  // could save this during first load instead
     const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
-    if (pvar_psam_flags & kfPvarColXheader) {
-      if (unlikely(CsputsStd(xheader, xheader_blen, &css, &cswritep))) {
-        goto WritePvarJoin_ret_WRITE_FAIL;
-      }
-      if (write_info_pr && (!info_pr_flag_present)) {
-        cswritep = strcpya_k(cswritep, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
+    if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
+      reterr = PvarXheaderWrite(variant_include, cip, variant_bps, allele_idx_offsets, allele_storage, nullptr, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, vpos_sortstatus, xheader, &css, &cswritep);
+      if (unlikely(reterr)) {
+        goto WritePvarJoin_ret_1;
       }
     }
     const uint32_t join_mode = (make_plink2_flags & (kfMakePlink2MSplitBase * 7));
@@ -4092,13 +4323,6 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
     if (write_qual) {
       cswritep = strcpya_k(cswritep, "\tQUAL");
     }
-
-    uint32_t write_filter = 0;
-    if (pvar_psam_flags & kfPvarColFilter) {
-      write_filter = 1;
-    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
-      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
-    }
     const char** filter_keys = nullptr;
     uint32_t* filter_keys_htable = nullptr;
     uintptr_t* cur_filter_keys = nullptr;
@@ -4127,8 +4351,9 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
 
     char** info_bufs = nullptr;
     const char** info_starts = nullptr;
-    const char** info_ends = nullptr;
+    const char** info_ends = nullptr;  // ugh, this is not related to INFO:END
     const char** info_curs = nullptr;
+    uint32_t info_end_key_idx = UINT32_MAX;
     if (pvar_info_reload) {
       if (unlikely(
               bigstack_alloc_cp(info_cache_size, &info_bufs) ||
@@ -4141,6 +4366,16 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
       if (unlikely(reterr)) {
         goto WritePvarJoin_ret_TSTREAM_FAIL;
       }
+      info_end_key_idx = IdHtableFind("END", info_keys, info_keys_htable, strlen("END"), info_keys_htable_size);
+      if (info_end_key_idx != UINT32_MAX) {
+        const int32_t knum = const_container_of[info_keys[info_end_key_idx], InfoVtype, key)->num;
+        if ((knum != 1) && (knum != kInfoVtypeUnknown)) {
+          // TODO: verify type instead.
+          // but if number is not . or 1, this is not the INFO:END we're
+          // looking for.
+          info_end_key_idx = UINT32_MAX;
+        }
+      }
     }
     if (write_info) {
       cswritep = strcpya_k(cswritep, "\tINFO");
@@ -4192,13 +4427,12 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
     uint32_t pct = 0;
     uint32_t next_print_variant_idx = variant_ct / 100;
     JoinCounts jc;
-    JoinCounts next_jc;
     jc.snp_ct = 0;
     jc.nonsnp_ct = 0;
     jc.symbolic_ct = 0;
     jc.missalt_snp_ct = 0;
     jc.missalt_nonsnp_ct = 0;
-    ;;;
+    JoinCounts next_jc = jc;
     fputs("0%", stdout);
     fflush(stdout);
     while (1) {
@@ -4232,6 +4466,12 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
           allele_ct = allele_idx_offsets[next_variant_uidx + 1] - allele_idx_offset_base;
         }
         const char* const* cur_alleles = &(allele_storage[allele_idx_offset_base]);
+        JoinVtype jvt = JoinCount(cur_alleles, allele_ct, &next_jc);
+        // previously validated
+        if ((join_mode == kfMakePlink2MJoinSnps) && ) {
+        }
+
+        // TODO
         jc.snp_ct += next_jc.snp_ct;
         jc.nonsnp_ct += next_jc.nonsnp_ct;
         jc.symbolic_ct += next_jc.symbolic_ct;
@@ -4331,12 +4571,18 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
         }
         AppendBinaryEoln(&cswritep);
       } else {
+        // TODO
+        next_jc.snp_ct = 0;
+        next_jc.nonsnp_ct = 0;
+        next_jc.symbolic_ct = 0;
+        next_jc.missalt_snp_ct = 0;
+        next_jc.missalt_nonsnp_ct = 0;
       }
       if (next_variant_idx == variant_ct) {
         break;
       }
       this_pos_write_variant_ct = 0;
-      jt = next_jt;
+      jc = next_jc;
       prev_bp = cur_bp;
       bp_start_variant_idx = next_variant_idx;
       bp_start_variant_uidx = next_variant_uidx;
@@ -4377,6 +4623,7 @@ PglErr WritePvarJoin(const char* outname, const char* xheader, const uintptr_t*
             }
           }
         }
+      }
     }
     if (unlikely(CswriteCloseNull(&css, cswritep))) {
       goto WritePvarJoin_ret_WRITE_FAIL;
@@ -5605,20 +5852,24 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
       // bugfix (25 Jul 2018): left expression needs ||, not &&
       mcp->hard_call_halfdist = ((hard_call_thresh == UINT32_MAX) || (!read_dosage_present))? 0 : (kDosage4th - hard_call_thresh);
       ctx.dosage_erase_halfdist = kDosage4th - dosage_erase_thresh;
-      const uint32_t read_hphase_present = (read_gflags / kfPgenGlobalHardcallPhasePresent) & 1;
+      // bugfix/simplification (10 Mar 2020): it is possible for dosage-phase
+      // to be present in the input without hardcall-phase.  Don't try to treat
+      // that differently than the usual scenario where hardcall-phase is
+      // present.
+      const uint32_t read_phase_present = !!(read_gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent));
       const uint32_t read_dphase_present = (read_gflags / kfPgenGlobalDosagePhasePresent) & 1;
       PgenGlobalFlags write_gflags = read_gflags;
       // When --hard-call-threshold is specified, if either hphase or dphase
       // values exist, the other can be generated.
-      uint32_t read_or_write_hphase_present = read_hphase_present;
+      uint32_t read_or_write_phase_present = read_phase_present;
       uint32_t read_or_write_dphase_present = read_dphase_present;
-      if (mcp->hard_call_halfdist && (read_hphase_present || read_or_write_dphase_present)) {
-        read_or_write_hphase_present = 1;
+      if (mcp->hard_call_halfdist && (read_phase_present || read_or_write_dphase_present)) {
+        read_or_write_phase_present = 1;
         read_or_write_dphase_present = 1;
         write_gflags |= kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent;
       } else if (dosage_erase_thresh && read_dosage_present) {
         // need write_phasepresent, pretty harmless to allocate write_phaseinfo
-        read_or_write_hphase_present = 1;
+        read_or_write_phase_present = 1;
       }
       uint32_t read_or_write_dosage_present = read_dosage_present;
       if (mcp->plink2_write_flags & kfPlink2WriteLateDosageErase) {
@@ -5708,7 +5959,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
         if (unlikely(bigstack_alloc_wp(1, &ctx.thread_write_genovecs))) {
           goto MakePgenRobust_ret_NOMEM;
         }
-        if (read_hphase_present && new_sample_idx_to_old) {
+        if (read_phase_present && new_sample_idx_to_old) {
           if (unlikely(bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new))) {
             goto MakePgenRobust_ret_NOMEM;
           }
@@ -5734,7 +5985,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
       }
       ctx.thread_write_phasepresents = nullptr;
       ctx.thread_all_hets = nullptr;
-      if (read_or_write_hphase_present) {
+      if (read_or_write_phase_present) {
         if (unlikely(
                 bigstack_alloc_wp(1, &ctx.thread_write_phasepresents) ||
                 bigstack_alloc_wp(1, &ctx.thread_write_phaseinfos) ||
@@ -5742,7 +5993,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
                 bigstack_alloc_w(sample_ctl, &(ctx.thread_write_phaseinfos[0])))) {
           goto MakePgenRobust_ret_NOMEM;
         }
-        if (read_hphase_present) {
+        if (read_phase_present) {
           if (unlikely(
                   bigstack_alloc_wp(1, &ctx.thread_all_hets) ||
                   bigstack_alloc_w(raw_sample_ctl, &(ctx.thread_all_hets[0])))) {
@@ -5834,7 +6085,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
                 bigstack_alloc_u32p(max_read_allele_ct + 1, &alt_two_sample_idx_starts))) {
           goto MakePgenRobust_ret_NOMEM;
         }
-        if (read_hphase_present) {
+        if (read_phase_present) {
           if (unlikely(
                   bigstack_alloc_w(raw_sample_ctl, &pgv.phasepresent) ||
                   bigstack_alloc_w(raw_sample_ctl, &pgv.phaseinfo) ||
@@ -5853,9 +6104,9 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
       const uint32_t raw_sample_ctv2 = NypCtToVecCt(raw_sample_ct);
       uintptr_t load_variant_vec_ct = raw_sample_ctv2;
       uint32_t loaded_vrtypes_needed = (read_gflags & kfPgenGlobalMultiallelicHardcallFound)? 1 : 0;
-      if (read_hphase_present || read_dosage_present) {
+      if (read_phase_present || read_dosage_present) {
         loaded_vrtypes_needed = 1;
-        if (read_hphase_present) {
+        if (read_phase_present) {
           // phaseraw has three parts:
           // 1. het_ct as uint32_t, and explicit_phasepresent_ct as uint32_t.
           // 2. vec-aligned bitarray of up to (raw_sample_ct + 1) bits.  first
@@ -5985,7 +6236,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
             } else if (cur_write_allele_ct == 2) {
               if (write_aidx == 1) {
                 // 1. read into normal, not raw representation
-                if (read_hphase_present) {
+                if (read_phase_present) {
                   reterr = PgrGetMDp(nullptr, null_pssi, raw_sample_ct, read_variant_uidx, simple_pgrp, &pgv);
                 } else {
                   reterr = PgrGetMD(nullptr, null_pssi, raw_sample_ct, read_variant_uidx, simple_pgrp, &pgv);
@@ -6395,7 +6646,7 @@ PglErr MakePgenRobust(const uintptr_t* sample_include, const uint32_t* new_sampl
 }
 
 // allele_presents should be nullptr iff trim_alts not true
-PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, __maybe_unused const char* varid_multi_template_str, __maybe_unused const char* varid_multi_nonsnp_template_str, const char* missing_varid_match, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, uint32_t new_variant_id_max_allele_slen, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end) {
+PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, __maybe_unused const char* varid_multi_template_str, __maybe_unused const char* varid_multi_nonsnp_template_str, const char* missing_varid_match, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, uint32_t new_variant_id_max_allele_slen, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uintptr_t pgr_alloc_cacheline_ct, char* xheader, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end) {
   unsigned char* bigstack_mark = g_bigstack_base;
   FILE* outfile = nullptr;
   PglErr reterr = kPglRetSuccess;
@@ -6501,7 +6752,7 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
         nonref_flags_storage = (pgfip->gflags & kfPgenGlobalAllNonref)? 2 : 1;
       }
       if (write_variant_ct == variant_ct) {
-        reterr = WritePvar(outname, xheader, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, pvar_psam_flags, max_thread_ct);
+        reterr = WritePvar(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, pvar_psam_flags, max_thread_ct, xheader);
       } else {
         const char* const* info_keys = nullptr;
         uint32_t info_key_ct = 0;
@@ -6514,12 +6765,12 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
           }
         }
         if (write_variant_ct > variant_ct) {
-          reterr = WritePvarSplit(outname, xheader, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct);
+          reterr = WritePvarSplit(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct, xheader);
         } else {
           logerrputs("Error: Multiallelic join is under development.\n");
           reterr = kPglRetNotYetSupported;
           goto MakePlink2NoVsort_ret_1;
-          // reterr = WritePvarJoin(outname, xheader, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, max_write_allele_ct, max_missalt_ct, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct);
+          // reterr = WritePvarJoin(outname, variant_include, cip, variant_bps, variant_ids, allele_idx_offsets, allele_storage, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, pgfip->nonref_flags, pvar_info_reload, variant_cms, varid_template_str, missing_varid_match, info_keys, info_keys_htable, raw_variant_ct, variant_ct, max_allele_slen, new_variant_id_max_allele_slen, max_write_allele_ct, max_missalt_ct, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, vpos_sortstatus, info_key_ct, info_keys_htable_size, misc_flags, make_plink2_flags, pvar_psam_flags, max_thread_ct, xheader);
         }
       }
       if (unlikely(reterr)) {
@@ -6620,17 +6871,17 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
       const uint32_t read_dosage_present = (read_gflags / kfPgenGlobalDosagePresent) & 1;
       mc.hard_call_halfdist = ((hard_call_thresh == UINT32_MAX) || (!read_dosage_present))? 0 : (kDosage4th - hard_call_thresh);
       ctx.dosage_erase_halfdist = kDosage4th - dosage_erase_thresh;
-      const uint32_t read_hphase_present = (read_gflags / kfPgenGlobalHardcallPhasePresent) & 1;
+      const uint32_t read_phase_present = !!(read_gflags & (kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent));
       const uint32_t read_dphase_present = (read_gflags / kfPgenGlobalDosagePhasePresent) & 1;
       PgenGlobalFlags write_gflags = read_gflags;
-      uint32_t read_or_write_hphase_present = read_hphase_present;
+      uint32_t read_or_write_phase_present = read_phase_present;
       uint32_t read_or_write_dphase_present = read_dphase_present;
-      if (mc.hard_call_halfdist && (read_hphase_present || read_or_write_dphase_present)) {
-        read_or_write_hphase_present = 1;
+      if (mc.hard_call_halfdist && (read_phase_present || read_or_write_dphase_present)) {
+        read_or_write_phase_present = 1;
         read_or_write_dphase_present = 1;
         write_gflags |= kfPgenGlobalHardcallPhasePresent | kfPgenGlobalDosagePhasePresent;
       } else if (dosage_erase_thresh && read_dosage_present) {
-        read_or_write_hphase_present = 1;
+        read_or_write_phase_present = 1;
       }
       uint32_t read_or_write_dosage_present = read_dosage_present;
       if (mc.plink2_write_flags & kfPlink2WriteLateDosageErase) {
@@ -6672,7 +6923,7 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
         const uintptr_t mhcraw_word_ct = RoundUpPow2(2, kWordsPerVec) + GetMhcWordCt(raw_sample_ct);
         load_vblock_cacheline_ct += WordCtToCachelineCtU64(S_CAST(uint64_t, mhcraw_word_ct) * max_vblock_size);
       }
-      if (read_hphase_present) {
+      if (read_phase_present) {
         // could make this bound tighter when lots of unphased variants are
         // mixed in among the phased variants, but this isn't nearly as
         // important as the analogous multiallelic optimization
@@ -6801,7 +7052,7 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
         if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_genovecs)) {
           goto MakePlink2NoVsort_fallback;
         }
-        if (read_hphase_present && new_sample_idx_to_old) {
+        if (read_phase_present && new_sample_idx_to_old) {
           if (bigstack_alloc_u32(raw_sample_ct, &ctx.old_sample_idx_to_new)) {
             goto MakePlink2NoVsort_fallback;
           }
@@ -6834,13 +7085,13 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
       ctx.thread_all_hets = nullptr;
       ctx.thread_write_dosagepresents = nullptr;
       ctx.thread_write_dphasepresents = nullptr;
-      if (read_or_write_hphase_present || read_or_write_dosage_present) {
-        if (read_or_write_hphase_present) {
+      if (read_or_write_phase_present || read_or_write_dosage_present) {
+        if (read_or_write_phase_present) {
           if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_phasepresents) ||
               bigstack_alloc_wp(calc_thread_ct, &ctx.thread_write_phaseinfos)) {
             goto MakePlink2NoVsort_fallback;
           }
-          if (read_hphase_present) {
+          if (read_phase_present) {
             if (bigstack_alloc_wp(calc_thread_ct, &ctx.thread_all_hets)) {
               goto MakePlink2NoVsort_fallback;
             }
@@ -6869,7 +7120,7 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
           // todo: multiallelic dosage
         }
       }
-      if (read_or_write_hphase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
+      if (read_or_write_phase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
         // ctx.loaded_vrtypes
         other_per_thread_cacheline_ct += 2 * (kPglVblockSize / kCacheline);
       }
@@ -6885,19 +7136,19 @@ PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, c
       main_loadbufs[1] = S_CAST(uintptr_t*, bigstack_alloc_raw(load_vblock_cacheline_ct * calc_thread_ct * kCacheline));
       ctx.loaded_vrtypes[0] = nullptr;
       ctx.loaded_vrtypes[1] = nullptr;
-      if (read_or_write_hphase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
+      if (read_or_write_phase_present || read_dosage_present || (read_gflags & kfPgenGlobalMultiallelicHardcallFound)) {
         ctx.loaded_vrtypes[0] = S_CAST(unsigned char*, bigstack_alloc_raw(kPglVblockSize * calc_thread_ct));
         ctx.loaded_vrtypes[1] = S_CAST(unsigned char*, bigstack_alloc_raw(kPglVblockSize * calc_thread_ct));
       }
-      if (read_or_write_hphase_present || read_or_write_dosage_present) {
+      if (read_or_write_phase_present || read_or_write_dosage_present) {
         const uint32_t bitvec_writebuf_byte_ct = BitCtToCachelineCt(sample_ct) * kCacheline;
         const uintptr_t dosagevals_writebuf_byte_ct = DivUp(sample_ct, (kCacheline / 2)) * kCacheline;
         for (uint32_t tidx = 0; tidx != calc_thread_ct; ++tidx) {
-          if (read_or_write_hphase_present) {
+          if (read_or_write_phase_present) {
             ctx.thread_write_phasepresents[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
             ctx.thread_write_phaseinfos[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(bitvec_writebuf_byte_ct));
 
-            if (read_hphase_present) {
+            if (read_phase_present) {
               ctx.thread_all_hets[tidx] = S_CAST(uintptr_t*, bigstack_alloc_raw(BitCtToCachelineCt(raw_sample_ct) * kCacheline));
             }
           }
@@ -7438,7 +7689,7 @@ PglErr WritePvarResortedInterval(const ChrInfo* write_cip, const uint32_t* varia
 // allocation of buffers, and generating the header line occurs directly in
 // this function, while loading the next pvar_info_strs batch and writing the
 // next .pvar line batch are one level down.
-PglErr WritePvarResorted(const char* outname, const char* xheader, const uintptr_t* variant_include, const ChrInfo* write_cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const uint32_t* new_variant_idx_to_old, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct) {
+PglErr WritePvarResorted(const char* outname, const uintptr_t* variant_include, const ChrInfo* write_cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* qual_present, const float* quals, const uintptr_t* filter_present, const uintptr_t* filter_npass, const char* const* filter_storage, const uintptr_t* nonref_flags, const char* pvar_info_reload, const double* variant_cms, const uint32_t* new_variant_idx_to_old, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_slen, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t nonref_flags_storage, uint32_t max_filter_slen, uint32_t info_reload_slen, PvarPsamFlags pvar_psam_flags, uint32_t thread_ct, char* xheader) {
   unsigned char* bigstack_mark = g_bigstack_base;
   char* cswritep = nullptr;
   PglErr reterr = kPglRetSuccess;
@@ -7478,13 +7729,17 @@ PglErr WritePvarResorted(const char* outname, const char* xheader, const uintptr
       goto WritePvarResorted_ret_INCONSISTENT_INPUT;
     }
 
+    uint32_t write_filter = 0;
+    if (pvar_psam_flags & kfPvarColFilter) {
+      write_filter = 1;
+    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
+      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
+    }
     const uint32_t info_pr_flag_present = (info_flags / kfInfoPrFlagPresent) & 1;
-    if (pvar_psam_flags & kfPvarColXheader) {
-      if (unlikely(CsputsStd(xheader, xheader_blen, &css, &cswritep))) {
-        goto WritePvarResorted_ret_WRITE_FAIL;
-      }
-      if (write_info_pr && (!info_pr_flag_present)) {
-        cswritep = strcpya_k(cswritep, "##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\">" EOLN_STR);
+    if (pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) {
+      reterr = PvarXheaderWrite(nullptr, write_cip, variant_bps, allele_idx_offsets, allele_storage, new_variant_idx_to_old, xheader_blen, (pvar_psam_flags / kfPvarColVcfheader) & 1, write_filter, write_info, write_info_pr && (!info_pr_flag_present), max_allele_slen, kfUnsortedVar0, xheader, &css, &cswritep);
+      if (unlikely(reterr)) {
+        goto WritePvarResorted_ret_1;
       }
     }
     if (write_cip->chrset_source) {
@@ -7502,12 +7757,6 @@ PglErr WritePvarResorted(const char* outname, const char* xheader, const uintptr
       cswritep = strcpya_k(cswritep, "\tQUAL");
     }
 
-    uint32_t write_filter = 0;
-    if (pvar_psam_flags & kfPvarColFilter) {
-      write_filter = 1;
-    } else if ((pvar_psam_flags & kfPvarColMaybefilter) && filter_present) {
-      write_filter = !IntersectionIsEmpty(variant_include, filter_present, raw_variant_ctl);
-    }
     if (write_filter) {
       cswritep = strcpya_k(cswritep, "\tFILTER");
     }
@@ -7637,7 +7886,7 @@ PglErr WritePvarResorted(const char* outname, const char* xheader, const uintptr
   return reterr;
 }
 
-PglErr MakePlink2Vsort(const char* xheader, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ChrIdx* chr_idxs, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, uint32_t use_nsort, PvarPsamFlags pvar_psam_flags, PgenReader* simple_pgrp, char* outname, char* outname_end) {
+PglErr MakePlink2Vsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ChrIdx* chr_idxs, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, uint32_t use_nsort, PvarPsamFlags pvar_psam_flags, char* xheader, PgenReader* simple_pgrp, char* outname, char* outname_end) {
   unsigned char* bigstack_mark = g_bigstack_base;
   unsigned char* bigstack_end_mark = g_bigstack_end;
   PglErr reterr = kPglRetSuccess;
@@ -7843,7 +8092,7 @@ PglErr MakePlink2Vsort(const char* xheader, const uintptr_t* sample_include, con
       if (!PgrGetNonrefFlags(simple_pgrp)) {
         nonref_flags_storage = (PgrGetGflags(simple_pgrp) & kfPgenGlobalAllNonref)? 2 : 1;
       }
-      reterr = WritePvarResorted(outname, xheader, variant_include, &write_chr_info, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, PgrGetNonrefFlags(simple_pgrp), pvar_info_reload, variant_cms, new_variant_idx_to_old, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, pvar_psam_flags, max_thread_ct);
+      reterr = WritePvarResorted(outname, variant_include, &write_chr_info, variant_bps, variant_ids, allele_idx_offsets, allele_storage, allele_presents, refalt1_select, pvar_qual_present, pvar_quals, pvar_filter_present, pvar_filter_npass, pvar_filter_storage, PgrGetNonrefFlags(simple_pgrp), pvar_info_reload, variant_cms, new_variant_idx_to_old, raw_variant_ct, variant_ct, max_allele_slen, xheader_blen, info_flags, nonref_flags_storage, max_filter_slen, info_reload_slen, pvar_psam_flags, max_thread_ct, xheader);
       if (unlikely(reterr)) {
         goto MakePlink2Vsort_ret_1;
       }


=====================================
plink2_data.h
=====================================
@@ -64,10 +64,18 @@ PglErr WriteMapOrBim(const char* outname, const uintptr_t* variant_include, cons
 
 PglErr PvarInfoOpenAndReloadHeader(const char* pvar_info_reload, uint32_t max_thread_ct, TextStream* pvar_reload_txsp, char** line_iterp, uint32_t* info_col_idx_ptr);
 
+PglErr PvarInfoReload(uint32_t info_col_idx, uint32_t variant_uidx, TextStream* pvar_reload_txsp, char** line_iterp, uint32_t* trs_variant_uidx_ptr);
+
 PglErr PvarInfoReloadAndWrite(uint32_t info_pr_flag_present, uint32_t info_col_idx, uint32_t variant_uidx, uint32_t is_pr, TextStream* pvar_reload_txsp, char** line_iterp, char** write_iter_ptr, uint32_t* rls_variant_uidx_ptr);
 
 void AppendChrsetLine(const ChrInfo* cip, char** write_iter_ptr);
 
+uint32_t ChrLenLbound(const ChrInfo* cip, const uint32_t* variant_bps, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uint32_t* new_variant_idx_to_old, uint32_t chr_fo_idx, uint32_t max_allele_slen, UnsortedVar vpos_sortstatus);
+
+// fileformat, fileDate, source
+// assumes adequate buffer space
+void AppendVcfHeaderStart(uint32_t v43, char** cswritepp);
+
 PglErr WriteFam(const char* outname, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const uint32_t* new_sample_idx_to_old, uint32_t sample_ct, uint32_t pheno_ct, char delim);
 
 uint32_t DataFidColIsRequired(const uintptr_t* sample_include, const SampleIdInfo* siip, uint32_t sample_ct, uint32_t maybe_modifier);
@@ -84,9 +92,9 @@ void ApplyHardCallThresh(const uintptr_t* dosage_present, const Dosage* dosage_m
 
 uint32_t ApplyHardCallThreshPhased(const uintptr_t* dosage_present, const Dosage* dosage_main, uint32_t dosage_ct, uint32_t hard_call_halfdist, uintptr_t* genovec, uintptr_t* phasepresent, uintptr_t* phaseinfo, uintptr_t* dphase_present, SDosage* dphase_delta, SDosage* tmp_dphase_delta);
 
-PglErr MakePlink2NoVsort(const char* xheader, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, const char* varid_multi_template_str, const char* varid_multi_nonsnp_template_str, const char* missing_varid_match, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, uint32_t new_variant_id_max_allele_slen, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uintptr_t pgr_alloc_cacheline_ct, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
+PglErr MakePlink2NoVsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const char* varid_template_str, __maybe_unused const char* varid_multi_template_str, __maybe_unused const char* varid_multi_nonsnp_template_str, const char* missing_varid_match, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, uint32_t new_variant_id_max_allele_slen, MiscFlags misc_flags, MakePlink2Flags make_plink2_flags, PvarPsamFlags pvar_psam_flags, uintptr_t pgr_alloc_cacheline_ct, char* xheader, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
 
-PglErr MakePlink2Vsort(const char* xheader, const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ChrIdx* chr_idxs, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, uint32_t use_nsort, PvarPsamFlags pvar_psam_flags, PgenReader* simple_pgrp, char* outname, char* outname_end);
+PglErr MakePlink2Vsort(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uint32_t* new_sample_idx_to_old, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const uintptr_t* allele_presents, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ChrIdx* chr_idxs, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_allele_ct, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, uint32_t hard_call_thresh, uint32_t dosage_erase_thresh, MakePlink2Flags make_plink2_flags, uint32_t use_nsort, PvarPsamFlags pvar_psam_flags, char* xheader, PgenReader* simple_pgrp, char* outname, char* outname_end);
 
 PglErr SampleSortFileMap(const uintptr_t* sample_include, const SampleIdInfo* siip, const char* sample_sort_fname, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t** new_sample_idx_to_old_ptr);
 


=====================================
plink2_decompress.h
=====================================
@@ -88,6 +88,7 @@ HEADER_INLINE unsigned char* TextStreamMemStart(TextStream* txs_ptr) {
   return R_CAST(unsigned char*, GET_PRIVATE(*txs_ptr, m).base.dst);
 }
 
+// TODO: logputs("\n") first when necessary
 void TextErrPrint(const char* file_descrip, const char* errmsg, PglErr reterr);
 
 HEADER_INLINE void TextFileErrPrint(const char* file_descrip, const textFILE* txfp) {


=====================================
plink2_export.cc
=====================================
The diff for this file was not included because it is too large.

=====================================
plink2_export.h
=====================================
@@ -56,7 +56,7 @@ void InitExportf(ExportfInfo* exportf_info_ptr);
 
 void CleanupExportf(ExportfInfo* exportf_info_ptr);
 
-PglErr Exportf(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ExportfInfo* eip, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_variant_id_slen, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, uint32_t max_thread_ct, MakePlink2Flags make_plink2_flags, uintptr_t pgr_alloc_cacheline_ct, char* xheader, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
+PglErr Exportf(const uintptr_t* sample_include, const PedigreeIdInfo* piip, const uintptr_t* sex_nm, const uintptr_t* sex_male, const PhenoCol* pheno_cols, const char* pheno_names, const uintptr_t* variant_include, const ChrInfo* cip, const uint32_t* variant_bps, const char* const* variant_ids, const uintptr_t* allele_idx_offsets, const char* const* allele_storage, const STD_ARRAY_PTR_DECL(AlleleCode, 2, refalt1_select), const uintptr_t* pvar_qual_present, const float* pvar_quals, const uintptr_t* pvar_filter_present, const uintptr_t* pvar_filter_npass, const char* const* pvar_filter_storage, const char* pvar_info_reload, const double* variant_cms, const ExportfInfo* eip, uintptr_t xheader_blen, InfoFlags info_flags, uint32_t raw_sample_ct, uint32_t sample_ct, uint32_t pheno_ct, uintptr_t max_pheno_name_blen, uint32_t raw_variant_ct, uint32_t variant_ct, uint32_t max_variant_id_slen, uint32_t max_allele_slen, uint32_t max_filter_slen, uint32_t info_reload_slen, UnsortedVar vpos_sortstatus, uint32_t max_thread_ct, MakePlink2Flags make_plink2_flags, uintptr_t pgr_alloc_cacheline_ct, char* xheader, PgenFileInfo* pgfip, PgenReader* simple_pgrp, char* outname, char* outname_end);
 
 #ifdef __cplusplus
 }  // namespace plink2


=====================================
plink2_filter.cc
=====================================
@@ -4215,8 +4215,12 @@ PglErr SetRefalt1FromFile(const uintptr_t* variant_include, const char* const* v
           char* write_iter = strcpya_k(g_logbuf, "Warning: ");
           // strlen("--ref-allele") == 12, strlen("--alt1-allele") == 13
           write_iter = memcpya(write_iter, flagstr, 12 + is_alt1);
-          write_iter = strcpya_k(write_iter, " mismatch for multiallelic variant '");
-          write_iter = strcpya(write_iter, variant_ids[variant_uidx]);
+          write_iter = strcpya_k(write_iter, " mismatch for multiallelic variant");
+          // If we put this all on one line, its length would be
+          // 60 + is_alt1 + variant_id_slen.  Split into two if this is >79.
+          *write_iter++ = (variant_id_slen + is_alt1 < 20)? ' ' : '\n';
+          *write_iter++ = '\'';
+          write_iter = memcpya(write_iter, variant_ids[variant_uidx], variant_id_slen);
           strcpy_k(write_iter, "'.\n");
           if (allele_mismatch_warning_ct < 3) {
             logerrputsb();


=====================================
plink2_glm.cc
=====================================
The diff for this file was not included because it is too large.

=====================================
plink2_help.cc
=====================================
@@ -132,8 +132,8 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "  --bpfile <prefix> ['vzs'] : Specify .pgen + .bim[.zst] + .fam prefix.\n\n"
                );
     HelpPrint("vcf\0bcf\0keep-autoconv\0", &help_ctrl, 1,
-"  --keep-autoconv    : When importing non-PLINK-binary data, don't delete\n"
-"                       autogenerated binary fileset at end of run.\n\n"
+"  --keep-autoconv ['vzs']   : When importing non-PLINK-binary data, don't\n"
+"                              delete autogenerated fileset at end of run.\n\n"
                );
     HelpPrint("bfile\0fam\0no-fid\0no-parents\0no-sex\0", &help_ctrl, 1,
 "  --no-fid           : .fam file does not contain column 1 (family ID).\n"
@@ -299,7 +299,7 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "  --make-bed ['vzs'] [{trim-alts | erase-alt2+}]\n"
 "             ['multiallelics='<split mode>]\n"
                */
-"    Create a new PLINK binary fileset (--make-pgen = .pgen + .pvar[.zst] +\n"
+"    Create a new PLINK 2 binary fileset (--make-pgen = .pgen + .pvar[.zst] +\n"
 "    .psam, --make-bpgen = .pgen + .bim[.zst] + .fam).\n"
 "    * Unlike the automatic text-to-binary converters (which only heed\n"
 "      chromosome filters), this supports all of PLINK's filtering flags.\n"
@@ -337,7 +337,7 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "      * '+any': All variants with identical CHROM/POS/REF and no symbolic ALT\n"
 "                alleles are joined into a single multiallelic variant.\n"
 "      Symbolic ALT alleles are joined separately.  Additional conditions apply\n"
-"      to them, and PLINK usually errors out when they aren't met.\n"
+"      to them, and PLINK 2 usually errors out when they aren't met.\n"
 "      When splitting with 'vid-dup', the new variants keep the original variant\n"
 "      ID.  Otherwise, new variant IDs are assigned as follows:\n"
 "      1. If --set-all-var-ids was specified, that template is applied.\n"
@@ -372,6 +372,7 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "                 except for possibly FILTER/INFO definitions when those\n"
 "                 column(s) have been removed.  Without this, only the #CHROM\n"
 "                 header line is kept.\n"
+"        vcfheader: xheader, with additions to make it a valid VCF header.\n"
 "        maybequal: QUAL.  Omitted if all remaining values are missing.\n"
 "        qual: Force QUAL column to be written even when empty.\n"
 "        maybefilter: FILTER.  Omitted if all remaining values are missing.\n"
@@ -414,8 +415,8 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "           ['bits='<#>]\n"
 "    Create a new fileset with all filters applied.  The following output\n"
 "    formats are supported:\n"
-"    (actually, only A, AD, A-transpose, bgen-1.x, ind-major-bed, haps,\n"
-"    hapslegend, oxford, and vcf are implemented for now)\n"
+"    (actually, only A, AD, A-transpose, bcf, bgen-1.x, haps, hapslegend,\n"
+"    ind-major-bed, oxford, and vcf are implemented for now)\n"
 "    * '23': 23andMe 4-column format.  This can only be used on a single\n"
 "            sample's data (--keep may be handy), and does not support\n"
 "            multicharacter allele codes.\n"
@@ -466,15 +467,18 @@ PglErr DispHelp(const char* const* argvk, uint32_t param_ct) {
 "    * 'transpose': PLINK 1 variant-major (.tped + .tfam), loadable with\n"
 "                   --tfile.\n"
 "    * 'vcf',     : VCF (default version 4.3).  If PAR1 and PAR2 are present,\n"
-"      'vcf-4.2'    they are automatically merged with chrX, with proper\n"
-"                   handling of chromosome codes and male ploidy.\n"
-"                   When the 'bgz' modifier is present, the VCF file is\n"
-"                   block-gzipped.\n"
+"      'vcf-4.2',   they are automatically merged with chrX, with proper\n"
+"      'bcf',       handling of chromosome codes and male ploidy.\n"
+"      'bcf-4.2'    When the 'bgz' modifier is present, the VCF file is\n"
+"                   block-gzipped.  (This always happens with BCF output.)\n"
 "                   The 'id-paste' modifier controls which .psam columns are\n"
 "                   used to construct sample IDs (choices are maybefid, fid,\n"
 "                   iid, maybesid, and sid; default is maybefid,iid,maybesid),\n"
 "                   while the 'id-delim' modifier sets the character between the\n"
 "                   ID pieces (default '_').\n"
+"                   Genotypes are always exported.  If you want to export a\n"
+"                   sites-only VCF instead, see --make-pgen/--make-just-pvar's\n"
+"                   'vcfheader' column set.\n"
 "                   Dosages are not exported unless the 'vcf-dosage=' modifier\n"
 "                   is present.  The following five dosage export modes are\n"
 "                   supported:\n"


=====================================
plink2_import.cc
=====================================
The diff for this file was not included because it is too large.

=====================================
plink2_import.h
=====================================
@@ -28,8 +28,9 @@ namespace plink2 {
 FLAGSET_DEF_START()
   kfImport0,
   kfImportKeepAutoconv = (1 << 0),
-  kfImportDoubleId = (1 << 1),
-  kfImportVcfRequireGt = (1 << 2)
+  kfImportKeepAutoconvVzs = (1 << 1),
+  kfImportDoubleId = (1 << 2),
+  kfImportVcfRequireGt = (1 << 3)
 FLAGSET_DEF_END(ImportFlags);
 
 ENUM_U31_DEF_START()


=====================================
plink2_matrix.h
=====================================
@@ -447,10 +447,19 @@ HEADER_INLINE void PrintSymmFmatrix(const float* matrix, uint32_t dim) {
   }
 }
 
-HEADER_INLINE void PrintMatrix(const double* matrix, uint32_t dim) {
-  for (uint32_t uii = 0; uii != dim; ++uii) {
-    for (uint32_t ujj = 0; ujj != dim; ++ujj) {
-      printf("%g ", matrix[uii * dim + ujj]);
+HEADER_INLINE void PrintMatrix(const double* matrix, uintptr_t row_ct, uintptr_t col_ct) {
+  for (uintptr_t ulii = 0; ulii != row_ct; ++ulii) {
+    for (uintptr_t uljj = 0; uljj != col_ct; ++uljj) {
+      printf("%g ", matrix[ulii * col_ct + uljj]);
+    }
+    printf("\n");
+  }
+}
+
+HEADER_INLINE void PrintFmatrix(const float* matrix, uintptr_t row_ct, uintptr_t col_ct, uintptr_t stride) {
+  for (uintptr_t ulii = 0; ulii != row_ct; ++ulii) {
+    for (uintptr_t uljj = 0; uljj != col_ct; ++uljj) {
+      printf("%g ", S_CAST(double, matrix[ulii * stride + uljj]));
     }
     printf("\n");
   }


=====================================
plink2_matrix_calc.cc
=====================================
@@ -442,7 +442,12 @@ PglErr KingCutoffBatch(const SampleIdInfo* siip, uint32_t raw_sample_ct, double
       }
     } else {
       if (unlikely(fsize != (fsize_double_expected / 2))) {
-        logerrprintfww("Error: Invalid --king-cutoff .bin file size (expected %" PRIu64 " or %" PRIu64 " bytes).\n", fsize_double_expected / 2, fsize_double_expected);
+        const uint64_t fsize_double_square = king_id_ct * S_CAST(uint64_t, king_id_ct) * sizeof(double);
+        if ((fsize == fsize_double_square) || (fsize == fsize_double_square / 2)) {
+          logerrputs("Error: --king-cutoff currently requires a *triangular* .bin file; the provided\nfile appears to be square.\n");
+        } else {
+          logerrprintfww("Error: Invalid --king-cutoff .bin file size (expected %" PRIu64 " or %" PRIu64 " bytes).\n", fsize_double_expected / 2, fsize_double_expected);
+        }
         goto KingCutoffBatch_ret_MALFORMED_INPUT;
       }
       assert(king_id_ct <= ((0x7ffff000 / sizeof(float)) + 1));


=====================================
plink2_pvar.cc
=====================================
@@ -787,7 +787,8 @@ PglErr LoadPvar(const char* pvarname, const char* var_filter_exceptions_flattene
   // probable todo: load INFO:END.  (does this allow the CNV module to be
   //   unified with the rest of the program?)  but this will probably wait
   //   until I need to analyze some sort of CNV data, and that day keeps
-  //   getting postponed...
+  //   getting postponed... for now, the BCF exporter performs its own parsing
+  //   of INFO:END so that it can fill each variant's rlen field correctly.
   // possible todo: require FILTER to only contain values declared in header,
   //   and modify its storage accordingly?  (pointless for now, but worthwhile
   //   to keep an eye on what typical VCF files look like.)
@@ -833,7 +834,7 @@ PglErr LoadPvar(const char* pvarname, const char* var_filter_exceptions_flattene
     unsigned char* tmp_alloc_base = g_bigstack_base;
     g_bigstack_base = bigstack_mark;
 
-    char* xheader_end = ((pvar_psam_flags & kfPvarColXheader) || xheader_needed)? R_CAST(char*, bigstack_mark) : nullptr;
+    char* xheader_end = ((pvar_psam_flags & (kfPvarColXheader | kfPvarColVcfheader)) || xheader_needed)? R_CAST(char*, bigstack_mark) : nullptr;
     uint32_t chrset_present = 0;
     uint32_t info_pr_present = 0;
     uint32_t info_pr_nonflag_present = 0;
@@ -906,7 +907,6 @@ PglErr LoadPvar(const char* pvarname, const char* var_filter_exceptions_flattene
           line_iter = line_end;
         }
       }
-      *info_flags_ptr = S_CAST(InfoFlags, (info_pr_present * kfInfoPrFlagPresent) | (info_pr_nonflag_present * kfInfoPrNonflagPresent) | (info_nonpr_present * kfInfoNonprPresent));
       line_iter = AdvPastDelim(line_iter, '\n');
     }
     if (xheader_end) {
@@ -927,6 +927,7 @@ PglErr LoadPvar(const char* pvarname, const char* var_filter_exceptions_flattene
     uint32_t info_col_present = 0;
     uint32_t cm_col_present = 0;
     if (line_start[0] == '#') {
+      *info_flags_ptr = S_CAST(InfoFlags, (info_pr_present * kfInfoPrFlagPresent) | (info_pr_nonflag_present * kfInfoPrNonflagPresent) | (info_nonpr_present * kfInfoNonprPresent));
       // parse header
       // [-1] = #CHROM (must be first column)
       // [0] = POS
@@ -1388,10 +1389,14 @@ PglErr LoadPvar(const char* pvarname, const char* var_filter_exceptions_flattene
             cip->chr_file_order[++chrs_encountered_m1] = cur_chr_code;
             cip->chr_fo_vidx_start[chrs_encountered_m1] = raw_variant_ct;
             cip->chr_idx_to_foidx[cur_chr_code] = chrs_encountered_m1;
-            last_bp = 0;
             last_cm = -DBL_MAX;
           }
         }
+        // always need to set this, if we want to avoid chromosome-length
+        // overestimation in --sort-vars + early-variant-filter +
+        // pvar-cols=+vcfheader case.
+        last_bp = 0;
+
         SetBit(cur_chr_code, loaded_chr_mask);
         if (chr_output_name_buf) {
           char* chr_name_end = chrtoa(cip, cur_chr_code, chr_output_name_buf);



View it on GitLab: https://salsa.debian.org/med-team/plink2/-/commit/2aaa4872560daa25e2010f56b88dbad02cf67fc6

-- 
View it on GitLab: https://salsa.debian.org/med-team/plink2/-/commit/2aaa4872560daa25e2010f56b88dbad02cf67fc6
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200327/56405380/attachment-0001.html>


More information about the debian-med-commit mailing list