[med-svn] [plink1.9] 01/03: Imported Upstream version 1.90~b3.28-151216

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Sun Dec 20 13:51:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository plink1.9.

commit 597ab217cb3659a3acadf6664e7f03164cc23dcd
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Fri Dec 18 19:55:23 2015 +0100

    Imported Upstream version 1.90~b3.28-151216
---
 Makefile        |    2 +-
 plink.c         | 1020 +++++++++++++++++++---------------
 plink_assoc.c   |  742 +++----------------------
 plink_assoc.h   |   10 -
 plink_calc.c    |   33 +-
 plink_cluster.c |   15 +-
 plink_cluster.h |    2 +-
 plink_common.c  |  175 +++---
 plink_common.h  |   90 +--
 plink_data.c    | 1654 +++++++++++++++++++++++++++++++++----------------------
 plink_data.h    |    8 +-
 plink_dosage.c  |   90 +--
 plink_dosage.h  |    7 +-
 plink_family.c  | 1270 +++++++++++++++++++++++++++++++++++++++---
 plink_family.h  |    2 +-
 plink_filter.c  |  237 ++++----
 plink_filter.h  |   24 +-
 plink_glm.c     |  111 ++--
 plink_help.c    |   39 +-
 plink_lasso.c   |    1 +
 plink_ld.c      |  104 ++--
 plink_misc.c    |  244 +++++---
 plink_misc.h    |    1 +
 plink_perm.c    |  333 +++++++++++
 plink_perm.h    |  196 +++++++
 plink_set.c     |   85 ++-
 plink_set.h     |    4 +-
 27 files changed, 4186 insertions(+), 2313 deletions(-)

diff --git a/Makefile b/Makefile
index c7ef5ee..e204486 100644
--- a/Makefile
+++ b/Makefile
@@ -49,7 +49,7 @@ ifdef NO_LAPACK
   BLASFLAGS=
 endif
 
-SRC = plink.c plink_assoc.c plink_calc.c plink_cluster.c plink_cnv.c plink_common.c plink_data.c plink_dosage.c plink_family.c plink_filter.c plink_glm.c plink_help.c plink_homozyg.c plink_lasso.c plink_ld.c plink_matrix.c plink_misc.c plink_rserve.c plink_set.c plink_stats.c SFMT.c dcdflib.c pigz.c yarn.c Rconnection.cc hfile.c bgzf.c
+SRC = plink.c plink_assoc.c plink_calc.c plink_cluster.c plink_cnv.c plink_common.c plink_data.c plink_dosage.c plink_family.c plink_filter.c plink_glm.c plink_help.c plink_homozyg.c plink_lasso.c plink_ld.c plink_matrix.c plink_misc.c plink_perm.c plink_rserve.c plink_set.c plink_stats.c SFMT.c dcdflib.c pigz.c yarn.c Rconnection.cc hfile.c bgzf.c
 
 # In the event that you are still concurrently using PLINK 1.07, we suggest
 # renaming that binary to "plink107" and "plink1".  (Previously,
diff --git a/plink.c b/plink.c
index 2a006fa..de1f23c 100644
--- a/plink.c
+++ b/plink.c
@@ -91,7 +91,7 @@
 
 const char ver_str[] =
 #ifdef STABLE_BUILD
-  "PLINK v1.90b3w"
+  "PLINK v1.90b3.28"
 #else
   "PLINK v1.90p"
 #endif
@@ -103,19 +103,19 @@ const char ver_str[] =
 #else
   " 32-bit"
 #endif
-  " (3 Sep 2015)";
+  " (16 Dec 2015)";
 const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
-  " "
+  ""
 #ifdef STABLE_BUILD
   "" // (don't want this when version number has a trailing letter)
 #else
-  "  " // (don't want this when version number has e.g. "b3" before "p")
+  "    " // (don't want this when version number has e.g. "b3" before "p")
 #endif
 #ifndef NOLAPACK
   "  "
 #endif
-  "      https://www.cog-genomics.org/plink2\n"
+  "    https://www.cog-genomics.org/plink2\n"
   "(C) 2005-2015 Shaun Purcell, Christopher Chang   GNU General Public License v3\n";
 const char errstr_append[] = "For more information, try '" PROG_NAME_STR " --help [flag name]' or '" PROG_NAME_STR " --help | more'.\n";
 #ifdef STABLE_BUILD
@@ -236,7 +236,7 @@ void calc_marker_reverse_bin(uintptr_t* marker_reverse, uintptr_t* marker_exclud
   uint32_t markers_done = 0;
   uint32_t marker_uidx_stop;
   double dxx;
-  do {
+  while (markers_done < marker_ct) {
     marker_uidx = next_unset_unsafe(marker_exclude, marker_uidx);
     marker_uidx_stop = next_set(marker_exclude, marker_uidx, unfiltered_marker_ct);
     markers_done += marker_uidx_stop - marker_uidx;
@@ -247,7 +247,7 @@ void calc_marker_reverse_bin(uintptr_t* marker_reverse, uintptr_t* marker_exclud
 	set_allele_freqs[marker_uidx] = 1.0 - dxx;
       }
     }
-  } while (markers_done < marker_ct);
+  }
 }
 
 void swap_reversed_marker_alleles(uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char** marker_allele_ptrs) {
@@ -288,7 +288,7 @@ static inline int32_t relationship_or_ibc_req(uint64_t calculation_type) {
   return (relationship_req(calculation_type) || (calculation_type & CALC_IBC));
 }
 
-int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, char* famname, char* cm_map_fname, char* cm_map_chrname, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* freqname, char* distance_wts_fname, char* read_dists_fname, char* read_dists_id_fname, char* evecname, char* mergename1, char* mergename2, char* mergename3, char* missing_mid_template, char* missing_marke [...]
+int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, char* famname, char* cm_map_fname, char* cm_map_chrname, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* freqname, char* distance_wts_fname, char* read_dists_fname, char* read_dists_id_fname, char* evecname, char* mergename1, char* mergename2, char* mergename3, char* missing_mid_template, char* missing_marke [...]
   FILE* bedfile = NULL;
   FILE* phenofile = NULL;
   uintptr_t unfiltered_marker_ct = 0;
@@ -320,6 +320,8 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   uint32_t uii = 0;
   int64_t llxx = 0;
   uint32_t nonfounders = (misc_flags / MISC_NONFOUNDERS) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t pheno_all = pheno_modifier & PHENO_ALL;
   char* marker_ids = NULL;
   uint32_t* marker_id_htable = NULL;
@@ -386,6 +388,9 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   uint32_t plink_maxfid = 0;
   uint32_t plink_maxiid = 0;
   uint32_t max_bim_linelen = 0;
+  uint32_t sample_male_ct = 0;
+  uint32_t sample_f_ct = 0;
+  uint32_t sample_f_male_ct = 0;
   unsigned char* wkspace_mark2 = NULL;
   unsigned char* wkspace_mark_precluster = NULL;
   unsigned char* wkspace_mark_postcluster = NULL;
@@ -411,9 +416,6 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   int32_t ii;
   int64_t llyy;
   int64_t llzz;
-  uint32_t sample_male_ct;
-  uint32_t sample_f_ct;
-  uint32_t sample_f_male_ct;
   Pedigree_rel_info pri;
   uintptr_t marker_uidx;
 
@@ -533,9 +535,9 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	goto plink_ret_1;
       }
       if (ulii > 80) {
-	// only warn on long new marker ID, since if there's a long old marker ID
-	// and no long new one, it's reasonable to infer that the user is fixing
-	// the problem, so we shouldn't spam them.
+	// only warn on long new marker ID, since if there's a long old marker
+	// ID and no long new one, it's reasonable to infer that the user is
+	// fixing the problem, so we shouldn't spam them.
 	logerrprint("Warning: Unusually long new variant ID(s) in --update-name file.  Double-check\nyour file and command-line parameters, and consider changing your naming\nscheme if you encounter memory problems.\n");
       }
       if (ulii > max_marker_id_len) {
@@ -573,7 +575,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
     }
 
-    retval = load_fam(famname, fam_cols, uii, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, &unfiltered_sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude);
+    retval = load_fam(famname, fam_cols, uii, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, &unfiltered_sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude, allow_no_samples);
     if (retval) {
       goto plink_ret_1;
     }
@@ -615,131 +617,133 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     } else {
       LOGPRINTF("%" PRIuPTR " %s (%d male%s, %d female%s) loaded from .fam.\n", unfiltered_sample_ct, species_str(unfiltered_sample_ct), uii, (uii == 1)? "" : "s", ujj, (ujj == 1)? "" : "s");
     }
-    uii = popcount_longs(pheno_nm, unfiltered_sample_ctl);
-    if (uii) {
-      LOGPRINTF("%u phenotype value%s loaded from .fam.\n", uii, (uii == 1)? "" : "s");
-    }
-
-    if (phenoname && fopen_checked(&phenofile, phenoname, "r")) {
-      goto plink_ret_OPEN_FAIL;
-    }
+    if (unfiltered_sample_ct) {
+      uii = popcount_longs(pheno_nm, unfiltered_sample_ctl);
+      if (uii) {
+	LOGPRINTF("%u phenotype value%s loaded from .fam.\n", uii, (uii == 1)? "" : "s");
+      }
 
-    if (phenofile || update_ids_fname || update_parents_fname || update_sex_fname || (filter_flags & FILTER_TAIL_PHENO)) {
-      wkspace_mark = wkspace_base;
-      retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, 0, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
-      if (retval) {
-	goto plink_ret_1;
+      if (phenoname && fopen_checked(&phenofile, phenoname, "r")) {
+	goto plink_ret_OPEN_FAIL;
       }
 
-      if (makepheno_str) {
-	retval = makepheno_load(phenofile, makepheno_str, unfiltered_sample_ct, cptr, max_sample_id_len, uiptr, pheno_nm, &pheno_c);
+      if (phenofile || update_ids_fname || update_parents_fname || update_sex_fname || (filter_flags & FILTER_TAIL_PHENO)) {
+	wkspace_mark = wkspace_base;
+	retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, 0, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
 	if (retval) {
 	  goto plink_ret_1;
 	}
-      } else if (phenofile) {
-	retval = load_pheno(phenofile, unfiltered_sample_ct, 0, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, NULL, 0);
-	if (retval) {
-	  if (retval == LOAD_PHENO_LAST_COL) {
-	    logerrprintb();
-	    retval = RET_INVALID_FORMAT;
-	    wkspace_reset(wkspace_mark);
+
+	if (makepheno_str) {
+	  retval = makepheno_load(phenofile, makepheno_str, unfiltered_sample_ct, cptr, max_sample_id_len, uiptr, pheno_nm, &pheno_c);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	} else if (phenofile) {
+	  retval = load_pheno(phenofile, unfiltered_sample_ct, 0, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, NULL, 0);
+	  if (retval) {
+	    if (retval == LOAD_PHENO_LAST_COL) {
+	      logerrprintb();
+	      retval = RET_INVALID_FORMAT;
+	      wkspace_reset(wkspace_mark);
+	    }
+	    goto plink_ret_1;
 	  }
-	  goto plink_ret_1;
 	}
-      }
-      if (filter_flags & FILTER_TAIL_PHENO) {
-	retval = convert_tail_pheno(unfiltered_sample_ct, pheno_nm, &pheno_c, &pheno_d, tail_bottom, tail_top, missing_phenod);
-	if (retval) {
-	  goto plink_ret_1;
+	if (filter_flags & FILTER_TAIL_PHENO) {
+	  retval = convert_tail_pheno(unfiltered_sample_ct, pheno_nm, &pheno_c, &pheno_d, tail_bottom, tail_top, missing_phenod);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
 	}
+	wkspace_reset(wkspace_mark);
       }
-      wkspace_reset(wkspace_mark);
-    }
 
-    if (pheno_c) {
-      /*
-      if (calculation_type & (CALC_REGRESS_PCS | CALC_REGRESS_PCS_DISTANCE)) {
-	sprintf(logbuf, "Error: --regress-pcs%s requires a scalar phenotype.\n", (calculation_type & CALC_REGRESS_PCS_DISTANCE)? "-distance" : "");
-	goto plink_ret_INVALID_CMDLINE_2;
-      */
-      if (calculation_type & (CALC_REGRESS_REL | CALC_REGRESS_DISTANCE | CALC_UNRELATED_HERITABILITY | CALC_GXE)) {
-	if (calculation_type & CALC_REGRESS_REL) {
-	  logerrprint("Error: --regress-rel calculation requires a scalar phenotype.\n");
-	} else if (calculation_type & CALC_REGRESS_DISTANCE) {
-	  logerrprint("Error: --regress-distance calculation requires a scalar phenotype.\n");
-	} else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
-	  logerrprint("Error: --unrelated-heritability requires a scalar phenotype.\n");
-	} else if (calculation_type & CALC_GXE) {
-	  logerrprint("Error: --gxe requires a scalar phenotype.\n");
-	}
-	goto plink_ret_INVALID_CMDLINE;
-      }
-    } else {
-      if (calculation_type & CALC_CLUSTER) {
-	if (cluster_ptr->modifier & CLUSTER_CC) {
-	  logerrprint("Error: --cc requires a case/control phenotype.\n");
-	  goto plink_ret_INVALID_CMDLINE;
-	} else if ((cluster_ptr->max_cases != 0xffffffffU) || (cluster_ptr->max_ctrls != 0xffffffffU)) {
-	  logerrprint("Error: --mcc requires a case/control phenotype.\n");
+      if (pheno_c) {
+	/*
+	if (calculation_type & (CALC_REGRESS_PCS | CALC_REGRESS_PCS_DISTANCE)) {
+	  sprintf(logbuf, "Error: --regress-pcs%s requires a scalar phenotype.\n", (calculation_type & CALC_REGRESS_PCS_DISTANCE)? "-distance" : "");
+	  goto plink_ret_INVALID_CMDLINE_2;
+	*/
+	if (calculation_type & (CALC_REGRESS_REL | CALC_REGRESS_DISTANCE | CALC_UNRELATED_HERITABILITY | CALC_GXE)) {
+	  if (calculation_type & CALC_REGRESS_REL) {
+	    logerrprint("Error: --regress-rel calculation requires a scalar phenotype.\n");
+	  } else if (calculation_type & CALC_REGRESS_DISTANCE) {
+	    logerrprint("Error: --regress-distance calculation requires a scalar phenotype.\n");
+	  } else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
+	    logerrprint("Error: --unrelated-heritability requires a scalar phenotype.\n");
+	  } else if (calculation_type & CALC_GXE) {
+	    logerrprint("Error: --gxe requires a scalar phenotype.\n");
+	  }
 	  goto plink_ret_INVALID_CMDLINE;
 	}
-      } else if ((calculation_type & CALC_EPI) && (epi_ip->modifier & EPI_FAST)) {
-	logerrprint("Error: --fast-epistasis requires a case/control phenotype.\n");
-	goto plink_ret_INVALID_CMDLINE;
-      } else if (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST | CALC_FLIPSCAN)) {
-	if (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST)) {
-	  logerrprint("Error: --ibs-test and --groupdist calculations require a case/control\nphenotype.\n");
-	} else if (calculation_type & CALC_FLIPSCAN) {
-	  logerrprint("Error: --flip-scan requires a case/control phenotype.\n");
-	}
-	goto plink_ret_INVALID_CMDLINE;
-      } else if ((calculation_type & CALC_RECODE) && (recode_modifier & (RECODE_HV | RECODE_HV_1CHR))) {
-	logerrprint("Error: --recode HV{-1chr} requires a case/control phenotype.\n");
-	goto plink_ret_INVALID_CMDLINE;
-      } else if ((calculation_type & CALC_FST) && (misc_flags & MISC_FST_CC)) {
-	logerrprint("Error: '--fst case-control' requires a case/control phenotype.\n");
-	goto plink_ret_INVALID_CMDLINE;
-      } else if ((calculation_type & CALC_FREQ) && (misc_flags & MISC_FREQ_CC)) {
-	logerrprint("Error: '--freq case-control' requires a case/control phenotype.\n");
-	goto plink_ret_INVALID_CMDLINE;
-      }
-    }
-
-    if (!pheno_all) {
-      if (loop_assoc_fname || (!pheno_d)) {
-	if ((calculation_type & CALC_GLM) && (!(glm_modifier & GLM_LOGISTIC))) {
-	  logerrprint("Error: --linear without --all-pheno requires a scalar phenotype.\n");
+      } else {
+	if (calculation_type & CALC_CLUSTER) {
+	  if (cluster_ptr->modifier & CLUSTER_CC) {
+	    logerrprint("Error: --cc requires a case/control phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  } else if ((cluster_ptr->max_cases != 0xffffffffU) || (cluster_ptr->max_ctrls != 0xffffffffU)) {
+	    logerrprint("Error: --mcc requires a case/control phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	} else if ((calculation_type & CALC_EPI) && (epi_ip->modifier & EPI_FAST)) {
+	  logerrprint("Error: --fast-epistasis requires a case/control phenotype.\n");
 	  goto plink_ret_INVALID_CMDLINE;
-	} else if (calculation_type & CALC_QFAM) {
-	  logerrprint("Error: QFAM test requires a scalar phenotype.\n");
+	} else if (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST | CALC_FLIPSCAN)) {
+	  if (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST)) {
+	    logerrprint("Error: --ibs-test and --groupdist calculations require a case/control\nphenotype.\n");
+	  } else if (calculation_type & CALC_FLIPSCAN) {
+	    logerrprint("Error: --flip-scan requires a case/control phenotype.\n");
+	  }
 	  goto plink_ret_INVALID_CMDLINE;
-	}
-      } else if (!pheno_c) {
-	if ((calculation_type & CALC_MODEL) && (!(model_modifier & MODEL_ASSOC))) {
-	  logerrprint("Error: --model requires a case/control phenotype.\n");
+	} else if ((calculation_type & CALC_RECODE) && (recode_modifier & (RECODE_HV | RECODE_HV_1CHR))) {
+	  logerrprint("Error: --recode HV{-1chr} requires a case/control phenotype.\n");
 	  goto plink_ret_INVALID_CMDLINE;
-	} else if ((calculation_type & CALC_GLM) && (glm_modifier & GLM_LOGISTIC)) {
-	  logerrprint("Error: --logistic without --all-pheno requires a case/control phenotype.\n");
+	} else if ((calculation_type & CALC_FST) && (misc_flags & MISC_FST_CC)) {
+	  logerrprint("Error: '--fst case-control' requires a case/control phenotype.\n");
 	  goto plink_ret_INVALID_CMDLINE;
-	} else if (calculation_type & (CALC_CMH | CALC_HOMOG | CALC_TESTMISS | CALC_TDT | CALC_DFAM)) {
-	  if (calculation_type & CALC_CMH) {
-	    logerrprint("Error: --mh and --mh2 require a case/control phenotype.\n");
-	  } else if (calculation_type & CALC_HOMOG) {
-	    logerrprint("Error: --homog requires a case/control phenotype.\n");
-	  } else if (calculation_type & CALC_TESTMISS) {
-	    logerrprint("Error: --test-missing requires a case/control phenotype.\n");
-	  } else if (calculation_type & CALC_TDT) {
-	    logerrprint("Error: --tdt requires a case/control phenotype.\n");
-	  } else {
-	    logerrprint("Error: --dfam requires a case/control phenotype.\n");
-	  }
+	} else if ((calculation_type & CALC_FREQ) && (misc_flags & MISC_FREQ_CC)) {
+	  logerrprint("Error: '--freq case-control' requires a case/control phenotype.\n");
 	  goto plink_ret_INVALID_CMDLINE;
 	}
       }
+
+      if (!pheno_all) {
+	if (loop_assoc_fname || (!pheno_d)) {
+	  if ((calculation_type & CALC_GLM) && (!(glm_modifier & GLM_LOGISTIC))) {
+	    logerrprint("Error: --linear without --all-pheno requires a scalar phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  } else if (calculation_type & CALC_QFAM) {
+	    logerrprint("Error: QFAM test requires a scalar phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	} else if (!pheno_c) {
+	  if ((calculation_type & CALC_MODEL) && (!(model_modifier & MODEL_ASSOC))) {
+	    logerrprint("Error: --model requires a case/control phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  } else if ((calculation_type & CALC_GLM) && (glm_modifier & GLM_LOGISTIC)) {
+	    logerrprint("Error: --logistic without --all-pheno requires a case/control phenotype.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  } else if (calculation_type & (CALC_CMH | CALC_HOMOG | CALC_TESTMISS | CALC_TDT | CALC_DFAM)) {
+	    if (calculation_type & CALC_CMH) {
+	      logerrprint("Error: --mh and --mh2 require a case/control phenotype.\n");
+	    } else if (calculation_type & CALC_HOMOG) {
+	      logerrprint("Error: --homog requires a case/control phenotype.\n");
+	    } else if (calculation_type & CALC_TESTMISS) {
+	      logerrprint("Error: --test-missing requires a case/control phenotype.\n");
+	    } else if (calculation_type & CALC_TDT) {
+	      logerrprint("Error: --tdt requires a case/control phenotype.\n");
+	    } else {
+	      logerrprint("Error: --dfam requires a case/control phenotype.\n");
+	    }
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	}
+      }
     }
   }
 
-  if (cm_map_fname) {
+  if (cm_map_fname && unfiltered_marker_ct) {
     // need sorted bps, but not marker IDs
     if (map_is_unsorted & UNSORTED_BP) {
       logerrprint("Error: --cm-map requires a sorted .bim file.  Retry this command after using\n--make-bed to sort your data.\n");
@@ -751,112 +755,120 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
   }
 
-  uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter;
-  if (uii || extractname || excludename) {
-    // only permit duplicate marker IDs for --extract/--exclude
-    wkspace_mark = wkspace_base;
-    retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable, &marker_id_htable_size);
-    if (retval) {
-      goto plink_ret_1;
-    }
-    if (update_cm) {
-      retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_cms);
-      if (retval) {
-	goto plink_ret_1;
-      }
-    }
-    if (update_map) {
-      retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
-    } else if (update_name) {
-      retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
+  if (unfiltered_marker_ct != marker_exclude_ct) {
+    uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter;
+    if (uii || extractname || excludename) {
+      // only permit duplicate marker IDs for --extract/--exclude
+      wkspace_mark = wkspace_base;
+      retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable, &marker_id_htable_size);
       if (retval) {
 	goto plink_ret_1;
       }
-      if (update_alleles_fname || (marker_alleles_needed && flip_fname && (!flip_subset_fname)) || extractname || excludename) {
-	wkspace_reset(wkspace_mark);
-        retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable, &marker_id_htable_size);
-      }
-    }
-    if (marker_alleles_needed) {
-      if (update_alleles_fname) {
-        retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_len, outname, outname_end);
-        if (retval) {
-	  goto plink_ret_1;
-        }
-      }
-      if (flip_fname && (!flip_subset_fname)) {
-        retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
-        if (retval) {
+      if (update_cm) {
+	retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_cms);
+	if (retval) {
 	  goto plink_ret_1;
-        }
+	}
       }
-    }
-    if (extractname) {
-      if (!(misc_flags & MISC_EXTRACT_RANGE)) {
-        retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+      if (update_map) {
+	retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
 	if (retval) {
 	  goto plink_ret_1;
 	}
-      } else {
-	if (map_is_unsorted & UNSORTED_BP) {
-	  logerrprint("Error: '--extract range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
-	  goto plink_ret_INVALID_CMDLINE;
-	}
-        retval = extract_exclude_range(extractname, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0, chrom_info_ptr);
+      } else if (update_name) {
+	retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
-	uljj = unfiltered_marker_ct - marker_exclude_ct;
-	LOGPRINTF("--extract range: %" PRIuPTR " variant%s remaining.\n", uljj, (uljj == 1)? "" : "s");
+	if (update_alleles_fname || (marker_alleles_needed && flip_fname && (!flip_subset_fname)) || extractname || excludename) {
+	  wkspace_reset(wkspace_mark);
+	  retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable, &marker_id_htable_size);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	}
       }
-    }
-    if (excludename) {
-      if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
-	retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+      if (marker_alleles_needed) {
+	if (update_alleles_fname) {
+	  retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_len, outname, outname_end);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	}
+	if (flip_fname && (!flip_subset_fname)) {
+	  retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	}
+      }
+      if (extractname) {
+	if (!(misc_flags & MISC_EXTRACT_RANGE)) {
+	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	} else {
+	  if (map_is_unsorted & UNSORTED_BP) {
+	    logerrprint("Error: '--extract range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	  retval = extract_exclude_range(extractname, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0, allow_no_variants, chrom_info_ptr);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	  uljj = unfiltered_marker_ct - marker_exclude_ct;
+	  LOGPRINTF("--extract range: %" PRIuPTR " variant%s remaining.\n", uljj, (uljj == 1)? "" : "s");
+	}
+      }
+      if (excludename) {
+	if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
+	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	} else {
+	  if (map_is_unsorted & UNSORTED_BP) {
+	    logerrprint("Error: '--exclude range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	  retval = extract_exclude_range(excludename, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 1, allow_no_variants, chrom_info_ptr);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	  uljj = unfiltered_marker_ct - marker_exclude_ct;
+	  LOGPRINTF("--exclude range: %" PRIuPTR " variant%s remaining.\n", uljj, (uljj == 1)? "" : "s");
+	}
+      }
+      if (filter_attrib_fname) {
+	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
-      } else {
-	if (map_is_unsorted & UNSORTED_BP) {
-	  logerrprint("Error: '--exclude range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
-	  goto plink_ret_INVALID_CMDLINE;
-	}
-        retval = extract_exclude_range(excludename, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 1, chrom_info_ptr);
+      }
+      if (qual_filter) {
+	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
-	uljj = unfiltered_marker_ct - marker_exclude_ct;
-	LOGPRINTF("--exclude range: %" PRIuPTR " variant%s remaining.\n", uljj, (uljj == 1)? "" : "s");
       }
+      wkspace_reset(wkspace_mark);
     }
-    if (filter_attrib_fname) {
-      retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
-      if (retval) {
-	goto plink_ret_1;
-      }
+
+    if (allelexxxx) {
+      allelexxxx_recode(allelexxxx, marker_allele_ptrs, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct);
     }
-    if (qual_filter) {
-      retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+
+    if (thin_keep_prob != 1.0) {
+      if (random_thin_markers(thin_keep_prob, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants)) {
+	goto plink_ret_ALL_MARKERS_EXCLUDED;
+      }
+    } else if (thin_keep_ct != 0xffffffffU) {
+      retval = random_thin_markers_ct(thin_keep_ct, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
       if (retval) {
-        goto plink_ret_1;
+	goto plink_ret_1;
       }
     }
-    wkspace_reset(wkspace_mark);
-  }
-
-  if (allelexxxx) {
-    allelexxxx_recode(allelexxxx, marker_allele_ptrs, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct);
-  }
-
-  if (thin_keep_prob != 1.0) {
-    if (random_thin_markers(thin_keep_prob, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct)) {
-      goto plink_ret_ALL_MARKERS_EXCLUDED;
-    }
-  } else if (thin_keep_ct) {
-    retval = random_thin_markers_ct(thin_keep_ct, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
-    if (retval) {
-      goto plink_ret_1;
-    }
   }
 
   if (bedfile) {
@@ -888,13 +900,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       bed_offset = 2;
     } else {
       // pre-v0.99, sample-major, no header bytes
-      llyy = llzz;
-      if (llxx != llyy) {
+      if (llxx != llzz) {
 	// probably not PLINK-format at all, so give this error instead of
 	// "invalid file size"
 	logerrprint("Error: Invalid header bytes in .bed file.\n");
 	goto plink_ret_INVALID_FORMAT;
       }
+      llyy = llzz;
       bed_offset = 2;
     }
     if (llxx != llyy) {
@@ -924,63 +936,63 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
   }
 
-  if (update_ids_fname || update_parents_fname || update_sex_fname || keepname || keepfamname || removename || removefamname || filter_attrib_sample_fname || om_ip->marker_fname || filtername) {
+  if (unfiltered_sample_ct && (update_ids_fname || update_parents_fname || update_sex_fname || keepname || keepfamname || removename || removefamname || filter_attrib_sample_fname || om_ip->marker_fname || filtername)) {
     wkspace_mark = wkspace_base;
-    retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
+    retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, 0, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
     if (retval) {
       goto plink_ret_1;
     }
-    ulii = unfiltered_sample_ct - sample_exclude_ct;
     if (update_ids_fname) {
-      retval = update_sample_ids(update_ids_fname, cptr, ulii, max_sample_id_len, uiptr, sample_ids);
+      retval = update_sample_ids(update_ids_fname, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, sample_ids);
       if (retval) {
 	goto plink_ret_1;
       }
       wkspace_reset(wkspace_base);
-      retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
+      retval = sort_item_ids(&cptr, &uiptr, unfiltered_sample_ct, sample_exclude, 0, sample_ids, max_sample_id_len, 0, 0, strcmp_deref);
       if (retval) {
 	goto plink_ret_1;
       }
     } else {
       if (update_parents_fname) {
-	retval = update_sample_parents(update_parents_fname, cptr, ulii, max_sample_id_len, uiptr, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info);
+	retval = update_sample_parents(update_parents_fname, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if (update_sex_fname) {
-        retval = update_sample_sexes(update_sex_fname, update_sex_col, cptr, ulii, max_sample_id_len, uiptr, sex_nm, sex_male);
+        retval = update_sample_sexes(update_sex_fname, update_sex_col, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, sex_nm, sex_male);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
     }
+    // sample_exclude_ct assumed to be 0 before this point
     if (keepfamname) {
-      retval = keep_or_remove(keepfamname, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 2);
+      retval = keep_or_remove(keepfamname, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 2, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
     }
     if (keepname) {
-      retval = keep_or_remove(keepname, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 0);
+      retval = keep_or_remove(keepname, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 0, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
     }
     if (removefamname) {
-      retval = keep_or_remove(removefamname, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 3);
+      retval = keep_or_remove(removefamname, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 3, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
     }
     if (removename) {
-      retval = keep_or_remove(removename, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 1);
+      retval = keep_or_remove(removename, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 1, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
     }
     if (filter_attrib_sample_fname) {
-      retval = filter_attrib_sample(filter_attrib_sample_fname, filter_attrib_sample_liststr, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct);
+      retval = filter_attrib_sample(filter_attrib_sample_fname, filter_attrib_sample_liststr, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, allow_no_samples, sample_exclude, &sample_exclude_ct);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -988,16 +1000,23 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     if (om_ip->marker_fname) {
       // would rather do this with pre-sorted markers, but that might break
       // order-of-operations assumptions in existing pipelines
-      retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, sex_male, chrom_info_ptr, om_ip);
-      if (retval) {
-	goto plink_ret_1;
+      if ((unfiltered_sample_ct == sample_exclude_ct) || (unfiltered_marker_ct == marker_exclude_ct)) {
+	// don't need this if everything that refers to om_ip is skipped
+        oblig_missing_cleanup(om_ip);
+	om_ip->cluster_ct = 0;
+	om_ip->entry_ct = 0;
+      } else {
+	retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sex_male, chrom_info_ptr, om_ip);
+	if (retval) {
+	  goto plink_ret_1;
+	}
       }
     }
     if (filtername) {
       if (!mfilter_col) {
 	mfilter_col = 1;
       }
-      retval = filter_samples_file(filtername, cptr, ulii, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, filtervals_flattened, mfilter_col);
+      retval = filter_samples_file(filtername, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, filtervals_flattened, mfilter_col, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1005,7 +1024,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     wkspace_reset(wkspace_mark);
   }
 
-  if (famname[0]) {
+  if (famname[0] && (unfiltered_sample_ct != sample_exclude_ct)) {
     if (gender_unk_ct && (!(sex_missing_pheno & ALLOW_NO_SEX))) {
       uii = popcount_longs_exclude(pheno_nm, sex_nm, unfiltered_sample_ctl);
       if (uii) {
@@ -1028,11 +1047,11 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       bitfield_ornot(sample_exclude, pheno_nm, unfiltered_sample_ctl);
       zero_trailing_bits(sample_exclude, unfiltered_sample_ct);
       sample_exclude_ct = popcount_longs(sample_exclude, unfiltered_sample_ctl);
-      if (sample_exclude_ct == unfiltered_sample_ct) {
+      if ((sample_exclude_ct == unfiltered_sample_ct) && (!allow_no_samples)) {
 	LOGERRPRINTF("Error: All %s removed by --prune.\n", g_species_plural);
 	goto plink_ret_ALL_SAMPLES_EXCLUDED;
       }
-      LOGPRINTF("--prune: %" PRIuPTR " %s remaining.\n", unfiltered_sample_ct - sample_exclude_ct, species_str(unfiltered_sample_ct == sample_exclude_ct + 1));
+      LOGPRINTF("--prune: %" PRIuPTR " %s remaining.\n", unfiltered_sample_ct - sample_exclude_ct, species_str(unfiltered_sample_ct - sample_exclude_ct));
     }
 
     if (filter_flags & (FILTER_BINARY_CASES | FILTER_BINARY_CONTROLS)) {
@@ -1045,7 +1064,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       // fcc == 2: exclude all ones in pheno_c
       // -> flip on fcc == 1
       filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, pheno_c, (filter_flags / FILTER_BINARY_CASES) & 1, pheno_nm);
-      if (sample_exclude_ct == unfiltered_sample_ct) {
+      if ((sample_exclude_ct == unfiltered_sample_ct) && (!allow_no_samples)) {
 	LOGERRPRINTF("Error: All %s removed due to case/control status (--filter-%s).\n", g_species_plural, (filter_flags & FILTER_BINARY_CASES)? "cases" : "controls");
 	goto plink_ret_ALL_SAMPLES_EXCLUDED;
       }
@@ -1055,7 +1074,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     if (filter_flags & (FILTER_BINARY_FEMALES | FILTER_BINARY_MALES)) {
       ii = sample_exclude_ct;
       filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sex_male, (filter_flags / FILTER_BINARY_MALES) & 1, sex_nm);
-      if (sample_exclude_ct == unfiltered_sample_ct) {
+      if ((sample_exclude_ct == unfiltered_sample_ct) && (!allow_no_samples)) {
 	LOGERRPRINTF("Error: All %s removed due to gender filter (--filter-%s).\n", g_species_plural, (filter_flags & FILTER_BINARY_MALES)? "males" : "females");
 	goto plink_ret_ALL_SAMPLES_EXCLUDED;
       }
@@ -1065,7 +1084,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     if (filter_flags & (FILTER_BINARY_FOUNDERS | FILTER_BINARY_NONFOUNDERS)) {
       ii = sample_exclude_ct;
       filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, (filter_flags / FILTER_BINARY_FOUNDERS) & 1, NULL);
-      if (sample_exclude_ct == unfiltered_sample_ct) {
+      if ((sample_exclude_ct == unfiltered_sample_ct) && (!allow_no_samples)) {
 	LOGERRPRINTF("Error: All %s removed due to founder status (--filter-%s).\n", g_species_plural, (filter_flags & FILTER_BINARY_FOUNDERS)? "founders" : "nonfounders");
 	goto plink_ret_ALL_SAMPLES_EXCLUDED;
       }
@@ -1074,10 +1093,10 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
 
     if (thin_keep_sample_prob != 1.0) {
-      if (random_thin_samples(thin_keep_sample_prob, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct)) {
+      if (random_thin_samples(thin_keep_sample_prob, unfiltered_sample_ct, allow_no_samples, sample_exclude, &sample_exclude_ct)) {
         goto plink_ret_ALL_SAMPLES_EXCLUDED;
       }
-    } else if (thin_keep_sample_ct) {
+    } else if (thin_keep_sample_ct != 0xffffffffU) {
       retval = random_thin_samples_ct(thin_keep_sample_ct, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct);
       if (retval) {
         goto plink_ret_1;
@@ -1085,7 +1104,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
 
     if (mind_thresh < 1.0) {
-      retval = mind_filter(bedfile, bed_offset, outname, outname_end, mind_thresh, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, sex_male, chrom_info_ptr, om_ip);
+      retval = mind_filter(bedfile, bed_offset, outname, outname_end, mind_thresh, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, sex_male, chrom_info_ptr, om_ip, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1094,27 +1113,25 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       // could save off wkspace_mark here and free immediately after
       // load_clusters(), if clusters are *only* used for filtering.  But not a
       // big deal.
-      retval = load_clusters(cluster_ptr->fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, cluster_ptr->keep_fname, cluster_ptr->keep_flattened, cluster_ptr->remove_fname, cluster_ptr->remove_flattened);
+      retval = load_clusters(cluster_ptr->fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, cluster_ptr->keep_fname, cluster_ptr->keep_flattened, cluster_ptr->remove_fname, cluster_ptr->remove_flattened, allow_no_samples);
       if (retval) {
 	goto plink_ret_1;
       }
     }
     sample_ct = unfiltered_sample_ct - sample_exclude_ct;
-    if (!sample_ct) {
-      // defensive; currently shouldn't happen since we're actually checking at
-      // every filter
+    if ((!sample_ct) && (!allow_no_samples)) {
+      // defensive; currently shouldn't happen since we're checking at every
+      // filter
       LOGERRPRINTF("Error: No %s pass QC.\n", g_species_plural);
       goto plink_ret_ALL_SAMPLES_EXCLUDED;
     }
 
-    if ((sample_ct == 1) && (relationship_or_ibc_req(calculation_type) || distance_req(calculation_type, read_dists_fname) || (calculation_type & (CALC_GENOME | CALC_CLUSTER | CALC_NEIGHBOR)))) {
-      sprintf(logbuf, "Error: More than 1 %s required for pairwise analysis.\n", g_species_singular);
+    if ((sample_ct < 2) && (relationship_or_ibc_req(calculation_type) || distance_req(calculation_type, read_dists_fname) || (calculation_type & (CALC_GENOME | CALC_CLUSTER | CALC_NEIGHBOR)))) {
+      sprintf(logbuf, "Error: At least 2 %s required for pairwise analysis.\n", g_species_plural);
       goto plink_ret_INVALID_CMDLINE_2;
     }
 
-    // er, this needs to check marker_ct instead of sample_ct for --r/--r2,
-    // --fast-epistasis
-    if ((parallel_tot > 1) && (parallel_tot > sample_ct / 2)) {
+    if ((parallel_tot > 1) && (calculation_type & (CALC_DISTANCE | CALC_GENOME | CALC_RELATIONSHIP)) && (parallel_tot > sample_ct / 2)) {
       sprintf(logbuf, "Error: Too many --parallel jobs (maximum %" PRIuPTR "/2 = %" PRIuPTR ").\n", sample_ct, sample_ct / 2);
       goto plink_ret_INVALID_CMDLINE_2;
     }
@@ -1123,7 +1140,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   // Multithreaded BLAS/LAPACK call?  If yes, and either user requested <=
   // [0.5 * nprocs] threads or nprocs is unknown, warn that BLAS/LAPACK
   // multithreading is not under PLINK's control.
-  uii = ((!known_procs) || (known_procs * 2 >= g_thread_ct)) && ((calculation_type & (CALC_LASSO | CALC_PCA | CALC_UNRELATED_HERITABILITY)) || ((calculation_type & CALC_GLM) && pheno_d) || cluster_ptr->mds_dim_ct || ((calculation_type & CALC_LD_PRUNE) && (!(ldip->modifier & (LD_PRUNE_PAIRWISE | LD_PRUNE_PAIRPHASE)))));
+  uii = ((!known_procs) || (known_procs >= 2 * ((int32_t)g_thread_ct))) && ((calculation_type & (CALC_LASSO | CALC_PCA | CALC_UNRELATED_HERITABILITY)) || ((calculation_type & CALC_GLM) && pheno_d) || cluster_ptr->mds_dim_ct || ((calculation_type & CALC_LD_PRUNE) && (!(ldip->modifier & (LD_PRUNE_PAIRWISE | LD_PRUNE_PAIRPHASE)))));
 #endif
   if (g_thread_ct > 1) {
     if ((calculation_type & (CALC_RELATIONSHIP | CALC_REL_CUTOFF | CALC_GDISTANCE_MASK | CALC_IBS_TEST | CALC_GROUPDIST | CALC_REGRESS_DISTANCE | CALC_GENOME | CALC_REGRESS_REL | CALC_UNRELATED_HERITABILITY | CALC_LD | CALC_PCA | CALC_MAKE_PERM_PHENO | CALC_QFAM)) || ((calculation_type & CALC_MODEL) && (model_modifier & (MODEL_PERM | MODEL_MPERM))) || ((calculation_type & CALC_GLM) && (glm_modifier & (GLM_PERM | GLM_MPERM))) || ((calculation_type & CALC_TESTMISS) && (testmiss_modifier &  [...]
@@ -1231,40 +1248,36 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
   }
 
-  if (bimname[0]) {
-    if (unfiltered_marker_ct == marker_exclude_ct) {
-      // defensive
-      logerrprint("Error: No variants remaining.\n");
-      goto plink_ret_ALL_MARKERS_EXCLUDED;
-    }
+  if (bimname[0] && (unfiltered_marker_ct != marker_exclude_ct)) {
     plink_maxsnp = calc_plink_maxsnp(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len);
     uii = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
     if (wkspace_alloc_ul_checked(&marker_reverse, uii * sizeof(intptr_t))) {
       goto plink_ret_NOMEM;
     }
     fill_ulong_zero(marker_reverse, uii);
-    if (bedfile) {
+    if (bedfile && sample_ct) {
       retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CALC [...]
       if (retval) {
 	goto plink_ret_1;
       }
-
-      if (freqname) {
-	retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
-	if (retval) {
-	  goto plink_ret_1;
-	}
-      }
-
-      if (!(misc_flags & MISC_KEEP_ALLELE_ORDER)) {
-	// after this, set_allele_freqs[] has A2 freqs
-	calc_marker_reverse_bin(marker_reverse, marker_exclude, unfiltered_marker_ct, unfiltered_marker_ct - marker_exclude_ct, set_allele_freqs);
-      }
     } else {
       for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
         set_allele_freqs[marker_uidx] = 1.0;
       }
     }
+
+    if (freqname) {
+      retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
+      if (retval) {
+	goto plink_ret_1;
+      }
+    }
+
+    if (!(misc_flags & MISC_KEEP_ALLELE_ORDER)) {
+      // after this, set_allele_freqs[] has A2 freqs
+      calc_marker_reverse_bin(marker_reverse, marker_exclude, unfiltered_marker_ct, unfiltered_marker_ct - marker_exclude_ct, set_allele_freqs);
+    }
+
     if (a1alleles || a2alleles) {
       retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_len, marker_reverse, marker_ids, max_marker_id_len, set_allele_freqs, a2alleles? 1 : 0);
       if (retval) {
@@ -1277,58 +1290,62 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
     // contrary to the PLINK 1.07 flowchart, --freq effectively resolves before
     // --geno.
-    if (calculation_type & CALC_FREQ) {
-      if (cluster_ct && (!(misc_flags & MISC_FREQX))) {
-	if (misc_flags & MISC_FREQ_COUNTS) {
-	  logprint("Note: --freq 'counts' modifier has no effect on cluster-stratified report.\n");
+    if (sample_ct) {
+      if (calculation_type & CALC_FREQ) {
+	if (cluster_ct && (!(misc_flags & MISC_FREQX))) {
+	  if (misc_flags & MISC_FREQ_COUNTS) {
+	    logprint("Note: --freq 'counts' modifier has no effect on cluster-stratified report.\n");
+	  }
+	  retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
+	} else if (misc_flags & MISC_FREQ_CC) {
+	  retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
+	} else {
+	  retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
+	}
+	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ))))) {
+	  goto plink_ret_1;
 	}
-	retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
-      } else if (misc_flags & MISC_FREQ_CC) {
-	retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
-      } else {
-	retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
-      }
-      if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ))))) {
-	goto plink_ret_1;
       }
-    }
-    if (calculation_type & CALC_MISSING_REPORT) {
-      retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
-      if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT))))) {
-	goto plink_ret_1;
+      if (calculation_type & CALC_MISSING_REPORT) {
+	retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
+	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT))))) {
+	  goto plink_ret_1;
+	}
       }
-    }
 
-    if (geno_excl_bitfield) {
-      ulii = marker_exclude_ct;
-      uljj = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
-      bitfield_or(marker_exclude, geno_excl_bitfield, uljj);
-      marker_exclude_ct = popcount_longs(marker_exclude, uljj);
-      if (marker_exclude_ct == unfiltered_marker_ct) {
-	logerrprint("Error: All variants excluded due to missing genotype data (--geno).\n");
-	goto plink_ret_ALL_MARKERS_EXCLUDED;
+      if (geno_excl_bitfield) {
+	ulii = marker_exclude_ct;
+	uljj = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
+	bitfield_or(marker_exclude, geno_excl_bitfield, uljj);
+	marker_exclude_ct = popcount_longs(marker_exclude, uljj);
+	if ((marker_exclude_ct == unfiltered_marker_ct) && (!allow_no_variants)) {
+	  logerrprint("Error: All variants excluded due to missing genotype data (--geno).\n");
+	  goto plink_ret_ALL_MARKERS_EXCLUDED;
+	}
+	ulii = marker_exclude_ct - ulii;
+	LOGPRINTF("%" PRIuPTR " variant%s removed due to missing genotype data (--geno).\n", ulii, (ulii == 1)? "" : "s");
       }
-      ulii = marker_exclude_ct - ulii;
-      LOGPRINTF("%" PRIuPTR " variant%s removed due to missing genotype data (--geno).\n", ulii, (ulii == 1)? "" : "s");
     }
     oblig_missing_cleanup(om_ip);
-    if (calculation_type & CALC_HARDY) {
-      retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
-      if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_HARDY))))) {
-	goto plink_ret_1;
+    if (sample_ct) {
+      if (calculation_type & CALC_HARDY) {
+	retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
+	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_HARDY))))) {
+	  goto plink_ret_1;
+	}
       }
-    }
-    if (hwe_thresh > 0.0) {
-      if (enforce_hwe_threshold(hwe_thresh, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, chrom_info_ptr)) {
-	goto plink_ret_ALL_MARKERS_EXCLUDED;
+      if (hwe_thresh > 0.0) {
+	if (enforce_hwe_threshold(hwe_thresh, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, allow_no_variants, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, chrom_info_ptr)) {
+	  goto plink_ret_ALL_MARKERS_EXCLUDED;
+	}
       }
     }
     if ((min_maf != 0.0) || (max_maf != 0.5) || ac_excl_bitfield) {
-      if (enforce_minor_allele_thresholds(min_maf, max_maf, unfiltered_marker_ct, marker_exclude, ac_excl_bitfield, &marker_exclude_ct, set_allele_freqs)) {
+      if (enforce_minor_allele_thresholds(min_maf, max_maf, unfiltered_marker_ct, marker_exclude, ac_excl_bitfield, &marker_exclude_ct, set_allele_freqs, allow_no_variants)) {
 	goto plink_ret_ALL_MARKERS_EXCLUDED;
       }
     }
-    if (min_bp_space) {
+    if (min_bp_space && (unfiltered_marker_ct > marker_exclude_ct)) {
       if (map_is_unsorted & UNSORTED_BP) {
 	logerrprint("Error: --bp-space requires a sorted .bim file.  Retry this command after using\n--make-bed to sort your data.\n");
 	goto plink_ret_INVALID_FORMAT;
@@ -1336,9 +1353,9 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       enforce_min_bp_space(min_bp_space, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, chrom_info_ptr);
     }
 
-    if (bedfile) {
+    if (bedfile && sample_ct && (unfiltered_marker_ct > marker_exclude_ct)) {
       if ((calculation_type & CALC_MENDEL) || (fam_ip->mendel_modifier & MENDEL_FILTER)) {
-	retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, chrom_info_ptr, (calc [...]
+	retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, ch [...]
 	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT | CALC_MENDEL))))) {
 	  goto plink_ret_1;
 	}
@@ -1365,25 +1382,33 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	logerrprint("Error: --set/--make-set requires a sorted .bim file.  Retry this command after\nusing --make-bed to sort your data.\n");
 	goto plink_ret_INVALID_FORMAT;
       }
-      retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr);
+      retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, allow_no_variants);
       if (retval) {
 	goto plink_ret_1;
       }
     }
 
     marker_ct = unfiltered_marker_ct - marker_exclude_ct;
-    if (!marker_ct) {
+    if ((!marker_ct) && (!allow_no_variants)) {
       // defensive
       logerrprint("Error: All variants fail QC.\n");
       goto plink_ret_ALL_MARKERS_EXCLUDED;
     }
+    // could add --parallel + --r/--r2/--{fast-}epistasis check here.  (this
+    // currently happens downstream)
+
     if (bedfile) {
       LOGPRINTFWW("%" PRIuPTR " variant%s and %" PRIuPTR " %s pass filters and QC%s.\n", marker_ct, (marker_ct == 1)? "" : "s", sample_ct, species_str(sample_ct), (calculation_type & CALC_REL_CUTOFF)? " (before --rel-cutoff)": "");
     } else {
       LOGPRINTFWW("%" PRIuPTR " variant%s filters and QC.\n", marker_ct, (marker_ct == 1)? " passes" : "s pass");
     }
+  } else if (!allow_no_variants) {
+    // defensive
+    logerrprint("Error: No variants remaining.\n");
+    goto plink_ret_ALL_MARKERS_EXCLUDED;
   }
-  if (famname[0]) {
+
+  if (famname[0] && sample_ct) {
     if (!pheno_nm_ct) {
       logprint("Note: No phenotypes present.\n");
     } else if (pheno_c) {
@@ -1399,134 +1424,141 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
   }
 
-  if (relationship_or_ibc_req(calculation_type)) {
-    if (relip->pca_cluster_names_flattened || relip->pca_clusters_fname) {
-      retval = extract_clusters(unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, relip->pca_cluster_names_flattened, relip->pca_clusters_fname, &pca_sample_exclude, &pca_sample_ct);
-      if (retval) {
-	goto plink_ret_1;
-      }
-      if (pca_sample_ct < 2) {
-	logerrprint("Error: Too few samples specified by --pca-cluster-names/--pca-clusters.\n");
-	goto plink_ret_1;
-      }
-      if (pca_sample_ct == sample_ct) {
-	logerrprint("Warning: --pca-cluster-names/--pca-clusters has no effect since all samples are\nin the named clusters.\n");
-	pca_sample_exclude = NULL;
-      } else {
-	LOGPRINTF("--pca-cluster-names/--pca-clusters: %" PRIuPTR " samples specified.\n", pca_sample_ct);
-	ulii = unfiltered_sample_ct - pca_sample_ct;
-      }
-    }
-    retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
-    if (retval) {
-      goto plink_ret_1;
-    }
-    if ((!pca_sample_exclude) && (sample_ct != unfiltered_sample_ct + sample_exclude_ct)) {
-      sample_ct = unfiltered_sample_ct - sample_exclude_ct;
-      if ((sample_ct < 2) && (distance_req(calculation_type, read_dists_fname) || (calculation_type & (CALC_REGRESS_REL | CALC_PCA | CALC_GENOME | CALC_CLUSTER | CALC_NEIGHBOR)))) {
-	// pathological case
-        sprintf(logbuf, "Error: Too many %s pruned for additional pairwise analysis steps.\n", g_species_plural);
-        goto plink_ret_INVALID_CMDLINE_2;
+  if (sample_ct) {
+    if (marker_ct) {
+      if (relationship_or_ibc_req(calculation_type)) {
+	if (relip->pca_cluster_names_flattened || relip->pca_clusters_fname) {
+	  retval = extract_clusters(unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, relip->pca_cluster_names_flattened, relip->pca_clusters_fname, &pca_sample_exclude, &pca_sample_ct);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	  if (pca_sample_ct < 2) {
+	    logerrprint("Error: Too few samples specified by --pca-cluster-names/--pca-clusters.\n");
+	    goto plink_ret_1;
+	  }
+	  if (pca_sample_ct == sample_ct) {
+	    logerrprint("Warning: --pca-cluster-names/--pca-clusters has no effect since all samples are\nin the named clusters.\n");
+	    pca_sample_exclude = NULL;
+	  } else {
+	    LOGPRINTF("--pca-cluster-names/--pca-clusters: %" PRIuPTR " samples specified.\n", pca_sample_ct);
+	    ulii = unfiltered_sample_ct - pca_sample_ct;
+	  }
+	}
+	retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
+	if (retval) {
+	  goto plink_ret_1;
+	}
+	if ((!pca_sample_exclude) && (sample_ct != unfiltered_sample_ct + sample_exclude_ct)) {
+	  sample_ct = unfiltered_sample_ct - sample_exclude_ct;
+	  if ((sample_ct < 2) && (distance_req(calculation_type, read_dists_fname) || (calculation_type & (CALC_REGRESS_REL | CALC_PCA | CALC_GENOME | CALC_CLUSTER | CALC_NEIGHBOR)))) {
+	    // pathological case
+	    sprintf(logbuf, "Error: Too many %s pruned for additional pairwise analysis steps.\n", g_species_plural);
+	    goto plink_ret_INVALID_CMDLINE_2;
+	  }
+	}
+	if (calculation_type & CALC_REL_CUTOFF) {
+	  // ugh, probably better to just stop supporting this
+	  bitfield_andnot(founder_info, sample_exclude, unfiltered_sample_ctl);
+	  bitfield_andnot(sex_nm, sample_exclude, unfiltered_sample_ctl);
+	  bitfield_and(sex_male, sex_nm, unfiltered_sample_ctl);
+	  if (pheno_nm_ct) {
+	    bitfield_andnot(pheno_nm, sample_exclude, unfiltered_sample_ctl);
+	    pheno_nm_ct = popcount_longs(pheno_nm, unfiltered_sample_ctl);
+	    if (pheno_c) {
+	      bitfield_and(pheno_c, pheno_nm, unfiltered_sample_ctl);
+	      pheno_ctrl_ct = pheno_nm_ct - popcount_longs(pheno_c, unfiltered_sample_ctl);
+	    }
+	  }
+	}
+
+	if (calculation_type & CALC_REGRESS_REL) {
+	  retval = regress_rel_main(unfiltered_sample_ct, sample_exclude, sample_ct, relip, threads, pheno_d);
+	  if (retval) {
+	    goto plink_ret_1;
+	  }
+	}
+#ifndef NOLAPACK
+	if (calculation_type & CALC_PCA) {
+	  retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
+	} else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
+	  if (sample_ct != pheno_nm_ct) {
+	    logerrprint("Error: --unrelated-heritability requires phenotype data for all samples.\n(--prune should help.)\n");
+	    goto plink_ret_INVALID_CMDLINE;
+	  }
+	  retval = calc_unrelated_herit(calculation_type, relip, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_d, rel_ibc);
+	}
+#endif
+	wkspace_reset(g_sample_missing_unwt);
+	if (retval) {
+	  goto plink_ret_1;
+	}
+	g_sample_missing_unwt = NULL;
+	g_missing_dbl_excluded = NULL;
       }
-    }
-    if (calculation_type & CALC_REL_CUTOFF) {
-      // ugh, probably better to just stop supporting this
-      bitfield_andnot(founder_info, sample_exclude, unfiltered_sample_ctl);
-      bitfield_andnot(sex_nm, sample_exclude, unfiltered_sample_ctl);
-      bitfield_and(sex_male, sex_nm, unfiltered_sample_ctl);
-      if (pheno_nm_ct) {
-	bitfield_andnot(pheno_nm, sample_exclude, unfiltered_sample_ctl);
-        pheno_nm_ct = popcount_longs(pheno_nm, unfiltered_sample_ctl);
-	if (pheno_c) {
-	  bitfield_and(pheno_c, pheno_nm, unfiltered_sample_ctl);
-          pheno_ctrl_ct = pheno_nm_ct - popcount_longs(pheno_c, unfiltered_sample_ctl);
+
+      if (calculation_type & CALC_SEXCHECK) {
+	retval = sexcheck(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_nm, sex_male, misc_flags, check_sex_fthresh, check_sex_mthresh, check_sex_f_yobs, check_sex_m_yobs, chrom_info_ptr, set_allele_freqs, &gender_unk_ct);
+	if (retval) {
+	  goto plink_ret_1;
 	}
       }
     }
 
-    if (calculation_type & CALC_REGRESS_REL) {
-      retval = regress_rel_main(unfiltered_sample_ct, sample_exclude, sample_ct, relip, threads, pheno_d);
+    if (calculation_type & CALC_MAKE_PERM_PHENO) {
+      retval = make_perm_pheno(threads, outname, outname_end, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, pheno_d, output_missing_pheno, permphe_ct);
       if (retval) {
 	goto plink_ret_1;
       }
     }
-#ifndef NOLAPACK
-    if (calculation_type & CALC_PCA) {
-      retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
-    } else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
-      if (sample_ct != pheno_nm_ct) {
-	logerrprint("Error: --unrelated-heritability requires phenotype data for all samples.\n(--prune should help.)\n");
-	goto plink_ret_INVALID_CMDLINE;
-      }
-      retval = calc_unrelated_herit(calculation_type, relip, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_d, rel_ibc);
-    }
-#endif
-    wkspace_reset(g_sample_missing_unwt);
-    if (retval) {
-      goto plink_ret_1;
-    }
-    g_sample_missing_unwt = NULL;
-    g_missing_dbl_excluded = NULL;
-  }
 
-  if (calculation_type & CALC_SEXCHECK) {
-    retval = sexcheck(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_nm, sex_male, misc_flags, check_sex_fthresh, check_sex_mthresh, check_sex_f_yobs, check_sex_m_yobs, chrom_info_ptr, set_allele_freqs, &gender_unk_ct);
-    if (retval) {
-      goto plink_ret_1;
-    }
-  }
-
-  if (calculation_type & CALC_MAKE_PERM_PHENO) {
-    retval = make_perm_pheno(threads, outname, outname_end, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, pheno_d, output_missing_pheno, permphe_ct);
-    if (retval) {
-      goto plink_ret_1;
+    if ((calculation_type & CALC_GENOME) || genome_skip_write) {
+      // er, this probably should be moved inside calc_genome(), since we're
+      // using get_trios_and_families() instead of pri elsewhere
+      retval = populate_pedigree_rel_info(&pri, unfiltered_sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
   }
 
-  if ((calculation_type & CALC_GENOME) || genome_skip_write) {
-    // er, this probably should be moved inside calc_genome(), since we're
-    // using get_trios_and_families() instead of pri elsewhere
-    retval = populate_pedigree_rel_info(&pri, unfiltered_sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info);
-    if (retval) {
-      goto plink_ret_1;
+  if (marker_ct) {
+    if (calculation_type & CALC_WRITE_SET) {
+      retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
-  }
 
-  if (calculation_type & CALC_WRITE_SET) {
-    retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr);
-    if (retval) {
-      goto plink_ret_1;
+    if (calculation_type & CALC_WRITE_SNPLIST) {
+      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, NULL, 0);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
-  }
 
-  if (calculation_type & CALC_WRITE_SNPLIST) {
-    retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, NULL, 0);
-    if (retval) {
-      goto plink_ret_1;
-    }
-  }
-  if (calculation_type & CALC_WRITE_VAR_RANGES) {
-    retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, write_var_range_ct);
-    if (retval) {
-      goto plink_ret_1;
+    if (calculation_type & CALC_WRITE_VAR_RANGES) {
+      retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, write_var_range_ct);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
-  }
 
-  if (calculation_type & CALC_LIST_23_INDELS) {
-    retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, 1);
-    if (retval) {
-      goto plink_ret_1;
+    if (calculation_type & CALC_LIST_23_INDELS) {
+      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, 1);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
-  }
 
-  if (calculation_type & CALC_DUPVAR) {
-    if (map_is_unsorted & UNSORTED_BP) {
-      logerrprint("Error: --list-duplicate-vars requires a sorted .bim file.  Retry this command\nafter using --make-bed to sort your data.\n");
-      goto plink_ret_INVALID_FORMAT;
-    }
-    retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, marker_allele_ptrs);
-    if (retval) {
-      goto plink_ret_1;
+    if (calculation_type & CALC_DUPVAR) {
+      if (map_is_unsorted & UNSORTED_BP) {
+	logerrprint("Error: --list-duplicate-vars requires a sorted .bim file.  Retry this command\nafter using --make-bed to sort your data.\n");
+	goto plink_ret_INVALID_FORMAT;
+      }
+      retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, marker_allele_ptrs);
+      if (retval) {
+	goto plink_ret_1;
+      }
     }
   }
 
@@ -1538,7 +1570,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       memcpy(pheno_nm_datagen, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
       bitfield_and(pheno_nm_datagen, sex_nm, unfiltered_sample_ctl);
     }
-    if (covar_fname && (calculation_type & (CALC_WRITE_COVAR | CALC_MAKE_BED | CALC_MAKE_FAM | CALC_RECODE))) {
+    if (covar_ct && (calculation_type & (CALC_WRITE_COVAR | CALC_MAKE_BED | CALC_MAKE_FAM | CALC_RECODE)) && sample_ct) {
       retval = write_covars(outname, outname_end, write_covar_modifier, write_covar_dummy_max_categories, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, missing_phenod, output_missing_pheno, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d);
       if (retval) {
 	goto plink_ret_1;
@@ -1559,6 +1591,10 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     aligned_free_cond_null(&pheno_nm_datagen);
   }
 
+  if ((!marker_ct) || (!sample_ct)) {
+    goto plink_ret_1;
+  }
+
   if ((calculation_type & CALC_EPI) && epi_ip->twolocus_mkr1) {
     retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, sex_male, outname, outname_end, hh_exists);
     if (retval) {
@@ -1851,7 +1887,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     uii = 0; // phenotype/cluster number
     *outname_end = '.';
     if (loop_assoc_fname) {
-      retval = load_clusters(loop_assoc_fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, NULL, NULL, NULL, NULL);
+      retval = load_clusters(loop_assoc_fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, NULL, NULL, NULL, NULL, 0);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1914,6 +1950,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	retval = load_pheno(phenofile, unfiltered_sample_ct, sample_exclude_ct, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, uii, NULL, pheno_nm, &pheno_c, &pheno_d, &(outname_end[1]), (uintptr_t)((&(outname[FNAMESIZE - 32])) - outname_end));
 	if (retval == LOAD_PHENO_LAST_COL) {
 	  wkspace_reset(wkspace_mark);
+	  retval = 0; // exit code bugfix
 	  break;
 	} else if (retval) {
 	  goto plink_ret_1;
@@ -2112,8 +2149,8 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   return retval;
 }
 
-// output-missing-phenotype + terminating null, or 'recode 01 fastphase-1chr'
-#define MAX_FLAG_LEN 25
+// meta-analysis-report-dups + terminating null, or 'recode 01 fastphase-1chr'
+#define MAX_FLAG_LEN 26
 
 static inline int32_t is_flag(char* param) {
   unsigned char ucc = param[1];
@@ -3216,8 +3253,8 @@ int32_t main(int32_t argc, char** argv) {
   uint64_t filter_flags = 0;
   double thin_keep_prob = 1.0;
   double thin_keep_sample_prob = 1.0;
-  uint32_t thin_keep_ct = 0;
-  uint32_t thin_keep_sample_ct = 0;
+  uint32_t thin_keep_ct = 0xffffffffU;
+  uint32_t thin_keep_sample_ct = 0xffffffffU;
   uint32_t min_bp_space = 0;
   uint32_t check_sex_f_yobs = 0;
   uint32_t check_sex_m_yobs = 0;
@@ -3339,7 +3376,11 @@ int32_t main(int32_t argc, char** argv) {
   double lasso_minlambda = -1;
   uint32_t testmiss_modifier = 0;
   uint32_t testmiss_mperm_val = 0;
+
+  // this default limit plays well with e.g. fbstring small-string optimization
   uint32_t new_id_max_allele_len = 23;
+
+  uint32_t aperm_present = 0;
   char* segment_spanning_fname = NULL;
   char* missing_code = NULL;
   char range_delim = '-';
@@ -3372,6 +3413,7 @@ int32_t main(int32_t argc, char** argv) {
   time_t rawtime;
   char* argptr;
   char* sptr;
+  const char* csptr;
   int32_t ii;
   int32_t jj;
   int32_t kk;
@@ -3400,7 +3442,7 @@ int32_t main(int32_t argc, char** argv) {
   char* flagptr;
   double dxx;
   char cc;
-  uint32_t known_procs;
+  int32_t known_procs;
   uint32_t uii;
   uint32_t ujj;
   uint32_t ukk;
@@ -3995,7 +4037,7 @@ int32_t main(int32_t argc, char** argv) {
   ii = sysconf(_SC_NPROCESSORS_ONLN);
   if (ii == -1) {
     g_thread_ct = 1;
-    known_procs = 0;
+    known_procs = -1;
   } else {
     g_thread_ct = ii;
     known_procs = ii;
@@ -4244,6 +4286,18 @@ int32_t main(int32_t argc, char** argv) {
       } else if (!memcmp(argptr2, "llow-no-sex", 12)) {
         sex_missing_pheno |= ALLOW_NO_SEX;
 	goto main_param_zero;
+      } else if (!memcmp(argptr2, "llow-no-samples", 16)) {
+	UNSTABLE("allow-no-samples");
+	misc_flags |= MISC_ALLOW_NO_SAMPLES;
+	goto main_param_zero;
+      } else if (!memcmp(argptr2, "llow-no-vars", 13)) {
+	UNSTABLE("allow-no-vars");
+	misc_flags |= MISC_ALLOW_NO_VARS;
+	goto main_param_zero;
+      } else if (!memcmp(argptr2, "llow-no-covars", 15)) {
+	UNSTABLE("allow-no-covars");
+	covar_modifier |= COVAR_ALLOW_NONE;
+	goto main_param_zero;
       } else if (!memcmp(argptr2, "ll", 3)) {
 	logprint("Note: --all flag has no effect.\n");
 	goto main_param_zero;
@@ -4429,6 +4483,7 @@ int32_t main(int32_t argc, char** argv) {
 	    }
 	  }
 	}
+	aperm_present = 1;
       } else if (!memcmp(argptr2, "1-allele", 9)) {
 	if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 4)) {
 	  goto main_ret_INVALID_CMDLINE_2A;
@@ -4592,20 +4647,20 @@ int32_t main(int32_t argc, char** argv) {
 	  goto main_ret_INVALID_CMDLINE_2A;
 	}
 	if (param_ct) {
-	  sptr = argv[cur_arg + 1];
-	  if (strlen(sptr) > (FNAMESIZE - 5)) {
+	  csptr = argv[cur_arg + 1];
+	  if (strlen(csptr) > (FNAMESIZE - 5)) {
 	    logerrprint("Error: --bfile parameter too long.\n");
 	    goto main_ret_OPEN_FAIL;
 	  }
 	} else {
-	  sptr = (char*)PROG_NAME_STR;
+	  csptr = PROG_NAME_STR;
 	}
 	if (!(load_params & LOAD_PARAMS_BED)) {
-	  memcpy(strcpya(pedname, sptr), ".bed", 5);
+	  memcpy(strcpya(pedname, csptr), ".bed", 5);
 	  load_params |= LOAD_PARAMS_BED;
 	}
-	memcpy(strcpya(mapname, sptr), ".bim", 5);
-	memcpy(strcpya(famname, sptr), ".fam", 5);
+	memcpy(strcpya(mapname, csptr), ".bim", 5);
+	memcpy(strcpya(famname, csptr), ".fam", 5);
 	load_params |= LOAD_PARAMS_BIM | LOAD_PARAMS_FAM;
       } else if (!memcmp(argptr2, "ed", 3)) {
 	if (load_rare) {
@@ -5976,20 +6031,20 @@ int32_t main(int32_t argc, char** argv) {
 	  goto main_ret_INVALID_CMDLINE_2A;
 	}
 	if (param_ct) {
-	  sptr = argv[cur_arg + 1];
-	  if (strlen(sptr) > (FNAMESIZE - 8)) {
+	  csptr = argv[cur_arg + 1];
+	  if (strlen(csptr) > (FNAMESIZE - 8)) {
 	    logerrprint("Error: --data parameter too long.\n");
 	    goto main_ret_OPEN_FAIL;
 	  }
 	} else {
-	  sptr = (char*)PROG_NAME_STR;
+	  csptr = PROG_NAME_STR;
 	}
 	if (!(load_params & LOAD_PARAMS_OXBGEN)) {
-	  memcpy(strcpya(pedname, sptr), ".gen", 5);
+	  memcpy(strcpya(pedname, csptr), ".gen", 5);
 	  load_params |= LOAD_PARAMS_OXGEN;
 	}
 	// cheating: this is of course more like a .fam file
-	memcpy(strcpya(mapname, sptr), ".sample", 8);
+	memcpy(strcpya(mapname, csptr), ".sample", 8);
 	load_params |= LOAD_PARAMS_OXSAMPLE;
       } else if (!memcmp(argptr2, "ecompress", 10)) {
 	logerrprint("Error: --decompress flag retired.  Use e.g. 'gunzip [filename]'.\n");
@@ -6133,11 +6188,11 @@ int32_t main(int32_t argc, char** argv) {
         if (enforce_param_ct_range(param_ct, argv[cur_arg], 2, 6)) {
           goto main_ret_INVALID_CMDLINE_2A;
 	}
-	if (scan_posint_defcap(argv[cur_arg + 1], &dummy_sample_ct)) {
+	if (scan_uint_defcap(argv[cur_arg + 1], &dummy_sample_ct) || ((!dummy_sample_ct) && (!(misc_flags & MISC_ALLOW_NO_SAMPLES)))) {
 	  logerrprint("Error: Invalid --dummy sample count.\n");
 	  goto main_ret_INVALID_CMDLINE_A;
 	}
-	if (scan_posint_defcap(argv[cur_arg + 2], &dummy_marker_ct)) {
+	if (scan_uint_defcap(argv[cur_arg + 2], &dummy_marker_ct) || ((!dummy_marker_ct) && (!(misc_flags & MISC_ALLOW_NO_VARS)))) {
 	  logerrprint("Error: Invalid --dummy variant count.\n");
 	  goto main_ret_INVALID_CMDLINE_A;
 	}
@@ -6233,6 +6288,9 @@ int32_t main(int32_t argc, char** argv) {
 	} else if (condition_mname || condition_fname) {
 	  logerrprint("Error: --dosage does not support --condition/--condition-list.\n");
 	  goto main_ret_INVALID_CMDLINE_A;
+	} else if (misc_flags & (MISC_ALLOW_NO_SAMPLES | MISC_ALLOW_NO_VARS)) {
+	  logerrprint("Error: --dosage does not support --allow-no-samples/--allow-no-vars.\n");
+	  goto main_ret_INVALID_CMDLINE_A;
 	}
 	if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 13)) {
           goto main_ret_INVALID_CMDLINE_2A;
@@ -6529,16 +6587,16 @@ int32_t main(int32_t argc, char** argv) {
 	  goto main_ret_INVALID_CMDLINE_2A;
 	}
 	if (param_ct) {
-	  sptr = argv[cur_arg + 1];
-	  if (strlen(sptr) > (FNAMESIZE - 5)) {
+	  csptr = argv[cur_arg + 1];
+	  if (strlen(csptr) > (FNAMESIZE - 5)) {
 	    logerrprint("Error: --file parameter too long.\n");
 	    goto main_ret_OPEN_FAIL;
 	  }
 	} else {
-	  sptr = (char*)PROG_NAME_STR;
+	  csptr = PROG_NAME_STR;
 	}
-	memcpy(strcpya(pedname, sptr), ".ped", 5);
-	memcpy(strcpya(mapname, sptr), ".map", 5);
+	memcpy(strcpya(pedname, csptr), ".ped", 5);
+	memcpy(strcpya(mapname, csptr), ".map", 5);
       } else if (!memcmp(argptr2, "am", 3)) {
 	if (load_params & (LOAD_PARAMS_TEXT_ALL | LOAD_PARAMS_OX_ALL)) {
 	  goto main_ret_INVALID_CMDLINE_INPUT_CONFLICT;
@@ -7838,22 +7896,44 @@ int32_t main(int32_t argc, char** argv) {
 
     case 'l':
       if (!memcmp(argptr2, "file", 5)) {
-	if (load_rare || load_params) {
+	if (load_rare || (load_params & (~LOAD_PARAMS_FAM))) {
 	  goto main_ret_INVALID_CMDLINE_INPUT_CONFLICT;
 	}
 	if (enforce_param_ct_range(param_ct, argv[cur_arg], 0, 1)) {
 	  goto main_ret_INVALID_CMDLINE_2A;
 	}
 	if (param_ct) {
-	  if (strlen(argv[cur_arg + 1]) > FNAMESIZE - 6) {
+	  csptr = argv[cur_arg + 1];
+	  if (strlen(csptr) > FNAMESIZE - 6) {
 	    logerrprint("Error: --lfile filename prefix too long.\n");
 	    goto main_ret_OPEN_FAIL;
 	  }
-	  strcpy(pedname, argv[cur_arg + 1]);
 	} else {
-	  memcpy(pedname, PROG_NAME_STR, 6);
+	  csptr = PROG_NAME_STR;
+	}
+	memcpy(strcpya(pedname, csptr), ".lgen", 6);
+	memcpy(strcpya(mapname, csptr), ".map", 5);
+	if (!famname[0]) {
+	  memcpy(strcpya(famname, csptr), ".fam", 5);
 	}
 	load_rare = LOAD_RARE_LGEN;
+      } else if (!memcmp(argptr2, "gen", 4)) {
+	if ((load_rare & (~LOAD_RARE_LGEN)) || (load_params & (~LOAD_PARAMS_FAM))) {
+	  goto main_ret_INVALID_CMDLINE_INPUT_CONFLICT;
+	}
+	if ((load_params != LOAD_PARAMS_FAM) && (!load_rare)) {
+	  logerrprint("Error: --lgen must be used with --fam or --lfile.\n");
+	  goto main_ret_INVALID_CMDLINE_A;
+	}
+	if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
+	  goto main_ret_INVALID_CMDLINE_2A;
+	}
+	if (strlen(argv[cur_arg + 1]) > (FNAMESIZE - 1)) {
+	  logerrprint("Error: --lgen parameter too long.\n");
+	  goto main_ret_OPEN_FAIL;
+	}
+	strcpy(pedname, argv[cur_arg + 1]);
+	load_rare = LOAD_RARE_LGEN;
       } else if (!memcmp(argptr2, "oop-assoc", 10)) {
 	if (pheno_modifier & PHENO_ALL) {
 	  logerrprint("Error: --loop-assoc cannot be used with --all-pheno.\n");
@@ -8015,10 +8095,6 @@ int32_t main(int32_t argc, char** argv) {
 	      }
 	      glm_modifier |= GLM_STANDARD_BETA;
 	    } else if (!strcmp(argv[cur_arg + uii], "intercept")) {
-	      if (glm_modifier & GLM_LOGISTIC) {
-		logerrprint("Error: --logistic does not currently have a 'intercept' modifier.  (Did you\nmean --linear or 'beta'?)\n");
-		goto main_ret_INVALID_CMDLINE_A;
-	      }
 	      glm_modifier |= GLM_INTERCEPT;
 	    } else if (!strcmp(argv[cur_arg + uii], "beta")) {
 	      glm_modifier |= GLM_BETA;
@@ -8200,7 +8276,7 @@ int32_t main(int32_t argc, char** argv) {
 
     case 'm':
       if (!memcmp(argptr2, "ap", 3)) {
-	if (((load_params & (LOAD_PARAMS_BFILE_ALL | LOAD_PARAMS_OX_ALL)) || (load_rare & (~(LOAD_RARE_CNV | LOAD_RARE_GVAR)))) && ((load_rare != LOAD_RARE_DOSAGE) || (load_params != LOAD_PARAMS_FAM))) {
+	if (((load_params & (LOAD_PARAMS_BFILE_ALL | LOAD_PARAMS_OX_ALL)) || (load_rare & (~(LOAD_RARE_CNV | LOAD_RARE_GVAR)))) && ((load_rare != LOAD_RARE_DOSAGE) || (load_params != LOAD_PARAMS_FAM)) && (load_rare != LOAD_RARE_LGEN)) {
 	  goto main_ret_INVALID_CMDLINE_INPUT_CONFLICT;
 	}
 	load_params |= LOAD_PARAMS_MAP;
@@ -9379,6 +9455,12 @@ int32_t main(int32_t argc, char** argv) {
 	if (retval) {
 	  goto main_ret_NOMEM;
 	}
+      } else if (!memcmp(argptr2, "eta-analysis-report-dups", 25)) {
+	if (!metaanal_fnames) {
+	  logerrprint("Error: --meta-analysis-report-dups must be used with --meta-analysis.\n");
+	}
+	metaanal_flags |= METAANAL_REPORT_DUPS;
+	goto main_param_zero;
       } else if (!memcmp(argptr2, "ac", 3)) {
 	UNSTABLE("mac");
 	if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
@@ -9536,6 +9618,13 @@ int32_t main(int32_t argc, char** argv) {
 	logprint("Note: --nop flag deprecated.  Use '--fast-epistasis nop'.\n");
         epi_info.modifier |= EPI_FAST_NO_P_VALUE;
         goto main_param_zero;
+      } else if (!memcmp(argptr2, "o-const-covar", 14)) {
+	if (!covar_fname) {
+	  logerrprint("Error: --no-const-covar must be used with --covar.\n");
+	  goto main_ret_INVALID_CMDLINE;
+	}
+	covar_modifier |= COVAR_NO_CONST;
+	goto main_param_zero;
       } else if (!memcmp(argptr2, "oweb", 5)) {
         logprint("Note: --noweb has no effect since no web check is implemented yet.\n");
 	goto main_param_zero;
@@ -11157,7 +11246,7 @@ int32_t main(int32_t argc, char** argv) {
 	goto main_param_zero;
       } else if (!memcmp(argptr2, "tandard-beta", 13)) {
 	if (((!(calculation_type & CALC_GLM)) || (glm_modifier & GLM_LOGISTIC)) && (!(dosage_info.modifier & DOSAGE_GLM))) {
-	  logerrprint("Error: --standard-beta must be used wtih --linear or --dosage.\n");
+	  logerrprint("Error: --standard-beta must be used with --linear or --dosage.\n");
 	  goto main_ret_INVALID_CMDLINE_A;
 	}
 	logprint("Note: --standard-beta flag deprecated.  Use e.g. '--linear standard-beta'.\n");
@@ -11391,7 +11480,7 @@ int32_t main(int32_t argc, char** argv) {
 	  }
 	  dosage_info.modifier += (DOSAGE_SCORE - DOSAGE_GLM);
 	}
-        if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 8)) {
+        if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 9)) {
           goto main_ret_INVALID_CMDLINE_2A;
 	}
 	retval = alloc_fname(&score_info.fname, argv[cur_arg + 1], argptr, 0);
@@ -11434,6 +11523,8 @@ int32_t main(int32_t argc, char** argv) {
 	    dosage_info.modifier |= DOSAGE_SCORE_NOSUM;
 	  } else if (!strcmp(argv[cur_arg + uii], "include-cnt")) {
             dosage_info.modifier |= DOSAGE_SCORE_CNT;
+	  } else if (!strcmp(argv[cur_arg + uii], "double-dosage")) {
+	    dosage_info.modifier |= DOSAGE_SCORE_DOUBLE;
 	  } else if (ujj == 3) {
             logerrprint("Error: --score takes at most three numeric parameters.\n");
             goto main_ret_INVALID_CMDLINE_A;
@@ -11553,6 +11644,9 @@ int32_t main(int32_t argc, char** argv) {
 	if (g_thread_ct > MAX_THREADS) {
 	  LOGPRINTF("Note: Reducing --threads parameter to %u.  (If this is not large enough,\nrecompile with a larger MAX_THREADS setting.)\n", MAX_THREADS);
 	  g_thread_ct = MAX_THREADS;
+	} else if (known_procs == -1) {
+	  // trigger BLAS/LAPACK warning
+	  known_procs = 0;
 	}
       } else if (!memcmp(argptr2, "ab", 3)) {
 	logprint("Note: --tab flag deprecated.  Use '--recode tab ...'.\n");
@@ -11737,7 +11831,7 @@ int32_t main(int32_t argc, char** argv) {
 	if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
 	  goto main_ret_INVALID_CMDLINE_2A;
 	}
-	if (scan_posint_defcap(argv[cur_arg + 1], &thin_keep_ct)) {
+	if (scan_uint_defcap(argv[cur_arg + 1], &thin_keep_ct) || ((!thin_keep_ct) && (!(misc_flags & MISC_ALLOW_NO_VARS)))) {
 	  sprintf(logbuf, "Error: Invalid --thin-count parameter '%s'.\n", argv[cur_arg + 1]);
 	  goto main_ret_INVALID_CMDLINE_WWA;
 	}
@@ -11767,7 +11861,7 @@ int32_t main(int32_t argc, char** argv) {
         if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
           goto main_ret_INVALID_CMDLINE_2A;
         }
-        if (scan_posint_defcap(argv[cur_arg + 1], &thin_keep_sample_ct)) {
+        if (scan_uint_defcap(argv[cur_arg + 1], &thin_keep_sample_ct) || ((!thin_keep_sample_ct) && (!(misc_flags & MISC_ALLOW_NO_SAMPLES)))) {
           sprintf(logbuf, "Error: Invalid --thin-indiv-count parameter '%s'.\n", argv[cur_arg + 1]);
           goto main_ret_INVALID_CMDLINE_WWA;
         }
@@ -12377,6 +12471,13 @@ int32_t main(int32_t argc, char** argv) {
 	  sprintf(logbuf, "Error: '%s' is not a valid mode for --vcf-half-call.\n", argv[cur_arg + 1]);
 	  goto main_ret_INVALID_CMDLINE_WWA;
 	}
+      } else if (!memcmp(argptr2, "cf-require-gt", 14)) {
+        if (!(load_rare & (LOAD_RARE_VCF | LOAD_RARE_BCF))) {
+	  logerrprint("Error: --vcf-require-gt must be used with --vcf/--bcf.\n");
+	  goto main_ret_INVALID_CMDLINE;
+	}
+	misc_flags |= MISC_VCF_REQUIRE_GT;
+	goto main_param_zero;
       } else {
 	goto main_ret_INVALID_CMDLINE_UNRECOGNIZED;
       }
@@ -13054,6 +13155,12 @@ int32_t main(int32_t argc, char** argv) {
     if ((calculation_type & CALC_TDT) && ((family_info.tdt_modifier & (TDT_MPERM | TDT_SET_TEST)) == TDT_MPERM)) {
       uii++;
     }
+    if ((calculation_type & CALC_DFAM) && ((family_info.dfam_modifier & (DFAM_MPERM | DFAM_SET_TEST)) == DFAM_MPERM)) {
+      uii++;
+    }
+
+    // no qfam since that's a nonstandard permutation test
+
     if ((calculation_type & CALC_CMH) && ((cluster.modifier & (CLUSTER_CMH_MPERM | CLUSTER_CMH_SET_TEST)) == CLUSTER_CMH_MPERM)) {
       uii++;
     }
@@ -13078,6 +13185,21 @@ int32_t main(int32_t argc, char** argv) {
       model_modifier |= MODEL_PERM;
     }
   }
+  if (aperm_present && (calculation_type & (CALC_MODEL | CALC_GLM | CALC_TESTMISS | CALC_TDT | CALC_DFAM | CALC_QFAM | CALC_CMH)) &&
+      (!(model_modifier & MODEL_PERM)) &&
+      (!(glm_modifier & GLM_PERM)) &&
+      (!(testmiss_modifier & TESTMISS_PERM)) &&
+      (!(family_info.tdt_modifier & TDT_PERM)) &&
+      (!(family_info.dfam_modifier & DFAM_PERM)) &&
+      (!(family_info.qfam_modifier & QFAM_PERM)) &&
+      (!(cluster.modifier & (CLUSTER_CMH_PERM | CLUSTER_CMH_PERM_BD)))) {
+    // If --aperm is present, at least one association analysis command which
+    // supports adaptive permutation testing was also specified, but no actual
+    // adaptive permutation test is happening, the user is likely to be
+    // confused.  Produce a warning.  (Not an error since a sophisticated user
+    // may want to use --script with different --aperm defaults.)
+    logerrprint("Warning: --aperm only controls the settings for adaptive permutation tests; it\ndoes not cause such a test to be performed.  (Did you forget to add the 'perm'\nmodifier to an association analysis flag?)\n");
+  }
   if ((mtest_adjust & (ADJUST_LAMBDA + 1)) == ADJUST_LAMBDA) {
     logerrprint("Error: --lambda must be used with --adjust.\n");
     goto main_ret_INVALID_CMDLINE_A;
@@ -13113,6 +13235,10 @@ int32_t main(int32_t argc, char** argv) {
     goto main_ret_INVALID_CMDLINE_A;
   }
 
+  if ((load_rare == LOAD_RARE_LGEN) && (!mapname[0])) {
+    logerrprint("Error: --lgen must be used with --lfile or --map.\n");
+    goto main_ret_INVALID_CMDLINE_A;
+  }
   uii = load_params & LOAD_PARAMS_OX_ALL;
   if ((uii == LOAD_PARAMS_OXGEN) || (uii == LOAD_PARAMS_OXBGEN)) {
     logerrprint("Error: --gen/--bgen cannot be used without --data or --sample.\n");
@@ -13309,7 +13435,7 @@ int32_t main(int32_t argc, char** argv) {
       }
       uii = (sptr - outname);
       if (load_rare == LOAD_RARE_LGEN) {
-        retval = lgen_to_bed(pedname, outname, sptr, missing_pheno, misc_flags, lgen_modifier, lgen_reference_fname, &chrom_info);
+        retval = lgen_to_bed(pedname, mapname, famname, outname, sptr, missing_pheno, misc_flags, lgen_modifier, lgen_reference_fname, &chrom_info);
       } else if (load_rare & LOAD_RARE_TRANSPOSE_MASK) {
         retval = transposed_to_bed(pedname, famname, outname, sptr, misc_flags, &chrom_info);
       } else if (load_rare & LOAD_RARE_VCF) {
@@ -13317,10 +13443,12 @@ int32_t main(int32_t argc, char** argv) {
       } else if (load_rare & LOAD_RARE_BCF) {
 	retval = bcf_to_bed(pedname, outname, sptr, missing_pheno, misc_flags, const_fid, id_delim, vcf_idspace_to, vcf_min_qual, vcf_filter_exceptions_flattened, &chrom_info);
       } else if (load_rare == LOAD_RARE_23) {
-        retval = bed_from_23(pedname, outname, sptr, modifier_23, fid_23, iid_23, (pheno_23 == HUGE_DOUBLE)? ((double)missing_pheno) : pheno_23, paternal_id_23, maternal_id_23, &chrom_info);
+        retval = bed_from_23(pedname, outname, sptr, modifier_23, fid_23, iid_23, (pheno_23 == HUGE_DOUBLE)? ((double)missing_pheno) : pheno_23, misc_flags, paternal_id_23, maternal_id_23, &chrom_info);
       } else if (load_rare & LOAD_RARE_DUMMY) {
 	retval = generate_dummy(outname, sptr, dummy_flags, dummy_marker_ct, dummy_sample_ct, dummy_missing_geno, dummy_missing_pheno, missing_pheno);
       } else if (load_rare & LOAD_RARE_SIMULATE) {
+	// no need to support zero samples/variants here since --dummy takes
+	// care of generating those test cases
 	retval = simulate_dataset(outname, sptr, simulate_flags, simulate_fname, simulate_cases, simulate_controls, simulate_prevalence, simulate_qt_samples, simulate_missing, simulate_label);
 	free(simulate_fname);
 	simulate_fname = NULL;
diff --git a/plink_assoc.c b/plink_assoc.c
index c597a3f..14030dc 100644
--- a/plink_assoc.c
+++ b/plink_assoc.c
@@ -4,6 +4,7 @@
 #include "plink_cluster.h"
 #include "plink_ld.h"
 #include "plink_matrix.h"
+#include "plink_perm.h"
 #include "plink_stats.h"
 
 void aperm_init(Aperm_info* apip) {
@@ -562,335 +563,6 @@ int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintp
   return retval;
 }
 
-void generate_cc_perm_vec(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp) {
-  // Assumes tot_quotient is 2^32 / tot_ct, and
-  // totq_magic/totq_preshift/totq_postshift/totq_incr have been precomputed
-  // from magic_num().
-  uint32_t num_set = 0;
-  uint32_t upper_bound = tot_ct * tot_quotient - 1;
-  uintptr_t widx;
-  uintptr_t wcomp;
-  uintptr_t pv_val;
-  uint32_t urand;
-  uint32_t uii;
-  if (set_ct * 2 < tot_ct) {
-    fill_ulong_zero(perm_vec, 2 * ((tot_ct + (BITCT - 1)) / BITCT));
-    for (; num_set < set_ct; num_set++) {
-      do {
-	do {
-	  urand = sfmt_genrand_uint32(sfmtp);
-	} while (urand > upper_bound);
-	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
-        widx = uii / BITCT2;
-	wcomp = ONELU << (2 * (uii % BITCT2));
-	pv_val = perm_vec[widx];
-      } while (pv_val & wcomp);
-      perm_vec[widx] = pv_val | wcomp;
-    }
-  } else {
-    fill_vec_55(perm_vec, tot_ct);
-    set_ct = tot_ct - set_ct;
-    for (; num_set < set_ct; num_set++) {
-      do {
-	do {
-	  urand = sfmt_genrand_uint32(sfmtp);
-	} while (urand > upper_bound);
-	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
-        widx = uii / BITCT2;
-	wcomp = ONELU << (2 * (uii % BITCT2));
-	pv_val = perm_vec[widx];
-      } while (!(pv_val & wcomp));
-      perm_vec[widx] = pv_val - wcomp;
-    }
-  }
-}
-
-void generate_cc_perm1(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp) {
-  // generate_cc_perm_vec() variant which uses 1-bit packing instead of 2.
-  uint32_t num_set = 0;
-  uint32_t upper_bound = tot_ct * tot_quotient - 1;
-  uintptr_t widx;
-  uintptr_t wcomp;
-  uintptr_t pv_val;
-  uint32_t urand;
-  uint32_t uii;
-  if (set_ct * 2 < tot_ct) {
-    fill_ulong_zero(perm_vec, (tot_ct + (BITCT - 1)) / BITCT);
-    for (; num_set < set_ct; num_set++) {
-      do {
-	do {
-	  urand = sfmt_genrand_uint32(sfmtp);
-	} while (urand > upper_bound);
-	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
-        widx = uii / BITCT;
-	wcomp = ONELU << (uii % BITCT);
-	pv_val = perm_vec[widx];
-      } while (pv_val & wcomp);
-      perm_vec[widx] = pv_val | wcomp;
-    }
-  } else {
-    fill_all_bits(perm_vec, tot_ct);
-    set_ct = tot_ct - set_ct;
-    for (; num_set < set_ct; num_set++) {
-      do {
-	do {
-	  urand = sfmt_genrand_uint32(sfmtp);
-	} while (urand > upper_bound);
-	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
-        widx = uii / BITCT;
-	wcomp = ONELU << (uii % BITCT);
-	pv_val = perm_vec[widx];
-      } while (!(pv_val & wcomp));
-      perm_vec[widx] = pv_val - wcomp;
-    }
-  }
-}
-
-void generate_cc_cluster_perm_vec(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp) {
-  uint32_t tot_ctl2 = 2 * ((tot_ct + (BITCT - 1)) / BITCT);
-  uint32_t cluster_idx;
-  uint32_t target_ct;
-  uint32_t cluster_end;
-  uint32_t* map_ptr;
-  uint32_t num_swapped;
-  uint32_t cluster_size;
-  uint32_t upper_bound;
-  uint64_t totq_magic;
-  uint32_t totq_preshift;
-  uint32_t totq_postshift;
-  uint32_t totq_incr;
-  uintptr_t widx;
-  uintptr_t wcomp;
-  uintptr_t pv_val;
-  uint32_t urand;
-  uint32_t uii;
-  memcpy(perm_vec, preimage, tot_ctl2 * sizeof(intptr_t));
-  for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
-    target_ct = cluster_case_cts[cluster_idx];
-    cluster_end = cluster_starts[cluster_idx + 1];
-    cluster_size = cluster_end - cluster_starts[cluster_idx];
-    if (target_ct && (target_ct != cluster_size)) {
-      upper_bound = cluster_size * tot_quotients[cluster_idx] - 1;
-      totq_magic = totq_magics[cluster_idx];
-      totq_preshift = totq_preshifts[cluster_idx];
-      totq_postshift = totq_postshifts[cluster_idx];
-      totq_incr = totq_incrs[cluster_idx];
-      map_ptr = &(cluster_map[cluster_starts[cluster_idx]]);
-      if (target_ct * 2 < cluster_size) {
-	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
-	  do {
-	    do {
-	      urand = sfmt_genrand_uint32(sfmtp);
-	    } while (urand > upper_bound);
-	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
-	    widx = uii / BITCT2;
-	    wcomp = ONELU << (2 * (uii % BITCT2));
-	    pv_val = perm_vec[widx];
-	  } while (pv_val & wcomp);
-	  perm_vec[widx] = pv_val | wcomp;
-	}
-      } else {
-	target_ct = cluster_size - target_ct;
-	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
-	  do {
-	    do {
-	      urand = sfmt_genrand_uint32(sfmtp);
-	    } while (urand > upper_bound);
-	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
-	    widx = uii / BITCT2;
-	    wcomp = ONELU << (2 * (uii % BITCT2));
-	    pv_val = perm_vec[widx];
-	  } while (!(pv_val & wcomp));
-	  perm_vec[widx] = pv_val - wcomp;
-	}
-      }
-    }
-  }
-}
-
-void generate_cc_cluster_perm1(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp) {
-  uint32_t tot_ctl = (tot_ct + (BITCT - 1)) / BITCT;
-  uint32_t cluster_idx;
-  uint32_t target_ct;
-  uint32_t cluster_end;
-  uint32_t cluster_size;
-  uint32_t* map_ptr;
-  uint32_t num_swapped;
-  uint32_t upper_bound;
-  uint64_t totq_magic;
-  uint32_t totq_preshift;
-  uint32_t totq_postshift;
-  uint32_t totq_incr;
-  uintptr_t widx;
-  uintptr_t wcomp;
-  uintptr_t pv_val;
-  uint32_t urand;
-  uint32_t uii;
-  memcpy(perm_vec, preimage, tot_ctl * sizeof(intptr_t));
-  for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
-    target_ct = cluster_case_cts[cluster_idx];
-    cluster_end = cluster_starts[cluster_idx + 1];
-    cluster_size = cluster_end - cluster_starts[cluster_idx];
-    if (target_ct && (target_ct != cluster_size)) {
-      upper_bound = cluster_size * tot_quotients[cluster_idx] - 1;
-      totq_magic = totq_magics[cluster_idx];
-      totq_preshift = totq_preshifts[cluster_idx];
-      totq_postshift = totq_postshifts[cluster_idx];
-      totq_incr = totq_incrs[cluster_idx];
-      map_ptr = &(cluster_map[cluster_starts[cluster_idx]]);
-      if (target_ct * 2 < cluster_size) {
-	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
-	  do {
-	    do {
-	      urand = sfmt_genrand_uint32(sfmtp);
-	    } while (urand > upper_bound);
-	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
-	    widx = uii / BITCT;
-	    wcomp = ONELU << (uii % BITCT);
-	    pv_val = perm_vec[widx];
-	  } while (pv_val & wcomp);
-	  perm_vec[widx] = pv_val | wcomp;
-	}
-      } else {
-	target_ct = cluster_size - target_ct;
-	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
-	  do {
-	    do {
-	      urand = sfmt_genrand_uint32(sfmtp);
-	    } while (urand > upper_bound);
-	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
-	    widx = uii / BITCT;
-	    wcomp = ONELU << (uii % BITCT);
-	    pv_val = perm_vec[widx];
-	  } while (!(pv_val & wcomp));
-	  perm_vec[widx] = pv_val - wcomp;
-	}
-      }
-    }
-  }
-}
-
-void transpose_perms(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst) {
-  // Transpose permutations so PRESTO/PERMORY-style genotype indexing can work.
-  //
-  // We used a 32-ply interleaved format, to allow counts up to the uint32_t
-  // limit without giving up highly parallel adds in the calc_git() inner loop.
-  // The index order used here is:
-  // 64-bit build:
-  //   first 16 bytes: 0 32 64 96 16 48 80 112 4 36 68 100 20 52 84 116
-  //     8 40 72 104 24 56 88 120 12 44 76 108 28 60 92 124 1...
-  //   next 16 bytes: 128 160 192...
-  //
-  // 32-bit build:
-  //   first 4 bytes: 0 8 16 24 4 12 20 28 1 9 17 25 5 13 21 29 2 10 18...
-  //   next 4 bytes: 32 40 48...
-  uintptr_t sample_idx = 0;
-  uintptr_t pheno_nm_ctl2 = 2 * ((pheno_nm_ct + (BITCT - 1)) / BITCT);
-#ifdef __LP64__
-  uint32_t wbuf[4];
-  uint32_t* wbptr;
-#else
-  uint32_t wval;
-#endif
-  uint32_t rshift;
-  uint32_t wshift;
-  uintptr_t* pvptr;
-  uintptr_t perm_idx;
-  for (; sample_idx < pheno_nm_ct; sample_idx++) {
-    perm_idx = 0;
-    pvptr = &(perm_vecs[sample_idx / BITCT2]);
-    rshift = 2 * (sample_idx % BITCT2);
-    goto transpose_perms_loop_start;
-#ifdef __LP64__
-    do {
-      if (!(perm_idx % 4)) {
-	if (perm_idx % 128) {
-	  wshift = ((perm_idx & 96) >> 5) | ((perm_idx & 16) >> 2) | ((perm_idx & 12) << 1);
-	} else {
-	  memcpy(perm_vecst, wbuf, 16);
-	  perm_vecst = &(perm_vecst[4]);
-	transpose_perms_loop_start:
-	  fill_uint_zero(wbuf, 4);
-	  wshift = 0;
-	}
-	wbptr = wbuf;
-      }
-      *wbptr |= ((pvptr[perm_idx * pheno_nm_ctl2] >> rshift) & 1) << wshift;
-      wbptr++;
-    } while (++perm_idx < perm_vec_ct);
-    memcpy(perm_vecst, wbuf, 16);
-    perm_vecst = &(perm_vecst[4]);
-#else
-    do {
-      if (perm_idx % 32) {
-	wshift = ((perm_idx & 24) >> 3) | (perm_idx & 4) | ((perm_idx & 3) << 3);
-      } else {
-	*perm_vecst++ = wval;
-      transpose_perms_loop_start:
-	wval = 0;
-	wshift = 0;
-      }
-      wval |= ((pvptr[perm_idx * pheno_nm_ctl2] >> rshift) & 1) << wshift;
-    } while (++perm_idx < perm_vec_ct);
-    *perm_vecst++ = wval;
-#endif
-  }
-}
-
-void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst) {
-  uintptr_t sample_idx = 0;
-  uintptr_t pheno_nm_ctl = (pheno_nm_ct + (BITCT - 1)) / BITCT;
-#ifdef __LP64__
-  uint32_t wbuf[4];
-  uint32_t* wbptr;
-#else
-  uint32_t wval;
-#endif
-  uint32_t rshift;
-  uint32_t wshift;
-  uintptr_t* pvptr;
-  uintptr_t perm_idx;
-  for (; sample_idx < pheno_nm_ct; sample_idx++) {
-    perm_idx = 0;
-    pvptr = &(perm_vecs[sample_idx / BITCT]);
-    rshift = sample_idx % BITCT;
-    goto transpose_perm1s_loop_start;
-#ifdef __LP64__
-    do {
-      if (!(perm_idx % 4)) {
-	if (perm_idx % 128) {
-	  wshift = ((perm_idx & 96) >> 5) | ((perm_idx & 16) >> 2) | ((perm_idx & 12) << 1);
-	} else {
-	  memcpy(perm_vecst, wbuf, 16);
-	  perm_vecst = &(perm_vecst[4]);
-	transpose_perm1s_loop_start:
-	  fill_uint_zero(wbuf, 2);
-	  wshift = 0;
-	}
-	wbptr = wbuf;
-      }
-      *wbptr |= ((pvptr[perm_idx * pheno_nm_ctl] >> rshift) & 1) << wshift;
-      wbptr++;
-    } while (++perm_idx < perm_vec_ct);
-    memcpy(perm_vecst, wbuf, 16);
-    perm_vecst = &(perm_vecst[4]);
-#else
-    do {
-      if (perm_idx % 32) {
-	wshift = ((perm_idx & 24) >> 3) | (perm_idx & 4) | ((perm_idx & 3) << 3);
-      } else {
-	*perm_vecst++ = wval;
-      transpose_perm1s_loop_start:
-	wval = 0;
-	wshift = 0;
-      }
-      wval |= ((pvptr[perm_idx * pheno_nm_ctl] >> rshift) & 1) << wshift;
-    } while (++perm_idx < perm_vec_ct);
-    *perm_vecst++ = wval;
-#endif
-  }
-}
-
 char* model_assoc_tna(uint32_t model_fisher, char* wptr) {
   // write terminal NAs to buffer
   if (model_fisher) {
@@ -924,33 +596,19 @@ void calc_git(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __restrict_
   uint32_t perm_ct128x4 = perm_ct128 * 4;
   uint32_t perm_ct32 = (perm_vec_ct + 31) / 32;
   uint32_t perm_ct16x4 = 4 * perm_ct16;
-  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
-  const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
-  const __m128i m8x32 = {0x000000ff000000ffLLU, 0x000000ff000000ffLLU};
   __m128i* permsv = (__m128i*)perm_vecst;
   __m128i* gitv[9];
-  __m128i* __restrict__ git_merge4; // no conflicts, please
-  __m128i* __restrict__ git_merge8;
-  __m128i* __restrict__ git_write;
-  __m128i* __restrict__ perm_ptr;
-  __m128i loader;
 #else
   uint32_t perm_ct32 = (perm_vec_ct + 31) / 32;
   uint32_t perm_ct32x4 = perm_ct32 * 4;
   uint32_t perm_ct8 = (perm_vec_ct + 7) / 8;
   uint32_t perm_ct4 = (perm_vec_ct + 3) / 4;
   uint32_t perm_ct16x16 = 16 * perm_ct16;
-  uint32_t* permsv = perm_vecst;
-  uint32_t* gitv[9];
-  uint32_t* git_merge4;
-  uint32_t* git_merge8;
-  uint32_t* git_write;
-  uint32_t* perm_ptr;
-  uintptr_t loader;
+  uintptr_t* permsv = (uintptr_t*)perm_vecst;
+  uintptr_t* gitv[9];
 #endif
   uint32_t cur_cts[3];
   uintptr_t ulii;
-  uint32_t pbidx;
   uint32_t uii;
   uint32_t ujj;
   uint32_t ukk;
@@ -967,15 +625,15 @@ void calc_git(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __restrict_
   gitv[7] = &(((__m128i*)results_bufs)[perm_ct16x4]);
   gitv[8] = (__m128i*)results_bufs;
 #else
-  gitv[0] = thread_wkspace;
-  gitv[1] = &(thread_wkspace[perm_ct32x4]);
-  gitv[2] = &(thread_wkspace[2 * perm_ct32x4]);
-  gitv[3] = &(thread_wkspace[3 * perm_ct32x4]);
-  gitv[4] = &(thread_wkspace[3 * perm_ct32x4 + 2 * perm_ct8]);
-  gitv[5] = &(thread_wkspace[3 * perm_ct32x4 + 4 * perm_ct8]);
-  gitv[6] = &(results_bufs[2 * perm_ct16x16]);
-  gitv[7] = &(results_bufs[perm_ct16x16]);
-  gitv[8] = results_bufs;
+  gitv[0] = (uintptr_t*)thread_wkspace;
+  gitv[1] = (uintptr_t*)(&(thread_wkspace[perm_ct32x4]));
+  gitv[2] = (uintptr_t*)(&(thread_wkspace[2 * perm_ct32x4]));
+  gitv[3] = (uintptr_t*)(&(thread_wkspace[3 * perm_ct32x4]));
+  gitv[4] = (uintptr_t*)(&(thread_wkspace[3 * perm_ct32x4 + 2 * perm_ct8]));
+  gitv[5] = (uintptr_t*)(&(thread_wkspace[3 * perm_ct32x4 + 4 * perm_ct8]));
+  gitv[6] = (uintptr_t*)(&(results_bufs[2 * perm_ct16x16]));
+  gitv[7] = (uintptr_t*)(&(results_bufs[perm_ct16x16]));
+  gitv[8] = (uintptr_t*)results_bufs;
 #endif
   cur_cts[0] = 0;
   cur_cts[1] = 0;
@@ -991,77 +649,22 @@ void calc_git(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __restrict_
     while (ulii) {
       ujj = CTZLU(ulii) & (BITCT - 2); // get pos of next non-[hom A2] sample
       sample_type = ((ulii >> ujj) & 3) - 1;
-      git_merge4 = gitv[sample_type];
-#ifdef __LP64__
-      perm_ptr = &(permsv[(ujj / 2) * perm_ct128]);
-      for (pbidx = 0; pbidx < perm_ct128; pbidx++) {
-	loader = *perm_ptr++;
-	git_merge4[0] = _mm_add_epi64(git_merge4[0], _mm_and_si128(loader, m1x4));
-	git_merge4[1] = _mm_add_epi64(git_merge4[1], _mm_and_si128(_mm_srli_epi64(loader, 1), m1x4));
-	git_merge4[2] = _mm_add_epi64(git_merge4[2], _mm_and_si128(_mm_srli_epi64(loader, 2), m1x4));
-	git_merge4[3] = _mm_add_epi64(git_merge4[3], _mm_and_si128(_mm_srli_epi64(loader, 3), m1x4));
-	git_merge4 = &(git_merge4[4]);
-      }
       ukk = cur_cts[sample_type] + 1;
       cur_cts[sample_type] = ukk;
+#ifdef __LP64__
+      unroll_incr_1_4(&(permsv[(ujj / 2) * perm_ct128]), gitv[sample_type], perm_ct128);
       if (!(ukk % 15)) {
-	git_merge4 = gitv[sample_type];
-	git_merge8 = gitv[sample_type + 3];
-	for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-	  loader = *git_merge4;
-	  git_merge8[0] = _mm_add_epi64(git_merge8[0], _mm_and_si128(loader, m4));
-	  git_merge8[1] = _mm_add_epi64(git_merge8[1], _mm_and_si128(_mm_srli_epi64(loader, 4), m4));
-	  git_merge8 = &(git_merge8[2]);
-	  *git_merge4++ = _mm_setzero_si128();
-	}
+	unroll_zero_incr_4_8(gitv[sample_type], gitv[sample_type + 3], perm_ct32);
 	if (!(ukk % 255)) {
-	  git_merge8 = gitv[sample_type + 3];
-	  git_write = gitv[sample_type + 6];
-	  for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
-	    loader = *git_merge8;
-	    git_write[0] = _mm_add_epi64(git_write[0], _mm_and_si128(loader, m8x32));
-	    git_write[1] = _mm_add_epi64(git_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-	    git_write[2] = _mm_add_epi64(git_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-	    git_write[3] = _mm_add_epi64(git_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
-	    git_write = &(git_write[4]);
-	    *git_merge8++ = _mm_setzero_si128();
-	  }
+	  unroll_zero_incr_8_32(gitv[sample_type + 3], gitv[sample_type + 6], perm_ct16);
 	}
       }
 #else
-      perm_ptr = &(permsv[(ujj / 2) * perm_ct32]);
-      for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-	loader = *perm_ptr++;
-	git_merge4[0] += loader & 0x11111111;
-	git_merge4[1] += (loader >> 1) & 0x11111111;
-	git_merge4[2] += (loader >> 2) & 0x11111111;
-	git_merge4[3] += (loader >> 3) & 0x11111111;
-	git_merge4 = &(git_merge4[4]);
-      }
-      ukk = cur_cts[sample_type] + 1;
-      cur_cts[sample_type] = ukk;
+      unroll_incr_1_4(&(permsv[(ujj / 2) * perm_ct32]), gitv[sample_type], perm_ct32);
       if (!(ukk % 15)) {
-	git_merge4 = gitv[sample_type];
-	git_merge8 = gitv[sample_type + 3];
-	for (pbidx = 0; pbidx < perm_ct8; pbidx++) {
-	  loader = *git_merge4;
-	  git_merge8[0] += loader & 0x0f0f0f0f;
-	  git_merge8[1] += (loader >> 4) & 0x0f0f0f0f;
-	  git_merge8 = &(git_merge8[2]);
-	  *git_merge4++ = 0;
-	}
+	unroll_zero_incr_4_8(gitv[sample_type], gitv[sample_type + 3], perm_ct8);
 	if (!(ukk % 255)) {
-	  git_merge8 = gitv[sample_type + 3];
-	  git_write = gitv[sample_type + 6];
-	  for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
-	    loader = *git_merge8;
-	    git_write[0] += loader & 0x000000ff;
-	    git_write[1] += (loader >> 8) & 0x000000ff;
-	    git_write[2] += (loader >> 16) & 0x000000ff;
-	    git_write[3] += loader >> 24;
-	    git_write = &(git_write[4]);
-	    *git_merge8++ = 0;
-	  }
+	  unroll_zero_incr_8_32(gitv[sample_type + 3], gitv[sample_type + 6], perm_ct4);
 	}
       }
 #endif
@@ -1077,49 +680,17 @@ void calc_git(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __restrict_
     uii = cur_cts[sample_type];
 #ifdef __LP64__
     if (uii % 15) {
-      git_merge4 = gitv[sample_type];
-      git_merge8 = gitv[sample_type + 3];
-      for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-	loader = *git_merge4++;
-	git_merge8[0] = _mm_add_epi64(git_merge8[0], _mm_and_si128(loader, m4));
-	git_merge8[1] = _mm_add_epi64(git_merge8[1], _mm_and_si128(_mm_srli_epi64(loader, 4), m4));
-	git_merge8 = &(git_merge8[2]);
-      }
+      unroll_incr_4_8(gitv[sample_type], gitv[sample_type + 3], perm_ct32);
     }
     if (uii % 255) {
-      git_merge8 = gitv[sample_type + 3];
-      git_write = gitv[sample_type + 6];
-      for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
-	loader = *git_merge8++;
-	git_write[0] = _mm_add_epi64(git_write[0], _mm_and_si128(loader, m8x32));
-	git_write[1] = _mm_add_epi64(git_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-	git_write[2] = _mm_add_epi64(git_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-	git_write[3] = _mm_add_epi64(git_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
-	git_write = &(git_write[4]);
-      }
+      unroll_incr_8_32(gitv[sample_type + 3], gitv[sample_type + 6], perm_ct16);
     }
 #else
     if (uii % 15) {
-      git_merge4 = gitv[sample_type];
-      git_merge8 = gitv[sample_type + 3];
-      for (pbidx = 0; pbidx < perm_ct8; pbidx++) {
-	loader = *git_merge4++;
-	git_merge8[0] += loader & 0x0f0f0f0f;
-	git_merge8[1] += (loader >> 4) & 0x0f0f0f0f;
-	git_merge8 = &(git_merge8[2]);
-      }
+      unroll_incr_4_8(gitv[sample_type], gitv[sample_type + 3], perm_ct8);
     }
     if (uii % 255) {
-      git_merge8 = gitv[sample_type + 3];
-      git_write = gitv[sample_type + 6];
-      for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
-	loader = *git_merge8++;
-	git_write[0] += loader & 0x000000ff;
-	git_write[1] += (loader >> 8) & 0x000000ff;
-	git_write[2] += (loader >> 16) & 0x000000ff;
-	git_write[3] += loader >> 24;
-	git_write = &(git_write[4]);
-      }
+      unroll_incr_8_32(gitv[sample_type + 3], gitv[sample_type + 6], perm_ct4);
     }
 #endif
   }
@@ -1621,20 +1192,6 @@ uintptr_t qrem_cost2(uintptr_t sample_ctl2, uintptr_t* loadbuf1, uintptr_t* load
 }
 
 #ifdef __LP64__
-static inline void calc_rem_merge4_one(uint32_t perm_ct128, __m128i* __restrict__ perm_ptr, __m128i* __restrict__ rem_merge4) {
-  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
-  __m128i loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct128; pbidx++) {
-    loader = *perm_ptr++;
-    rem_merge4[0] = _mm_add_epi64(rem_merge4[0], _mm_and_si128(loader, m1x4));
-    rem_merge4[1] = _mm_add_epi64(rem_merge4[1], _mm_and_si128(_mm_srli_epi64(loader, 1), m1x4));
-    rem_merge4[2] = _mm_add_epi64(rem_merge4[2], _mm_and_si128(_mm_srli_epi64(loader, 2), m1x4));
-    rem_merge4[3] = _mm_add_epi64(rem_merge4[3], _mm_and_si128(_mm_srli_epi64(loader, 3), m1x4));
-    rem_merge4 = &(rem_merge4[4]);
-  }
-}
-
 static inline void calc_rem_merge4_two(uint32_t perm_ct128, __m128i* __restrict__ perm_ptr, __m128i* __restrict__ rem_merge4a, __m128i* __restrict__ rem_merge4b) {
   const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
   __m128i loader;
@@ -1645,13 +1202,16 @@ static inline void calc_rem_merge4_two(uint32_t perm_ct128, __m128i* __restrict_
     loader2 = _mm_and_si128(loader, m1x4);
     rem_merge4a[0] = _mm_add_epi64(rem_merge4a[0], loader2);
     rem_merge4b[0] = _mm_add_epi64(rem_merge4b[0], loader2);
-    loader2 = _mm_and_si128(_mm_srli_epi64(loader, 1), m1x4);
+    loader = _mm_srli_epi64(loader, 1);
+    loader2 = _mm_and_si128(loader, m1x4);
     rem_merge4a[1] = _mm_add_epi64(rem_merge4a[1], loader2);
     rem_merge4b[1] = _mm_add_epi64(rem_merge4b[1], loader2);
-    loader2 = _mm_and_si128(_mm_srli_epi64(loader, 2), m1x4);
+    loader = _mm_srli_epi64(loader, 1);
+    loader2 = _mm_and_si128(loader, m1x4);
     rem_merge4a[2] = _mm_add_epi64(rem_merge4a[2], loader2);
     rem_merge4b[2] = _mm_add_epi64(rem_merge4b[2], loader2);
-    loader2 = _mm_and_si128(_mm_srli_epi64(loader, 3), m1x4);
+    loader = _mm_srli_epi64(loader, 1);
+    loader2 = _mm_and_si128(loader, m1x4);
     rem_merge4a[3] = _mm_add_epi64(rem_merge4a[3], loader2);
     rem_merge4b[3] = _mm_add_epi64(rem_merge4b[3], loader2);
     rem_merge4a = &(rem_merge4a[4]);
@@ -1659,34 +1219,6 @@ static inline void calc_rem_merge4_two(uint32_t perm_ct128, __m128i* __restrict_
   }
 }
 
-static inline void calc_rem_merge8(uint32_t perm_ct32, __m128i* __restrict__ rem_merge4, __m128i* __restrict__ rem_merge8) {
-  const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
-  __m128i loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-    loader = *rem_merge4;
-    rem_merge8[0] = _mm_add_epi64(rem_merge8[0], _mm_and_si128(loader, m4));
-    rem_merge8[1] = _mm_add_epi64(rem_merge8[1], _mm_and_si128(_mm_srli_epi64(loader, 4), m4));
-    rem_merge8 = &(rem_merge8[2]);
-    *rem_merge4++ = _mm_setzero_si128();
-  }
-}
-
-static inline void calc_rem_merge32_plus(uint32_t perm_ct16, __m128i* __restrict__ rem_merge8, __m128i* rem_write) {
-  const __m128i m8x32 = {0x000000ff000000ffLLU, 0x000000ff000000ffLLU};
-  __m128i loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
-    loader = *rem_merge8;
-    rem_write[0] = _mm_add_epi64(rem_write[0], _mm_and_si128(loader, m8x32));
-    rem_write[1] = _mm_add_epi64(rem_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-    rem_write[2] = _mm_add_epi64(rem_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-    rem_write[3] = _mm_add_epi64(rem_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
-    rem_write = &(rem_write[4]);
-    *rem_merge8++ = _mm_setzero_si128();
-  }
-}
-
 static inline void calc_rem_merge32_minus(uint32_t perm_ct16, __m128i* __restrict__ rem_merge8, __m128i* rem_write) {
   // temporary integer underflow is possible here, but by the end of the
   // calculation it should be reversed
@@ -1696,27 +1228,17 @@ static inline void calc_rem_merge32_minus(uint32_t perm_ct16, __m128i* __restric
   for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
     loader = *rem_merge8;
     rem_write[0] = _mm_sub_epi64(rem_write[0], _mm_and_si128(loader, m8x32));
-    rem_write[1] = _mm_sub_epi64(rem_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-    rem_write[2] = _mm_sub_epi64(rem_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-    rem_write[3] = _mm_sub_epi64(rem_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
+    loader = _mm_srli_epi64(loader, 8);
+    rem_write[1] = _mm_sub_epi64(rem_write[1], _mm_and_si128(loader, m8x32));
+    loader = _mm_srli_epi64(loader, 8);
+    rem_write[2] = _mm_sub_epi64(rem_write[2], _mm_and_si128(loader, m8x32));
+    loader = _mm_srli_epi64(loader, 8);
+    rem_write[3] = _mm_sub_epi64(rem_write[3], _mm_and_si128(loader, m8x32));
     rem_write = &(rem_write[4]);
     *rem_merge8++ = _mm_setzero_si128();
   }
 }
 #else
-static inline void calc_rem_merge4_one(uint32_t perm_ct32, uintptr_t* __restrict__ perm_ptr, uintptr_t* __restrict__ rem_merge4) {
-  uintptr_t loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-    loader = *perm_ptr++;
-    rem_merge4[0] += loader & 0x11111111;
-    rem_merge4[1] += (loader >> 1) & 0x11111111;
-    rem_merge4[2] += (loader >> 2) & 0x11111111;
-    rem_merge4[3] += (loader >> 3) & 0x11111111;
-    rem_merge4 = &(rem_merge4[4]);
-  }
-}
-
 static inline void calc_rem_merge4_two(uint32_t perm_ct32, uintptr_t* __restrict__ perm_ptr, uintptr_t* __restrict__ rem_merge4a, uintptr_t* __restrict__ rem_merge4b) {
   uintptr_t loader;
   uintptr_t loader2;
@@ -1740,41 +1262,18 @@ static inline void calc_rem_merge4_two(uint32_t perm_ct32, uintptr_t* __restrict
   }
 }
 
-static inline void calc_rem_merge8(uint32_t perm_ct8, uintptr_t* __restrict__ rem_merge4, uintptr_t* __restrict__ rem_merge8) {
-  uintptr_t loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct8; pbidx++) {
-    loader = *rem_merge4;
-    rem_merge8[0] += loader & 0x0f0f0f0f;
-    rem_merge8[1] += (loader >> 4) & 0x0f0f0f0f;
-    rem_merge8 = &(rem_merge8[2]);
-    *rem_merge4++ = 0;
-  }
-}
-
-static inline void calc_rem_merge32_plus(uint32_t perm_ct4, uintptr_t* __restrict__ rem_merge8, uintptr_t* __restrict__ rem_write) {
-  uintptr_t loader;
-  uint32_t pbidx;
-  for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
-    loader = *rem_merge8;
-    rem_write[0] += loader & 0x000000ff;
-    rem_write[1] += (loader >> 8) & 0x000000ff;
-    rem_write[2] += (loader >> 16) & 0x000000ff;
-    rem_write[3] += loader >> 24;
-    rem_write = &(rem_write[4]);
-    *rem_merge8++ = 0;
-  }
-}
-
 static inline void calc_rem_merge32_minus(uint32_t perm_ct4, uintptr_t* __restrict__ rem_merge8, uintptr_t* __restrict__ rem_write) {
   uintptr_t loader;
   uint32_t pbidx;
   for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
     loader = *rem_merge8;
-    rem_write[0] -= loader & 0x000000ff;
-    rem_write[1] -= (loader >> 8) & 0x000000ff;
-    rem_write[2] -= (loader >> 16) & 0x000000ff;
-    rem_write[3] -= loader >> 24;
+    rem_write[0] -= (uint8_t)loader;
+    loader >>= 8;
+    rem_write[1] -= (uint8_t)loader;
+    loader >>= 8;
+    rem_write[2] -= (uint8_t)loader;
+    loader >>= 8;
+    rem_write[3] -= loader;
     rem_write = &(rem_write[4]);
     *rem_merge8++ = 0;
   }
@@ -1863,13 +1362,13 @@ void calc_rem(uint32_t pheno_nm_ct, uintptr_t perm_vec_ct, uintptr_t* loadbuf, u
 #ifdef __LP64__
       perm_ptr = &(permsv[(ujj / 2) * perm_ct128]);
       if (!idx2) {
-	calc_rem_merge4_one(perm_ct128, perm_ptr, remv[idx1]);
+	unroll_incr_1_4(perm_ptr, remv[idx1], perm_ct128);
       } else {
 	calc_rem_merge4_two(perm_ct128, perm_ptr, remv[idx1], remv[idx2]);
 	ukk = cur_cts[idx2] + 1;
 	cur_cts[idx2] = ukk;
 	if (!(ukk % 15)) {
-	  calc_rem_merge8(perm_ct32, remv[idx2], remv[idx2 + 6]);
+	  unroll_zero_incr_4_8(remv[idx2], remv[idx2 + 6], perm_ct32);
 	  if (!(ukk % 255)) {
 	    calc_rem_merge32_minus(perm_ct16, remv[idx2 + 6], remv[(idx2 / 2) + 12]);
 	  }
@@ -1878,10 +1377,10 @@ void calc_rem(uint32_t pheno_nm_ct, uintptr_t perm_vec_ct, uintptr_t* loadbuf, u
       ukk = cur_cts[idx1] + 1;
       cur_cts[idx1] = ukk;
       if (!(ukk % 15)) {
-	calc_rem_merge8(perm_ct32, remv[idx1], remv[idx1 + 6]);
+	unroll_zero_incr_4_8(remv[idx1], remv[idx1 + 6], perm_ct32);
 	if (!(ukk % 255)) {
 	  if (!(idx1 & 1)) {
-	    calc_rem_merge32_plus(perm_ct16, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
+	    unroll_zero_incr_8_32(remv[idx1 + 6], remv[(idx1 / 2) + 12], perm_ct16);
 	  } else {
 	    calc_rem_merge32_minus(perm_ct16, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
 	  }
@@ -1890,13 +1389,13 @@ void calc_rem(uint32_t pheno_nm_ct, uintptr_t perm_vec_ct, uintptr_t* loadbuf, u
 #else
       perm_ptr = &(permsv[(ujj / 2) * perm_ct32]);
       if (!idx2) {
-	calc_rem_merge4_one(perm_ct32, perm_ptr, remv[idx1]);
+	unroll_incr_1_4(perm_ptr, remv[idx1], perm_ct32);
       } else {
 	calc_rem_merge4_two(perm_ct32, perm_ptr, remv[idx1], remv[idx2]);
 	ukk = cur_cts[idx2] + 1;
 	cur_cts[idx2] = ukk;
 	if (!(ukk % 15)) {
-	  calc_rem_merge8(perm_ct8, remv[idx2], remv[idx2 + 6]);
+	  unroll_zero_incr_4_8(remv[idx2], remv[idx2 + 6], perm_ct8);
 	  if (!(ukk % 255)) {
 	    calc_rem_merge32_minus(perm_ct4, remv[idx2 + 6], remv[(idx2 / 2) + 12]);
 	  }
@@ -1905,10 +1404,10 @@ void calc_rem(uint32_t pheno_nm_ct, uintptr_t perm_vec_ct, uintptr_t* loadbuf, u
       ukk = cur_cts[idx1] + 1;
       cur_cts[idx1] = ukk;
       if (!(ukk % 15)) {
-	calc_rem_merge8(perm_ct8, remv[idx1], remv[idx1 + 6]);
+	unroll_zero_incr_4_8(remv[idx1], remv[idx1 + 6], perm_ct8);
 	if (!(ukk % 255)) {
 	  if (!(idx1 & 1)) {
-	    calc_rem_merge32_plus(perm_ct4, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
+	    unroll_zero_incr_8_32(remv[idx1 + 6], remv[(idx1 / 2) + 12], perm_ct4);
 	  } else {
 	    calc_rem_merge32_minus(perm_ct4, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
 	  }
@@ -1927,22 +1426,23 @@ void calc_rem(uint32_t pheno_nm_ct, uintptr_t perm_vec_ct, uintptr_t* loadbuf, u
     uii = cur_cts[idx1];
 #ifdef __LP64__
     if (uii % 15) {
-      calc_rem_merge8(perm_ct32, remv[idx1], remv[idx1 + 6]);
+      // todo: check if zeroing needed
+      unroll_zero_incr_4_8(remv[idx1], remv[idx1 + 6], perm_ct32);
     }
     if (uii % 255) {
       if (!(idx1 & 1)) {
-	calc_rem_merge32_plus(perm_ct16, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
+	unroll_zero_incr_8_32(remv[idx1 + 6], remv[(idx1 / 2) + 12], perm_ct16);
       } else {
 	calc_rem_merge32_minus(perm_ct16, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
       }
     }
 #else
     if (uii % 15) {
-      calc_rem_merge8(perm_ct8, remv[idx1], remv[idx1 + 6]);
+      unroll_zero_incr_4_8(remv[idx1], remv[idx1 + 6], perm_ct8);
     }
     if (uii % 255) {
       if (!(idx1 & 1)) {
-	calc_rem_merge32_plus(perm_ct4, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
+	unroll_zero_incr_8_32(remv[idx1 + 6], remv[(idx1 / 2) + 12], perm_ct4);
       } else {
 	calc_rem_merge32_minus(perm_ct4, remv[idx1 + 6], remv[(idx1 / 2) + 12]);
       }
@@ -10438,32 +9938,18 @@ void calc_git_missing(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __r
   uint32_t perm_ct128 = (perm_vec_ct + 127) / 128;
   uint32_t perm_ct128x4 = perm_ct128 * 4;
   uint32_t perm_ct32 = (perm_vec_ct + 31) / 32;
-  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
-  const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
-  const __m128i m8x32 = {0x000000ff000000ffLLU, 0x000000ff000000ffLLU};
   __m128i* permsv = (__m128i*)perm_vecst;
   __m128i* gitv[3];
-  __m128i* __restrict__ git_merge4;
-  __m128i* __restrict__ git_merge8;
-  __m128i* __restrict__ git_write;
-  __m128i* __restrict__ perm_ptr;
-  __m128i loader;
 #else
   uint32_t perm_ct32 = (perm_vec_ct + 31) / 32;
   uint32_t perm_ct32x4 = perm_ct32 * 4;
   uint32_t perm_ct8 = (perm_vec_ct + 7) / 8;
   uint32_t perm_ct4 = (perm_vec_ct + 3) / 4;
-  uint32_t* permsv = perm_vecst;
-  uint32_t* gitv[3];
-  uint32_t* git_merge4;
-  uint32_t* git_merge8;
-  uint32_t* git_write;
-  uint32_t* perm_ptr;
-  uintptr_t loader;
+  uintptr_t* permsv = (uintptr_t*)perm_vecst;
+  uintptr_t* gitv[3];
 #endif
   uint32_t cur_ct;
   uintptr_t ulii;
-  uint32_t pbidx;
   uint32_t uii;
   uint32_t ujj;
 #ifdef __LP64__
@@ -10472,9 +9958,9 @@ void calc_git_missing(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __r
   gitv[1] = &(((__m128i*)thread_wkspace)[9 * perm_ct128x4]);
   gitv[2] = (__m128i*)thread_wkspace;
 #else
-  gitv[0] = &(thread_wkspace[8 * perm_ct32x4]);
-  gitv[1] = &(thread_wkspace[9 * perm_ct32x4]);
-  gitv[2] = thread_wkspace;
+  gitv[0] = (uintptr_t*)(&(thread_wkspace[8 * perm_ct32x4]));
+  gitv[1] = (uintptr_t*)(&(thread_wkspace[9 * perm_ct32x4]));
+  gitv[2] = (uintptr_t*)thread_wkspace;
 #endif
   cur_ct = 0;
   for (uii = 0; uii < pheno_nm_ctl; uii++) {
@@ -10487,75 +9973,21 @@ void calc_git_missing(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __r
     }
     while (ulii) {
       ujj = CTZLU(ulii);
-      git_merge4 = gitv[0];
-#ifdef __LP64__
-      perm_ptr = &(permsv[ujj * perm_ct128]);
-      for (pbidx = 0; pbidx < perm_ct128; pbidx++) {
-	loader = *perm_ptr++;
-	git_merge4[0] = _mm_add_epi64(git_merge4[0], _mm_and_si128(loader, m1x4));
-	git_merge4[1] = _mm_add_epi64(git_merge4[1], _mm_and_si128(_mm_srli_epi64(loader, 1), m1x4));
-	git_merge4[2] = _mm_add_epi64(git_merge4[2], _mm_and_si128(_mm_srli_epi64(loader, 2), m1x4));
-	git_merge4[3] = _mm_add_epi64(git_merge4[3], _mm_and_si128(_mm_srli_epi64(loader, 3), m1x4));
-	git_merge4 = &(git_merge4[4]);
-      }
       cur_ct++;
+#ifdef __LP64__
+      unroll_incr_1_4(&(permsv[ujj * perm_ct128]), gitv[0], perm_ct128);
       if (!(cur_ct % 15)) {
-	git_merge4 = gitv[0];
-	git_merge8 = gitv[1];
-	for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-	  loader = *git_merge4;
-	  git_merge8[0] = _mm_add_epi64(git_merge8[0], _mm_and_si128(loader, m4));
-	  git_merge8[1] = _mm_add_epi64(git_merge8[1], _mm_and_si128(_mm_srli_epi64(loader, 4), m4));
-	  git_merge8 = &(git_merge8[2]);
-	  *git_merge4++ = _mm_setzero_si128();
-	}
+	unroll_zero_incr_4_8(gitv[0], gitv[1], perm_ct32);
 	if (!(cur_ct % 255)) {
-	  git_merge8 = gitv[1];
-	  git_write = gitv[2];
-	  for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
-	    loader = *git_merge8;
-	    git_write[0] = _mm_add_epi64(git_write[0], _mm_and_si128(loader, m8x32));
-	    git_write[1] = _mm_add_epi64(git_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-	    git_write[2] = _mm_add_epi64(git_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-	    git_write[3] = _mm_add_epi64(git_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
-	    git_write = &(git_write[4]);
-	    *git_merge8++ = _mm_setzero_si128();
-	  }
+	  unroll_zero_incr_8_32(gitv[1], gitv[2], perm_ct16);
 	}
       }
 #else
-      perm_ptr = &(permsv[ujj * perm_ct32]);
-      for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-	loader = *perm_ptr++;
-	git_merge4[0] += loader & 0x11111111;
-	git_merge4[1] += (loader >> 1) & 0x11111111;
-	git_merge4[2] += (loader >> 2) & 0x11111111;
-	git_merge4[3] += (loader >> 3) & 0x11111111;
-	git_merge4 = &(git_merge4[4]);
-      }
-      cur_ct++;
+      unroll_incr_1_4(&(permsv[ujj * perm_ct32]), gitv[0], perm_ct32);
       if (!(cur_ct % 15)) {
-	git_merge4 = gitv[0];
-	git_merge8 = gitv[1];
-	for (pbidx = 0; pbidx < perm_ct8; pbidx++) {
-	  loader = *git_merge4;
-	  git_merge8[0] += loader & 0x0f0f0f0f;
-	  git_merge8[1] += (loader >> 4) & 0x0f0f0f0f;
-	  git_merge8 = &(git_merge8[2]);
-	  *git_merge4++ = 0;
-	}
+	unroll_zero_incr_4_8(gitv[0], gitv[1], perm_ct8);
 	if (!(cur_ct % 255)) {
-	  git_merge8 = gitv[1];
-	  git_write = gitv[2];
-	  for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
-	    loader = *git_merge8;
-	    git_write[0] += loader & 0x000000ff;
-	    git_write[1] += (loader >> 8) & 0x000000ff;
-	    git_write[2] += (loader >> 16) & 0x000000ff;
-	    git_write[3] += loader >> 24;
-	    git_write = &(git_write[4]);
-	    *git_merge8++ = 0;
-	  }
+	  unroll_zero_incr_8_32(gitv[1], gitv[2], perm_ct4);
 	}
       }
 #endif
@@ -10569,49 +10001,17 @@ void calc_git_missing(uint32_t pheno_nm_ct, uint32_t perm_vec_ct, uintptr_t* __r
   }
 #ifdef __LP64__
   if (cur_ct % 15) {
-    git_merge4 = gitv[0];
-    git_merge8 = gitv[1];
-    for (pbidx = 0; pbidx < perm_ct32; pbidx++) {
-      loader = *git_merge4++;
-      git_merge8[0] = _mm_add_epi64(git_merge8[0], _mm_and_si128(loader, m4));
-      git_merge8[1] = _mm_add_epi64(git_merge8[1], _mm_and_si128(_mm_srli_epi64(loader, 4), m4));
-      git_merge8 = &(git_merge8[2]);
-    }
+    unroll_incr_4_8(gitv[0], gitv[1], perm_ct32);
   }
   if (cur_ct % 255) {
-    git_merge8 = gitv[1];
-    git_write = gitv[2];
-    for (pbidx = 0; pbidx < perm_ct16; pbidx++) {
-      loader = *git_merge8++;
-      git_write[0] = _mm_add_epi64(git_write[0], _mm_and_si128(loader, m8x32));
-      git_write[1] = _mm_add_epi64(git_write[1], _mm_and_si128(_mm_srli_epi64(loader, 8), m8x32));
-      git_write[2] = _mm_add_epi64(git_write[2], _mm_and_si128(_mm_srli_epi64(loader, 16), m8x32));
-      git_write[3] = _mm_add_epi64(git_write[3], _mm_and_si128(_mm_srli_epi64(loader, 24), m8x32));
-      git_write = &(git_write[4]);
-    }
+    unroll_incr_8_32(gitv[1], gitv[2], perm_ct16);
   }
 #else
   if (cur_ct % 15) {
-    git_merge4 = gitv[0];
-    git_merge8 = gitv[1];
-    for (pbidx = 0; pbidx < perm_ct8; pbidx++) {
-      loader = *git_merge4++;
-      git_merge8[0] += loader & 0x0f0f0f0f;
-      git_merge8[1] += (loader >> 4) & 0x0f0f0f0f;
-      git_merge8 = &(git_merge8[2]);
-    }
+    unroll_incr_4_8(gitv[0], gitv[1], perm_ct8);
   }
   if (cur_ct % 255) {
-    git_merge8 = gitv[1];
-    git_write = gitv[2];
-    for (pbidx = 0; pbidx < perm_ct4; pbidx++) {
-      loader = *git_merge8++;
-      git_write[0] += loader & 0x000000ff;
-      git_write[1] += (loader >> 8) & 0x000000ff;
-      git_write[2] += (loader >> 16) & 0x000000ff;
-      git_write[3] += loader >> 24;
-      git_write = &(git_write[4]);
-    }
+    unroll_incr_8_32(gitv[1], gitv[2], perm_ct4);
   }
 #endif
 }
diff --git a/plink_assoc.h b/plink_assoc.h
index 476e667..97aeae3 100644
--- a/plink_assoc.h
+++ b/plink_assoc.h
@@ -12,16 +12,6 @@ void aperm_init(Aperm_info* apip);
 
 int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintptr_t chi_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, double* chi, double pfilter, double output_min_p, uint32_t mtest_adjust, uint32_t skip_gc, double adjust_lambda, uint32_t* tcnt, double* pvals);
 
-void generate_cc_perm_vec(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp);
-
-// void generate_cc_perm1(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp);
-
-void generate_cc_cluster_perm_vec(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp);
-
-// void generate_cc_cluster_perm1(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp);
-
-void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst);
-
 int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t model_modifier, uint32_t model_cell_ct, uint32_t model_mperm_val, double ci_size, double ci_zt, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct_orig, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marke [...]
 
 int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t model_modifier, uint32_t model_mperm_val, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct_orig, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, Chrom_info* chrom [...]
diff --git a/plink_calc.c b/plink_calc.c
index 48e81dc..3595665 100644
--- a/plink_calc.c
+++ b/plink_calc.c
@@ -1496,19 +1496,20 @@ void incr_dists_rm_inv(uint32_t* idists, uintptr_t* mmasks, uintptr_t sample_ct_
   uintptr_t uljj;
   uint32_t uii;
   uint32_t ujj;
+  uint32_t ukk;
   for (uii = start_idx; uii < end_idx; uii++) {
     ulii = mmasks[uii];
+    ukk = sample_ct_m1 - uii;
     if (ulii) {
       glptr = &(mmasks[uii + 1]);
-      // ujj is deliberately biased down by 1
-      for (ujj = uii; ujj < sample_ct_m1; ujj++) {
+      for (ujj = 0; ujj < ukk; ujj++) {
         uljj = (*glptr++) & ulii;
 	if (uljj) {
 	  idists[ujj] += popcount_long(uljj);
 	}
       }
     }
-    idists = &(idists[sample_ct_m1 - uii - 1]);
+    idists = &(idists[ukk]);
   }
 }
 
@@ -1520,8 +1521,8 @@ THREAD_RET_TYPE calc_genome_thread(void* arg) {
   uintptr_t uljj = g_thread_start[0];
   // this is different from the regular offset because incr_dists_rm_inv() has
   // custom arithmetic
-  uintptr_t offsetm = ((uint64_t)sample_ct) * (ulii - uljj) - ((((uint64_t)(ulii + 1)) * (ulii + 2) - ((uint64_t)(uljj + 1)) * (uljj + 2)) / 2);
-  uintptr_t offset = (((uint64_t)sample_ct) * (ulii - uljj) - ((((uint64_t)ulii) * (ulii + 1) - ((uint64_t)uljj) * (uljj + 1)) / 2)) * 5;
+  uintptr_t offsetm = ((uint64_t)sample_ct) * (ulii - uljj) - ((((uint64_t)ulii) * (ulii + 1) - ((uint64_t)uljj) * (uljj + 1)) / 2);
+  uintptr_t offset = offsetm * 5;
   uint32_t* missing_ptr = &(g_missing_dbl_excluded[offsetm]);
   uint32_t* genome_main_ptr = &(g_genome_main[offset]);
   uintptr_t* geno_ptr = (uintptr_t*)g_geno;
@@ -4251,7 +4252,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	fputs("Writing...", stdout);
 	fflush(stdout);
         if (!bin4) {
-	  if (fwrite_checkedz(dists, sample_idx_ct * sizeof(double), *outfile_ptr)) {
+	  if (fwrite_checked(dists, sample_idx_ct * sizeof(double), *outfile_ptr)) {
 	    goto distance_d_write_ret_WRITE_FAIL;
 	  }
 	} else {
@@ -4322,12 +4323,12 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	dxx = 0.0;
 	dist_ptr = dists;
 	for (ii = first_sample_idx; ii < end_sample_idx; ii++) {
-	  if (fwrite_checkedz(dist_ptr, ii * sizeof(double), *outfile_ptr)) {
+	  if (fwrite_checked(dist_ptr, ii * sizeof(double), *outfile_ptr)) {
 	    goto distance_d_write_ret_WRITE_FAIL;
 	  }
 	  dist_ptr = &(dist_ptr[(uint32_t)ii]);
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(double), *outfile_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(double), *outfile_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -4364,7 +4365,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	    }
 	  }
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(double), *outfile2_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(double), *outfile2_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -4402,7 +4403,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	    }
 	  }
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(double), *outfile3_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(double), *outfile3_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -4442,7 +4443,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	  }
 	  dist_ptr = &(dist_ptr[(uint32_t)ii]);
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(float), *outfile_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(float), *outfile_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -4478,7 +4479,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	    }
 	  }
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(float), *outfile2_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(float), *outfile2_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -4514,7 +4515,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
 	    }
 	  }
 	  if (shape == DISTANCE_SQ0) {
-	    if (fwrite_checkedz(membuf, (sample_ct - ii) * sizeof(float), *outfile3_ptr)) {
+	    if (fwrite_checked(membuf, (sample_ct - ii) * sizeof(float), *outfile3_ptr)) {
 	      goto distance_d_write_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -7166,7 +7167,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
 	goto calc_rel_ret_OPEN_FAIL;
       }
       for (sample_idx = min_sample; sample_idx < max_parallel_sample; sample_idx++) {
-	if (fwrite_checkedz(&(rel_dists[((int64_t)sample_idx * (sample_idx - 1)) / 2 - start_offset]), sample_idx * sizeof(double), outfile)) {
+	if (fwrite_checked(&(rel_dists[((int64_t)sample_idx * (sample_idx - 1)) / 2 - start_offset]), sample_idx * sizeof(double), outfile)) {
 	  goto calc_rel_ret_WRITE_FAIL;
 	}
 	if (fwrite_checked(dptr2++, sizeof(double), outfile)) {
@@ -7180,7 +7181,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
 	  }
 	} else {
 	  if (rel_shape == REL_CALC_SQ0) {
-	    if (fwrite_checkedz(geno, (sample_ct - sample_idx - 1) * sizeof(double), outfile)) {
+	    if (fwrite_checked(geno, (sample_ct - sample_idx - 1) * sizeof(double), outfile)) {
 	      goto calc_rel_ret_WRITE_FAIL;
 	    }
 	  } else {
@@ -7276,7 +7277,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
 	  }
 	} else {
 	  if (rel_shape == REL_CALC_SQ0) {
-	    if (fwrite_checkedz(geno, (sample_ct - sample_idx - 1) * sizeof(float), outfile)) {
+	    if (fwrite_checked(geno, (sample_ct - sample_idx - 1) * sizeof(float), outfile)) {
 	      goto calc_rel_ret_WRITE_FAIL;
 	    }
 	  } else {
diff --git a/plink_cluster.c b/plink_cluster.c
index d1cf1d8..f0741be 100644
--- a/plink_cluster.c
+++ b/plink_cluster.c
@@ -42,7 +42,7 @@ void cluster_cleanup(Cluster_info* cluster_ptr) {
   free_cond(cluster_ptr->zerofname);
 }
 
-int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr, char* keep_fname, char* keep_flattened, char* remove_fname, char* remove_flattened) {
+int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr, char* keep_fname, char* keep_flattened, char* remove_fname, char* remove_flattened, uint32_t allow_no_samples) {
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile = NULL;
   uintptr_t* sample_exclude_new = NULL;
@@ -196,7 +196,6 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
 	}
         uii = next_set(already_seen, 0, cluster_kr_ct);
 	if (uii < cluster_kr_ct) {
-	  read_idx = uii + 1;
           for (read_idx = uii + 1; read_idx < cluster_kr_ct; read_idx++) {
             if (!IS_SET(already_seen, read_idx)) {
               strcpy(&(sorted_keep_ids[uii * max_cluster_kr_len]), &(sorted_keep_ids[read_idx * max_cluster_kr_len]));
@@ -455,11 +454,15 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
       LOGPRINTF("--within: %" PRIuPTR " cluster%s loaded, covering a total of %" PRIuPTR " %s.\n", cluster_ct, (cluster_ct == 1)? "" : "s", assigned_ct, species_str(assigned_ct));
     } else {
       if (sorted_keep_ids) {
-	logerrprint("Error: No samples named in --within file remain in the current analysis, so\n--keep-clusters/--keep-cluster-names excludes everyone.\n");
-	goto load_clusters_ret_INVALID_FORMAT;
+        if (!allow_no_samples) {
+	  logerrprint("Error: No samples named in --within file remain in the current analysis, so\n--keep-clusters/--keep-cluster-names excludes everyone.\n");
+	  goto load_clusters_ret_INVALID_FORMAT;
+	}
       }
       logerrprint("Warning: No samples named in --within file remain in the current analysis.\n");
-      goto load_clusters_ret_1;
+      if (!sorted_keep_ids) {
+        goto load_clusters_ret_1;
+      }
     }
   } else {
     // --family
@@ -496,7 +499,7 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
       }
       assigned_ct++;
     }
-    if (!assigned_ct) {
+    if ((!assigned_ct) && (!allow_no_samples)) {
       logerrprint("Error: --keep-clusters/--keep-cluster-names excludes everyone.\n");
       goto load_clusters_ret_INVALID_FORMAT;
     }
diff --git a/plink_cluster.h b/plink_cluster.h
index 6046e6f..40ecdbd 100644
--- a/plink_cluster.h
+++ b/plink_cluster.h
@@ -45,7 +45,7 @@ void cluster_init(Cluster_info* cluster_ptr);
 
 void cluster_cleanup(Cluster_info* cluster_ptr);
 
-int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr, char* keep_fname, char* keep_flattened, char* remove_fname, char* remove_flattened);
+int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr, char* keep_fname, char* keep_flattened, char* remove_fname, char* remove_flattened, uint32_t allow_no_samples);
 
 void fill_unfiltered_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* sample_to_cluster);
 
diff --git a/plink_common.c b/plink_common.c
index f27d740..9bbc9f9 100644
--- a/plink_common.c
+++ b/plink_common.c
@@ -3306,6 +3306,7 @@ void magic_num(uint32_t divisor, uint64_t* multp, uint32_t* pre_shiftp, uint32_t
 }
 
 void fill_bits(uintptr_t* bit_arr, uintptr_t loc_start, uintptr_t len) {
+  // requires bit_arr to be nonempty
   uintptr_t maj_start = loc_start / BITCT;
   uintptr_t maj_end = (loc_start + len) / BITCT;
   uintptr_t minor;
@@ -3322,6 +3323,7 @@ void fill_bits(uintptr_t* bit_arr, uintptr_t loc_start, uintptr_t len) {
 }
 
 void clear_bits(uintptr_t* bit_arr, uintptr_t loc_start, uintptr_t len) {
+  // requires bit_arr to be nonempty
   uintptr_t maj_start = loc_start / BITCT;
   uintptr_t maj_end = (loc_start + len) / BITCT;
   uintptr_t minor;
@@ -3667,7 +3669,7 @@ uintptr_t geqprime(uintptr_t floor) {
   return floor;
 }
 
-int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t* id_htable, uint32_t id_htable_size) {
+int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t store_dups, uint32_t* id_htable, uint32_t id_htable_size) {
   // While unique IDs are normally assumed (and enforced) here, --extract and
   // --exclude are an exception, since we want to be able to e.g. exclude all
   // variants named '.'.  Since there could be millions of them, ordinary
@@ -3691,7 +3693,7 @@ int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uint
   uint32_t hash_result;
   uint32_t cur_dup;
   fill_uint_one(id_htable, id_htable_size);
-  if (!allow_dups) {
+  if (!store_dups) {
     for (; item_idx < item_ct; item_uidx++, item_idx++) {
       next_unset_ul_unsafe_ck(exclude_arr, &item_uidx);
       sptr = &(item_ids[item_uidx * max_id_len]);
@@ -3704,6 +3706,8 @@ int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uint
 	  id_htable[hashval] = item_uidx;
 	  break;
 	} else if (!memcmp(sptr, &(item_ids[hash_result * max_id_len]), slen + 1)) {
+	  // could add an allow_dups parameter which controls whether this is
+	  // an error
 	  LOGERRPRINTFWW("Error: Duplicate ID '%s'.\n", sptr);
 	  return RET_INVALID_FORMAT;
 	}
@@ -3781,7 +3785,7 @@ int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uint
 }
 
 uint32_t id_htable_find(const char* id_buf, uintptr_t cur_id_len, const uint32_t* id_htable, uint32_t id_htable_size, const char* item_ids, uintptr_t max_id_len) {
-  // assumes no duplicate entries
+  // assumes no duplicate entries, and nonzero id_htable_size
   // returns 0xffffffffU on failure
   if (cur_id_len >= max_id_len) {
     return 0xffffffffU;
@@ -3814,26 +3818,26 @@ void fill_idx_to_uidx(uintptr_t* exclude_arr, uintptr_t unfiltered_item_ct, uint
   uint32_t* idx_to_uidx_end = &(idx_to_uidx[item_ct]);
   uint32_t item_uidx = 0;
   uint32_t item_uidx_stop;
-  do {
+  while (idx_to_uidx < idx_to_uidx_end) {
     item_uidx = next_unset_unsafe(exclude_arr, item_uidx);
     item_uidx_stop = next_set(exclude_arr, item_uidx, unfiltered_item_ct);
     do {
       *idx_to_uidx++ = item_uidx++;
     } while (item_uidx < item_uidx_stop);
-  } while (idx_to_uidx < idx_to_uidx_end);
+  }
 }
 
 void fill_idx_to_uidx_incl(uintptr_t* include_arr, uintptr_t unfiltered_item_ct, uintptr_t item_ct, uint32_t* idx_to_uidx) {
   uint32_t* idx_to_uidx_end = &(idx_to_uidx[item_ct]);
   uint32_t item_uidx = 0;
   uint32_t item_uidx_stop;
-  do {
+  while (idx_to_uidx < idx_to_uidx_end) {
     item_uidx = next_set_unsafe(include_arr, item_uidx);
     item_uidx_stop = next_unset(include_arr, item_uidx, unfiltered_item_ct);
     do {
       *idx_to_uidx++ = item_uidx++;
     } while (item_uidx < item_uidx_stop);
-  } while (idx_to_uidx < idx_to_uidx_end);
+  }
 }
 
 void fill_uidx_to_idx(uintptr_t* exclude_arr, uint32_t unfiltered_item_ct, uint32_t item_ct, uint32_t* uidx_to_idx) {
@@ -3841,7 +3845,7 @@ void fill_uidx_to_idx(uintptr_t* exclude_arr, uint32_t unfiltered_item_ct, uint3
   uint32_t item_idx = 0;
   uint32_t* uidx_to_idx_ptr;
   uint32_t* uidx_to_idx_stop;
-  do {
+  while (item_idx < item_ct) {
     item_uidx = next_unset_unsafe(exclude_arr, item_uidx);
     uidx_to_idx_ptr = &(uidx_to_idx[item_uidx]);
     item_uidx = next_set(exclude_arr, item_uidx, unfiltered_item_ct);
@@ -3849,7 +3853,7 @@ void fill_uidx_to_idx(uintptr_t* exclude_arr, uint32_t unfiltered_item_ct, uint3
     do {
       *uidx_to_idx_ptr++ = item_idx++;
     } while (uidx_to_idx_ptr < uidx_to_idx_stop);
-  } while (item_idx < item_ct);
+  }
 }
 
 void fill_uidx_to_idx_incl(uintptr_t* include_arr, uint32_t unfiltered_item_ct, uint32_t item_ct, uint32_t* uidx_to_idx) {
@@ -3857,7 +3861,7 @@ void fill_uidx_to_idx_incl(uintptr_t* include_arr, uint32_t unfiltered_item_ct,
   uint32_t item_idx = 0;
   uint32_t* uidx_to_idx_ptr;
   uint32_t* uidx_to_idx_stop;
-  do {
+  while (item_idx < item_ct) {
     item_uidx = next_set_unsafe(include_arr, item_uidx);
     uidx_to_idx_ptr = &(uidx_to_idx[item_uidx]);
     item_uidx = next_unset(include_arr, item_uidx, unfiltered_item_ct);
@@ -3865,7 +3869,7 @@ void fill_uidx_to_idx_incl(uintptr_t* include_arr, uint32_t unfiltered_item_ct,
     do {
       *uidx_to_idx_ptr++ = item_idx++;
     } while (uidx_to_idx_ptr < uidx_to_idx_stop);
-  } while (item_idx < item_ct);
+  }
 }
 
 void fill_midx_to_idx(uintptr_t* exclude_arr_orig, uintptr_t* exclude_arr, uint32_t item_ct, uint32_t* midx_to_idx) {
@@ -3885,28 +3889,40 @@ void fill_midx_to_idx(uintptr_t* exclude_arr_orig, uintptr_t* exclude_arr, uint3
 }
 
 void fill_vec_55(uintptr_t* vec, uint32_t ct) {
-  uint32_t ctl = 2 * ((ct + (BITCT - 1)) / BITCT);
   uint32_t rem = ct & (BITCT - 1);
-  uintptr_t* second_to_last = &(vec[ctl - 2]);
 #ifdef __LP64__
   const __m128i m1 = {FIVEMASK, FIVEMASK};
   __m128i* vecp = (__m128i*)vec;
-  __m128i* vec_end = (__m128i*)(&(vec[ctl]));
-  do {
+  __m128i* vec_end = (__m128i*)(&(vec[2 * (ct / BITCT)]));
+  uintptr_t* second_to_last;
+  while (vecp < vec_end) {
     *vecp++ = m1;
-  } while (vecp < vec_end);
+  }
+  if (rem) {
+    second_to_last = (uintptr_t*)vecp;
+    if (rem > BITCT2) {
+      second_to_last[0] = FIVEMASK;
+      second_to_last[1] = FIVEMASK >> ((BITCT - rem) * 2);
+    } else {
+      second_to_last[0] = FIVEMASK >> ((BITCT2 - rem) * 2);
+      second_to_last[1] = 0;
+    }
+  }
 #else
-  uintptr_t* vec_end = &(vec[ctl]);
-  do {
+  uintptr_t* vec_end = &(vec[2 * (ct / BITCT)]);
+  while (vec < vec_end) {
     *vec++ = FIVEMASK;
-  } while (vec < vec_end);
-#endif
-  if (rem > BITCT2) {
-    second_to_last[1] &= (~ZEROLU) >> ((BITCT - rem) * 2);
-  } else if (rem) {
-    *second_to_last &= (~ZEROLU) >> ((BITCT2 - rem) * 2);
-    second_to_last[1] = 0;
   }
+  if (rem) {
+    if (rem > BITCT2) {
+      vec[0] = FIVEMASK;
+      vec[1] = FIVEMASK >> ((BITCT - rem) * 2);
+    } else {
+      vec[0] = FIVEMASK >> ((BITCT2 - rem) * 2);
+      vec[1] = 0;
+    }
+  }
+#endif
 }
 
 void vec_collapse_init(uintptr_t* unfiltered_bitarr, uint32_t unfiltered_ct, uintptr_t* filter_bitarr, uint32_t filtered_ct, uintptr_t* output_vec) {
@@ -3918,7 +3934,7 @@ void vec_collapse_init(uintptr_t* unfiltered_bitarr, uint32_t unfiltered_ct, uin
   uint32_t write_bit = 0;
   uint32_t item_idx = 0;
   uint32_t item_uidx_stop;
-  do {
+  while (item_idx < filtered_ct) {
     item_uidx = next_set_unsafe(filter_bitarr, item_uidx);
     item_uidx_stop = next_unset(filter_bitarr, item_uidx, unfiltered_ct);
     item_idx += item_uidx_stop - item_uidx;
@@ -3930,7 +3946,7 @@ void vec_collapse_init(uintptr_t* unfiltered_bitarr, uint32_t unfiltered_ct, uin
 	write_bit = 0;
       }
     } while (++item_uidx < item_uidx_stop);
-  } while (item_idx < filtered_ct);
+  }
   if (write_bit) {
     *output_vec++ = cur_write;
   }
@@ -3945,7 +3961,7 @@ void vec_collapse_init_exclude(uintptr_t* unfiltered_bitarr, uint32_t unfiltered
   uint32_t write_bit = 0;
   uint32_t item_idx = 0;
   uint32_t item_uidx_stop;
-  do {
+  while (item_idx < filtered_ct) {
     item_uidx = next_unset_unsafe(filter_exclude_bitarr, item_uidx);
     item_uidx_stop = next_set(filter_exclude_bitarr, item_uidx, unfiltered_ct);
     item_idx += item_uidx_stop - item_uidx;
@@ -3957,7 +3973,7 @@ void vec_collapse_init_exclude(uintptr_t* unfiltered_bitarr, uint32_t unfiltered
 	write_bit = 0;
       }
     } while (++item_uidx < item_uidx_stop);
-  } while (item_idx < filtered_ct);
+  }
   if (write_bit) {
     *output_vec++ = cur_write;
   }
@@ -4405,6 +4421,7 @@ int32_t resolve_or_add_chrom_name(Chrom_info* chrom_info_ptr, char* bufptr, int3
 }
 
 void refresh_chrom_info(Chrom_info* chrom_info_ptr, uintptr_t marker_uidx, uint32_t* chrom_end_ptr, uint32_t* chrom_fo_idx_ptr, uint32_t* is_x_ptr, uint32_t* is_y_ptr, uint32_t* is_mt_ptr, uint32_t* is_haploid_ptr) {
+  // assumes marker_uidx < unfiltered_marker_ct
   int32_t chrom_idx;
   *chrom_end_ptr = chrom_info_ptr->chrom_file_order_marker_idx[(*chrom_fo_idx_ptr) + 1];
   while (marker_uidx >= (*chrom_end_ptr)) {
@@ -5237,9 +5254,9 @@ void bitfield_and(uintptr_t* vv, uintptr_t* include_vec, uintptr_t word_ct) {
   }
 #else
   uintptr_t* vec_end = &(vv[word_ct]);
-  do {
+  while (vv < vec_end) {
     *vv++ &= *include_vec++;
-  } while (vv < vec_end);
+  }
 #endif
 }
 
@@ -5261,15 +5278,16 @@ void bitfield_andnot(uintptr_t* vv, uintptr_t* exclude_vec, uintptr_t word_ct) {
   }
 #else
   uintptr_t* vec_end = &(vv[word_ct]);
-  do {
+  while (vv < vec_end) {
     *vv++ &= ~(*exclude_vec++);
-  } while (vv < vec_end);
+  }
 #endif
 }
 
 void bitfield_andnot_reversed_args(uintptr_t* vv, uintptr_t* include_vec, uintptr_t word_ct) {
   // vv := (~vv) AND include_vec
   // on 64-bit systems, assumes vv and exclude_vec are 16-byte aligned
+  // assumes word_ct is nonzero
 #ifdef __LP64__
   __m128i* vv128 = (__m128i*)vv;
   __m128i* iv128 = (__m128i*)include_vec;
@@ -5284,10 +5302,10 @@ void bitfield_andnot_reversed_args(uintptr_t* vv, uintptr_t* include_vec, uintpt
   }
 #else
   uintptr_t* vec_end = &(vv[word_ct]);
-  do {
+  while (vv < vec_end) {
     *vv = (~(*vv)) & (*include_vec++);
     vv++;
-  } while (vv < vec_end);
+  }
 #endif
 }
 
@@ -5308,9 +5326,9 @@ void bitfield_or(uintptr_t* vv, uintptr_t* or_vec, uintptr_t word_ct) {
   }
 #else
   uintptr_t* vec_end = &(vv[word_ct]);
-  do {
+  while (vv < vec_end) {
     *vv++ |= *or_vec++;
-  } while (vv < vec_end);
+  }
 #endif
 }
 
@@ -5336,9 +5354,9 @@ void bitfield_ornot(uintptr_t* vv, uintptr_t* inverted_or_vec, uintptr_t word_ct
   }
 #else
   uintptr_t* vec_end = &(vv[word_ct]);
-  do {
+  while (vv < vec_end) {
     *vv++ |= ~(*inverted_or_vec++);
-  } while (vv < vec_end);
+  }
 #endif
 }
 
@@ -5359,9 +5377,9 @@ void bitfield_xor(uintptr_t* bit_arr, uintptr_t* xor_arr, uintptr_t word_ct) {
   }
 #else
   uintptr_t* bit_arr_end = &(bit_arr[word_ct]);
-  do {
+  while (bit_arr < bit_arr_end) {
     *bit_arr++ ^= *xor_arr++;
-  } while (bit_arr < bit_arr_end);
+  }
 #endif
 }
 
@@ -7519,6 +7537,7 @@ uintptr_t count_01(uintptr_t* lptr, uintptr_t word_ct) {
 
 void fill_all_bits(uintptr_t* bit_arr, uintptr_t ct) {
   // leaves bits beyond the end unset
+  // ok for ct == 0
   uintptr_t quotient = ct / BITCT;
   uintptr_t remainder = ct % BITCT;
   fill_ulong_one(bit_arr, quotient);
@@ -7781,6 +7800,7 @@ uint32_t get_max_chrom_size(Chrom_info* chrom_info_ptr, uintptr_t* marker_exclud
 }
 
 void count_genders(uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uint32_t* male_ct_ptr, uint32_t* female_ct_ptr, uint32_t* unk_ct_ptr) {
+  // unfiltered_sample_ct can be zero
   uint32_t male_ct = 0;
   uint32_t female_ct = 0;
   uint32_t unk_ct = 0;
@@ -7810,6 +7830,7 @@ void count_genders(uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t unfiltered_
 }
 
 void reverse_loadbuf(unsigned char* loadbuf, uintptr_t unfiltered_sample_ct) {
+  // unfiltered_sample_ct can be zero
   uintptr_t sample_bidx = 0;
   unsigned char* loadbuf_end = &(loadbuf[(unfiltered_sample_ct + 3) / 4]);
   unsigned char ucc;
@@ -7892,7 +7913,7 @@ void collapse_copy_2bitarr(uintptr_t* rawbuf, uintptr_t* mainbuf, uint32_t unfil
     sample_idx += sample_uidx_stop - sample_uidx;
     do {
       // er, this can totally be sped up
-      cur_write |= (((rawbuf[sample_uidx / BITCT2] >> ((sample_uidx % BITCT2) * 2)) & 3) << (ii_rem * 2));
+      cur_write |= EXTRACT_2BIT_GENO(rawbuf, sample_uidx) << (ii_rem * 2);
       if (++ii_rem == BITCT2) {
         *mainbuf++ = cur_write;
         cur_write = 0;
@@ -7906,6 +7927,7 @@ void collapse_copy_2bitarr(uintptr_t* rawbuf, uintptr_t* mainbuf, uint32_t unfil
 }
 
 uint32_t load_and_collapse(FILE* bedfile, uintptr_t* rawbuf, uint32_t unfiltered_sample_ct, uintptr_t* mainbuf, uint32_t sample_ct, uintptr_t* sample_exclude, uintptr_t final_mask, uint32_t do_reverse) {
+  // assumes unfiltered_sample_ct is positive
   uint32_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
   if (unfiltered_sample_ct == sample_ct) {
     rawbuf = mainbuf;
@@ -7942,7 +7964,7 @@ void collapse_copy_2bitarr_incl(uintptr_t* rawbuf, uintptr_t* mainbuf, uint32_t
     sample_uidx_stop = next_unset(sample_include, sample_uidx, unfiltered_sample_ct);
     sample_idx += sample_uidx_stop - sample_uidx;
     do {
-      cur_write |= (((rawbuf[sample_uidx / BITCT2] >> ((sample_uidx % BITCT2) * 2)) & 3) << (ii_rem * 2));
+      cur_write |= EXTRACT_2BIT_GENO(rawbuf, sample_uidx) << (ii_rem * 2);
       if (++ii_rem == BITCT2) {
         *mainbuf++ = cur_write;
         cur_write = 0;
@@ -8032,13 +8054,14 @@ uint32_t load_and_split(FILE* bedfile, uintptr_t* rawbuf, uint32_t unfiltered_sa
 }
 
 void vec_include_init(uintptr_t unfiltered_sample_ct, uintptr_t* new_include2, uintptr_t* old_include) {
+  // allows unfiltered_sample_ct == 0
   uint32_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
   uintptr_t ulii;
   uintptr_t uljj;
   uintptr_t ulkk;
   uintptr_t ulmm;
   uint32_t bit_idx;
-  do {
+  while (unfiltered_sample_ctl) {
     ulii = ~(*old_include++);
     ulkk = FIVEMASK;
     ulmm = FIVEMASK;
@@ -8066,7 +8089,8 @@ void vec_include_init(uintptr_t unfiltered_sample_ct, uintptr_t* new_include2, u
     }
     *new_include2++ = ulkk;
     *new_include2++ = ulmm;
-  } while (--unfiltered_sample_ctl);
+    --unfiltered_sample_ctl;
+  }
   ulii = unfiltered_sample_ct & (BITCT - 1);
   if (ulii) {
     new_include2--;
@@ -8080,13 +8104,14 @@ void vec_include_init(uintptr_t unfiltered_sample_ct, uintptr_t* new_include2, u
 }
 
 void exclude_to_vec_include(uintptr_t unfiltered_sample_ct, uintptr_t* include_vec, uintptr_t* exclude_arr) {
+  // allows unfiltered_sample_ct == 0
   uint32_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
   uintptr_t ulii;
   uintptr_t uljj;
   uintptr_t ulkk;
   uintptr_t ulmm;
   uint32_t bit_idx;
-  do {
+  while (unfiltered_sample_ctl) {
     ulii = *exclude_arr++;
     ulkk = FIVEMASK;
     ulmm = FIVEMASK;
@@ -8114,7 +8139,8 @@ void exclude_to_vec_include(uintptr_t unfiltered_sample_ct, uintptr_t* include_v
     }
     *include_vec++ = ulkk;
     *include_vec++ = ulmm;
-  } while (--unfiltered_sample_ctl);
+    --unfiltered_sample_ctl;
+  }
   ulii = unfiltered_sample_ct & (BITCT - 1);
   if (ulii) {
     include_vec--;
@@ -8131,31 +8157,44 @@ void vec_init_invert(uintptr_t entry_ct, uintptr_t* target_arr, uintptr_t* sourc
   // Initializes a half-bitfield as the inverse of another.  Assumes target_arr
   // and source_arr are doubleword-aligned.
   uint32_t vec_wsize = 2 * ((entry_ct + (BITCT - 1)) / BITCT);
-  uintptr_t* second_to_last = &(target_arr[vec_wsize - 2]);
   uint32_t rem = entry_ct & (BITCT - 1);
 #ifdef __LP64__
   const __m128i m1 = {FIVEMASK, FIVEMASK};
   __m128i* tptr = (__m128i*)target_arr;
   __m128i* sptr = (__m128i*)source_arr;
   __m128i* tptr_end = (__m128i*)(&(target_arr[vec_wsize]));
-  do {
+  uintptr_t* second_to_last;
+  while (tptr < tptr_end) {
     *tptr++ = _mm_andnot_si128(*sptr++, m1);
-  } while (tptr < tptr_end);
+  }
+  if (rem) {
+    second_to_last = &(((uintptr_t*)tptr_end)[-2]);
+    if (rem > BITCT2) {
+      second_to_last[1] &= (~ZEROLU) >> ((BITCT - rem) * 2);
+    } else {
+      *second_to_last &= (~ZEROLU) >> ((BITCT2 - rem) * 2);
+      second_to_last[1] = 0;
+    }
+  }
 #else
   uintptr_t* tptr_end = &(target_arr[vec_wsize]);
-  do {
+  while (target_arr < tptr_end) {
     *target_arr++ = FIVEMASK & (~(*source_arr++));
-  } while (target_arr < tptr_end);
-#endif
-  if (rem > BITCT2) {
-    second_to_last[1] &= (~ZEROLU) >> ((BITCT - rem) * 2);
-  } else if (rem) {
-    *second_to_last &= (~ZEROLU) >> ((BITCT2 - rem) * 2);
-    second_to_last[1] = 0;
   }
+  if (rem) {
+    if (rem > BITCT2) {
+      target_arr[-1] &= (~ZEROLU) >> ((BITCT - rem) * 2);
+    } else {
+      target_arr[-2] &= (~ZEROLU) >> ((BITCT2 - rem) * 2);
+      target_arr[-1] = 0;
+    }
+  }
+
+#endif
 }
 
 void bitfield_andnot_copy(uintptr_t word_ct, uintptr_t* target_arr, uintptr_t* source_arr, uintptr_t* exclude_arr) {
+  // assumes word_ct is positive
   // target_arr := source_arr ANDNOT exclude_arr
   // may write an extra word
 #ifdef __LP64__
@@ -8175,13 +8214,14 @@ void bitfield_andnot_copy(uintptr_t word_ct, uintptr_t* target_arr, uintptr_t* s
 }
 
 void vec_include_mask_in(uintptr_t unfiltered_sample_ct, uintptr_t* include_arr, uintptr_t* mask_arr) {
+  // allows unfiltered_sample_ct == 0
   uint32_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
   uintptr_t ulii;
   uintptr_t uljj;
   uintptr_t ulkk;
   uintptr_t ulmm;
   uint32_t bit_idx;
-  do {
+  while (unfiltered_sample_ctl) {
     ulii = ~(*mask_arr++);
     ulkk = *include_arr;
     ulmm = include_arr[1];
@@ -8209,10 +8249,12 @@ void vec_include_mask_in(uintptr_t unfiltered_sample_ct, uintptr_t* include_arr,
     }
     *include_arr++ = ulkk;
     *include_arr++ = ulmm;
-  } while (--unfiltered_sample_ctl);
+    --unfiltered_sample_ctl;
+  }
 }
 
 void vec_include_mask_out(uintptr_t unfiltered_sample_ct, uintptr_t* include_arr, uintptr_t* mask_arr) {
+  // assumes unfiltered_sample_ct is positive
   uint32_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
   uintptr_t ulii;
   uintptr_t uljj;
@@ -8251,6 +8293,7 @@ void vec_include_mask_out(uintptr_t unfiltered_sample_ct, uintptr_t* include_arr
 }
 
 void vec_include_mask_out_intersect(uintptr_t unfiltered_sample_ct, uintptr_t* include_arr, uintptr_t* mask_arr, uintptr_t* mask2_arr) {
+  // assumes unfiltered_sample_ct is positive
   uint32_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
   uintptr_t ulii;
   uintptr_t uljj;
@@ -8289,6 +8332,7 @@ void vec_include_mask_out_intersect(uintptr_t unfiltered_sample_ct, uintptr_t* i
 }
 
 void vec_init_01(uintptr_t unfiltered_sample_ct, uintptr_t* data_ptr, uintptr_t* result_ptr) {
+  // assumes unfiltered_sample_ct is positive
   // initializes result_ptr bits 01 iff data_ptr bits are 01
 #ifdef __LP64__
   const __m128i m1 = {FIVEMASK, FIVEMASK};
@@ -8338,6 +8382,7 @@ void vec_invert(uintptr_t unfiltered_sample_ct, uintptr_t* vec2) {
 }
 
 void vec_datamask(uintptr_t unfiltered_sample_ct, uint32_t matchval, uintptr_t* data_ptr, uintptr_t* mask_ptr, uintptr_t* result_ptr) {
+  // assumes unfiltered_sample_ct is positive
   // vec_ptr assumed to be standard 00/01 bit vector
   // sets result_vec bits to 01 iff data_ptr bits are equal to matchval and
   // vec_ptr bit is set, 00 otherwise.
@@ -8424,6 +8469,7 @@ void vec_rotate_plink1_to_plink2(uintptr_t* lptr, uint32_t word_ct) {
 */
 
 void rotate_plink1_to_plink2_and_copy(uintptr_t* loadbuf, uintptr_t* writebuf, uintptr_t word_ct) {
+  // assumes positive word_ct
   uintptr_t* loadbuf_end = &(loadbuf[word_ct]);
   uintptr_t ulii;
   uintptr_t uljj;
@@ -9149,14 +9195,14 @@ char* alloc_and_init_collapsed_arr(char* item_arr, uintptr_t item_len, uintptr_t
   }
   wptr = new_arr;
   wptr_end = &(new_arr[filtered_ct * item_len]);
-  do {
+  while (wptr < wptr_end) {
     item_uidx = next_unset_ul_unsafe(exclude_arr, item_uidx);
     item_uidx_stop = next_set_ul(exclude_arr, item_uidx, unfiltered_ct);
     delta = item_uidx_stop - item_uidx;
     memcpy(wptr, &(item_arr[item_uidx * item_len]), delta * item_len);
     wptr = &(wptr[delta * item_len]);
     item_uidx = item_uidx_stop;
-  } while (wptr < wptr_end);
+  }
   return new_arr;
 }
 
@@ -9418,9 +9464,12 @@ uint32_t collapse_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max
   // Collapses array of sorted IDs to remove duplicates, and writes
   // pre-collapse positions to id_starts (so e.g. duplication count of any
   // sample ID can be determined via subtraction) if it isn't NULL.
-  // Assumes id_ct is positive.  Returns id_ct of collapsed array.
+  // Returns id_ct of collapsed array.
   uintptr_t read_idx;
   uintptr_t write_idx;
+  if (!id_ct) {
+    return 0;
+  }
   if (id_starts) {
     id_starts[0] = 0;
     for (read_idx = 1; read_idx < id_ct; read_idx++) {
diff --git a/plink_common.h b/plink_common.h
index e1b6e94..c9794f4 100644
--- a/plink_common.h
+++ b/plink_common.h
@@ -83,7 +83,18 @@
   #include <algorithm>
 #endif
 
+// It would be useful to disable compilation on big-endian platforms, but I
+// don't see a decent portable way to do this (see e.g. discussion at
+// http://esr.ibiblio.org/?p=5095 ).
+
 #ifdef __LP64__
+  #ifndef __SSE2__
+    // It's obviously possible to support this by writing 64-bit non-SSE2 code
+    // shadowing each SSE2 intrinsic, but this almost certainly isn't worth the
+    // development/testing effort until regular PLINK 2.0 development is
+    // complete.  No researcher has ever asked me for this feature.
+    #error "64-bit builds currently require SSE2.  Try producing a 32-bit build instead."
+  #endif
   #include <emmintrin.h>
   #define FIVEMASK 0x5555555555555555LLU
   typedef union {
@@ -223,6 +234,9 @@
 #define MISC_MISSING_GZ 0x4000000000LLU
 #define MISC_FREQ_GZ 0x8000000000LLU
 #define MISC_HET_GZ 0x10000000000LLU
+#define MISC_ALLOW_NO_SAMPLES 0x20000000000LLU
+#define MISC_ALLOW_NO_VARS 0x40000000000LLU
+#define MISC_VCF_REQUIRE_GT 0x80000000000LLU
 
 // assume for now that .bed must always be accompanied by both .bim and .fam
 #define FILTER_ALL_REQ 1LLU
@@ -357,6 +371,8 @@
 #define COVAR_KEEP_PHENO_ON_MISSING_COV 1
 #define COVAR_NAME 2
 #define COVAR_NUMBER 4
+#define COVAR_NO_CONST 8
+#define COVAR_ALLOW_NONE 0x10
 
 #define DISTANCE_SQ 1
 #define DISTANCE_SQ0 2
@@ -566,25 +582,34 @@
 #define WKSPACE_MIN_MB 64
 #define WKSPACE_DEFAULT_MB 2048
 
+#ifdef __LP64__
+  #define BITCT 64
+#else
+  #define BITCT 32
+#endif
+
+#define BITCT2 (BITCT / 2)
+#define BYTECT (BITCT / 8)
+
 #define CACHELINE 64 // assumed number of bytes per cache line, for alignment
 #define CACHELINE_INT32 (CACHELINE / 4)
 #define CACHELINE_INT64 (CACHELINE / 8)
 #define CACHELINE_WORD (CACHELINE / BYTECT)
 #define CACHELINE_DBL (CACHELINE / 8)
 
-#define CACHEALIGN(val) ((val + (CACHELINE - 1)) & (~(CACHELINE - ONELU)))
-#define CACHEALIGN_INT32(val) ((val + (CACHELINE_INT32 - 1)) & (~(CACHELINE_INT32 - ONELU)))
-#define CACHEALIGN_WORD(val) ((val + (CACHELINE_WORD - 1)) & (~(CACHELINE_WORD - ONELU)))
-#define CACHEALIGN_DBL(val) ((val + (CACHELINE_DBL - 1)) & (~(CACHELINE_DBL - ONELU)))
+#define CACHEALIGN(val) (((val) + (CACHELINE - 1)) & (~(CACHELINE - ONELU)))
+#define CACHEALIGN_INT32(val) (((val) + (CACHELINE_INT32 - 1)) & (~(CACHELINE_INT32 - ONELU)))
+#define CACHEALIGN_WORD(val) (((val) + (CACHELINE_WORD - 1)) & (~(CACHELINE_WORD - ONELU)))
+#define CACHEALIGN_DBL(val) (((val) + (CACHELINE_DBL - 1)) & (~(CACHELINE_DBL - ONELU)))
 
 // 32-bit instead of word-length bitwise not here, when val can be assumed to
 // be 32-bit.
 // (note that the sizeof operator "returns" an uintptr_t, not a uint32_t; hence
 // the lack of sizeof in the CACHELINE_INT32, etc. definitions.)
-#define CACHEALIGN32(val) ((val + (CACHELINE - 1)) & (~(CACHELINE - 1)))
-#define CACHEALIGN32_INT32(val) ((val + (CACHELINE_INT32 - 1)) & (~(CACHELINE_INT32 - 1)))
-#define CACHEALIGN32_WORD(val) ((val + (CACHELINE_WORD - 1)) & (~(CACHELINE_WORD - 1)))
-#define CACHEALIGN32_DBL(val) ((val + (CACHELINE_DBL - 1)) & (~(CACHELINE_DBL - 1)))
+#define CACHEALIGN32(val) (((val) + (CACHELINE - 1)) & (~(CACHELINE - 1)))
+#define CACHEALIGN32_INT32(val) (((val) + (CACHELINE_INT32 - 1)) & (~(CACHELINE_INT32 - 1)))
+#define CACHEALIGN32_WORD(val) (((val) + (CACHELINE_WORD - 1)) & (~(CACHELINE_WORD - 1)))
+#define CACHEALIGN32_DBL(val) (((val) + (CACHELINE_DBL - 1)) & (~(CACHELINE_DBL - 1)))
 
 #define MAXV(aa, bb) (((bb) > (aa))? (bb) : (aa))
 #define MINV(aa, bb) (((aa) > (bb))? (bb) : (aa))
@@ -601,14 +626,9 @@
   #define MAX_THREADS_P1 513
 #endif
 
-#ifdef __LP64__
-  #define BITCT 64
-#else
-  #define BITCT 32
-#endif
-
-#define BITCT2 (BITCT / 2)
-#define BYTECT (BITCT / 8)
+// defined as a macro since type of idx can vary; might want a debug
+// compilation mode which performs type-checking, though
+#define EXTRACT_2BIT_GENO(ulptr, idx) (((ulptr)[(idx) / BITCT2] >> (2 * ((idx) % BITCT2))) & 3)
 
 // generic maximum line length.  .ped/.vcf/etc. lines can of course be longer
 #define MAXLINELEN 131072
@@ -713,7 +733,7 @@ static inline const char* cond_replace(const char* ss, const char* match_str, co
 
 uint32_t aligned_malloc(uintptr_t** aligned_pp, uintptr_t size);
 
-void aligned_free(uintptr_t* aligned_ptr);
+void aligned_free(uintptr_t* aligned_pp);
 
 static inline void aligned_free_cond(uintptr_t* aligned_ptr) {
   if (aligned_ptr) {
@@ -837,13 +857,6 @@ static inline int32_t fputs_checked(const char* ss, FILE* outfile) {
 
 int32_t fwrite_checked(const void* buf, size_t len, FILE* outfile);
 
-static inline int32_t fwrite_checkedz(const void* buf, size_t len, FILE* outfile) {
-  if (len) {
-    return fwrite_checked(buf, len, outfile);
-  }
-  return ferror(outfile);
-}
-
 static inline int32_t fread_checked(char* buf, uintptr_t len, FILE* infile, uintptr_t* bytes_read_ptr) {
   *bytes_read_ptr = fread(buf, 1, len, infile);
   return ferror(infile);
@@ -1475,9 +1488,9 @@ static inline uint32_t tri_coord_no_diag_32(uint32_t small_coord, uint32_t big_c
 }
 
 // let the compiler worry about the second argument's bit width here
-#define SET_BIT(aa, bb) (aa[(bb) / BITCT] |= ONELU << ((bb) % BITCT))
+#define SET_BIT(aa, bb) ((aa)[(bb) / BITCT] |= ONELU << ((bb) % BITCT))
 
-#define SET_BIT_DBL(aa, bb) (aa[bb / BITCT2] |= ONELU << (2 * (bb % BITCT2)))
+#define SET_BIT_DBL(aa, bb) ((aa)[(bb) / BITCT2] |= ONELU << (2 * ((bb) % BITCT2)))
 
 static inline void set_bit(uintptr_t* bit_arr, uint32_t loc) {
   bit_arr[loc / BITCT] |= (ONELU << (loc % BITCT));
@@ -1491,9 +1504,9 @@ void fill_bits(uintptr_t* bit_arr, uintptr_t loc_start, uintptr_t len);
 
 void clear_bits(uintptr_t* bit_arr, uintptr_t loc_start, uintptr_t len);
 
-#define CLEAR_BIT(aa, bb) (aa[bb / BITCT] &= ~(ONELU << (bb % BITCT)))
+#define CLEAR_BIT(aa, bb) ((aa)[(bb) / BITCT] &= ~(ONELU << ((bb) % BITCT)))
 
-#define CLEAR_BIT_DBL(aa, bb) (aa[bb / BITCT2] &= ~(ONELU << (2 * (bb % BITCT2))))
+#define CLEAR_BIT_DBL(aa, bb) ((aa)[(bb) / BITCT2] &= ~(ONELU << (2 * ((bb) % BITCT2))))
 
 static inline void clear_bit(uintptr_t* bit_arr, uint32_t loc) {
   bit_arr[loc / BITCT] &= ~(ONELU << (loc % BITCT));
@@ -1503,20 +1516,20 @@ static inline void clear_bit_ul(uintptr_t* bit_arr, uintptr_t loc) {
   bit_arr[loc / BITCT] &= ~(ONELU << (loc % BITCT));
 }
 
-#define IS_SET(aa, bb) ((aa[bb / BITCT] >> (bb % BITCT)) & 1)
+#define IS_SET(aa, bb) (((aa)[(bb) / BITCT] >> ((bb) % BITCT)) & 1)
 
-#define IS_SET_DBL(aa, bb) ((aa[bb / BITCT2] >> (2 * (bb % BITCT2))) & 1)
+#define IS_SET_DBL(aa, bb) (((aa)[(bb) / BITCT2] >> (2 * ((bb) % BITCT2))) & 1)
 
 // use this instead of IS_SET() for signed 32-bit integers
-static inline uint32_t is_set(uintptr_t* exclude_arr, uint32_t loc) {
+static inline uint32_t is_set(const uintptr_t* exclude_arr, uint32_t loc) {
   return (exclude_arr[loc / BITCT] >> (loc % BITCT)) & 1;
 }
 
-static inline uint32_t is_set_ul(uintptr_t* exclude_arr, uintptr_t loc) {
+static inline uint32_t is_set_ul(const uintptr_t* exclude_arr, uintptr_t loc) {
   return (exclude_arr[loc / BITCT] >> (loc % BITCT)) & 1;
 }
 
-#define IS_NONNULL_AND_SET(aa, bb) (aa && IS_SET(aa, bb))
+#define IS_NONNULL_AND_SET(aa, bb) ((aa) && IS_SET(aa, bb))
 
 uint32_t next_unset_unsafe(uintptr_t* bit_arr, uint32_t loc);
 
@@ -1643,6 +1656,13 @@ static inline void fill_ulong_zero(uintptr_t* ularr, size_t size) {
 static inline void fill_ull_zero(uint64_t* ullarr, size_t size) {
   fill_ulong_zero((uintptr_t*)ullarr, size);
 }
+
+static inline void fill_v128_zero(__m128i* v128arr, size_t size) {
+  size_t ulii;
+  for (ulii = 0; ulii < size; ulii++) {
+    *v128arr++ = _mm_setzero_si128();
+  }
+}
 #else
 static inline void fill_ull_zero(uint64_t* ullarr, size_t size) {
   fill_ulong_zero((uintptr_t*)ullarr, size * 2);
@@ -1727,7 +1747,7 @@ static inline uint32_t get_id_htable_size(uintptr_t item_ct) {
   return (item_ct < 32761)? 65521 : geqprime(item_ct * 2 + 1);
 }
 
-int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t* id_htable, uint32_t id_htable_size);
+int32_t populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t store_dups, uint32_t* id_htable, uint32_t id_htable_size);
 
 static inline int32_t alloc_and_populate_id_htable(uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t item_ct, const char* item_ids, uintptr_t max_id_len, uint32_t allow_dups, uint32_t** id_htable_ptr, uint32_t* id_htable_size_ptr) {
   uint32_t id_htable_size = get_id_htable_size(item_ct);
@@ -1795,7 +1815,7 @@ void get_set_wrange_align(uintptr_t* bitfield, uintptr_t word_ct, uintptr_t* fir
 
 // Maximum accepted chromosome index is this minus 1.  Currently cannot exceed
 // 2^14 due to SMALL_INTERVAL_BITS setting in plink_cnv.c...
-#define MAX_POSSIBLE_CHROM 5120
+#define MAX_POSSIBLE_CHROM 64000
 // ...unless this is uncommented (it removes the entire CNV module).
 // #define HIGH_MAX_CHROM
 
diff --git a/plink_data.c b/plink_data.c
index fe07a9c..6bcc946 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -54,17 +54,17 @@ int32_t sort_item_ids_nx(char** sorted_ids_ptr, uint32_t** id_map_ptr, uintptr_t
   return 0;
 }
 
-int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t sample_ct, uint64_t fsize) {
-  // See below for old mmap() code.  Turns out this is more portable without
-  // being noticeably slower.
+int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t unfiltered_sample_ct, uint64_t fsize) {
+  // previously used mmap(); turns out this is more portable without being
+  // noticeably slower.
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile = NULL;
   FILE* outfile = NULL;
   uintptr_t unfiltered_marker_ct4 = (unfiltered_marker_ct + 3) / 4;
   uintptr_t unfiltered_marker_ctl2 = (unfiltered_marker_ct + (BITCT2 - 1)) / BITCT2;
-  uintptr_t unfiltered_sample_ct4 = (sample_ct + 3) / 4;
+  uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
   uintptr_t marker_idx_end = 0;
-  uint32_t bed_offset = fsize - sample_ct * ((uint64_t)unfiltered_marker_ct4);
+  uint32_t bed_offset = fsize - unfiltered_sample_ct * ((uint64_t)unfiltered_marker_ct4);
   int32_t retval = 0;
   uintptr_t* loadbuf;
   uintptr_t* lptr;
@@ -81,80 +81,82 @@ int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintp
   uintptr_t cur_word1;
   uintptr_t cur_word2;
   uintptr_t cur_word3;
-  // could make this allocation a bit smaller in multipass case, but whatever
-  if (wkspace_alloc_ul_checked(&loadbuf, unfiltered_marker_ctl2 * 4 * sizeof(intptr_t))) {
-    goto sample_major_to_snp_major_ret_NOMEM;
-  }
-  if (wkspace_left < unfiltered_sample_ct4) {
-    goto sample_major_to_snp_major_ret_NOMEM;
-  }
-  writebuf = (unsigned char*)wkspace_base;
-  write_marker_ct = BITCT2 * (wkspace_left / (unfiltered_sample_ct4 * BITCT2));
-  loadbuf[unfiltered_marker_ctl2 - 1] = 0;
-  loadbuf[2 * unfiltered_marker_ctl2 - 1] = 0;
-  loadbuf[3 * unfiltered_marker_ctl2 - 1] = 0;
-  loadbuf[4 * unfiltered_marker_ctl2 - 1] = 0;
-  if (fopen_checked(&infile, sample_major_fname, "rb")) {
-    goto sample_major_to_snp_major_ret_OPEN_FAIL;
-  }
   if (fopen_checked(&outfile, outname, "wb")) {
     goto sample_major_to_snp_major_ret_OPEN_FAIL;
   }
   if (fwrite_checked("l\x1b\x01", 3, outfile)) {
     goto sample_major_to_snp_major_ret_WRITE_FAIL;
   }
-  do {
-    marker_idx_base = marker_idx_end;
-    marker_idx_end += write_marker_ct;
-    if (marker_idx_end > unfiltered_marker_ct) {
-      marker_idx_end = unfiltered_marker_ct;
-    }
-    if (fseeko(infile, bed_offset, SEEK_SET)) {
-      goto sample_major_to_snp_major_ret_READ_FAIL;
-    }
-    for (sample_idx_end = 0; sample_idx_end < sample_ct;) {
-      sample_idx_base = sample_idx_end;
-      sample_idx_end = sample_idx_base + 4;
-      if (sample_idx_end > sample_ct) {
-	fill_ulong_zero(&(loadbuf[(sample_ct % 4) * unfiltered_marker_ctl2]), (4 - (sample_ct % 4)) * unfiltered_marker_ctl2);
-	sample_idx_end = sample_ct;
-      }
-      lptr = loadbuf;
-      for (sample_idx = sample_idx_base; sample_idx < sample_idx_end; sample_idx++) {
-        if (load_raw(infile, lptr, unfiltered_marker_ct4)) {
-	  goto sample_major_to_snp_major_ret_READ_FAIL;
-        }
-	lptr = &(lptr[unfiltered_marker_ctl2]);
-      }
-      lptr = &(loadbuf[marker_idx_base / BITCT2]);
-      for (marker_idx_block_end = marker_idx_base; marker_idx_block_end < marker_idx_end; lptr++) {
-	marker_idx = marker_idx_block_end;
-        cur_word0 = *lptr;
-	cur_word1 = lptr[unfiltered_marker_ctl2];
-	cur_word2 = lptr[2 * unfiltered_marker_ctl2];
-	cur_word3 = lptr[3 * unfiltered_marker_ctl2];
-	marker_idx_block_end = marker_idx + BITCT2;
-	if (marker_idx_block_end > marker_idx_end) {
-          marker_idx_block_end = marker_idx_end;
-	}
-	ucptr = &(writebuf[(marker_idx - marker_idx_base) * unfiltered_sample_ct4 + (sample_idx_base / 4)]);
-	while (1) {
-	  *ucptr = (unsigned char)((cur_word0 & 3) | ((cur_word1 & 3) << 2) | ((cur_word2 & 3) << 4) | ((cur_word3 & 3) << 6));
-	  if (++marker_idx == marker_idx_block_end) {
-	    break;
+  if (unfiltered_marker_ct && unfiltered_sample_ct) {
+    // could make this allocation a bit smaller in multipass case, but whatever
+    if (wkspace_alloc_ul_checked(&loadbuf, unfiltered_marker_ctl2 * 4 * sizeof(intptr_t))) {
+      goto sample_major_to_snp_major_ret_NOMEM;
+    }
+    if (wkspace_left < unfiltered_sample_ct4) {
+      goto sample_major_to_snp_major_ret_NOMEM;
+    }
+    writebuf = (unsigned char*)wkspace_base;
+    write_marker_ct = BITCT2 * (wkspace_left / (unfiltered_sample_ct4 * BITCT2));
+    if (fopen_checked(&infile, sample_major_fname, "rb")) {
+      goto sample_major_to_snp_major_ret_OPEN_FAIL;
+    }
+    loadbuf[unfiltered_marker_ctl2 - 1] = 0;
+    loadbuf[2 * unfiltered_marker_ctl2 - 1] = 0;
+    loadbuf[3 * unfiltered_marker_ctl2 - 1] = 0;
+    loadbuf[4 * unfiltered_marker_ctl2 - 1] = 0;
+    do {
+      marker_idx_base = marker_idx_end;
+      marker_idx_end += write_marker_ct;
+      if (marker_idx_end > unfiltered_marker_ct) {
+	marker_idx_end = unfiltered_marker_ct;
+      }
+      if (fseeko(infile, bed_offset, SEEK_SET)) {
+	goto sample_major_to_snp_major_ret_READ_FAIL;
+      }
+      for (sample_idx_end = 0; sample_idx_end < unfiltered_sample_ct;) {
+	sample_idx_base = sample_idx_end;
+	sample_idx_end = sample_idx_base + 4;
+	if (sample_idx_end > unfiltered_sample_ct) {
+	  fill_ulong_zero(&(loadbuf[(unfiltered_sample_ct % 4) * unfiltered_marker_ctl2]), (4 - (unfiltered_sample_ct % 4)) * unfiltered_marker_ctl2);
+	  sample_idx_end = unfiltered_sample_ct;
+	}
+	lptr = loadbuf;
+	for (sample_idx = sample_idx_base; sample_idx < sample_idx_end; sample_idx++) {
+	  if (load_raw(infile, lptr, unfiltered_marker_ct4)) {
+	    goto sample_major_to_snp_major_ret_READ_FAIL;
+	  }
+	  lptr = &(lptr[unfiltered_marker_ctl2]);
+	}
+	lptr = &(loadbuf[marker_idx_base / BITCT2]);
+	for (marker_idx_block_end = marker_idx_base; marker_idx_block_end < marker_idx_end; lptr++) {
+	  marker_idx = marker_idx_block_end;
+	  cur_word0 = *lptr;
+	  cur_word1 = lptr[unfiltered_marker_ctl2];
+	  cur_word2 = lptr[2 * unfiltered_marker_ctl2];
+	  cur_word3 = lptr[3 * unfiltered_marker_ctl2];
+	  marker_idx_block_end = marker_idx + BITCT2;
+	  if (marker_idx_block_end > marker_idx_end) {
+	    marker_idx_block_end = marker_idx_end;
+	  }
+	  ucptr = &(writebuf[(marker_idx - marker_idx_base) * unfiltered_sample_ct4 + (sample_idx_base / 4)]);
+	  while (1) {
+	    *ucptr = (unsigned char)((cur_word0 & 3) | ((cur_word1 & 3) << 2) | ((cur_word2 & 3) << 4) | ((cur_word3 & 3) << 6));
+	    if (++marker_idx == marker_idx_block_end) {
+	      break;
+	    }
+	    cur_word0 >>= 2;
+	    cur_word1 >>= 2;
+	    cur_word2 >>= 2;
+	    cur_word3 >>= 2;
+	    ucptr = &(ucptr[unfiltered_sample_ct4]);
 	  }
-	  cur_word0 >>= 2;
-	  cur_word1 >>= 2;
-	  cur_word2 >>= 2;
-	  cur_word3 >>= 2;
-	  ucptr = &(ucptr[unfiltered_sample_ct4]);
 	}
       }
-    }
-    if (fwrite_checked(writebuf, (marker_idx_end - marker_idx_base) * unfiltered_sample_ct4, outfile)) {
-      goto sample_major_to_snp_major_ret_WRITE_FAIL;
-    }
-  } while (marker_idx_end < unfiltered_marker_ct);
+      if (fwrite_checked(writebuf, (marker_idx_end - marker_idx_base) * unfiltered_sample_ct4, outfile)) {
+	goto sample_major_to_snp_major_ret_WRITE_FAIL;
+      }
+    } while (marker_idx_end < unfiltered_marker_ct);
+  }
   if (fclose_null(&outfile)) {
     goto sample_major_to_snp_major_ret_WRITE_FAIL;
   }
@@ -208,7 +210,8 @@ uint32_t chrom_error(const char* extension, Chrom_info* chrom_info_ptr, char* ch
   return 1;
 }
 
-int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, char** marker_ids_ptr, Chrom_info* chrom_info_ptr, uint32_t** marker_pos_ptr, uint32_t* map_is_unsorted_ptr, uint32_t allow_extra_chroms) {
+int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, char** marker_ids_ptr, Chrom_info* chrom_info_ptr, uint32_t** marker_pos_ptr, uint32_t* map_is_unsorted_ptr, uint32_t allow_extra_chroms, uint32_t allow_no_vars) {
+  // currently only used by lgen_to_bed()
   // todo: some cleanup
   uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
   uintptr_t max_marker_id_len = 0;
@@ -267,7 +270,7 @@ int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uint
   if (!feof(*mapfile_ptr)) {
     goto load_map_ret_READ_FAIL;
   }
-  if (!unfiltered_marker_ct) {
+  if ((!unfiltered_marker_ct) && (!allow_no_vars)) {
     logerrprint("Error: No variants in .map file.\n");
     goto load_map_ret_INVALID_FORMAT;
   }
@@ -367,14 +370,16 @@ int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uint
       }
     }
   }
-  chrom_info_ptr->chrom_end[last_chrom] = marker_uidx;
   chrom_info_ptr->chrom_ct = ++chroms_encountered_m1;
-  chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
   *marker_exclude_ct_ptr = marker_exclude_ct;
-  if (*marker_exclude_ct_ptr == unfiltered_marker_ct) {
-    logerrprint("Error: All variants excluded from .map file.\n");
-    goto load_map_ret_ALL_MARKERS_EXCLUDED;
+  if (unfiltered_marker_ct) {
+    chrom_info_ptr->chrom_end[last_chrom] = marker_uidx;
+    if (marker_exclude_ct == unfiltered_marker_ct) {
+      logerrprint("Error: All variants excluded from .map file.\n");
+      goto load_map_ret_ALL_MARKERS_EXCLUDED;
+    }
   }
+  chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
   while (0) {
   load_map_ret_NOMEM:
     retval = RET_NOMEM;
@@ -509,6 +514,7 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
   int32_t prev_chrom = -1;
   uint32_t last_pos = 0;
   uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t exclude_snp = (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1;
   uint32_t snps_only = (filter_flags / FILTER_SNPS_ONLY) & 1;
   uint32_t snps_only_no_di = (misc_flags / MISC_SNPS_ONLY_NO_DI) & 1;
@@ -948,7 +954,7 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
   if (!feof(bimfile)) {
     goto load_bim_ret_READ_FAIL;
   }
-  if (!unfiltered_marker_ct) {
+  if ((!unfiltered_marker_ct) && (!allow_no_variants)) {
     sprintf(logbuf, "Error: No variants in %s.\n", ftype_str);
     goto load_bim_ret_INVALID_FORMAT_2;
   } else if (unfiltered_marker_ct > 2147483645) {
@@ -1281,7 +1287,7 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
       }
     }
   }
-  if (unfiltered_marker_ct == marker_exclude_ct) {
+  if ((unfiltered_marker_ct == marker_exclude_ct) && (!allow_no_variants)) {
     logerrprint("Error: All variants excluded.\n");
     goto load_bim_ret_ALL_MARKERS_EXCLUDED;
   }
@@ -1357,6 +1363,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   // similar to load_clusters() in plink_cluster.c
   // sex_nm and sex_male should be NULL unless sex is supposed to be added as
   // an extra covariate
+  // covar_range_list_ptr is NULL iff --gxe was specified
   unsigned char* wkspace_mark = wkspace_base;
   unsigned char* wkspace_mark2 = NULL;
   FILE* covar_file = NULL;
@@ -1471,15 +1478,41 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
     goto load_covars_ret_NOMEM;
   }
   loadbuf = (char*)wkspace_base;
-  retval = open_and_load_to_first_token(&covar_file, covar_fname, loadbuf_size, '\0', "--covar file", loadbuf, &bufptr, &line_idx);
-  if (retval) {
-    goto load_covars_ret_1;
+  // was using open_and_load_to_first_token(), but we now don't want to
+  // automatically print an error message on an empty file.
+  loadbuf[loadbuf_size - 1] = ' ';
+  if (fopen_checked(&covar_file, covar_fname, "r")) {
+    goto load_covars_ret_OPEN_FAIL;
   }
+  line_idx = 0;
+  do {
+    if (!fgets(loadbuf, loadbuf_size, covar_file)) {
+      if (!feof(covar_file)) {
+	goto load_covars_ret_READ_FAIL;
+      }
+      strcpy(tbuf, "Empty --covar file.\n");
+      goto load_covars_none;
+    }
+    line_idx++;
+    if (!(loadbuf[loadbuf_size - 1])) {
+      if (loadbuf_size == MAXLINEBUFLEN) {
+	LOGERRPRINTF("Error: Line %" PRIuPTR " of --covar file is pathologically long.\n", line_idx);
+	goto load_covars_ret_INVALID_FORMAT;
+      } else {
+	goto load_covars_ret_NOMEM;
+      }
+    }
+    bufptr = skip_initial_spaces(loadbuf);
+  } while (is_eoln_kns(*bufptr));
   covar_raw_ct = count_tokens(bufptr);
-  if ((covar_raw_ct < 3) || (covar_raw_ct < 2 + gxe_mcovar)) {
+  if ((covar_raw_ct < 2) || (covar_raw_ct < 2 + gxe_mcovar)) {
     goto load_covars_ret_MISSING_TOKENS;
   }
   covar_raw_ct -= 2;
+  if ((!covar_raw_ct) && (!sex_nm)) {
+    strcpy(tbuf, "No covariate columns in --covar file.\n");
+    goto load_covars_none;
+  }
   covar_raw_ctl = (covar_raw_ct + (BITCT - 1)) / BITCT;
   covars_active = (uintptr_t*)top_alloc(&topsize, covar_raw_ctl * sizeof(intptr_t));
 
@@ -1488,13 +1521,13 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   header_absent = (strcmp_se(bufptr, "FID", 3) || strcmp_se(bufptr2, "IID", 3));
   bufptr = next_token(bufptr2);
 
-  if (covar_modifier & (COVAR_NAME | COVAR_NUMBER)) {
+  if ((covar_modifier & (COVAR_NAME | COVAR_NUMBER)) && covar_raw_ct) {
     fill_ulong_zero(covars_active, covar_raw_ctl);
     if (covar_modifier & COVAR_NUMBER) {
       if (numeric_range_list_to_bitfield(covar_range_list_ptr, covar_raw_ct, covars_active, 1, 0)) {
 	goto load_covars_ret_MISSING_TOKENS;
       }
-    } else {
+    } else if (covar_modifier & COVAR_NAME) {
       if (header_absent) {
 	logerrprint("Error: --covar file doesn't have a header line for --covar-name.\n");
 	goto load_covars_ret_INVALID_FORMAT;
@@ -1512,17 +1545,22 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
     fill_all_bits(covars_active, covar_raw_ct);
     covar_ct = covar_raw_ct;
   } else {
+    // --gxe only
     fill_ulong_zero(covars_active, covar_raw_ctl);
     covar_ct = 0;
   }
   covar_ctx = covar_ct + (sex_nm? 1 : 0);
-  min_covar_col_ct = last_set_bit(covars_active, covar_raw_ctl) + 1;
+  if ((!covar_ctx) && (!gxe_mcovar)) {
+    strcpy(tbuf, "No --covar values loaded.\n");
+    goto load_covars_none;
+  }
+  min_covar_col_ct = covar_ct? (last_set_bit(covars_active, covar_raw_ctl) + 1) : 0;
   if (min_covar_col_ct < gxe_mcovar) {
     min_covar_col_ct = gxe_mcovar;
   }
   if (header_absent) {
     max_covar_name_len = 4 + intlen(min_covar_col_ct);
-  } else {
+  } else if (min_covar_col_ct) {
     uii = 0;
     while (1) {
       bufptr2 = token_endnn(bufptr);
@@ -1593,14 +1631,13 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   rewind(covar_file);
   if (header_absent) {
     if (covar_range_list_ptr) {
-      covar_uidx = 0;
-      for (covar_idx = 0; covar_idx < covar_ct; covar_idx++) {
+      for (covar_uidx = 0, covar_idx = 0; covar_idx < covar_ct; covar_idx++) {
 	covar_uidx = next_set_ul_unsafe(covars_active, covar_uidx);
 	uint32_writex(memcpyl3a(&(covar_names[covar_idx * max_covar_name_len]), "COV"), ++covar_uidx, '\0');
       }
     }
     line_idx = 0;
-  } else {
+  } else if (covar_ct) {
     covar_idx = 0;
     retval = load_to_first_token(covar_file, loadbuf_size, '\0', "--covar file", loadbuf, &bufptr, &line_idx);
     if (retval) {
@@ -1648,11 +1685,13 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
     set_bit(already_seen, ii);
     sample_idx = id_map[(uint32_t)ii];
     bufptr = bufptr2;
-    if (min_covar_col_ct > 1) {
-      bufptr = next_token_mult(bufptr, min_covar_col_ct - 1);
-    }
-    if (no_more_tokens_kns(bufptr)) {
-      goto load_covars_ret_MISSING_TOKENS;
+    if (min_covar_col_ct) {
+      if (min_covar_col_ct > 1) {
+	bufptr = next_token_mult(bufptr, min_covar_col_ct - 1);
+      }
+      if (no_more_tokens_kns(bufptr)) {
+	goto load_covars_ret_MISSING_TOKENS;
+      }
     }
     if (covar_range_list_ptr) {
       dptr = &(covar_d[sample_idx * covar_ctx]);
@@ -1696,6 +1735,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
 	  *dptr++ = (double)((int32_t)is_set(sex_male, sample_uidx));
 	} else {
 	  covar_missing = 1;
+	  *dptr++ = missing_phenod;
 	}
       }
       if (!covar_missing) {
@@ -1715,12 +1755,8 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   if (!feof(covar_file)) {
     goto load_covars_ret_READ_FAIL;
   }
-  if (loaded_sample_ct == missing_cov_ct) {
-    logerrprint("Error: No --covar values loaded.\n");
-    goto load_covars_ret_INVALID_FORMAT;
-  }
   if (covar_range_list_ptr) {
-    if ((covar_ct < covar_raw_ct - 1) || ((covar_ct == covar_raw_ct - 1) && ((!gxe_mcovar) || is_set(covars_active, gxe_mcovar - 1)))) {
+    if ((covar_ct + 1 < covar_raw_ct) || ((covar_ct + 1 == covar_raw_ct) && ((!gxe_mcovar) || is_set(covars_active, gxe_mcovar - 1)))) {
       if (gxe_mcovar && (!is_set(covars_active, gxe_mcovar - 1))) {
         sprintf(logbuf, "--covar: 1 C/C cov. loaded for --gxe, %" PRIuPTR "/%" PRIuPTR " for other operations.\n", covar_ct, covar_raw_ct);
       } else {
@@ -1741,6 +1777,78 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
     LOGPRINTF("%" PRIuPTR " %s %s not seen in the covariate file.\n", ulii, species_str(ulii), (ulii == 1)? "was" : "were");
   }
 
+  if (covar_modifier & COVAR_NO_CONST) {
+    if (gxe_mcovar) {
+      uii = popcount_longs(gxe_covar_c, sample_ctl);
+      if ((!uii) || (uii == popcount_longs(gxe_covar_nm, sample_ctl))) {
+	logerrprint("Error: --gxe covariate is constant and --no-const-covar was specified.\n");
+	goto load_covars_ret_INVALID_FORMAT;
+      }
+    }
+    if (covar_range_list_ptr) {
+      // redefinition
+      covar_raw_ctl = (covar_ctx + BITCT - 1) / BITCT;
+      if (wkspace_alloc_ul_checked(&already_seen, covar_raw_ctl * sizeof(intptr_t))) {
+	goto load_covars_ret_NOMEM;
+      }
+      // is covariate nonconstant?
+      fill_ulong_zero(already_seen, covar_raw_ctl);
+      for (covar_idx = 0; covar_idx < covar_ctx; covar_idx++) {
+	dptr = &(covar_d[covar_idx]);
+	dxx = missing_phenod;
+	for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+	  if (dptr[sample_idx * covar_ctx] != missing_phenod) {
+	    dxx = dptr[sample_idx * covar_ctx];
+	    break;
+	  }
+	}
+	for (; sample_idx < sample_ct; sample_idx++) {
+	  if ((dptr[sample_idx * covar_ctx] != missing_phenod) && (dptr[sample_idx * covar_ctx] != dxx)) {
+	    break;
+	  }
+	}
+	if (sample_idx < sample_ct) {
+	  SET_BIT(already_seen, covar_idx);
+	}
+      }
+      uii = popcount_longs(already_seen, covar_raw_ctl);
+      if (!uii) {
+	strcpy(tbuf, "All covariates are constant.\n");
+	goto load_covars_none;
+      } else if (uii < covar_ctx) {
+	LOGPRINTF("--no-const-covar: %" PRIuPTR " constant covariate%s excluded.\n", covar_ctx - uii, (covar_ctx - uii == 1)? "" : "s");
+	*covar_ctx_ptr = uii;
+	dptr = covar_d;
+        for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+	  uii = 0;
+	  for (covar_idx = 0; covar_idx < covar_ctx; covar_idx++) {
+	    if (IS_SET(already_seen, covar_idx)) {
+	      dxx = covar_d[sample_idx * covar_ctx + covar_idx];
+	      if (dxx == missing_phenod) {
+		uii = 1;
+	      }
+	      *dptr++ = dxx;
+	    }
+	  }
+	  if (!uii) {
+	    // if this sample had some missing covariate values, but all those
+	    // covariates were excluded by --no-const-covar, set covar_nm bit
+	    SET_BIT(covar_nm, sample_idx);
+	  }
+	}
+	covar_idx = next_unset_unsafe(already_seen, 0);
+	uii = covar_idx;
+	for (; covar_idx < covar_ctx; covar_idx++) {
+	  if (IS_SET(already_seen, covar_idx)) {
+	    strcpy(&(covar_names[uii * max_covar_name_len]), &(covar_names[covar_idx * max_covar_name_len]));
+	    uii++;
+	  }
+	}
+	// don't worry about memory overallocation for now
+      }
+    }
+  }
+
   wkspace_reset(wkspace_mark2);
   while (0) {
   load_covars_ret_NOMEM2:
@@ -1748,6 +1856,9 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   load_covars_ret_NOMEM:
     retval = RET_NOMEM;
     break;
+  load_covars_ret_OPEN_FAIL:
+    retval = RET_OPEN_FAIL;
+    break;
   load_covars_ret_READ_FAIL:
     retval = RET_READ_FAIL;
     break;
@@ -1758,6 +1869,21 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
   load_covars_ret_INVALID_FORMAT:
     retval = RET_INVALID_FORMAT;
     break;
+  load_covars_none:
+    if (covar_modifier & COVAR_ALLOW_NONE) {
+      *covar_ctx_ptr = 0;
+      *covar_names_ptr = NULL;
+      *max_covar_name_len_ptr = 1;
+      *covar_nm_ptr = NULL;
+      *covar_d_ptr = NULL;
+      // --gxe not possible
+      wkspace_reset(wkspace_mark);
+      logerrprint("Warning: ");
+    } else {
+      retval = RET_INVALID_FORMAT;
+      logerrprint("Error: ");
+    }
+    logerrprint(tbuf);
   }
  load_covars_ret_1:
   if (retval) {
@@ -2491,8 +2617,8 @@ int32_t load_bim_split_chrom(char* bimname, uintptr_t* marker_exclude, uintptr_t
   FILE* infile = NULL;
   char* loadbuf = tbuf;
   uint32_t marker_uidx = 0xffffffffU; // deliberate overflow
-  uintptr_t marker_idx = 0;
   int32_t retval = 0;
+  uintptr_t marker_idx;
   char* bufptr;
   uint64_t chrom_idx;
   if (max_bim_linelen > MAXLINELEN) {
@@ -2503,7 +2629,7 @@ int32_t load_bim_split_chrom(char* bimname, uintptr_t* marker_exclude, uintptr_t
   if (fopen_checked(&infile, bimname, "r")) {
     goto load_bim_split_chrom_ret_OPEN_FAIL;
   }
-  do {
+  for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
   load_bim_split_chrom_reread:
     if (!fgets(loadbuf, max_bim_linelen, infile)) {
       goto load_bim_split_chrom_ret_READ_FAIL;
@@ -2519,7 +2645,7 @@ int32_t load_bim_split_chrom(char* bimname, uintptr_t* marker_exclude, uintptr_t
     // already validated
     chrom_idx = ((uint32_t)get_chrom_code(chrom_info_ptr, bufptr));
     ll_buf[marker_idx] = (int64_t)((chrom_idx << 32) | ((uint64_t)marker_idx));
-  } while ((++marker_idx) < marker_ct);
+  }
   while (0) {
   load_bim_split_chrom_ret_NOMEM:
     retval = RET_NOMEM;
@@ -2703,6 +2829,11 @@ void sort_marker_chrom_pos(int64_t* ll_buf, uintptr_t marker_ct, uint32_t* pos_b
   uint32_t uii;
   uint32_t cur_chrom;
   uint32_t chrom_ct;
+  if (!marker_ct) {
+    chrom_start[0] = 0;
+    *chrom_ct_ptr = 0;
+    return;
+  }
 #ifdef __cplusplus
   std::sort(ll_buf, &(ll_buf[marker_ct]));
 #else
@@ -2766,15 +2897,15 @@ int32_t sort_and_write_bim(uint32_t* map_reverse, uint32_t map_cols, char* outna
   uint32_t chrom_ct;
   uint32_t uii;
   uint32_t ujj;
-  // There can be a LOT of markers (some 1000 Genomes files we've been offered
-  // have ~40 million), so speeding up the sorting step over just calling
-  // qsort_ext() may not be a complete waste of effort.
+  // There can be a LOT of markers (1000 Genomes files can have ~40-80
+  // million), so speeding up the sorting step over just calling qsort_ext()
+  // may not be a complete waste of effort.
   // Strategy:
   // 1. fill ll_buf with chromosome idx in high-order bits, original position
-  // in low-order.
+  //    in low-order.
   // 2. std::sort() ll_buf, read off chromosome boundaries
   // 3. then replace high-order bits in ll_buf with marker positions, and
-  // std::sort() each chromosome separately.
+  //    std::sort() each chromosome separately.
   // Would be even faster if this was performed in a single sort, in the
   // super-common case where all three numbers can be squeezed together in 64
   // bits.  But we care most about performance when this can't be done, so I
@@ -2997,8 +3128,8 @@ int32_t flip_subset_init(char* flip_fname, char* flip_subset_fname, uintptr_t un
     }
     a1ptr = marker_allele_ptrs[2 * marker_uidx];
     a2ptr = marker_allele_ptrs[2 * marker_uidx + 1];
-    ucc = a1ptr[0];
-    if (a1ptr[1] || a2ptr[1] || (ucc < 'A') || (ucc > 'T') || (reverse_complements[ucc - 'A'] != a2ptr[0])) {
+    ucc = ((unsigned char)a1ptr[0]) - 'A';
+    if (a1ptr[1] || a2ptr[1] || (ucc > 19) || (reverse_complements[ucc] != a2ptr[0])) {
       sprintf(logbuf, "Error: Invalid alleles (not reverse complement single bases) on line\n%" PRIuPTR " of --flip file.\n", line_idx);
       goto flip_subset_init_ret_INVALID_FORMAT_2;
     }
@@ -3139,7 +3270,7 @@ int32_t make_bed_one_marker(FILE* bedfile, uintptr_t* loadbuf, uint32_t unfilter
       do {
 	sample_uidx2 = sample_sort_map[sample_uidx++];
       } while (IS_SET(sample_exclude, sample_uidx2));
-      cur_word |= (((loadbuf[sample_uidx2 / BITCT2] >> ((sample_uidx2 % BITCT2) * 2)) & 3) << (ii_rem * 2));
+      cur_word |= EXTRACT_2BIT_GENO(loadbuf, sample_uidx2) << (ii_rem * 2);
       if (++ii_rem == BITCT2) {
 	*writeptr++ = cur_word;
 	cur_word = 0;
@@ -3186,7 +3317,7 @@ int32_t make_bed_me_missing_one_marker(FILE* bedfile, uintptr_t* loadbuf, uint32
       do {
 	sample_uidx2 = sample_sort_map[sample_uidx++];
       } while (IS_SET(sample_exclude, sample_uidx2));
-      cur_word |= (((loadbuf[sample_uidx2 / BITCT2] >> ((sample_uidx2 % BITCT2) * 2)) & 3) << (ii_rem * 2));
+      cur_word |= EXTRACT_2BIT_GENO(loadbuf, sample_uidx2) << (ii_rem * 2);
       if (++ii_rem == BITCT2) {
 	*writeptr++ = cur_word;
 	cur_word = 0;
@@ -3252,22 +3383,22 @@ void reverse_subset(uintptr_t* writebuf, uintptr_t* subset_vec2, uintptr_t word_
   __m128i* wvec_end = (__m128i*)(&(writebuf[word_ct]));
   __m128i vii;
   __m128i vjj;
-  do {
+  while (wvec < wvec_end) {
     vii = *wvec;
     vjj = _mm_andnot_si128(_mm_xor_si128(vii, _mm_srli_epi64(vii, 1)), *svec++);
     vjj = _mm_or_si128(vjj, _mm_slli_epi64(vjj, 1));
     *wvec++ = _mm_xor_si128(vii, vjj);
-  } while (wvec < wvec_end);
+  }
 #else
   uintptr_t* writebuf_end = &(writebuf[word_ct]);
   uintptr_t ulii;
   uintptr_t uljj;
-  do {
+  while (writebuf < writebuf_end) {
     ulii = *writebuf;
     uljj = (*subset_vec2++) & (~(ulii ^ (ulii >> 1)));
     uljj *= 3;
     *writebuf++ = ulii ^ uljj;
-  } while (writebuf < writebuf_end);
+  }
 #endif
 }
 
@@ -3326,7 +3457,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
   uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
   uint32_t family_ct = 0;
   uint32_t set_hh_missing = (misc_flags / MISC_SET_HH_MISSING) & 1;
-  uint32_t set_me_missing = (misc_flags / MISC_SET_ME_MISSING) & 1;
+  uint32_t set_me_missing = ((misc_flags / MISC_SET_ME_MISSING) & 1) && sample_ct;
   uint32_t fill_missing_a2 = (misc_flags / MISC_FILL_MISSING_A2) & 1;
   uint32_t mendel_include_duos = (mendel_modifier / MENDEL_DUOS) & 1;
   uint32_t mendel_multigen = (mendel_modifier / MENDEL_MULTIGEN) & 1;
@@ -3465,59 +3596,61 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
       *outname_end = '\0';
       LOGPRINTFWW5("--make-bed to %s.bed + %s.bim + %s.fam ... ", outname, outname, outname);
       fputs("0%", stdout);
-      loop_end = marker_ct / 100;
-      markers_done = 0;
-      for (pass_idx = 0; pass_idx < pass_ct; pass_idx++) {
-        pass_start = pass_idx * pass_size;
-	pass_end = (pass_idx + 1) * pass_size;
-	if (pass_idx + 1 == pass_ct) {
-	  pass_end = marker_ct;
-	}
-	seek_needed = 1;
-	for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
-	  if (IS_SET(marker_exclude, marker_uidx)) {
-	    marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
-	    seek_needed = 1;
-	  }
-	  if ((map_reverse[marker_uidx] < pass_start) || (map_reverse[marker_uidx] >= pass_end)) {
-	    seek_needed = 1;
-	    continue;
-	  }
-	  writebuf_ptr = &(writebuf[sample_ctv2 * (map_reverse[marker_uidx] - pass_start)]);
-	  if (seek_needed) {
-	    if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
-	      goto make_bed_ret_READ_FAIL;
+      if (sample_ct) {
+	loop_end = marker_ct / 100;
+	markers_done = 0;
+	for (pass_idx = 0; pass_idx < pass_ct; pass_idx++) {
+	  pass_start = pass_idx * pass_size;
+	  pass_end = (pass_idx + 1) * pass_size;
+	  if (pass_idx + 1 == pass_ct) {
+	    pass_end = marker_ct;
+	  }
+	  seek_needed = 1;
+	  for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
+	    if (IS_SET(marker_exclude, marker_uidx)) {
+	      marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
+	      seek_needed = 1;
 	    }
-	    seek_needed = 0;
-	  }
-	  retval = make_bed_one_marker(bedfile, loadbuf, unfiltered_sample_ct, unfiltered_sample_ct4, sample_exclude, sample_ct, sample_sort_map, final_mask, IS_SET(marker_reverse, marker_uidx), writebuf_ptr);
-	  if (retval) {
-	    goto make_bed_ret_1;
-	  }
-	  if (zcdefs) {
-	    zeropatch(sample_ctv2, cluster_ct, cluster_zc_masks, zcdefs, patchbuf, marker_idx, writebuf_ptr);
-	  }
-	  if (flip_subset_markers && is_set(flip_subset_markers, marker_uidx)) {
-	    reverse_subset(writebuf_ptr, flip_subset_vec2, sample_ctv2);
-	  }
-	  if (markers_done >= loop_end) {
-	    if (pct > 10) {
-	      putchar('\b');
+	    if ((map_reverse[marker_uidx] < pass_start) || (map_reverse[marker_uidx] >= pass_end)) {
+	      seek_needed = 1;
+	      continue;
+	    }
+	    writebuf_ptr = &(writebuf[sample_ctv2 * (map_reverse[marker_uidx] - pass_start)]);
+	    if (seek_needed) {
+	      if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
+		goto make_bed_ret_READ_FAIL;
+	      }
+	      seek_needed = 0;
+	    }
+	    retval = make_bed_one_marker(bedfile, loadbuf, unfiltered_sample_ct, unfiltered_sample_ct4, sample_exclude, sample_ct, sample_sort_map, final_mask, IS_SET(marker_reverse, marker_uidx), writebuf_ptr);
+	    if (retval) {
+	      goto make_bed_ret_1;
+	    }
+	    if (zcdefs) {
+	      zeropatch(sample_ctv2, cluster_ct, cluster_zc_masks, zcdefs, patchbuf, marker_idx, writebuf_ptr);
+	    }
+	    if (flip_subset_markers && is_set(flip_subset_markers, marker_uidx)) {
+	      reverse_subset(writebuf_ptr, flip_subset_vec2, sample_ctv2);
+	    }
+	    if (markers_done >= loop_end) {
+	      if (pct > 10) {
+		putchar('\b');
+	      }
+	      pct = (markers_done * 100LLU) / marker_ct;
+	      printf("\b\b%u%%", pct);
+	      fflush(stdout);
+	      pct++;
+	      loop_end = (pct * ((uint64_t)marker_ct)) / 100;
 	    }
-	    pct = (markers_done * 100LLU) / marker_ct;
-	    printf("\b\b%u%%", pct);
-	    fflush(stdout);
-	    pct++;
-	    loop_end = (pct * ((uint64_t)marker_ct)) / 100;
+	    markers_done++;
 	  }
-	  markers_done++;
-	}
-	writebuf_ptr = writebuf;
-	for (marker_idx = pass_start; marker_idx < pass_end; marker_idx++) {
-	  if (fwrite_checked(writebuf_ptr, sample_ct4, bedoutfile)) {
-	    goto make_bed_ret_WRITE_FAIL;
+	  writebuf_ptr = writebuf;
+	  for (marker_idx = pass_start; marker_idx < pass_end; marker_idx++) {
+	    if (fwrite_checked(writebuf_ptr, sample_ct4, bedoutfile)) {
+	      goto make_bed_ret_WRITE_FAIL;
+	    }
+	    writebuf_ptr = &(writebuf_ptr[sample_ctv2]);
 	  }
-	  writebuf_ptr = &(writebuf_ptr[sample_ctv2]);
 	}
       }
     } else {
@@ -3701,7 +3834,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
   return retval;
 }
 
-int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
+int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
   unsigned char* wkspace_mark = wkspace_base;
   double missing_phenod = (double)missing_pheno;
   uintptr_t* pheno_c = NULL;
@@ -3747,8 +3880,9 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
     goto load_fam_ret_OPEN_FAIL;
   }
   // ----- .fam read, first pass -----
-  // count number of people, determine maximum person/father/mother ID lengths,
-  // affection status, verify all floating point phenotype values are valid
+  // count number of samples, determine maximum person/father/mother ID
+  // lengths, affection status, verify all floating point phenotype values are
+  // valid
   while (fgets(loadbuf, loadbuf_size, famfile)) {
     line_idx++;
     if (!loadbuf[loadbuf_size - 1]) {
@@ -3811,7 +3945,7 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
   if (ferror(famfile)) {
     goto load_fam_ret_READ_FAIL;
   }
-  if (!unfiltered_sample_ct) {
+  if ((!unfiltered_sample_ct) && (!allow_no_samples)) {
     logerrprint("Error: Nobody in .fam file.\n");
     goto load_fam_ret_INVALID_FORMAT;
   }
@@ -4006,6 +4140,8 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 
   uint32_t snpid_chr = (misc_flags / MISC_OXFORD_SNPID_CHR) & 1;
   uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t sample_ct = 0;
   uint32_t col_ct = 3;
   uint32_t is_binary_pheno = 0;
@@ -4338,7 +4474,7 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
     }
     sample_ct++;
   }
-  if (!sample_ct) {
+  if ((!sample_ct) && (!allow_no_samples)) {
     logerrprint("Error: No samples in .sample file.\n");
     goto oxford_to_bed_ret_INVALID_FORMAT;
   }
@@ -4458,156 +4594,158 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
       if (putc_checked('\n', outfile_bim)) {
 	goto oxford_to_bed_ret_WRITE_FAIL;
       }
-      cur_word = 0;
-      shiftval = 0;
-      ulptr = writebuf;
-      bufptr = skip_initial_spaces(&(bufptr4[1]));
-      for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
-	if (is_eoln_kns(*bufptr)) {
-	  goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
-	}
-	// fast handling of common cases
-	cc = bufptr[1];
-	if ((cc == ' ') || (cc == '\t')) {
-	  cc = bufptr[3];
-	  cc2 = bufptr[5];
-	  if (((cc == ' ') || (cc == '\t')) && ((cc2 == ' ') || (cc2 == '\t'))) {
-	    cc = *bufptr;
-	    if (cc == '0') {
-	      bufptr2 = &(bufptr[2]);
-	      cc = *bufptr2;
-	      cc2 = bufptr2[2];
+      if (sample_ct) {
+	cur_word = 0;
+	shiftval = 0;
+	ulptr = writebuf;
+	bufptr = skip_initial_spaces(bufptr4);
+	for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+	  if (is_eoln_kns(*bufptr)) {
+	    goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+	  }
+	  // fast handling of common cases
+	  cc = bufptr[1];
+	  if ((cc == ' ') || (cc == '\t')) {
+	    cc = bufptr[3];
+	    cc2 = bufptr[5];
+	    if (((cc == ' ') || (cc == '\t')) && ((cc2 == ' ') || (cc2 == '\t'))) {
+	      cc = *bufptr;
 	      if (cc == '0') {
-		if (cc2 == '1') {
-		  ulii = 3;
-		} else if (cc2 == '0') {
-		  ulii = 1;
+		bufptr2 = &(bufptr[2]);
+		cc = *bufptr2;
+		cc2 = bufptr2[2];
+		if (cc == '0') {
+		  if (cc2 == '1') {
+		    ulii = 3;
+		  } else if (cc2 == '0') {
+		    ulii = 1;
+		  } else {
+		    // could be a space...
+		    goto oxford_to_bed_full_parse_2;
+		  }
+		} else if ((cc == '1') && (cc2 == '0')) {
+		  ulii = 2;
 		} else {
-		  // could be a space...
 		  goto oxford_to_bed_full_parse_2;
 		}
-	      } else if ((cc == '1') && (cc2 == '0')) {
-		ulii = 2;
+	      } else if ((cc == '1') && (bufptr[2] == '0') && (bufptr[4] == '0')) {
+		ulii = 0;
 	      } else {
-		goto oxford_to_bed_full_parse_2;
+		goto oxford_to_bed_full_parse;
 	      }
-	    } else if ((cc == '1') && (bufptr[2] == '0') && (bufptr[4] == '0')) {
-	      ulii = 0;
+	      bufptr = &(bufptr[6]);
 	    } else {
 	      goto oxford_to_bed_full_parse;
 	    }
-	    bufptr = &(bufptr[6]);
 	  } else {
-	    goto oxford_to_bed_full_parse;
-	  }
-	} else {
-	  // okay, gotta do things the slow way
-	oxford_to_bed_full_parse:
-	  bufptr2 = token_endnn(bufptr);
-	oxford_to_bed_full_parse_2:
-	  bufptr2 = skip_initial_spaces(bufptr2);
-	  if (is_eoln_kns(*bufptr2)) {
-	    goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
-	  }
-	  bufptr3 = token_endnn(bufptr2);
-	  dzz = strtod(bufptr3, &bufptr4);
-	  if (!is_randomized) {
-	    if (dzz >= hard_call_floor) {
-	      ulii = 3;
-	    } else {
-	      if (bufptr3 == bufptr4) {
-		goto oxford_to_bed_ret_INVALID_DOSAGE;
-	      }
-	      dyy = strtod(bufptr2, &bufptr3);
-	      if (dyy >= hard_call_floor) {
-		ulii = 2;
+	    // okay, gotta do things the slow way
+	  oxford_to_bed_full_parse:
+	    bufptr2 = token_endnn(bufptr);
+	  oxford_to_bed_full_parse_2:
+	    bufptr2 = skip_initial_spaces(bufptr2);
+	    if (is_eoln_kns(*bufptr2)) {
+	      goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+	    }
+	    bufptr3 = token_endnn(bufptr2);
+	    dzz = strtod(bufptr3, &bufptr4);
+	    if (!is_randomized) {
+	      if (dzz >= hard_call_floor) {
+		ulii = 3;
 	      } else {
-		if (bufptr2 == bufptr3) {
+		if (bufptr3 == bufptr4) {
 		  goto oxford_to_bed_ret_INVALID_DOSAGE;
 		}
-		dxx = strtod(bufptr, &bufptr2);
-		if (dxx >= hard_call_floor) {
-		  ulii = 0;
+		dyy = strtod(bufptr2, &bufptr3);
+		if (dyy >= hard_call_floor) {
+		  ulii = 2;
 		} else {
-		  if (bufptr == bufptr2) {
+		  if (bufptr2 == bufptr3) {
 		    goto oxford_to_bed_ret_INVALID_DOSAGE;
 		  }
-		  ulii = 1;
+		  dxx = strtod(bufptr, &bufptr2);
+		  if (dxx >= hard_call_floor) {
+		    ulii = 0;
+		  } else {
+		    if (bufptr == bufptr2) {
+		      goto oxford_to_bed_ret_INVALID_DOSAGE;
+		    }
+		    ulii = 1;
+		  }
 		}
 	      }
-	    }
-	  } else {
-	    drand = rand_unif();
-	    if (drand < dzz) {
-	      ulii = 3;
 	    } else {
-	      if (bufptr3 == bufptr4) {
-		goto oxford_to_bed_ret_INVALID_DOSAGE;
-	      }
-	      dyy = strtod(bufptr2, &bufptr3) + dzz;
-	      if (drand < dyy) {
-		ulii = 2;
+	      drand = rand_unif();
+	      if (drand < dzz) {
+		ulii = 3;
 	      } else {
-		if (bufptr2 == bufptr3) {
+		if (bufptr3 == bufptr4) {
 		  goto oxford_to_bed_ret_INVALID_DOSAGE;
 		}
-		dxx = strtod(bufptr, &bufptr2) + dyy;
+		dyy = strtod(bufptr2, &bufptr3) + dzz;
 		if (drand < dyy) {
-		  ulii = 0;
-		} else if (dxx < 1 - D_EPSILON) {
-		  ulii = 1;
+		  ulii = 2;
 		} else {
-		  // fully called genotype probabilities may add up to less
-		  // than one due to rounding error.  If this appears to have
-		  // happened, do NOT make a missing call; instead rescale
-		  // everything to add to one and reinterpret the random
-		  // number.  (D_EPSILON is currently set to make 4 decimal
-		  // place precision safe to use.)
-		  drand *= dxx;
-		  if (drand < dzz) {
-		    ulii = 3;
-		  } else if (drand < dyy) {
-		    ulii = 2;
-		  } else {
+		  if (bufptr2 == bufptr3) {
+		    goto oxford_to_bed_ret_INVALID_DOSAGE;
+		  }
+		  dxx = strtod(bufptr, &bufptr2) + dyy;
+		  if (drand < dyy) {
 		    ulii = 0;
+		  } else if (dxx < 1 - D_EPSILON) {
+		    ulii = 1;
+		  } else {
+		    // fully called genotype probabilities may add up to less
+		    // than one due to rounding error.  If this appears to have
+		    // happened, do NOT make a missing call; instead rescale
+		    // everything to add to one and reinterpret the random
+		    // number.  (D_EPSILON is currently set to make 4 decimal
+		    // place precision safe to use.)
+		    drand *= dxx;
+		    if (drand < dzz) {
+		      ulii = 3;
+		    } else if (drand < dyy) {
+		      ulii = 2;
+		    } else {
+		      ulii = 0;
+		    }
 		  }
 		}
 	      }
 	    }
+	    bufptr = skip_initial_spaces(bufptr4);
+	  }
+	  cur_word |= ulii << shiftval;
+	  shiftval += 2;
+	  if (shiftval == BITCT) {
+	    *ulptr++ = cur_word;
+	    cur_word = 0;
+	    shiftval = 0;
 	  }
-	  bufptr = skip_initial_spaces(bufptr4);
 	}
-	cur_word |= ulii << shiftval;
-	shiftval += 2;
-	if (shiftval == BITCT) {
+	if (shiftval) {
 	  *ulptr++ = cur_word;
-	  cur_word = 0;
-	  shiftval = 0;
 	}
-      }
-      if (shiftval) {
-	*ulptr++ = cur_word;
-      }
-      if (identical_alleles) {
-	// keep missing calls, but convert hom/het A1 to hom A2.
-	for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
-	  ulii = *ulptr;
-	  *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+	if (identical_alleles) {
+	  // keep missing calls, but convert hom/het A1 to hom A2.
+	  for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
+	    ulii = *ulptr;
+	    *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+	  }
+	  if (sample_ct % 4) {
+	    writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
+	  }
 	}
-	if (sample_ct % 4) {
-	  writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
+	if (fwrite_checked(writebuf, sample_ct4, outfile)) {
+	  goto oxford_to_bed_ret_WRITE_FAIL;
 	}
       }
-      if (fwrite_checked(writebuf, sample_ct4, outfile)) {
-	goto oxford_to_bed_ret_WRITE_FAIL;
-      }
       marker_ct++;
       if (!(marker_ct % 1000)) {
 	printf("\r--data: %uk variants converted.", marker_ct / 1000);
 	fflush(stdout);
       }
     }
-    if (!marker_ct) {
+    if ((!marker_ct) && (!allow_no_variants)) {
       logerrprint("Error: Empty .gen file.\n");
       goto oxford_to_bed_ret_INVALID_FORMAT;
     }
@@ -4623,7 +4761,6 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
     loadbuf = (char*)wkspace_base;
     loadbuf_size = wkspace_left;
     if (loadbuf_size > MAXLINEBUFLEN) {
-      // halve the limit since there are two alleles
       loadbuf_size = MAXLINEBUFLEN;
     } else if (loadbuf_size < 3 * 65536) {
       goto oxford_to_bed_ret_NOMEM;
@@ -4636,8 +4773,8 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
       goto oxford_to_bed_ret_INVALID_FORMAT;
     }
     raw_marker_ct = uint_arr[2];
-    if (!raw_marker_ct) {
-      logerrprint("Error: .bgen file contains no markers.\n");
+    if ((!raw_marker_ct) && (!allow_no_variants)) {
+      logerrprint("Error: .bgen file contains no variants.\n");
       goto oxford_to_bed_ret_INVALID_FORMAT;
     }
     if (uint_arr[3] != sample_ct) {
@@ -4687,6 +4824,7 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 	goto oxford_to_bed_ret_INVALID_FORMAT;
       }
       if (bgen_multichar_alleles) {
+	// v1.1
         if (fread(&usii, 1, 2, infile) < 2) {
 	  goto oxford_to_bed_ret_READ_FAIL;
 	}
@@ -4791,6 +4929,9 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
         fwrite(bufptr, 1, usjj, outfile_bim);
 	bufptr = uint32_writex(&(tbuf[3]), uint_arr[0], ' ');
 	fwrite(tbuf, 1, bufptr - tbuf, outfile_bim);
+
+        // halve the limit since there are two alleles
+	// (may want to enforce NON_WKSPACE_MIN allele length limit?)
         if (uint_arr[1] >= loadbuf_size / 2) {
 	  if (loadbuf_size < MAXLINEBUFLEN) {
 	    goto oxford_to_bed_ret_NOMEM;
@@ -4829,6 +4970,7 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 	  }
 	}
       } else {
+	// v1.0
 	uii = 0;
 	if (fread(&uii, 1, 1, infile) < 1) {
 	  goto oxford_to_bed_ret_READ_FAIL;
@@ -4842,8 +4984,10 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 	  ii = ((unsigned char)(loadbuf[2 * uii + 2]));
 	  if (ii > 24) {
 	    if (ii == 255) {
+	      // unknown
 	      ii = 0;
 	    } else if (ii > 252) {
+	      // XY or MT
 	      ii = ii - 228;
 	    } else {
 	      logerrprint("Error: Invalid chromosome code in BGEN v1.0 file.\n");
@@ -4911,7 +5055,7 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 	  goto oxford_to_bed_ret_READ_FAIL;
 	}
 	if (uii > loadbuf_size) {
-	  if (loadbuf_size < MAXLINEBUFLEN / 2) {
+	  if (loadbuf_size < MAXLINEBUFLEN) {
 	    goto oxford_to_bed_ret_NOMEM;
 	  }
 	  logerrprint("Error: Excessively long compressed SNP block in .bgen file.\n");
@@ -5097,7 +5241,7 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
 }
 
 // side effect: initializes tbuf to first nonempty line of .map/.bim
-int32_t check_cm_col(FILE* bimfile, char* tbuf, uint32_t is_binary, uint32_t bufsize, uint32_t* gd_col_ptr, uintptr_t* line_idx_ptr) {
+int32_t check_cm_col(FILE* bimfile, char* tbuf, uint32_t is_binary, uint32_t allow_no_variants, uint32_t bufsize, uint32_t* cm_col_exists_ptr, uintptr_t* line_idx_ptr) {
   uintptr_t line_idx = 0;
   char* bufptr;
   while (fgets(tbuf, bufsize, bimfile)) {
@@ -5112,14 +5256,14 @@ int32_t check_cm_col(FILE* bimfile, char* tbuf, uint32_t is_binary, uint32_t buf
       return -1;
     }
     if (no_more_tokens_kns(next_token(bufptr))) {
-      *gd_col_ptr = 0;
+      *cm_col_exists_ptr = 0;
     } else {
-      *gd_col_ptr = 1;
+      *cm_col_exists_ptr = 1;
     }
     return 0;
   }
   *line_idx_ptr = 0;
-  return -1;
+  return allow_no_variants? 0 : -1;
 }
 
 int32_t incr_text_allele0(char cc, char* marker_alleles, uint32_t* marker_allele_cts) {
@@ -5242,19 +5386,20 @@ char* get_llstr(Ll_str* llptr, uint32_t allele_idx) {
 
 static inline char* write_token_nt(char* read_ptr, FILE* outfile) {
   // assumes read_ptr is at the beginning of an item to write
+  // nt = "no tab"
   uint32_t slen = strlen_se(read_ptr);
   fwrite(read_ptr, 1, slen, outfile);
-  return skip_initial_spaces(&(read_ptr[slen + 1]));
+  return skip_initial_spaces(&(read_ptr[slen]));
 }
 
 static inline char* write_token(char* read_ptr, FILE* outfile) {
   uint32_t slen = strlen_se(read_ptr);
   fwrite(read_ptr, 1, slen, outfile);
   putc('\t', outfile);
-  return skip_initial_spaces(&(read_ptr[slen + 1]));
+  return skip_initial_spaces(&(read_ptr[slen]));
 }
 
-int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char* outname, char* outname_end, FILE** mapfile_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_alleles_f, uint32_t map_is_unsorted, uint32_t fam_cols, uint32_t ped_col_skip_iid, uint32_t ped_col_skip, uint32_t gd_col, uint32_t* map_reverse, int64_t ped_size, char* missing_pheno_str) {
+int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char* outname, char* outname_end, FILE** mapfile_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_alleles_f, uint32_t map_is_unsorted, uint32_t fam_cols, uint32_t ped_col_skip_iid, uint32_t ped_col_skip, uint32_t cm_col_exists, uint32_t* map_reverse, int64_t ped_size, char* missing_pheno_str) {
   // maintain allele counts and linked lists of observed alleles at FAR end of
   // wkspace.
   int32_t retval = 0;
@@ -5410,7 +5555,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
       }
       if ((*aptr1 == missing_geno) && (alen1 == 1)) {
 	if ((alen2 != 1) || (*aptr2 != missing_geno)) {
-          goto ped_to_bed_multichar_allele_ret_INVALID_FORMAT_4;
+	  goto ped_to_bed_multichar_allele_ret_INVALID_FORMAT_4;
 	}
 	marker_idx++;
 	continue;
@@ -5451,10 +5596,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
   }
   putchar('\r');
   logprint(".ped scan complete (for binary autoconversion).\n");
-  if (!sample_ct) {
-    sprintf(logbuf, "Error: No %s in .ped file.\n", g_species_plural);
-    goto ped_to_bed_multichar_allele_ret_INVALID_FORMAT_2;
-  }
+  // sample_ct == 0 impossible
   if (fclose_null(outfile_ptr)) {
     goto ped_to_bed_multichar_allele_ret_WRITE_FAIL;
   }
@@ -5535,7 +5677,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
       putc('\t', outfile);
       bufptr = skip_initial_spaces(&(bufptr[uii + 1]));
       bufptr = write_token(bufptr, outfile);
-      if (gd_col) {
+      if (cm_col_exists) {
         ucc = (unsigned char)(*bufptr);
 	// should be good enough at detecting nonnumeric values...
 	if (((ucc >= '0') && (ucc <= '9')) || (ucc == '-') || (ucc == '+')) {
@@ -5788,13 +5930,15 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
   uintptr_t marker_ct = 0;
   uintptr_t sample_ct = 0;
   uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t map_is_unsorted = 0;
   int32_t last_chrom = 0;
   uint32_t last_mpos = 0;
   uint32_t ped_buflen = 1;
   int32_t retval = 0;
-  uint32_t ped_col_skip_iid = 1 + 2 * ((fam_cols & FAM_COL_34) / FAM_COL_34) + ((fam_cols & FAM_COL_5) / FAM_COL_5) + ((fam_cols & FAM_COL_6) / FAM_COL_6);
-  uint32_t ped_col_skip = ped_col_skip_iid + ((fam_cols & FAM_COL_1) / FAM_COL_1);
+  uint32_t ped_col_skip_iid_m1 = ((fam_cols & FAM_COL_34) / (FAM_COL_34 / 2)) + ((fam_cols & FAM_COL_5) / FAM_COL_5) + ((fam_cols & FAM_COL_6) / FAM_COL_6);
+  uint32_t ped_col_skip = ped_col_skip_iid_m1 + 1 + ((fam_cols & FAM_COL_1) / FAM_COL_1);
   uint32_t last_pass = 0;
   int64_t* line_starts = NULL;
 
@@ -5814,7 +5958,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
   uintptr_t line_idx;
   uintptr_t sample_idx;
   uintptr_t ulii;
-  uint32_t cm_col;
+  uint32_t cm_col_exists;
   uint32_t markers_per_pass;
   uint32_t marker_start;
   uint32_t marker_end;
@@ -5855,7 +5999,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
     goto ped_to_bed_ret_OPEN_FAIL;
   }
   tbuf[MAXLINELEN - 6] = ' ';
-  if (check_cm_col(mapfile, tbuf, 0, MAXLINELEN - 5, &cm_col, &line_idx)) {
+  if (check_cm_col(mapfile, tbuf, 0, allow_no_variants, MAXLINELEN - 5, &cm_col_exists, &line_idx)) {
     if (line_idx) {
       goto ped_to_bed_ret_MISSING_TOKENS_MAP;
     } else {
@@ -5863,6 +6007,10 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       goto ped_to_bed_ret_INVALID_FORMAT;
     }
   }
+  if (!line_idx) {
+    // no variants
+    goto ped_to_bed_empty_map_with_allow_no_vars;
+  }
   line_idx--;
   do {
     line_idx++;
@@ -5875,7 +6023,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       continue;
     }
     col2_ptr = next_token(col1_ptr);
-    bufptr = next_token_mult(col2_ptr, 1 + cm_col);
+    bufptr = next_token_mult(col2_ptr, 1 + cm_col_exists);
     if (no_more_tokens_kns(bufptr)) {
       goto ped_to_bed_ret_MISSING_TOKENS_MAP;
     }
@@ -5931,18 +6079,19 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
     goto ped_to_bed_ret_READ_FAIL;
   }
   marker_ct = unfiltered_marker_ct - marker_exclude_ct;
-  if (!marker_ct) {
+  if ((!marker_ct) && (!allow_no_variants)) {
     logprint("Error: No variants in current analysis.\n");
     goto ped_to_bed_ret_ALL_MARKERS_EXCLUDED;
   }
+ ped_to_bed_empty_map_with_allow_no_vars:
   marker_exclude = (uintptr_t*)wkspace_alloc(((unfiltered_marker_ct + (BITCT - 1)) / BITCT) * sizeof(intptr_t));
 
   if (map_is_unsorted) {
-    retval = load_sort_and_write_map(&map_reverse, mapfile, 3 + cm_col, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, max_marker_id_len, 1, chrom_info_ptr);
+    retval = load_sort_and_write_map(&map_reverse, mapfile, 3 + cm_col_exists, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, max_marker_id_len, 1, chrom_info_ptr);
     if (retval) {
       goto ped_to_bed_ret_1;
     }
-    cm_col = 1;
+    cm_col_exists = 1;
     fclose_null(&mapfile);
   }
   // provisionally assume max_marker_allele_len == 1
@@ -6005,10 +6154,11 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
     } else {
       col2_ptr = col1_ptr;
     }
-    bufptr = next_token_mult(col2_ptr, ped_col_skip_iid);
+    bufptr = next_token_multz(col2_ptr, ped_col_skip_iid_m1);
     if (no_more_tokens_kns(bufptr)) {
       goto ped_to_bed_ret_MISSING_TOKENS_PED;
     }
+    bufptr = token_endnn(bufptr);
     if ((bufptr - col1_ptr) > (MAXLINELEN / 2) - 4) {
       logprint("\n");
       sprintf(logbuf, "Error: Line %" PRIuPTR " of .ped file has a pathologically long token.\n", line_idx);
@@ -6040,10 +6190,11 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       goto ped_to_bed_ret_WRITE_FAIL;
     }
     marker_idx = 0;
+    bufptr = skip_initial_spaces(bufptr);
     for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
       cc = *bufptr++;
       if (!cc) {
-        goto ped_to_bed_ret_MISSING_TOKENS_PED;
+	goto ped_to_bed_ret_MISSING_TOKENS_PED;
       }
       bufptr = skip_initial_spaces(bufptr);
       cc2 = *bufptr++;
@@ -6077,6 +6228,10 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       // either multi-character alleles, or invalid format.  Restart scan.
       putchar('\r');
       logstr("\n");
+      if (!marker_ct) {
+        sprintf(logbuf, "Error: Line %" PRIuPTR " of .ped file has more tokens than expected.\n", line_idx);
+        goto ped_to_bed_ret_INVALID_FORMAT_2;
+      }
       logprint("Possibly irregular .ped line.  Restarting scan, assuming multichar alleles.\n");
       is_single_char_alleles = 0;
       break;
@@ -6100,7 +6255,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
     if (!feof(pedfile)) {
       goto ped_to_bed_ret_READ_FAIL;
     }
-    if (!sample_ct) {
+    if ((!sample_ct) && (!allow_no_samples)) {
       logprint("\n");
       sprintf(logbuf, "Error: No %s in .ped file.\n", g_species_plural);
       goto ped_to_bed_ret_INVALID_FORMAT_2;
@@ -6185,7 +6340,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       } else {
 	bufptr = write_token(bufptr, outfile);
 	bufptr = write_token(bufptr, outfile);
-	if (cm_col) {
+	if (cm_col_exists) {
 	  ucc = (unsigned char)(*bufptr);
 	  if (((ucc >= '0') && (ucc <= '9')) || (ucc == '-') || (ucc == '+')) {
 	    bufptr = write_token_nt(bufptr, outfile);
@@ -6221,7 +6376,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
     if (wkspace_left >= marker_ct * sample_ct4) {
       markers_per_pass = marker_ct;
       sprintf(logbuf, "Performing single-pass .bed write (%" PRIuPTR " variant%s, %" PRIuPTR " %s).\n", marker_ct, (marker_ct == 1)? "" : "s", sample_ct, species_str(sample_ct));
-      pass_ct = 1;
+      pass_ct = (marker_ct * sample_ct4)? 1 : 0;
     } else {
       if (!map_is_unsorted) {
 	if (wkspace_alloc_ll_checked(&line_starts, sample_ct * sizeof(int64_t))) {
@@ -6371,7 +6526,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
       }
     }
   } else {
-    retval = ped_to_bed_multichar_allele(&pedfile, &outfile, outname, outname_end, &mapfile, unfiltered_marker_ct, marker_exclude, marker_ct, marker_alleles_f, map_is_unsorted, fam_cols, ped_col_skip_iid, ped_col_skip, cm_col, map_reverse, ped_size, missing_pheno_str);
+    retval = ped_to_bed_multichar_allele(&pedfile, &outfile, outname, outname_end, &mapfile, unfiltered_marker_ct, marker_exclude, marker_ct, marker_alleles_f, map_is_unsorted, fam_cols, ped_col_skip_iid_m1 + 1, ped_col_skip, cm_col_exists, map_reverse, ped_size, missing_pheno_str);
     if (retval) {
       goto ped_to_bed_ret_1;
     }
@@ -6422,13 +6577,14 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
   return retval;
 }
 
-int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, uint32_t lgen_modifier, char* lgen_reference_fname, Chrom_info* chrom_info_ptr) {
+int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, uint32_t lgen_modifier, char* lgen_reference_fname, Chrom_info* chrom_info_ptr) {
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile = NULL;
   FILE* outfile = NULL;
-  char* name_end = (char*)memchr(lgen_namebuf, 0, FNAMESIZE);
   uint32_t lgen_allele_count = lgen_modifier & LGEN_ALLELE_COUNT;
   uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_vars = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t affection_01 = (misc_flags / MISC_AFFECTION_01) & 1;
   uint32_t map_cols = 3;
   uintptr_t* marker_exclude = NULL;
@@ -6496,8 +6652,7 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
     goto lgen_to_bed_ret_INVALID_CMDLINE;
   }
 
-  memcpy(name_end, ".map", 5);
-  retval = load_map(&infile, lgen_namebuf, &map_cols, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &marker_ids, chrom_info_ptr, &marker_pos, &map_is_unsorted, allow_extra_chroms);
+  retval = load_map(&infile, mapname, &map_cols, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &marker_ids, chrom_info_ptr, &marker_pos, &map_is_unsorted, allow_extra_chroms, allow_no_vars);
   if (retval) {
     goto lgen_to_bed_ret_1;
   }
@@ -6520,16 +6675,17 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
   if (wkspace_alloc_ui_checked(&sample_id_map, unfiltered_marker_ct * sizeof(int32_t))) {
     goto lgen_to_bed_ret_NOMEM;
   }
-  fill_uidx_to_idx(marker_exclude, unfiltered_marker_ct, marker_ct, sample_id_map);
-  for (uii = 0; uii < marker_ct; uii++) {
-    marker_id_map[uii] = sample_id_map[marker_id_map[uii]];
+  if (marker_ct) {
+    fill_uidx_to_idx(marker_exclude, unfiltered_marker_ct, marker_ct, sample_id_map);
+    for (uii = 0; uii < marker_ct; uii++) {
+      marker_id_map[uii] = sample_id_map[marker_id_map[uii]];
+    }
   }
   fclose_null(&infile);
   memcpy(marker_ids, sorted_marker_ids, marker_ct * max_marker_id_len);
   wkspace_reset(sorted_marker_ids);
 
-  memcpy(name_end, ".fam", 5);
-  retval = load_fam(lgen_namebuf, FAM_COL_13456, 1, missing_pheno, affection_01, &sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude);
+  retval = load_fam(famname, FAM_COL_13456, 1, missing_pheno, affection_01, &sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude, allow_no_samples);
   if (retval) {
     goto lgen_to_bed_ret_1;
   }
@@ -6644,8 +6800,7 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
   if (fwrite_checked("l\x1b\x01", 3, outfile)) {
     goto lgen_to_bed_ret_WRITE_FAIL;
   }
-  memcpy(name_end, ".lgen", 6);
-  if (fopen_checked(&infile, lgen_namebuf, "r")) {
+  if (fopen_checked(&infile, lgenname, "r")) {
     goto lgen_to_bed_ret_OPEN_FAIL;
   }
   if (fseeko(infile, 0, SEEK_END)) {
@@ -6830,14 +6985,11 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
       if (ii != -1) {
 	marker_idx = marker_id_map[(uint32_t)ii];
 	a1len = strlen_se(a1ptr);
-	ucc = (unsigned char)(*a1ptr);
-	if ((a1len != 1) || (ucc < 48) || (ucc > 50)) {
+	uii = ((uint32_t)((unsigned char)(*a1ptr))) - 48;
+	if ((a1len != 1) || (uii > 2)) {
 	  uii = 1;
-	} else {
-	  uii = ucc - 48;
-	  if (uii) {
-	    uii++;
-	  }
+	} else if (uii) {
+	  uii++;
 	}
 	ulii = marker_idx * sample_ct4 + (sample_idx / 4);
 	ujj = (sample_idx % 4) * 2;
@@ -6887,8 +7039,7 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
       goto lgen_to_bed_ret_OPEN_FAIL;
     }
   } else {
-    memcpy(name_end, ".map", 5);
-    if (fopen_checked(&infile, lgen_namebuf, "r")) {
+    if (fopen_checked(&infile, mapname, "r")) {
       goto lgen_to_bed_ret_OPEN_FAIL;
     }
   }
@@ -6941,13 +7092,12 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
   if (fclose_null(&outfile)) {
     goto lgen_to_bed_ret_WRITE_FAIL;
   }
-  memcpy(name_end, ".fam", 5);
   memcpy(outname_end, ".fam", 5);
 #ifdef _WIN32
-  uii = GetFullPathName(lgen_namebuf, FNAMESIZE, tbuf, NULL);
+  uii = GetFullPathName(famname, FNAMESIZE, tbuf, NULL);
   if ((!uii) || (uii > FNAMESIZE))
 #else
-  if (!realpath(lgen_namebuf, tbuf))
+  if (!realpath(famname, tbuf))
 #endif
   {
     LOGERRPRINTFWW("Error: Failed to open %s.\n", outname);
@@ -6961,7 +7111,7 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
   if (!(cptr && (!strcmp(tbuf, &(tbuf[FNAMESIZE + 64])))))
 #endif
   {
-    if (fopen_checked(&infile, lgen_namebuf, "r")) {
+    if (fopen_checked(&infile, famname, "r")) {
       goto lgen_to_bed_ret_OPEN_FAIL;
     }
     if (fopen_checked(&outfile, outname, "w")) {
@@ -7022,7 +7172,8 @@ int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_
     retval = RET_INVALID_FORMAT;
     break;
   lgen_to_bed_ret_NOT_BIALLELIC:
-    LOGERRPRINTFWW("Error: Variant '%s' in .lgen file has 3+ different alleles.\n", id_buf);
+    *cptr4 = '\0';
+    LOGERRPRINTFWW("Error: Variant '%s' in .lgen file has 3+ different alleles.\n", cptr3);
     retval = RET_INVALID_FORMAT;
     break;
   lgen_to_bed_ret_INVALID_CMDLINE:
@@ -7094,6 +7245,8 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
   uintptr_t sample_ct = 0;
   uintptr_t line_idx = 0;
   uint32_t no_extra_cols = 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   int32_t retval = 0;
   uint32_t pct = 0;
   uint32_t map_is_unsorted = 0;
@@ -7189,7 +7342,7 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
   if (!feof(infile)) {
     goto transposed_to_bed_ret_READ_FAIL;
   }
-  if (!sample_ct) {
+  if ((!sample_ct) && (!allow_no_samples)) {
     sprintf(logbuf, "Error: No %s in .tfam file.\n", g_species_plural);
     goto transposed_to_bed_ret_INVALID_FORMAT_2R;
   }
@@ -7222,9 +7375,6 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
     goto transposed_to_bed_ret_WRITE_FAIL;
   }
 
-  // given e.g. 6MB indels in real datasets, there's legitimate reason for a
-  // .tped line to be even longer than 2GB, so we use ftoken_...() over
-  // fgets().
   if (fopen_checked(&infile, tpedname, "r")) {
     goto transposed_to_bed_ret_OPEN_FAIL;
   }
@@ -7246,7 +7396,9 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
       break;
     }
     // assume first four fields are within MAXLINELEN characters, but after
-    // that, anything goes
+    // that, anything goes.  given e.g. 6MB indels in real datasets, there's
+    // legitimate reason for a .tped line to be even longer than 2GB, so we use
+    // a custom loading loop.
     cptr = skip_initial_spaces(tbuf);
     if (is_eoln_kns(*cptr)) {
       if (!tbuf[MAXLINELEN - 1]) {
@@ -7355,13 +7507,13 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
       }
       axptr = cptr2;
       axlen = strlen_se(cptr2);
+      if (!axlen) {
+	goto transposed_to_bed_ret_MISSING_TOKENS;
+      }
       cptr2 = &(axptr[axlen]);
       // only way for this to happen if it isn't at end of buffer is if we're
       // at EOF, which is an error anyway
       if (!(*cptr2)) {
-	if (!axlen) {
-	  goto transposed_to_bed_ret_MISSING_TOKENS;
-	}
 	cptr3 = memcpya(allele_buf, axptr, axlen);
         axptr = allele_buf;
 	do {
@@ -7605,6 +7757,11 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
   if (fclose_null(&outfile)) {
     goto transposed_to_bed_ret_WRITE_FAIL;
   }
+  if ((!marker_ct) && (!allow_no_variants)) {
+    fputs("\b\b\b\b\b     \r", stdout);
+    logerrprint("Error: Empty .tped file.\n");
+    goto transposed_to_bed_ret_INVALID_FORMAT;
+  }
 
   chrom_info_ptr->zero_extra_chroms = 0;
   if (map_is_unsorted) {
@@ -7846,7 +8003,7 @@ int32_t vcf_sample_line(char* outname, char* outname_end, int32_t missing_pheno,
       } while (bufptr2);
     }
   }
-  do {
+  while (((unsigned char)bufptr[0]) >= ' ') {
     sample_ct++;
     bufptr2 = strchr(bufptr, '\t');
     if (bufptr2) {
@@ -7914,14 +8071,10 @@ int32_t vcf_sample_line(char* outname, char* outname_end, int32_t missing_pheno,
       break;
     }
     bufptr = &(bufptr2[1]);
-  } while (((unsigned char)bufptr[0]) > ' ');
+  }
   if (fclose_null(&outfile)) {
     goto vcf_sample_line_ret_WRITE_FAIL;
   }
-  if (!sample_ct) {
-    sprintf(logbuf, "Error: No samples in .%ccf file.\n", flag_char);
-    goto vcf_sample_line_ret_INVALID_FORMAT_2;
-  }
   *sample_ct_ptr = sample_ct;
   while (0) {
   vcf_sample_line_ret_OPEN_FAIL:
@@ -7997,14 +8150,18 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
   uintptr_t fexcept_ct = 0;
   uintptr_t max_fexcept_len = 5;
   uintptr_t sample_ct = 0;
+  uintptr_t marker_skip_ct = 0;
+  uintptr_t missing_gt_ct = 0;
   uint32_t double_id = (misc_flags / MISC_DOUBLE_ID) & 1;
   uint32_t check_qual = (vcf_min_qual != -1);
   uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
   uint32_t biallelic_only = (misc_flags / MISC_BIALLELIC_ONLY) & 1;
   uint32_t biallelic_strict = (misc_flags / MISC_BIALLELIC_ONLY_STRICT) & 1;
   uint32_t skip3_list = (misc_flags / MISC_BIALLELIC_ONLY_LIST) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
+  uint32_t require_gt = (misc_flags / MISC_VCF_REQUIRE_GT) & 1;
   uint32_t marker_ct = 0;
-  uint32_t marker_skip_ct = 0;
   uint32_t gq_field_pos = 0;
   uint32_t gp_field_pos = 0;
   uint32_t vcf_half_call_explicit_error = (vcf_half_call == VCF_HALF_CALL_ERROR);
@@ -8119,13 +8276,23 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
     goto vcf_to_bed_ret_INVALID_FORMAT;
   }
   bufptr = &(bufptr[38]);
-  if (memcmp(bufptr, "\tFORMAT\t", 8) || (((unsigned char)bufptr[8]) <= ' ')) {
-    logerrprint("Error: No genotype data in .vcf file.\n");
-    goto vcf_to_bed_ret_INVALID_FORMAT;
+  if (!memcmp(bufptr, "\tFORMAT\t", 8)) {
+    retval = vcf_sample_line(outname, outname_end, missing_pheno, &(bufptr[8]), const_fid, double_id, id_delim, vcf_idspace_to, 'v', &sample_ct);
+    if (retval) {
+      goto vcf_to_bed_ret_1;
+    }
+  } else if (allow_no_samples) {
+    memcpy(outname_end, ".fam", 5);
+    if (fopen_checked(&outfile, outname, "w")) {
+      goto vcf_to_bed_ret_OPEN_FAIL;
+    }
+    if (fclose_null(&outfile)) {
+      goto vcf_to_bed_ret_WRITE_FAIL;
+    }
   }
-  retval = vcf_sample_line(outname, outname_end, missing_pheno, &(bufptr[8]), const_fid, double_id, id_delim, vcf_idspace_to, 'v', &sample_ct);
-  if (retval) {
-    goto vcf_to_bed_ret_1;
+  if ((!sample_ct) && (!allow_no_samples)) {
+    logerrprint("Error: No samples in .vcf file.\n");
+    goto vcf_to_bed_ret_INVALID_FORMAT;
   }
   sample_ct4 = (sample_ct + 3) / 4;
   sample_ctl2 = (sample_ct + BITCT2 - 1) / BITCT2;
@@ -8292,6 +8459,10 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
       }
       bufptr2[-1] = '\t';
     }
+    if (!sample_ct) {
+      alt_allele_idx = 1;
+      goto vcf_to_bed_skip_genotype_write;
+    }
     bufptr = bufptr2;
     bufptr2 = strchr(bufptr, '\t');
     if (!bufptr2) {
@@ -8303,8 +8474,16 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
       goto vcf_to_bed_ret_MISSING_TOKENS;
     }
     if (memcmp(bufptr, "GT", 2)) {
-      marker_skip_ct++;
-      continue;
+      // We previously always skipped this case, but that's inconsistent with
+      // how we now handle zero-sample VCFs.
+      if (require_gt) {
+	marker_skip_ct++;
+	continue;
+      }
+      fill_vec_55(base_bitfields, sample_ct);
+      missing_gt_ct++;
+      alt_allele_idx = 1;
+      goto vcf_to_bed_genotype_write;
     }
     bufptr2++;
     if (vcf_min_gq != -1) {
@@ -8711,9 +8890,11 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
       *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
     }
     ref_ptr[-1] &= final_mask;
+  vcf_to_bed_genotype_write:
     if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
       goto vcf_to_bed_ret_WRITE_FAIL;
     }
+  vcf_to_bed_skip_genotype_write:
     chrom_ptr[chrom_len] = '\0';
     fputs(chrom_ptr, bimfile);
     putc('\t', bimfile);
@@ -8775,10 +8956,23 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
     }
   }
   putchar('\r');
+  if ((!marker_ct) && (!allow_no_variants)) {
+    if (marker_skip_ct) {
+      logerrprint("Error: All variants in VCF skipped.\n");
+      retval = RET_ALL_MARKERS_EXCLUDED;
+      goto vcf_to_bed_ret_1;
+    } else {
+      logerrprint("Error: No variants in VCF file.\n");
+      goto vcf_to_bed_ret_INVALID_FORMAT;
+    }
+  }
   *outname_end = '\0';
   LOGPRINTFWW("--vcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
   if (marker_skip_ct) {
-    LOGPRINTF("(%u variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+    LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+  }
+  if (missing_gt_ct) {
+    LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
   }
   while (0) {
   vcf_to_bed_ret_NOMEM:
@@ -8834,44 +9028,47 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
   return retval;
 }
 
-int32_t read_bcf_typed_integer(gzFile gz_infile, uint32_t* int_ptr) {
+int32_t read_bcf_typed_nonnegative_integer(gzFile gz_infile, uint32_t* int_ptr) {
+  // errors out on missing and negative values.
   int32_t retval = 0;
   int32_t ii = gzgetc(gz_infile);
   uint32_t uii;
   if (ii == -1) {
-    goto read_bcf_typed_integer_ret_READ_OR_FORMAT_FAIL;
+    goto read_bcf_typed_nonnegative_integer_ret_READ_OR_FORMAT_FAIL;
   }
   if (ii == 0x11) {
     ii = gzgetc(gz_infile);
-    if (ii == -1) {
-      goto read_bcf_typed_integer_ret_READ_OR_FORMAT_FAIL;
-    } else if (((uint32_t)ii) > 127) {
-      goto read_bcf_typed_integer_ret_INVALID_FORMAT_GENERIC;
+    if (((uint32_t)ii) > 127) {
+      if (ii == -1) {
+	goto read_bcf_typed_nonnegative_integer_ret_READ_OR_FORMAT_FAIL;
+      }
+      goto read_bcf_typed_nonnegative_integer_ret_INVALID_FORMAT_GENERIC;
     }
     *int_ptr = (uint32_t)ii;
   } else if (ii == 0x12) {
     uii = gzgetc(gz_infile);
     ii = gzgetc(gz_infile);
-    if (ii == -1) {
-      goto read_bcf_typed_integer_ret_READ_OR_FORMAT_FAIL;
-    } else if (((uint32_t)ii) > 127) {
-      goto read_bcf_typed_integer_ret_INVALID_FORMAT_GENERIC;
+    if (((uint32_t)ii) > 127) {
+      if (ii == -1) {
+	goto read_bcf_typed_nonnegative_integer_ret_READ_OR_FORMAT_FAIL;
+      }
+      goto read_bcf_typed_nonnegative_integer_ret_INVALID_FORMAT_GENERIC;
     }
     *int_ptr = uii | (((uint32_t)ii) << 8);
   } else if (ii == 0x13) {
     if (gzread(gz_infile, int_ptr, 4) < 4) {
-      goto read_bcf_typed_integer_ret_READ_OR_FORMAT_FAIL;
+      goto read_bcf_typed_nonnegative_integer_ret_READ_OR_FORMAT_FAIL;
     }
   } else {
-    goto read_bcf_typed_integer_ret_INVALID_FORMAT_GENERIC;
+    goto read_bcf_typed_nonnegative_integer_ret_INVALID_FORMAT_GENERIC;
   }
   while (0) {
-  read_bcf_typed_integer_ret_READ_OR_FORMAT_FAIL:
+  read_bcf_typed_nonnegative_integer_ret_READ_OR_FORMAT_FAIL:
     if (!gzeof(gz_infile)) {
       retval = RET_READ_FAIL;
       break;
     }
-  read_bcf_typed_integer_ret_INVALID_FORMAT_GENERIC:
+  read_bcf_typed_nonnegative_integer_ret_INVALID_FORMAT_GENERIC:
     logerrprint("Error: Improperly formatted .bcf file.\n");
     retval = RET_INVALID_FORMAT;
     break;
@@ -8888,7 +9085,7 @@ int32_t read_bcf_typed_string(gzFile gz_infile, char* readbuf, uint32_t maxlen,
     goto read_bcf_typed_string_ret_READ_OR_FORMAT_FAIL;
   }
   if (((uint32_t)ii) == 0xf7) {
-    retval = read_bcf_typed_integer(gz_infile, &slen);
+    retval = read_bcf_typed_nonnegative_integer(gz_infile, &slen);
     if (retval) {
       goto read_bcf_typed_string_ret_1;
     }
@@ -8940,6 +9137,8 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   uintptr_t max_contig_len = 0;
   uintptr_t max_fexcept_len = 0;
   uintptr_t fexcept_ct = 0;
+  uintptr_t marker_skip_ct = 0;
+  uintptr_t missing_gt_ct = 0;
   uintptr_t topsize = 0;
   uint32_t double_id = (misc_flags / MISC_DOUBLE_ID) & 1;
   uint32_t check_qual = (vcf_min_qual != -1);
@@ -8948,11 +9147,13 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   uint32_t biallelic_strict = (misc_flags / MISC_BIALLELIC_ONLY_STRICT) & 1;
   uint32_t skip3_list = (misc_flags / MISC_BIALLELIC_ONLY_LIST) & 1;
   uint32_t vcf_filter = (misc_flags / MISC_VCF_FILTER) & 1;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
+  uint32_t require_gt = (misc_flags / MISC_VCF_REQUIRE_GT) & 1;
+  uint32_t sample_ct = 0;
   uint32_t stringdict_ct = 1;
   uint32_t gt_idx = 0;
   uint32_t marker_ct = 0;
-  uint32_t marker_skip_ct = 0;
-  uint32_t sample_ct = 0;
   uint32_t umm = 0;
   int32_t retval = 0;
   float vcf_min_qualf = vcf_min_qual;
@@ -9015,7 +9216,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
     if (memcmp(tbuf, "BCF\4", 4)) {
       LOGPREPRINTFWW("Error: %s is not a BCF2 file.\n", bcfname);
     } else {
-      LOGPREPRINTFWW("Error: %s appears to be a BCF1 file; --bcf only supports BCF2. Use 'bcftools view' to convert to a readable VCF.\n", bcfname);
+      LOGPREPRINTFWW("Error: %s appears to be a BCF1 file; --bcf only supports BCF2. Use 'bcftools view' to convert it to a PLINK-readable VCF.\n", bcfname);
     }
     goto bcf_to_bed_ret_INVALID_FORMAT_2;
   }
@@ -9028,8 +9229,10 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   if (gzread(gz_infile, &header_size, 4) < 4) {
     goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
   }
-  // must have at least fileformat, GT, and one contig
-  if (header_size < 96) {
+  // must have at least fileformat, and first eight fields of #CHROM line.  GT
+  // not required with --allow-no-samples, contig not require with
+  // --allow-no-vars.
+  if (header_size < 59) {
     goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
   }
   if (vcf_filter_exceptions_flattened) {
@@ -9140,23 +9343,39 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
     }
   }
-  if (!gt_idx) {
-    logerrprint("Error: No GT field in .bcf header.\n");
-    goto bcf_to_bed_ret_INVALID_FORMAT;
-  }
-  if (!contig_ct) {
+  if ((!allow_no_variants) && (!contig_ct)) {
     logerrprint("Error: No contig fields in .bcf header.\n");
     goto bcf_to_bed_ret_INVALID_FORMAT;
   }
-  if (memcmp(linebuf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t", 46)) {
+  if (memcmp(linebuf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) {
     goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
   }
-  *linebuf_end = '\0';
-  retval = vcf_sample_line(outname, outname_end, missing_pheno, &(linebuf[46]), const_fid, double_id, id_delim, vcf_idspace_to, 'b', &ulii);
-  if (retval) {
-    goto bcf_to_bed_ret_1;
+  if (!memcmp(&(linebuf[38]), "\tFORMAT\t", 8)) {
+    *linebuf_end = '\0';
+    retval = vcf_sample_line(outname, outname_end, missing_pheno, &(linebuf[46]), const_fid, double_id, id_delim, vcf_idspace_to, 'b', &ulii);
+    if (retval) {
+      goto bcf_to_bed_ret_1;
+    }
+    if (ulii >= 0x1000000) {
+      // variant records only have 24 bits allocated for n_sample
+      logerrprint("Error: .bcf file contains >= 2^24 sample IDs.\n");
+      goto bcf_to_bed_ret_INVALID_FORMAT;
+    }
+    sample_ct = ulii;
+  } else if (allow_no_samples) {
+    gt_idx = 0;
+    memcpy(outname_end, ".fam", 5);
+    if (fopen_checked(&outfile, outname, "w")) {
+      goto bcf_to_bed_ret_OPEN_FAIL;
+    }
+    if (fclose_null(&outfile)) {
+      goto bcf_to_bed_ret_WRITE_FAIL;
+    }
+  }
+  if ((!sample_ct) && (!allow_no_samples)) {
+    logerrprint("Error: No samples in .bcf file.\n");
+    goto bcf_to_bed_ret_INVALID_FORMAT;
   }
-  sample_ct = ulii;
   sample_ct4 = (sample_ct + 3) / 4;
   sample_ctl2 = (sample_ct + (BITCT2 - 1)) / BITCT2;
   sample_ctv2 = 2 * ((sample_ct + (BITCT - 1)) / BITCT);
@@ -9169,7 +9388,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   }
   fill_ulong_zero(contig_bitfield, ulii);
   ulii = contig_ct;
-  do {
+  while (ulii) {
     ulii--;
     ii = get_chrom_code(chrom_info_ptr, contig_list->ss);
     if (ii < 0) {
@@ -9186,7 +9405,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       strcpy(&(contigdict[ulii * max_contig_len]), contig_list->ss);
     }
     contig_list = contig_list->next;
-  } while (ulii);
+  }
   if (vcf_filter) {
     uii = (stringdict_ct + (BITCT - 1)) / BITCT;
     if (wkspace_alloc_ul_checked(&fexcept_bitfield, uii * sizeof(intptr_t))) {
@@ -9232,6 +9451,18 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   if (fwrite_checked("l\x1b\x01", 3, outfile)) {
     goto bcf_to_bed_ret_WRITE_FAIL;
   }
+  if ((!gt_idx) && require_gt) {
+    if (!allow_no_variants) {
+      logerrprint("Error: .bcf header doesn't define FORMAT:GT.\n");
+      retval = RET_ALL_MARKERS_EXCLUDED;
+      goto bcf_to_bed_ret_1;
+    }
+    logerrprint("Warning: Skipping all variants since .bcf header doesn't define FORMAT:GT.\n");
+    goto bcf_to_bed_skip_all_variants;
+  }
+  // possible todo: optimize other no-GT cases.  e.g. if no sample information
+  // is needed, don't write the .bed or .fam.
+
   memcpyl3(tbuf2, "\t0\t");
   while (1) {
     lastloc = gztell(gz_infile) + 8;
@@ -9258,10 +9489,6 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       goto bcf_to_bed_ret_1;
     }
     n_allele = bcf_var_header[6] >> 16;
-    if (!n_allele) {
-      // skip instead of error out on zero alleles?
-      goto bcf_to_bed_marker_skip;
-    }
     if (biallelic_strict && (n_allele > 2)) {
       goto bcf_to_bed_skip3;
     }
@@ -9270,18 +9497,30 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
     }
     ujj = NON_WKSPACE_MIN; // remaining allele name buffer space
     bufptr = allele_buf;
-    for (uii = 0; uii < n_allele; uii++) {
-      retval = read_bcf_typed_string(gz_infile, bufptr, ujj, &ukk);
-      if (retval) {
-	goto bcf_to_bed_ret_1;
-      }
-      if ((!uii) && (!ukk)) {
-	// skip instead of error out on missing ref allele?
-        goto bcf_to_bed_marker_skip;
+    if (n_allele) {
+      for (uii = 0; uii < n_allele; uii++) {
+	retval = read_bcf_typed_string(gz_infile, bufptr, ujj, &ukk);
+	if (retval) {
+	  goto bcf_to_bed_ret_1;
+	}
+	if ((!uii) && ((!ukk) || ((ukk == 1) && (*bufptr == 'N')))) {
+	  // convert ref 'N' or '.' to missing genotype.  ('.' case was skipped
+	  // the past, and 'N' was not converted.)
+	  allele_lens[0] = 1;
+	  allele_ptrs[0] = bufptr;
+	  *bufptr++ = missing_geno;
+	} else {
+	  allele_lens[uii] = ukk;
+	  allele_ptrs[uii] = bufptr;
+	  bufptr = &(bufptr[ukk]);
+	}
       }
-      allele_lens[uii] = ukk;
-      allele_ptrs[uii] = bufptr;
-      bufptr = &(bufptr[ukk]);
+    } else {
+      // n_allele == 0 case was previously skipped, but it might have a place
+      // with --allow-no-samples.
+      allele_lens[0] = 1;
+      allele_ptrs[0] = bufptr;
+      *bufptr = missing_geno;
     }
     if (vcf_filter) {
       ii = gzgetc(gz_infile);
@@ -9290,7 +9529,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       } else {
 	ujj = ((uint32_t)ii) >> 4;
 	if (ujj == 15) {
-          retval = read_bcf_typed_integer(gz_infile, &ujj);
+          retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
 	  if (retval) {
 	    goto bcf_to_bed_ret_1;
 	  }
@@ -9359,15 +9598,29 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
 	}
       }
     }
+    alt_allele_idx = 1;
+    if ((!gt_idx) || (!bcf_var_header[1])) {
+      if (require_gt) {
+	goto bcf_to_bed_marker_skip;
+      }
+      ulljj = gztell(gz_infile);
+      ullii = lastloc + bcf_var_header[0] + bcf_var_header[1];
+      if (!sample_ct) {
+	goto bcf_to_bed_skip_genotype_write;
+      }
+      missing_gt_ct++;
+      fill_vec_55(base_bitfields, sample_ct);
+      goto bcf_to_bed_genotype_write;
+    }
+
     // skip INFO
     ullii = lastloc + bcf_var_header[0];
     if (gzseek(gz_infile, ullii, SEEK_SET) == -1) {
       goto bcf_to_bed_ret_READ_FAIL;
     }
-
     ullii += bcf_var_header[1];
     while (1) {
-      retval = read_bcf_typed_integer(gz_infile, &uii);
+      retval = read_bcf_typed_nonnegative_integer(gz_infile, &uii);
       if (retval) {
 	goto bcf_to_bed_ret_1;
       }
@@ -9377,7 +9630,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       }
       ujj = ((uint32_t)ii) >> 4;
       if (ujj == 15) {
-	retval = read_bcf_typed_integer(gz_infile, &ujj);
+	retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
 	if (retval) {
 	  goto bcf_to_bed_ret_1;
 	}
@@ -9386,17 +9639,17 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
         ukk = ((uint32_t)ii) & 0x0f;
 	if ((ukk == 3) || (ukk == 5)) {
 	  umm = 4; // int32, float = 4 bytes
-	} else if (ukk && (ukk > 2)) {
+	} else if ((!ukk) || (ukk > 2)) {
 	  logerrprint("Error: Unrecognized type in .bcf file.\n");
 	  goto bcf_to_bed_ret_INVALID_FORMAT;
 	} else {
 	  umm = ukk;
 	}
       }
-      ulljj = gztell(gz_infile) + ujj * umm * sample_ct;
-      // uii = format code
-      // ujj = vector length
-      // ukk = type code
+      ulljj = gztell(gz_infile) + ((uint64_t)ujj) * umm * sample_ct;
+      // uii = format key
+      // ujj = for GT, max ploidy
+      // ukk = integer/float/character type code
       // umm = bytes per entry
       if (ulljj > ullii) {
 	goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
@@ -9404,17 +9657,26 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       if (uii == gt_idx) {
 	break;
       }
+      // possible todo: --vcf-min-gq and --vcf-min-gp support
       if (ujj) {
-	if (gzseek(gz_infile, ujj * umm * sample_ct, SEEK_CUR) == -1) {
+	if (gzseek(gz_infile, ((uint64_t)ujj) * umm * sample_ct, SEEK_CUR) == -1) {
 	  goto bcf_to_bed_ret_READ_FAIL;
 	}
 	if (ulljj == ullii) {
-	  goto bcf_to_bed_marker_skip2;
+	  if (require_gt) {
+	    goto bcf_to_bed_marker_skip2;
+	  } else {
+	    missing_gt_ct++;
+	    fill_vec_55(base_bitfields, sample_ct);
+	    goto bcf_to_bed_genotype_write;
+	  }
 	}
       }
     }
     if (!ujj) {
-      goto bcf_to_bed_marker_skip;
+      // ploidy zero previously caused the variant to be skipped
+      fill_vec_55(base_bitfields, sample_ct);
+      goto bcf_to_bed_genotype_write;
     }
     if (ukk == 5) {
       logerrprint("Error: GT field cannot contain floating point values.\n");
@@ -9426,6 +9688,7 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       logerrprint("Error: --bcf does not support GT vectors requiring >12 bytes per sample.\n");
       goto bcf_to_bed_ret_INVALID_FORMAT;
     }
+    // ujj * umm <= 12 and sample_ct < 2^24, so no uint64_t cast needed there
     if ((uint32_t)((uint64_t)gzread(gz_infile, loadbuf, ujj * umm * sample_ct)) < ujj * umm * sample_ct) {
       goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
     }
@@ -9568,7 +9831,6 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
 	}
       }
     }
-    alt_allele_idx = 1;
     if (n_allele > 2) {
       ulii = popcount2_longs(&(base_bitfields[sample_ctv2]), sample_ctl2);
       for (ulkk = 2; ulkk < n_allele; ulkk++) {
@@ -9600,9 +9862,11 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
     }
     ref_ptr[-1] &= final_mask;
+  bcf_to_bed_genotype_write:
     if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
       goto bcf_to_bed_ret_WRITE_FAIL;
     }
+  bcf_to_bed_skip_genotype_write:
     fputs(&(contigdict[bcf_var_header[2] * max_contig_len]), bimfile);
     putc('\t', bimfile);
     if (marker_id_len) {
@@ -9611,8 +9875,8 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
       putc('.', bimfile);
     }
     // bcf2 coordinates are 0-based while vcf is 1-based... (seriously, whose
-    // idea was this?  this is basically a bug in the spec, but we have to play
-    // along)
+    // idea was this?  this is basically a bug in the spec due to how e.g.
+    // telomeres are supposed to be encoded, but we have to play along)
     bufptr = uint32_writex(&(tbuf2[3]), bcf_var_header[3] + 1, '\t');
     if (fwrite_checked(tbuf2, bufptr - tbuf2, bimfile)) {
       goto bcf_to_bed_ret_WRITE_FAIL;
@@ -9664,9 +9928,15 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
   bcf_to_bed_marker_skip2:
     marker_skip_ct++;
   }
-  if (!marker_ct) {
-    logerrprint("Error: No variants in .bcf file.\n");
-    goto bcf_to_bed_ret_INVALID_FORMAT;
+  if ((!marker_ct) && (!allow_no_variants)) {
+    if (marker_skip_ct) {
+      logerrprint("Error: All variants in .bcf file skipped.\n");
+      retval = RET_ALL_MARKERS_EXCLUDED;
+      goto bcf_to_bed_ret_1;
+    } else {
+      logerrprint("Error: No variants in .bcf file.\n");
+      goto bcf_to_bed_ret_INVALID_FORMAT;
+    }
   }
   if (gzclose(gz_infile) != Z_OK) {
     gz_infile = NULL;
@@ -9680,10 +9950,14 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
     goto bcf_to_bed_ret_WRITE_FAIL;
   }
   putchar('\r');
+ bcf_to_bed_skip_all_variants:
   *outname_end = '\0';
   LOGPRINTFWW("--bcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
   if (marker_skip_ct) {
-    LOGPRINTF("(%u variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+    LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+  }
+  if (missing_gt_ct) {
+    LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
   }
   while (0) {
   bcf_to_bed_ret_NOMEM2:
@@ -9743,7 +10017,7 @@ uint32_t write_23_cached_chrom(char* write_cache, uint32_t markers_left, char ch
   return 0;
 }
 
-int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_t modifier_23, char* fid_23, char* iid_23, double pheno_23, char* paternal_id_23, char* maternal_id_23, Chrom_info* chrom_info_ptr) {
+int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_t modifier_23, char* fid_23, char* iid_23, double pheno_23, uint64_t misc_flags, char* paternal_id_23, char* maternal_id_23, Chrom_info* chrom_info_ptr) {
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile_23 = NULL;
   FILE* outfile_bed = NULL;
@@ -9751,6 +10025,7 @@ int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_
   uintptr_t line_idx = 0;
   uint32_t is_male = modifier_23 & M23_MALE;
   uint32_t is_female = modifier_23 & M23_FEMALE;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   uint32_t x_present = 0;
   uint32_t haploid_x_present = 0;
   uint32_t y_present = 0;
@@ -9921,7 +10196,7 @@ int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_
   if (!feof(infile_23)) {
     goto bed_from_23_ret_READ_FAIL;
   }
-  if ((writebuf_cur == &(writebuf[3])) && (writebuf[0] == 'l')) {
+  if ((writebuf_cur == &(writebuf[3])) && (writebuf[0] == 'l') && (!allow_no_variants)) {
     if (chrom_mask_23 == 0x7ffffff) {
       logerrprint("Error: No --23file variants.\n");
       goto bed_from_23_ret_INVALID_FORMAT;
@@ -11190,22 +11465,28 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
 int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_allele_name, char*** allele_missing_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* recode_allele_reverse, char* recode_allele_extra) {
   FILE* rafile = NULL;
   uint32_t missing_allele = 0;
+  uint32_t marker_id_htable_size = get_id_htable_size(marker_ct);
   uintptr_t rae_size = 0;
   uintptr_t line_idx = 0;
-  char* sorted_ids;
-  uint32_t* id_map;
+  uintptr_t topsize = 0;
+  uint32_t* marker_id_htable;
   char* bufptr;
   char* bufptr2;
   int32_t retval;
   uint32_t slen;
   uint32_t alen;
-  int32_t ii;
   uintptr_t marker_uidx;
   if (fopen_checked(&rafile, recode_allele_name, "r")) {
     goto recode_allele_load_ret_OPEN_FAIL;
   }
-  retval = sort_item_ids(&sorted_ids, &id_map, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref);
+  marker_id_htable = (uint32_t*)top_alloc(&topsize, marker_id_htable_size * sizeof(int32_t));
+  if (!marker_id_htable) {
+    goto recode_allele_load_ret_NOMEM;
+  }
+  wkspace_left -= topsize;
+  retval = populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, marker_id_htable, marker_id_htable_size);
   if (retval) {
+    wkspace_left += topsize;
     goto recode_allele_load_ret_1;
   }
   loadbuf[loadbuf_size - 1] = ' ';
@@ -11213,7 +11494,7 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
     line_idx++;
     if (!loadbuf[loadbuf_size - 1]) {
       sprintf(logbuf, "Error: Line %" PRIuPTR " of --recode-allele file is pathologically long.\n", line_idx);
-      goto recode_allele_load_ret_INVALID_FORMAT_2;
+      goto recode_allele_load_ret_INVALID_FORMAT_3;
     }
     bufptr = skip_initial_spaces(loadbuf);
     if (is_eoln_kns(*bufptr)) {
@@ -11223,12 +11504,11 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
     bufptr2 = skip_initial_spaces(&(bufptr[slen]));
     if (is_eoln_kns(*bufptr2)) {
       sprintf(logbuf, "Error: Line %" PRIuPTR " of --recode-allele file has fewer tokens than expected.\n", line_idx);
-      goto recode_allele_load_ret_INVALID_FORMAT_2;
+      goto recode_allele_load_ret_INVALID_FORMAT_3;
     }
     alen = strlen_se(bufptr2);
-    ii = bsearch_str(bufptr, slen, sorted_ids, max_marker_id_len, marker_ct);
-    if (ii != -1) {
-      marker_uidx = id_map[(uint32_t)ii];
+    marker_uidx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
+    if (marker_uidx != 0xffffffffU) {
       bufptr2[alen++] = '\0';
       if (!strcmp(bufptr2, marker_allele_ptrs[2 * marker_uidx])) {
 	CLEAR_BIT(recode_allele_reverse, marker_uidx);
@@ -11236,7 +11516,7 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
 	SET_BIT(recode_allele_reverse, marker_uidx);
       } else {
 	if (rae_size + alen > wkspace_left) {
-	  goto recode_allele_load_ret_NOMEM;
+	  goto recode_allele_load_ret_NOMEM2;
 	}
 	missing_allele = 1;
 	(*allele_missing_ptr)[marker_uidx] = &(recode_allele_extra[rae_size]);
@@ -11245,10 +11525,13 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
       }
     }
   }
+  wkspace_left += topsize;
   if (!feof(rafile)) {
     goto recode_allele_load_ret_READ_FAIL;
   }
   while (0) {
+  recode_allele_load_ret_NOMEM2:
+    wkspace_left += topsize;
   recode_allele_load_ret_NOMEM:
     retval = RET_NOMEM;
     break;
@@ -11258,7 +11541,8 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
   recode_allele_load_ret_READ_FAIL:
     retval = RET_READ_FAIL;
     break;
-  recode_allele_load_ret_INVALID_FORMAT_2:
+  recode_allele_load_ret_INVALID_FORMAT_3:
+    wkspace_left += topsize;
     logerrprintb();
     retval = RET_INVALID_FORMAT;
   }
@@ -11592,9 +11876,13 @@ uint32_t write_ped_lines(FILE* outfile, unsigned char* loadbuf, uintptr_t* marke
 	}
 	bufptr = &(bufptr[unfiltered_sample_ct4]);
       }
-      wbufptr[-1] = '\n';
-      if (fwrite_checked(writebuf, wbufptr - writebuf, outfile)) {
-	return 1;
+      if (marker_ct) {
+	wbufptr[-1] = '\n';
+	if (fwrite_checked(writebuf, wbufptr - writebuf, outfile)) {
+	  return 1;
+	}
+      } else {
+	putc('\n', outfile);
       }
     }
   }
@@ -11623,7 +11911,7 @@ uint32_t valid_vcf_allele_code(const char* allele_code) {
   // returns 1 if probably valid (angle-bracket case is not exhaustively
   // checked), 0 if definitely not
   uint32_t uii = (unsigned char)(*allele_code);
-  if (uii == '<') {
+  if ((uii == '<') || ((uii == '*') && (!allele_code[1]))) {
     return 1;
   }
   do {
@@ -12047,8 +12335,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
       goto recode_ret_NOMEM;
     }
   } else if (recode_modifier & RECODE_STRUCTURE) {
-    sample_uidx = 0;
-    for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
+    for (sample_uidx = 0, sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
       next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
       cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
       aptr = (char*)memchr(cptr, '\t', max_sample_id_len);
@@ -12063,8 +12350,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     if (wkspace_alloc_c_checked(&writebuf3, max_fid_len * sample_ct)) {
       goto recode_ret_NOMEM;
     }
-    sample_uidx = 0;
-    for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
+    for (sample_uidx = 0, sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
       next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
       cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
       aptr = (char*)memchr(cptr, '\t', max_fid_len);
@@ -12079,7 +12365,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	break;
       }
     }
-    fid_ct = ulii;
+    fid_ct = MINV(ulii, sample_ct);
     while (++ulii < sample_ct) {
       if (strcmp(&(writebuf3[(fid_ct - 1) * max_fid_len]), &(writebuf3[ulii * max_fid_len]))) {
         strcpy(&(writebuf3[fid_ct * max_fid_len]), &(writebuf3[ulii * max_fid_len]));
@@ -12175,7 +12461,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  if (wkspace_alloc_c_checked(&writebuf, max_chrom_size * ulii)) {
 	    goto recode_ret_NOMEM;
 	  }
-	  if (recode_modifier & RECODE_COMPOUND) {
+	  if ((recode_modifier & RECODE_COMPOUND) && max_chrom_size) {
 	    memset(writebuf, delimiter, max_chrom_size * 3 - 1);
 	    writebuf[max_chrom_size * 3 - 1] = '\n';
 	  }
@@ -12250,8 +12536,16 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
   }
   loadbuf = wkspace_base;
   chrom_fo_idx = 0;
-  refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-  chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+  if (unfiltered_marker_ct) {
+    refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
+    chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+  } else {
+    chrom_end = 0;
+    is_x = 0;
+    is_y = 0;
+    is_mt = 0;
+    is_haploid = 0;
+  }
   if (recode_modifier & RECODE_TRANSPOSE) {
     strcpy(outname_end, ".tped");
     if (fopen_checked(&outfile, outname, "w")) {
@@ -12297,30 +12591,32 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  goto recode_ret_WRITE_FAIL;
 	}
 
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-          haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
-	}
-	init_recode_cmax0(mk_allele_ptrs[2 * marker_uidx], mk_allele_ptrs[2 * marker_uidx + 1], cur_mk_allelesx, cmalen, delimiter, delim2);
-	ulptr = loadbuf_collapsed;
-	ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
-	shiftmax = BITCT2;
-	while (1) {
-	  while (ulptr < ulptr_end) {
-	    cur_word = *ulptr++;
-	    for (shiftval = 0; shiftval < shiftmax; shiftval++) {
-	      ulii = cur_word & 3;
-	      fwrite(cur_mk_allelesx[ulii], 1, cmalen[ulii], outfile);
-	      cur_word >>= 2;
-	    }
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
 	  }
-	  if (ulptr == loadbuf_collapsed_end) {
-	    break;
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
+	  init_recode_cmax0(mk_allele_ptrs[2 * marker_uidx], mk_allele_ptrs[2 * marker_uidx + 1], cur_mk_allelesx, cmalen, delimiter, delim2);
+	  ulptr = loadbuf_collapsed;
+	  ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
+	  shiftmax = BITCT2;
+	  while (1) {
+	    while (ulptr < ulptr_end) {
+	      cur_word = *ulptr++;
+	      for (shiftval = 0; shiftval < shiftmax; shiftval++) {
+		ulii = cur_word & 3;
+		fwrite(cur_mk_allelesx[ulii], 1, cmalen[ulii], outfile);
+		cur_word >>= 2;
+	      }
+	    }
+	    if (ulptr == loadbuf_collapsed_end) {
+	      break;
+	    }
+	    ulptr_end++;
+	    shiftmax = sample_ct % BITCT2;
 	  }
-	  ulptr_end++;
-	  shiftmax = sample_ct % BITCT2;
 	}
 	if (putc_checked('\n', outfile)) {
 	  goto recode_ret_WRITE_FAIL;
@@ -12402,40 +12698,42 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  putc(delimiter, outfile);
 	}
 	fputs(mk_allele_ptrs[2 * marker_uidx + 1 - uii], outfile);
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, uii ^ IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-          haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
-	}
-	ulptr = loadbuf_collapsed;
-	ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
-	shiftmax = BITCT2;
 	wbufptr = writebuf;
-	if (allele_missing && allele_missing[marker_uidx]) {
-	  // all 0s and NAs
-	  memcpy(cur_dosage_chars, "0N00", 4);
-	} else {
-	  memcpy(cur_dosage_chars, "2N10", 4);
-	}
-	while (1) {
-	  while (ulptr < ulptr_end) {
-	    cur_word = *ulptr++;
-	    for (shiftval = 0; shiftval < shiftmax; shiftval++) {
-	      ulii = cur_word & 3;
-	      *wbufptr++ = delimiter;
-	      *wbufptr++ = cur_dosage_chars[ulii];
-	      if (ulii == 1) {
-		*wbufptr++ = 'A';
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, uii ^ IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
+	  ulptr = loadbuf_collapsed;
+	  ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
+	  shiftmax = BITCT2;
+	  if (allele_missing && allele_missing[marker_uidx]) {
+	    // all 0s and NAs
+	    memcpy(cur_dosage_chars, "0N00", 4);
+	  } else {
+	    memcpy(cur_dosage_chars, "2N10", 4);
+	  }
+	  while (1) {
+	    while (ulptr < ulptr_end) {
+	      cur_word = *ulptr++;
+	      for (shiftval = 0; shiftval < shiftmax; shiftval++) {
+		ulii = cur_word & 3;
+		*wbufptr++ = delimiter;
+		*wbufptr++ = cur_dosage_chars[ulii];
+		if (ulii == 1) {
+		  *wbufptr++ = 'A';
+		}
+		cur_word >>= 2;
 	      }
-	      cur_word >>= 2;
 	    }
+	    if (ulptr == loadbuf_collapsed_end) {
+	      break;
+	    }
+	    ulptr_end++;
+	    shiftmax = sample_ct % BITCT2;
 	  }
-	  if (ulptr == loadbuf_collapsed_end) {
-	    break;
-	  }
-	  ulptr_end++;
-	  shiftmax = sample_ct % BITCT2;
 	}
 	*wbufptr++ = '\n';
 	if (fwrite_checked(writebuf, wbufptr - writebuf, outfile)) {
@@ -12520,11 +12818,12 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
       goto recode_ret_WRITE_FAIL;
     }
     chrom_fo_idx = 0;
-    refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-    chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
-    sample_uidx = 0;
+    if (unfiltered_marker_ct) {
+      refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
+      chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+    }
     shiftval = 0; // repurposed: underscore seen in ID?
-    for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
+    for (sample_uidx = 0, sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
       next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
       cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
       ulii = strlen_se(cptr);
@@ -12605,11 +12904,13 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  goto recode_ret_WRITE_FAIL;
 	}
 
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-	  haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
 	}
 
 	cptr = mk_allele_ptrs[2 * marker_uidx];
@@ -12731,33 +13032,35 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	pzwritep = uint32_writex(pzwritep, marker_pos[marker_uidx], ' ');
 	pzwritep = strcpyax(pzwritep, mk_allele_ptrs[2 * marker_uidx], ' ');
 	pzwritep = strcpya(pzwritep, mk_allele_ptrs[2 * marker_uidx + 1]);
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-	  haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
-	}
-	ulptr = loadbuf_collapsed;
-	ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
-	sample_idx = 0;
-	sample_uidx = BITCT2; // repurposed as stop value
-	while (1) {
-	  while (ulptr < ulptr_end) {
-	    cur_word = *ulptr++;
-	    for (; sample_idx < sample_uidx; sample_idx++, cur_word >>= 2) {
-	      ulii = cur_word & 3;
-	      if (ulii == 1) {
-                missing_cts[sample_idx] += 1;
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
+	  ulptr = loadbuf_collapsed;
+	  ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
+	  sample_idx = 0;
+	  sample_uidx = BITCT2; // repurposed as stop value
+	  while (1) {
+	    while (ulptr < ulptr_end) {
+	      cur_word = *ulptr++;
+	      for (; sample_idx < sample_uidx; sample_idx++, cur_word >>= 2) {
+		ulii = cur_word & 3;
+		if (ulii == 1) {
+		  missing_cts[sample_idx] += 1;
+		}
+		pzwritep = memcpya(pzwritep, &(cur_mk_allelesx_buf[ulii * 8]), 6);
 	      }
-	      pzwritep = memcpya(pzwritep, &(cur_mk_allelesx_buf[ulii * 8]), 6);
+	      sample_uidx += BITCT2;
 	    }
-	    sample_uidx += BITCT2;
-	  }
-	  if (ulptr == loadbuf_collapsed_end) {
-	    break;
+	    if (ulptr == loadbuf_collapsed_end) {
+	      break;
+	    }
+	    ulptr_end++;
+	    sample_uidx = sample_ct;
 	  }
-	  ulptr_end++;
-	  sample_uidx = sample_ct;
 	}
 	append_binary_eoln(&pzwritep);
 	if (flex_pzwrite(&ps, &pzwritep)) {
@@ -12896,6 +13199,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
       autosomal_marker_ct -= count_chrom_markers(chrom_info_ptr, chrom_info_ptr->xy_code, marker_exclude);
     }
     if (!autosomal_marker_ct) {
+      // could allow this?
       logerrprint("Error: No autosomal variants for --recode beagle.\n");
       goto recode_ret_ALL_MARKERS_EXCLUDED;
     }
@@ -13022,6 +13326,10 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     }
   } else if (recode_modifier & (RECODE_BIMBAM | RECODE_BIMBAM_1CHR)) {
     if (recode_modifier & RECODE_BIMBAM_1CHR) {
+      if (!marker_ct) {
+	logerrprint("Error: No variants for --recode bimbam-1chr.\n");
+	goto recode_ret_ALL_MARKERS_EXCLUDED;
+      }
       ii = single_chrom_start(chrom_info_ptr, unfiltered_marker_ct, marker_exclude);
       if (ii == -1) {
         logerrprint("Error: --recode bimbam-1chr requires a single-chromosome dataset.  Did you mean\n'--recode bimbam'?  (Note the lack of a dash in the middle.)\n");
@@ -13119,8 +13427,10 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     marker_uidx = 0;
     marker_idx = 0;
     chrom_fo_idx = 0;
-    refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-    chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+    if (unfiltered_marker_ct) {
+      refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
+      chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+    }
     writebuf2[0] = ',';
     memcpy(&(writebuf2[4]), ",??", 4);
     writebuf2[8] = ',';
@@ -13139,44 +13449,46 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
           refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
           chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
 	}
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-	  haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
-	}
-        ucc = mk_allele_ptrs[2 * marker_uidx][0];
-        ucc2 = mk_allele_ptrs[2 * marker_uidx + 1][0];
-        writebuf2[1] = ucc;
-        writebuf2[2] = ucc;
-        writebuf2[9] = ucc;
-        writebuf2[10] = ucc2;
-        writebuf2[13] = ucc2;
-        writebuf2[14] = ucc2;
 	if (fputs_checked(&(marker_ids[marker_uidx * max_marker_id_len]), outfile)) {
 	  goto recode_ret_WRITE_FAIL;
 	}
-	wbufptr = writebuf;
-	ulptr = loadbuf_collapsed;
-	ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
-        shiftmax = BITCT2;
-        while (1) {
-	  while (ulptr < ulptr_end) {
-            cur_word = *ulptr++;
-            for (shiftval = 0; shiftval < shiftmax; shiftval++) {
-	      ulii = cur_word & 3;
-	      wbufptr = memcpyl3a(wbufptr, &(writebuf2[4 * ulii]));
-	      cur_word >>= 2;
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
+	  ucc = mk_allele_ptrs[2 * marker_uidx][0];
+	  ucc2 = mk_allele_ptrs[2 * marker_uidx + 1][0];
+	  writebuf2[1] = ucc;
+	  writebuf2[2] = ucc;
+	  writebuf2[9] = ucc;
+	  writebuf2[10] = ucc2;
+	  writebuf2[13] = ucc2;
+	  writebuf2[14] = ucc2;
+	  wbufptr = writebuf;
+	  ulptr = loadbuf_collapsed;
+	  ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
+	  shiftmax = BITCT2;
+	  while (1) {
+	    while (ulptr < ulptr_end) {
+	      cur_word = *ulptr++;
+	      for (shiftval = 0; shiftval < shiftmax; shiftval++) {
+		ulii = cur_word & 3;
+		wbufptr = memcpyl3a(wbufptr, &(writebuf2[4 * ulii]));
+		cur_word >>= 2;
+	      }
 	    }
+	    if (ulptr == loadbuf_collapsed_end) {
+	      break;
+	    }
+	    ulptr_end++;
+	    shiftmax = sample_ct % BITCT2;
 	  }
-	  if (ulptr == loadbuf_collapsed_end) {
-	    break;
+	  if (fwrite_checked(writebuf, 3 * sample_ct, outfile)) {
+	    goto recode_ret_WRITE_FAIL;
 	  }
-	  ulptr_end++;
-	  shiftmax = sample_ct % BITCT2;
-	}
-	if (fwrite_checked(writebuf, 3 * sample_ct, outfile)) {
-	  goto recode_ret_WRITE_FAIL;
 	}
 	putc('\n', outfile);
       }
@@ -13189,6 +13501,14 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
       }
     }
   } else if (recode_modifier & (RECODE_FASTPHASE | RECODE_FASTPHASE_1CHR)) {
+    if (!marker_ct) {
+      // why bother
+      logerrprint("Error: No variants for --recode fastphase{-1chr}.\n");
+      goto recode_ret_ALL_MARKERS_EXCLUDED;
+    } else if (!sample_ct) {
+      logerrprint("Error: No samples for --recode fastphase{-1chr}.\n");
+      goto recode_ret_ALL_SAMPLES_EXCLUDED;
+    }
     if (recode_modifier & RECODE_FASTPHASE) {
       memcpy(outname_end, ".chr-*", 7);
     } else {
@@ -13347,11 +13667,13 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  chrom_fo_idx++;
 	  refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
 	}
-        if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-	  haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	if (sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
 	}
 	wbufptr = &(marker_ids[marker_uidx * max_marker_id_len]);
 	cptr = strcpya(&(writebuf[1]), wbufptr);
@@ -13483,7 +13805,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     if (recode_load_to(loadbuf, bedfile, bed_offset, unfiltered_marker_ct, 0, marker_ct, marker_exclude, recode_allele_reverse, &marker_uidx, unfiltered_sample_ct)) {
       goto recode_ret_READ_FAIL;
     }
-    if (set_hh_missing) {
+    if (set_hh_missing && marker_ct) {
       haploid_fix_multiple(marker_exclude, 0, marker_ct, chrom_info_ptr, hh_exists, sample_include2, sample_male_include2, unfiltered_sample_ct, unfiltered_sample_ct4, loadbuf);
     }
     fputs("0%", stdout);
@@ -13496,50 +13818,54 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	if (recode_write_first_cols(outfile, sample_uidx, delimiter, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno)) {
 	  goto recode_ret_WRITE_FAIL;
 	}
-	bufptr = &(loadbuf[sample_uidx / 4]);
-	wbufptr = writebuf;
-	shiftval = (sample_uidx % 4) * 2;
-	marker_uidx = 0;
-	marker_idx = 0;
-	if (recode_modifier & RECODE_A) {
-	  do {
-	    marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
-	    ulii = next_set_ul(marker_exclude, marker_uidx, unfiltered_marker_ct);
-	    marker_idx += ulii - marker_uidx;
+	if (marker_ct) {
+	  bufptr = &(loadbuf[sample_uidx / 4]);
+	  wbufptr = writebuf;
+	  shiftval = (sample_uidx % 4) * 2;
+	  marker_uidx = 0;
+	  marker_idx = 0;
+	  if (recode_modifier & RECODE_A) {
 	    do {
-	      ucc = ((*bufptr) >> shiftval) & 3;
-	      if (allele_missing && allele_missing[marker_uidx]) {
-		*wbufptr++ = "0N00"[ucc];
-	      } else {
-		*wbufptr++ = "2N10"[ucc];
-	      }
-	      if (ucc == 1) {
-		*wbufptr++ = 'A';
-	      }
-	      *wbufptr++ = delimiter;
-	      bufptr = &(bufptr[unfiltered_sample_ct4]);
-	    } while (++marker_uidx < ulii);
-	  } while (marker_idx < marker_ct);
-	} else {
-	  do {
-	    marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
-	    ulii = next_set_ul(marker_exclude, marker_uidx, unfiltered_marker_ct);
-	    marker_idx += ulii - marker_uidx;
+	      marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
+	      ulii = next_set_ul(marker_exclude, marker_uidx, unfiltered_marker_ct);
+	      marker_idx += ulii - marker_uidx;
+	      do {
+		ucc = ((*bufptr) >> shiftval) & 3;
+		if (allele_missing && allele_missing[marker_uidx]) {
+		  *wbufptr++ = "0N00"[ucc];
+		} else {
+		  *wbufptr++ = "2N10"[ucc];
+		}
+		if (ucc == 1) {
+		  *wbufptr++ = 'A';
+		}
+		*wbufptr++ = delimiter;
+		bufptr = &(bufptr[unfiltered_sample_ct4]);
+	      } while (++marker_uidx < ulii);
+	    } while (marker_idx < marker_ct);
+	  } else {
 	    do {
-	      ucc = ((*bufptr) >> shiftval) & 3;
-	      if (ucc != 1) {
-		wbufptr = memcpya(wbufptr, &(writebuf2[4 * ((allele_missing && allele_missing[marker_uidx])? 3 : ucc)]), 4);
-	      } else {
-		wbufptr = memcpya(wbufptr, &(writebuf2[16]), 6);
-	      }
-	      bufptr = &(bufptr[unfiltered_sample_ct4]);
-	    } while (++marker_uidx < ulii);
-	  } while (marker_idx < marker_ct);
-	}
-	wbufptr[-1] = '\n';
-	ulii = (uintptr_t)(wbufptr - writebuf);
-	if (fwrite_checked(writebuf, ulii, outfile)) {
-	  goto recode_ret_WRITE_FAIL;
+	      marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
+	      ulii = next_set_ul(marker_exclude, marker_uidx, unfiltered_marker_ct);
+	      marker_idx += ulii - marker_uidx;
+	      do {
+		ucc = ((*bufptr) >> shiftval) & 3;
+		if (ucc != 1) {
+		  wbufptr = memcpya(wbufptr, &(writebuf2[4 * ((allele_missing && allele_missing[marker_uidx])? 3 : ucc)]), 4);
+		} else {
+		  wbufptr = memcpya(wbufptr, &(writebuf2[16]), 6);
+		}
+		bufptr = &(bufptr[unfiltered_sample_ct4]);
+	      } while (++marker_uidx < ulii);
+	    } while (marker_idx < marker_ct);
+	  }
+	  wbufptr[-1] = '\n';
+	  ulii = (uintptr_t)(wbufptr - writebuf);
+	  if (fwrite_checked(writebuf, ulii, outfile)) {
+	    goto recode_ret_WRITE_FAIL;
+	  }
+	} else {
+	  putc('\n', outfile);
 	}
       }
       if (pct < 100) {
@@ -13610,11 +13936,13 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
 	  }
 	  chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
 	}
-	if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, cur_sample_ct, cur_sample_exclude, cur_final_mask, IS_SET(marker_reverse, marker_uidx))) {
-	  goto recode_ret_READ_FAIL;
-	}
-	if (is_haploid && set_hh_missing) {
-          haploid_fix(hh_exists, cur_sample_include2, cur_sample_male_include2, cur_sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	if (unfiltered_sample_ct) {
+	  if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, cur_sample_ct, cur_sample_exclude, cur_final_mask, IS_SET(marker_reverse, marker_uidx))) {
+	    goto recode_ret_READ_FAIL;
+	  }
+	  if (is_haploid && set_hh_missing) {
+	    haploid_fix(hh_exists, cur_sample_include2, cur_sample_male_include2, cur_sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+	  }
 	}
 	init_recode_cmax(mk_allele_ptrs[2 * marker_uidx], mk_allele_ptrs[2 * marker_uidx + 1], cur_mk_allelesx, cmalen, '\0', delimiter);
 	cmalen[0] -= 1;
@@ -13741,6 +14069,10 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     if (wkspace_left < ((uint64_t)unfiltered_sample_ct4) * max_chrom_size) {
       goto recode_ret_NO_MULTIPASS_YET;
     }
+    if (!marker_ct) {
+      logerrprint("Error: No variants for --recode HV{-1chr}.\n");
+      goto recode_ret_ALL_MARKERS_EXCLUDED;
+    }
     if (recode_modifier & RECODE_HV) {
       memcpy(outname_end, ".chr-", 5);
       sprintf(logbuf, "--recode HV to %s*.ped + .info... ", outname);
@@ -13779,7 +14111,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
       if (recode_load_to(loadbuf, bedfile, bed_offset, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1], 0, ulii, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
 	goto recode_ret_READ_FAIL;
       }
-      if (set_hh_missing) {
+      if (set_hh_missing && marker_ct) {
         haploid_fix_multiple(marker_exclude, marker_uidx_start, ulii, chrom_info_ptr, hh_exists, sample_include2, sample_male_include2, unfiltered_sample_ct, unfiltered_sample_ct4, loadbuf);
       }
       sample_uidx = 0;
@@ -13852,7 +14184,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     if (recode_load_to(loadbuf, bedfile, bed_offset, unfiltered_marker_ct, 0, marker_ct, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
       goto recode_ret_READ_FAIL;
     }
-    if (set_hh_missing) {
+    if (set_hh_missing && marker_ct) {
       haploid_fix_multiple(marker_exclude, 0, marker_ct, chrom_info_ptr, hh_exists, sample_include2, sample_male_include2, unfiltered_sample_ct, unfiltered_sample_ct4, loadbuf);
     }
     sample_uidx = 0;
@@ -13912,7 +14244,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
     if (recode_load_to(loadbuf, bedfile, bed_offset, unfiltered_marker_ct, 0, marker_ct, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
       goto recode_ret_READ_FAIL;
     }
-    if (set_hh_missing) {
+    if (set_hh_missing && marker_ct) {
       haploid_fix_multiple(marker_exclude, 0, marker_ct, chrom_info_ptr, hh_exists, sample_include2, sample_male_include2, unfiltered_sample_ct, unfiltered_sample_ct4, loadbuf);
     }
     sample_uidx = 0;
@@ -13967,7 +14299,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
   if (!(recode_modifier & (RECODE_BEAGLE | RECODE_FASTPHASE | RECODE_FASTPHASE_1CHR | RECODE_HV))) {
     logprint("done.\n");
     if (invalid_allele_code_seen) {
-      logerrprint("Warning: At least one VCF allele code violates the official specification;\nother tools may not accept the file.  (Valid codes must either start with a\n'<', only contain characters in {A,C,G,T,N,a,c,g,t,n}, or represent a\nbreakend.)\n");
+      logerrprint("Warning: At least one VCF allele code violates the official specification;\nother tools may not accept the file.  (Valid codes must either start with a\n'<', only contain characters in {A,C,G,T,N,a,c,g,t,n}, be an isolated '*', or\nrepresent a breakend.)\n");
     }
   } else {
     fputs("done.\n", stdout);
@@ -14007,6 +14339,9 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
   recode_ret_ALL_MARKERS_EXCLUDED:
     retval = RET_ALL_MARKERS_EXCLUDED;
     break;
+  recode_ret_ALL_SAMPLES_EXCLUDED:
+    retval = RET_ALL_SAMPLES_EXCLUDED;
+    break;
   }
  recode_ret_1:
   wkspace_reset(wkspace_mark);
@@ -14171,7 +14506,7 @@ static inline Ll_entry2* top_alloc_ll2(uintptr_t* topsize_ptr, uint32_t size) {
   return (Ll_entry2*)top_alloc(topsize_ptr, size + sizeof(Ll_entry2));
 }
 
-int32_t merge_fam_id_scan(char* bedname, char* famname, uintptr_t* max_sample_id_len_ptr, uint32_t* max_sample_full_len_ptr, uint32_t* is_dichot_pheno_ptr, Ll_entry** htable, uintptr_t* topsize_ptr, uint64_t* tot_sample_ct_ptr, uint32_t* ped_buflen_ptr, uint32_t* cur_sample_ct_ptr, uint32_t* orig_idx_ptr) {
+int32_t merge_fam_id_scan(char* bedname, char* famname, uint32_t allow_no_samples, uintptr_t* max_sample_id_len_ptr, uint32_t* max_sample_full_len_ptr, uint32_t* is_dichot_pheno_ptr, Ll_entry** htable, uintptr_t* topsize_ptr, uint64_t* tot_sample_ct_ptr, uint32_t* ped_buflen_ptr, uint32_t* cur_sample_ct_ptr, uint32_t* orig_idx_ptr) {
   uint64_t tot_sample_ct = *tot_sample_ct_ptr;
   uintptr_t max_sample_id_len = *max_sample_id_len_ptr;
   uintptr_t topsize = *topsize_ptr;
@@ -14335,7 +14670,7 @@ int32_t merge_fam_id_scan(char* bedname, char* famname, uintptr_t* max_sample_id
   if (!feof(infile)) {
     goto merge_fam_id_scan_ret_READ_FAIL;
   }
-  if (!cur_sample_ct) {
+  if ((!cur_sample_ct) && (!allow_no_samples)) {
     LOGPREPRINTFWW("Error: No %s in %s.\n", g_species_plural, famname);
     goto merge_fam_id_scan_ret_INVALID_FORMAT_2;
   }
@@ -14377,7 +14712,7 @@ int32_t merge_sample_sortf(char* sample_sort_fname, char* sample_fids, uintptr_t
   return retval;
 }
 
-int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uintptr_t* max_marker_id_len_ptr, Ll_entry2** htable2, uintptr_t* topsize_ptr, uint32_t* max_bim_linelen_ptr, uint64_t* tot_marker_ct_ptr, uint32_t* cur_marker_ct_ptr, uint64_t* position_warning_ct_ptr, Ll_str** non_biallelics_ptr, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr) {
+int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uint32_t allow_no_variants, uintptr_t* max_marker_id_len_ptr, Ll_entry2** htable2, uintptr_t* topsize_ptr, uint32_t* max_bim_linelen_ptr, uint64_t* tot_marker_ct_ptr, uint32_t* cur_marker_ct_ptr, uint64_t* position_warning_ct_ptr, Ll_str** non_biallelics_ptr, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr) {
   unsigned char* wkspace_mark = wkspace_base;
   uintptr_t max_marker_id_len = *max_marker_id_len_ptr;
   uintptr_t topsize = *topsize_ptr;
@@ -14404,7 +14739,7 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uintptr_t* max_marker_
   Ll_str* ll_string_new;
   int64_t llxx;
   uintptr_t line_idx;
-  uint32_t cm_col;
+  uint32_t cm_col_exists;
   uint32_t allele_ct;
   uint32_t name_match;
   uint32_t uii;
@@ -14426,9 +14761,14 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uintptr_t* max_marker_
   }
   loadbuf = (char*)wkspace_alloc(loadbuf_size);
   loadbuf[loadbuf_size - 1] = ' ';
-  if (check_cm_col(infile, loadbuf, is_binary, loadbuf_size, &cm_col, &line_idx)) {
+  if (check_cm_col(infile, loadbuf, is_binary, allow_no_variants, loadbuf_size, &cm_col_exists, &line_idx)) {
     goto merge_bim_scan_ret_MISSING_TOKENS;
   }
+  if (!line_idx) {
+    // no variants
+    *cur_marker_ct_ptr = 0;
+    goto merge_bim_scan_ret_1;
+  }
   line_idx--;
   do {
     line_idx++;
@@ -14466,7 +14806,7 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uintptr_t* max_marker_
     if (no_more_tokens_kns(bufptr2)) {
       goto merge_bim_scan_ret_MISSING_TOKENS;
     }
-    if (cm_col) {
+    if (cm_col_exists) {
       if (scan_double(bufptr2, &cm)) {
 	cm = 0;
       }
@@ -14945,7 +15285,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
   uintptr_t uljj = 0;
   uintptr_t* mbufptr2;
   uintptr_t* rbufptr;
-  uint32_t cm_col;
+  uint32_t cm_col_exists;
   char* aptr1;
   char* aptr2;
   char* bufptr;
@@ -15015,9 +15355,12 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
   if (fopen_checked(&infile2, bimname, "r")) {
     goto merge_main_ret_OPEN_FAIL;
   }
-  if (check_cm_col(infile2, bim_loadbuf, is_binary, max_bim_linelen, &cm_col, &ulii)) {
+  if (check_cm_col(infile2, bim_loadbuf, is_binary, 1, max_bim_linelen, &cm_col_exists, &ulii)) {
     goto merge_main_ret_READ_FAIL;
   }
+  if (!ulii) {
+    bim_loadbuf[0] = '\0';
+  }
   if (fopen_checked(&bedfile, bedname, is_binary? "rb" : "r")) {
     goto merge_main_ret_OPEN_FAIL;
   }
@@ -15045,7 +15388,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
     }
     ++marker_in_idx;
     bufptr = next_token(bufptr);
-    bufptr2 = next_token_mult(bufptr, 1 + cm_col);
+    bufptr2 = next_token_mult(bufptr, 1 + cm_col_exists);
     if (!bufptr2) {
       goto merge_main_ret_READ_FAIL;
     }
@@ -15086,6 +15429,9 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
       bufptr5 = marker_allele_ptrs[((uint32_t)ii) * 2 + 1];
 
       last_marker_in_idx = marker_in_idx;
+      if (!cur_sample_ct) {
+	continue;
+      }
       if (load_raw(bedfile, readbuf_w, cur_sample_ct4)) {
 	goto merge_main_ret_READ_FAIL;
       }
@@ -15298,6 +15644,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
       if (is_eoln_or_comment(cc)) {
 	continue;
       }
+      // only possible to get here if sample_ct and marker_ct are positive
       bufptr2 = token_endnn(bufptr);
       uii = (bufptr2 - bufptr);
       bufptr3 = skip_initial_spaces(bufptr2);
@@ -15566,6 +15913,8 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
   uint32_t merge_mode = merge_type & MERGE_MODE_MASK;
   uint32_t merge_nsort = ((!sample_sort) || (sample_sort == SAMPLE_SORT_NATURAL))? 1 : 0;
   uint32_t merge_equal_pos = (merge_type & MERGE_EQUAL_POS)? 1 : 0;
+  uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
+  uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
   Ll_entry** htable = (Ll_entry**)(&(wkspace_base[wkspace_left - HASHMEM_S]));
   Ll_entry2** htable2 = (Ll_entry2**)(&(wkspace_base[wkspace_left - HASHMEM]));
   Ll_str* non_biallelics = NULL;
@@ -15793,13 +16142,13 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
   ullxx = 0;
   mlpos = 0;
   for (mlpos = 0; mlpos < merge_ct; mlpos++) {
-    retval = merge_fam_id_scan(mergelist_bed[mlpos], mergelist_fam[mlpos], &max_sample_id_len, &max_sample_full_len, &is_dichot_pheno, htable, &topsize, &ullxx, &ped_buflen, &cur_sample_ct, &orig_idx);
+    retval = merge_fam_id_scan(mergelist_bed[mlpos], mergelist_fam[mlpos], allow_no_samples, &max_sample_id_len, &max_sample_full_len, &is_dichot_pheno, htable, &topsize, &ullxx, &ped_buflen, &cur_sample_ct, &orig_idx);
     if (retval) {
       goto merge_datasets_ret_1;
     }
     if ((!merge_list) && mlpos) {
       LOGPRINTFWW("%u %s loaded from %s.\n", max_cur_sample_ct, species_str(max_cur_sample_ct), mergelist_fam[0]);
-      LOGPRINTFWW("%u %s to be merged from %s.\n", cur_sample_ct, species_str(cur_sample_ct), mergelist_fam[1]);
+      LOGPRINTFWW("%u %s to be merged from %s.\n", cur_sample_ct, species_str(cur_sample_ct), (merge_type & MERGE_BINARY)? mergelist_fam[1] : mergelist_bed[1]);
       uii = ullxx - max_cur_sample_ct;
       LOGPRINTF("Of these, %u %s new, while %u %s present in the base dataset.\n", uii, (uii == 1)? "is" : "are", cur_sample_ct - uii, (cur_sample_ct - uii == 1)? "is" : "are");
     }
@@ -16018,7 +16367,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
 
   ullxx = 0;
   for (mlpos = 0; mlpos < merge_ct; ++mlpos) {
-    retval = merge_bim_scan(mergelist_bim[mlpos], (mergelist_fam[mlpos])? 1 : 0, &max_marker_id_len, htable2, &topsize, &max_bim_linelen, &ullxx, &cur_marker_ct, &position_warning_ct, &non_biallelics, allow_extra_chroms, chrom_info_ptr);
+    retval = merge_bim_scan(mergelist_bim[mlpos], (mergelist_fam[mlpos])? 1 : 0, allow_no_variants, &max_marker_id_len, htable2, &topsize, &max_bim_linelen, &ullxx, &cur_marker_ct, &position_warning_ct, &non_biallelics, allow_extra_chroms, chrom_info_ptr);
     if (retval) {
       goto merge_datasets_ret_1;
     }
@@ -16148,7 +16497,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
   if (merge_post_msort_update_maps(marker_ids, max_marker_id_len, marker_map, marker_cms, marker_cms_tmp, pos_buf, ll_buf, chrom_start, chrom_id, chrom_ct, &dedup_marker_ct, merge_equal_pos, marker_allele_ptrs, chrom_info_ptr)) {
     goto merge_datasets_ret_INVALID_FORMAT;
   }
-  if (!dedup_marker_ct) {
+  if ((!dedup_marker_ct) && (!allow_no_variants)) {
     logerrprint("Error: No variants in merged file.\n");
     goto merge_datasets_ret_INVALID_FORMAT;
   }
@@ -16173,27 +16522,36 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
   } else {
     ulii = ped_buflen;
   }
-  // don't need to enforce >= 3 since wkspace_alloc guarantees >= 64
-  if (wkspace_alloc_uc_checked(&readbuf, ulii)) {
+  if (wkspace_alloc_uc_checked(&readbuf, MAXV(ulii, 3))) {
     goto merge_datasets_ret_NOMEM;
   }
   if (merge_must_track_write(merge_mode)) {
     ulii = (tot_sample_ct + (BITCT - 1)) / BITCT;
-    markers_per_pass = wkspace_left / (3 * sizeof(intptr_t) * ulii);
-    if (markers_per_pass > dedup_marker_ct) {
+    if (ulii) {
+      markers_per_pass = wkspace_left / (3 * sizeof(intptr_t) * ulii);
+      if (markers_per_pass > dedup_marker_ct) {
+	markers_per_pass = dedup_marker_ct;
+      }
+    } else {
       markers_per_pass = dedup_marker_ct;
     }
     markbuf = (uintptr_t*)wkspace_alloc(markers_per_pass * ulii * sizeof(intptr_t));
-  } else {
+  } else if (tot_sample_ct4) {
     markers_per_pass = wkspace_left / tot_sample_ct4;
     if (markers_per_pass > dedup_marker_ct) {
       markers_per_pass = dedup_marker_ct;
     }
+  } else {
+    markers_per_pass = dedup_marker_ct;
   }
-  if (!markers_per_pass) {
-    goto merge_datasets_ret_NOMEM;
+  if (dedup_marker_ct) {
+    if (!markers_per_pass) {
+      goto merge_datasets_ret_NOMEM;
+    }
+    pass_ct = 1 + ((dedup_marker_ct - 1) / markers_per_pass);
+  } else {
+    pass_ct = 0;
   }
-  pass_ct = 1 + ((dedup_marker_ct - 1) / markers_per_pass);
 
   writebuf = wkspace_base;
   pcptr = (uintptr_t*)wkspace_base;
diff --git a/plink_data.h b/plink_data.h
index 94ae693..0dd15ac 100644
--- a/plink_data.h
+++ b/plink_data.h
@@ -1,7 +1,7 @@
 #ifndef __PLINK_DATA_H__
 #define __PLINK_DATA_H__
 
-int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t sample_ct, uint64_t fsize);
+int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t unfiltered_sample_ct, uint64_t fsize);
 
 int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_len_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_len, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_ [...]
 
@@ -11,13 +11,13 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
 
 int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t map_cols, char* outname, char* outname_end, uint64_t calculation_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_i [...]
 
-int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
+int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
 
 int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outname_end, char* single_chr, char* pheno_name, double hard_call_threshold, char* missing_code, int32_t missing_pheno, uint64_t misc_flags, uint32_t is_bgen, Chrom_info* chrom_info_ptr);
 
 int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_end, uint32_t fam_cols, uint64_t misc_flags, int32_t missing_pheno, Chrom_info* chrom_info_ptr);
 
-int32_t lgen_to_bed(char* lgen_namebuf, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, uint32_t lgen_modifier, char* lgen_reference_fname, Chrom_info* chrom_info_ptr);
+int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, uint32_t lgen_modifier, char* lgen_reference_fname, Chrom_info* chrom_info_ptr);
 
 int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* outname_end, uint64_t misc_flags, Chrom_info* chrom_info_ptr);
 
@@ -25,7 +25,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
 
 int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, char* const_fid, char id_delim, char vcf_idspace_to, double vcf_min_qual, char* vcf_filter_exceptions_flattened, Chrom_info* chrom_info_ptr);
 
-int32_t bed_from_23(char* fname, char* outname, char* outname_end, uint32_t modifier_23, char* fid_23, char* iid_23, double pheno_23, char* paternal_id_23, char* maternal_id_23, Chrom_info* chrom_info_ptr);
+int32_t bed_from_23(char* fname, char* outname, char* outname_end, uint32_t modifier_23, char* fid_23, char* iid_23, double pheno_23, uint64_t misc_flags, char* paternal_id_23, char* maternal_id_23, Chrom_info* chrom_info_ptr);
 
 int32_t generate_dummy(char* outname, char* outname_end, uint32_t flags, uintptr_t marker_ct, uintptr_t sample_ct, double geno_mrate, double pheno_mrate, int32_t missing_pheno);
 
diff --git a/plink_dosage.c b/plink_dosage.c
index 602a504..71b0c3a 100644
--- a/plink_dosage.c
+++ b/plink_dosage.c
@@ -24,7 +24,7 @@ void dosage_cleanup(Dosage_info* doip) {
 
 #define DOSAGE_EPSILON 0.000244140625
 
-int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_end, uintptr_t* score_marker_ct_ptr, uintptr_t* max_score_marker_id_len_ptr, char** score_marker_ids_ptr, char*** score_allele_codes_ptr, double** score_effect_sizes_ptr, uintptr_t** score_qrange_key_exists_ptr, double** score_qrange_keys_ptr, uintptr_t* qrange_ct_ptr, uintptr_t* max_qrange_name_len_ptr, char** score_qrange_names_ptr, double** score_qrange_bounds_ptr) {
+int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_end, uint32_t double_dosage, uintptr_t* score_marker_ct_ptr, uintptr_t* max_score_marker_id_len_ptr, char** score_marker_ids_ptr, char*** score_allele_codes_ptr, double** score_effect_sizes_ptr, uintptr_t** score_qrange_key_exists_ptr, double** score_qrange_keys_ptr, uintptr_t* qrange_ct_ptr, uintptr_t* max_qrange_name_len_ptr, char** score_qrange_names_ptr, double** score_qrange_bounds_ptr) {
   // We don't necessarily have the whole variant ID list in advance, so it
   // makes sense to deviate a bit from score_report().
   //
@@ -268,6 +268,9 @@ int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_
       // guaranteed to succeed unless the user is overwriting the file between
       // load passes, which we won't bother defending against
       marker_idx = (uint32_t)bsearch_str(bufptr_arr[varid_idx], strlen_se(bufptr_arr[varid_idx]), score_marker_ids, max_score_marker_id_len, score_marker_ct);
+      if (double_dosage) {
+	dxx *= 2;
+      }
       score_effect_sizes[marker_idx] = dxx;
       slen = strlen_se(bufptr_arr[allele_idx]);
       if (slen == 1) {
@@ -461,7 +464,7 @@ int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_
   return retval;
 }
 
-int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* outname, char* outname_end, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* makepheno_str, char* phenoname_str, char* covar_fname, Two_col_params* qual_filter, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_ [...]
+int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* outname, char* outname_end, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* makepheno_str, char* phenoname_str, char* covar_fname, Two_col_params* qual_filter, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_ [...]
   // sucks to duplicate so much, but this code will be thrown out later so
   // there's no long-term maintenance problem
   FILE* phenofile = NULL;
@@ -701,7 +704,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       goto plink1_dosage_ret_1;
     }
   }
-  retval = load_fam(famname, fam_cols, uii, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, &unfiltered_sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude);
+  retval = load_fam(famname, fam_cols, uii, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, &unfiltered_sample_ct, &sample_ids, &max_sample_id_len, &paternal_ids, &max_paternal_id_len, &maternal_ids, &max_maternal_id_len, &sex_nm, &sex_male, &affection, &pheno_nm, &pheno_c, &pheno_d, &founder_info, &sample_exclude, 0);
   if (retval) {
     goto plink1_dosage_ret_1;
   }
@@ -786,7 +789,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       }
       if (extractname) {
 	if (!(misc_flags & MISC_EXTRACT_RANGE)) {
-	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -795,7 +798,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 	    logerrprint("Error: '--extract range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
 	    goto plink1_dosage_ret_INVALID_CMDLINE;
 	  }
-	  retval = extract_exclude_range(extractname, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0, chrom_info_ptr);
+	  retval = extract_exclude_range(extractname, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0, 0, chrom_info_ptr);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -805,7 +808,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       }
       if (excludename) {
 	if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
-	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -814,7 +817,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 	    logerrprint("Error: '--exclude range' requires a sorted .bim.  Retry this command after\nusing --make-bed to sort your data.\n");
 	    goto plink1_dosage_ret_INVALID_CMDLINE;
 	  }
-	  retval = extract_exclude_range(excludename, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 1, chrom_info_ptr);
+	  retval = extract_exclude_range(excludename, marker_pos, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 1, 0, chrom_info_ptr);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -823,13 +826,13 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 	}
       }
       if (filter_attrib_fname) {
-	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink1_dosage_ret_1;
 	}
       }
       if (qual_filter) {
-	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink1_dosage_ret_1;
 	}
@@ -837,10 +840,10 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       wkspace_reset(wkspace_mark);
     }
     if (thin_keep_prob != 1.0) {
-      if (random_thin_markers(thin_keep_prob, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct)) {
+      if (random_thin_markers(thin_keep_prob, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0)) {
 	goto plink1_dosage_ret_ALL_MARKERS_EXCLUDED;
       }
-    } else if (thin_keep_ct) {
+    } else if (thin_keep_ct != 0xffffffffU) {
       retval = random_thin_markers_ct(thin_keep_ct, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
       if (retval) {
 	goto plink1_dosage_ret_1;
@@ -874,31 +877,31 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       }
     }
     if (keepfamname) {
-      retval = keep_or_remove(keepfamname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 2);
+      retval = keep_or_remove(keepfamname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 2, 0);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
     }
     if (keepname) {
-      retval = keep_or_remove(keepname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 0);
+      retval = keep_or_remove(keepname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 0, 0);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
     }
     if (removefamname) {
-      retval = keep_or_remove(removefamname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 3);
+      retval = keep_or_remove(removefamname, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 3, 0);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
     }
     if (removename) {
-      retval = keep_or_remove(removename, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 1);
+      retval = keep_or_remove(removename, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, 1, 0);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
     }
     if (filter_attrib_sample_fname) {
-      retval = filter_attrib_sample(filter_attrib_sample_fname, filter_attrib_sample_liststr, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct);
+      retval = filter_attrib_sample(filter_attrib_sample_fname, filter_attrib_sample_liststr, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, 0, sample_exclude, &sample_exclude_ct);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
@@ -907,7 +910,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       if (!mfilter_col) {
 	mfilter_col = 1;
       }
-      retval = filter_samples_file(filtername, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, filtervals_flattened, mfilter_col);
+      retval = filter_samples_file(filtername, sorted_sample_ids, ulii, max_sample_id_len, sample_id_map, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, filtervals_flattened, mfilter_col, 0);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
@@ -971,9 +974,13 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
     LOGPRINTF("%d %s removed due to founder status (--filter-%s).\n", ii, species_str(ii), (filter_flags & FILTER_BINARY_FOUNDERS)? "founders" : "nonfounders");
   }
   if (cluster_ptr->fname || (misc_flags & MISC_FAMILY_CLUSTERS)) {
-    retval = load_clusters(cluster_ptr->fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, cluster_ptr->keep_fname, cluster_ptr->keep_flattened, cluster_ptr->remove_fname, cluster_ptr->remove_flattened);
-    if (retval) {
-      goto plink1_dosage_ret_1;
+    if (cluster_ptr->keep_fname || cluster_ptr->keep_flattened || cluster_ptr->remove_fname || cluster_ptr->remove_flattened) {
+      retval = load_clusters(cluster_ptr->fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, cluster_ptr->keep_fname, cluster_ptr->keep_flattened, cluster_ptr->remove_fname, cluster_ptr->remove_flattened, 0);
+      if (retval) {
+        goto plink1_dosage_ret_1;
+      }
+    } else {
+      logerrprint("Warning: Ignoring --within/--family since it has no effect.  (PLINK 1.07's\nundocumented Huber-White standard error computation is currently disabled.)\n");
     }
   }
   sample_ct = unfiltered_sample_ct - sample_exclude_ct;
@@ -998,7 +1005,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
     logprint("Using 1 thread.\n");
   }
 #ifndef NOLAPACK
-  if (uii && ((!known_procs) || (known_procs * 2 >= g_thread_ct))) {
+  if (uii && ((!known_procs) || (known_procs >= 2 * ((int32_t)g_thread_ct)))) {
     logerrprint("Warning: This run includes BLAS/LAPACK linear algebra operations which\ncurrently disregard the --threads limit.  If this is problematic, you may want\nto recompile against single-threaded BLAS/LAPACK.\n");
   }
 #endif
@@ -1102,7 +1109,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 #endif
   }
   if (do_score) {
-    retval = dosage_load_score_files(sc_ip, outname, outname_end, &score_marker_ct, &max_score_marker_id_len, &score_marker_ids, &score_allele_codes, &score_effect_sizes, &score_qrange_key_exists, &score_qrange_keys, &qrange_ct, &max_qrange_name_len, &score_qrange_names, &score_qrange_bounds);
+    retval = dosage_load_score_files(sc_ip, outname, outname_end, (doip->modifier & DOSAGE_SCORE_DOUBLE), &score_marker_ct, &max_score_marker_id_len, &score_marker_ids, &score_allele_codes, &score_effect_sizes, &score_qrange_key_exists, &score_qrange_keys, &qrange_ct, &max_qrange_name_len, &score_qrange_names, &score_qrange_bounds);
     if (retval) {
       goto plink1_dosage_ret_1;
     }
@@ -1516,15 +1523,15 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
     }
     bufptr2 = memcpyb(outname_end, ".out.dosage", 12);
   }
-  if (output_gz) {
-    memcpy(bufptr2, ".gz", 4);
-  }
-  if (flex_pzwrite_init(output_gz, outname, overflow_buf, 0, &ps)) {
-    goto plink1_dosage_ret_OPEN_FAIL;
-  }
-  pzwritep = (char*)overflow_buf;
 
   if (!do_score) {
+    if (output_gz) {
+      memcpy(bufptr2, ".gz", 4);
+    }
+    if (flex_pzwrite_init(output_gz, outname, overflow_buf, 0, &ps)) {
+      goto plink1_dosage_ret_OPEN_FAIL;
+    }
+    pzwritep = (char*)overflow_buf;
     if (do_glm) {
       pzwritep = memcpya(pzwritep, tbuf, bufptr - tbuf);
     } else if (!count_occur) {
@@ -1922,19 +1929,26 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
           dxx = dzz * (1.0 - dzz); // now dxx = theoretical var
 	  dyy = 2 * dyy * sample_valid_ct_recip; // and dyy = empirical
 	  rsq = (dxx > 0.0)? (dyy / dxx) : 0.0;
+	  if ((dxx >= 0.0098999999999999) && (rsq >= 0.1) && (rsq <= 2.0)) {
 #ifndef NOLAPACK
-	  if (pheno_d) {
-	    is_valid = glm_linear_dosage(sample_ct, cur_samples, sample_valid_ct, pheno_nm_collapsed, pheno_d_collapsed, perm_fails, covar_ct, covar_d, cur_dosages, pheno_d2, covars_cov_major_buf, covars_sample_major_buf, param_2d_buf, mi_buf, param_2d_buf2, regression_results, dgels_a, dgels_b, dgels_work, dgels_lwork, standard_beta, glm_vif_thresh, &beta, &se, &pval);
-	    if (is_valid == 2) {
-	      // NOMEM special case
-	      goto plink1_dosage_ret_NOMEM;              
-	    }
-	  } else {
+	    if (pheno_d) {
+	      is_valid = glm_linear_dosage(sample_ct, cur_samples, sample_valid_ct, pheno_nm_collapsed, pheno_d_collapsed, perm_fails, covar_ct, covar_d, cur_dosages, pheno_d2, covars_cov_major_buf, covars_sample_major_buf, param_2d_buf, mi_buf, param_2d_buf2, regression_results, dgels_a, dgels_b, dgels_work, dgels_lwork, standard_beta, glm_vif_thresh, &beta, &se, &pval);
+	      if (is_valid == 2) {
+		// NOMEM special case
+		goto plink1_dosage_ret_NOMEM;              
+	      }
+	    } else {
 #endif
-	    is_valid = glm_logistic_dosage(sample_ct, cur_samples, sample_valid_ct, pheno_nm_collapsed, pheno_c_collapsed, perm_vec, perm_fails, covar_ct, covar_f, cur_dosages, coef_f, pp_f, pheno_buf_f, covars_cov_major_f_buf, param_1d_buf_f, param_1d_buf2_f, param_2d_buf_f, param_2d_buf2_f, regression_results_f, sample_1d_buf_f, &beta, &se, &pval);
+	      is_valid = glm_logistic_dosage(sample_ct, cur_samples, sample_valid_ct, pheno_nm_collapsed, pheno_c_collapsed, perm_vec, perm_fails, covar_ct, covar_f, cur_dosages, coef_f, pp_f, pheno_buf_f, covars_cov_major_f_buf, param_1d_buf_f, param_1d_buf2_f, param_2d_buf_f, param_2d_buf2_f, regression_results_f, sample_1d_buf_f, &beta, &se, &pval);
 #ifndef NOLAPACK
-	  }
+	    }
 #endif
+	  } else {
+	    is_valid = 0;
+	    if (rsq > 2.0) {
+	      rsq = 2.0;
+	    }
+	  }
 	  if (load_map) {
 	    pzwritep = width_force(4, pzwritep, chrom_name_write(pzwritep, chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_idx)));
 	    *pzwritep++ = ' ';
diff --git a/plink_dosage.h b/plink_dosage.h
index 7530915..5df6173 100644
--- a/plink_dosage.h
+++ b/plink_dosage.h
@@ -15,8 +15,9 @@
 #define DOSAGE_SCORE 0x100
 #define DOSAGE_SCORE_NOSUM 0x200
 #define DOSAGE_SCORE_CNT 0x400
-#define DOSAGE_SEX 0x800
-#define DOSAGE_FREQ_CC 0x1000
+#define DOSAGE_SCORE_DOUBLE 0x800
+#define DOSAGE_SEX 0x1000
+#define DOSAGE_FREQ_CC 0x2000
 
 typedef struct {
   char* fname;
@@ -31,6 +32,6 @@ void dosage_init(Dosage_info* doip);
 
 void dosage_cleanup(Dosage_info* doip);
 
-int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* outname, char* outname_end, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* makepheno_str, char* phenoname_str, char* covar_fname, Two_col_params* qual_filter, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_ [...]
+int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* outname, char* outname_end, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* makepheno_str, char* phenoname_str, char* covar_fname, Two_col_params* qual_filter, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_ [...]
 
 #endif
diff --git a/plink_family.c b/plink_family.c
index 53806a2..12cee9e 100644
--- a/plink_family.c
+++ b/plink_family.c
@@ -3,6 +3,7 @@
 #include "plink_assoc.h"
 #include "plink_cluster.h"
 #include "plink_family.h"
+#include "plink_perm.h"
 #include "plink_stats.h"
 
 void family_init(Family_info* fam_ip) {
@@ -482,10 +483,10 @@ uint32_t erase_mendel_errors(uintptr_t unfiltered_sample_ct, uintptr_t* loadbuf,
       uii = *uiptr++;
       ujj = *uiptr++;
       ukk = *uiptr++;
-      umm = (workbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3;
-      unn = (workbuf[ukk / BITCT2] >> (2 * (ukk % BITCT2))) & 3;
+      umm = EXTRACT_2BIT_GENO(workbuf, uii);
+      unn = EXTRACT_2BIT_GENO(workbuf, ukk);
       if ((!is_x) || (!is_set(sex_male, uii))) {
-        umm = mendel_error_table[umm | (((workbuf[ujj / BITCT2] >> (2 * (ujj % BITCT2))) & 3) << 2) | (unn << 4)];
+        umm = mendel_error_table[umm | (EXTRACT_2BIT_GENO(workbuf, ujj) << 2) | (unn << 4)];
       } else {
 	umm = mendel_error_table_male_x[umm | (unn << 2)];
       }
@@ -684,7 +685,7 @@ void fill_mendel_errstr(uint32_t error_code, char** allele_ptrs, uint32_t* alens
   *len_ptr = (uintptr_t)(wptr - wbuf);
 }
 
-int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t plink_maxfid, uint32_t plink_maxiid, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_e [...]
+int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t plink_maxfid, uint32_t plink_maxiid, uint32_t plink_maxsnp, uint32_t allow_no_variants, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_ [...]
   unsigned char* wkspace_mark = wkspace_base;
   FILE* outfile = NULL;
   FILE* outfile_l = NULL;
@@ -887,10 +888,10 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
 	  uii = *uiptr++;
 	  ujj = *uiptr++;
 	  ukk = *uiptr++;
-	  umm = (loadbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3;
-	  unn = (loadbuf[ukk / BITCT2] >> (2 * (ukk % BITCT2))) & 3;
+	  umm = EXTRACT_2BIT_GENO(loadbuf, uii);
+	  unn = EXTRACT_2BIT_GENO(loadbuf, ukk);
 	  if ((!is_x) || (!is_set(sex_male, uii))) {
-            umm = mendel_error_table[umm | (((loadbuf[ujj / BITCT2] >> (2 * (ujj % BITCT2))) & 3) << 2) | (unn << 4)];
+            umm = mendel_error_table[umm | (EXTRACT_2BIT_GENO(loadbuf, ujj) << 2) | (unn << 4)];
 	  } else {
 	    umm = mendel_error_table_male_x[umm | (unn << 2)];
 	  }
@@ -924,7 +925,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
 	  ujj = *uiptr++;
 	  ukk = *uiptr++;
           trio_idx = *uiptr++;
-          uljj = ((loadbuf[ujj / BITCT2] >> (2 * (ujj % BITCT2))) & 3) | (((loadbuf[ukk / BITCT2] >> (2 * (ukk % BITCT2))) & 3) << 2);
+          uljj = EXTRACT_2BIT_GENO(loadbuf, ujj) | (EXTRACT_2BIT_GENO(loadbuf, ukk) << 2);
 	  umm = uii / BITCT2;
 	  ujj = 2 * (uii % BITCT2);
 	  ulii = (loadbuf[umm] >> ujj) & 3;
@@ -1179,7 +1180,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
   }
   if (fam_ip->mendel_modifier & MENDEL_FILTER) {
     *marker_exclude_ct_ptr += new_marker_exclude_ct;
-    if (unfiltered_marker_ct == *marker_exclude_ct_ptr) {
+    if ((unfiltered_marker_ct == *marker_exclude_ct_ptr) && (!allow_no_variants)) {
       logerrprint("Error: All variants excluded by --me.\n");
       goto mendel_error_scan_ret_ALL_MARKERS_EXCLUDED;
     }
@@ -1847,8 +1848,8 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
         uii = *lookup_ptr++;
         ujj = *lookup_ptr++;
         cur_child_ct = *lookup_ptr++;
-        ulii = (loadbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3;
-        uljj = (loadbuf[ujj / BITCT2] >> (2 * (ujj % BITCT2))) & 3;
+        ulii = EXTRACT_2BIT_GENO(loadbuf, uii);
+        uljj = EXTRACT_2BIT_GENO(loadbuf, ujj);
         ukk = ulii | (uljj << 2);
 	if ((0x4d04 >> ukk) & 1) {
 	  // 1+ het parents, no missing
@@ -1857,7 +1858,7 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
 	  poo_table_ptr = &(poo_table[4 * ukk]);
           for (child_idx = 0; child_idx < cur_child_ct; child_idx++) {
             ukk = *lookup_ptr++;
-            poo_acc += poo_table_ptr[(loadbuf[ukk / BITCT2] >> (2 * (ukk % BITCT2))) & 3];
+            poo_acc += poo_table_ptr[EXTRACT_2BIT_GENO(loadbuf, ukk)];
 	    if (++poo_acc_ct == 127) {
 	      // accumulator about to overflow, unpack it
               poo_obs_pat_x2 += (unsigned char)poo_acc;
@@ -2296,8 +2297,8 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
 	uii = *lookup_ptr++;
 	ujj = *lookup_ptr++;
 	cur_child_ct = *lookup_ptr++;
-        ulii = (loadbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3;
-        uljj = (loadbuf[ujj / BITCT2] >> (2 * (ujj % BITCT2))) & 3;
+        ulii = EXTRACT_2BIT_GENO(loadbuf, uii);
+        uljj = EXTRACT_2BIT_GENO(loadbuf, ujj);
         ukk = ulii | (uljj << 2);
 	if (cur_child_ct & 0x80000000U) {
           // discordant
@@ -2326,7 +2327,7 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
           tdt_table_ptr = &(tdt_table[4 * (ulii ^ uljj)]);
           for (child_idx = 0; child_idx < cur_child_ct; child_idx++) {
             ukk = *lookup_ptr++;
-	    umm = tdt_table_ptr[(loadbuf[ukk / BITCT2] >> (2 * (ukk % BITCT2))) & 3];
+	    umm = tdt_table_ptr[EXTRACT_2BIT_GENO(loadbuf, ukk)];
 	    tdt_obs_ct += (uint16_t)umm;
             tdt_a1_trans_ct += umm >> 16;
 	  }
@@ -2866,12 +2867,27 @@ int32_t get_sibship_info(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclu
 
 // multithread globals
 static double* g_maxt_extreme_stat;
-// static double* g_maxt_thread_results;
+static double* g_maxt_thread_results;
 static double* g_mperm_save_all;
 static uintptr_t* g_pheno_c;
-// static uintptr_t* g_dfam_flipa;
-// static uintptr_t* g_dfam_perm_vecs;
-// static uintptr_t* g_dfam_perm_vecst;
+static uintptr_t* g_dfam_flipa;
+#ifdef __LP64__
+static uintptr_t* g_dfam_flipa_shuffled;
+#endif
+static uintptr_t* g_dfam_perm_vecs;
+static uintptr_t* g_dfam_perm_vecst; // sample-major, shuffled
+static double* g_dfam_numers;
+static double* g_dfam_denoms;
+static uintptr_t* g_dfam_acc;
+static int32_t* g_dfam_twice_numers;
+static uint32_t* g_dfam_total_counts;
+static uint32_t* g_dfam_iteration_order;
+static uintptr_t g_perm_vec_ct;
+static uint32_t g_dfam_family_all_case_children_ct;
+static uint32_t g_dfam_family_mixed_ct;
+static uint32_t g_dfam_sibship_mixed_ct;
+static uint32_t g_dfam_unrelated_cluster_ct;
+static uint32_t g_dfam_sample_ct;
 
 static uintptr_t* g_loadbuf;
 static uintptr_t* g_lm_eligible;
@@ -2899,7 +2915,7 @@ static uintptr_t g_cur_perm_ct;
 static double g_qt_sum_all;
 static double g_qt_ssq_all;
 static uint32_t g_test_type;
-static uint32_t g_qfam_thread_ct;
+static uint32_t g_xfam_thread_ct;
 static uint32_t g_fs_ct;
 static uint32_t g_singleton_ct;
 static uint32_t g_lm_ct;
@@ -2921,6 +2937,832 @@ const uint8_t dfam_allele_ct_table[] =
  3, 0, 2, 1,
  0, 0, 1, 0};
 
+void dfam_sibship_or_unrelated_perm_calc(uintptr_t* loadbuf_ptr, const uint32_t* cur_dfam_ptr, const uintptr_t* perm_vecst, const uintptr_t* orig_pheno_c, uint32_t sibling_ct, uint32_t is_unrelated_calc, uintptr_t perm_vec_ct,
+#ifdef __LP64__
+					 __m128i* acc4, __m128i* acc8,
+#else
+					 uintptr_t* acc4, uintptr_t* acc8,
+#endif
+					 uint32_t* cur_case_a1_cts, uint32_t* cur_case_missing_cts, int32_t* twice_numers, double* numers, double* denoms, uint32_t* total_counts) {
+  // okay, compute array of familial/sibship case_a1_ct values.  Most
+  // families/sibships should have 7 or fewer children, so it makes sense
+  // to use 4-bit accumulators in the inner loop (similar to calc_git()
+  // in plink_assoc.c).
+  uintptr_t perm_vec_ct128 = (perm_vec_ct + 127) / 128;
+  const uintptr_t perm_vec_wcta = perm_vec_ct128 * (128 / BITCT);
+  uint32_t cur_genotype_cts[4];
+#ifdef __LP64__
+  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
+  const __m128i m1x4ls1 = {0x2222222222222222LLU, 0x2222222222222222LLU};
+  uintptr_t acc4_word_ct = perm_vec_ct128 * 8;
+  // uintptr_t acc8_word_ct = perm_vec_ct128 * 16;
+  uintptr_t acc4_vec_ct = perm_vec_ct128 * 4;
+  uintptr_t acc8_vec_ct = acc4_word_ct;
+  const __m128i* pheno_perm_ptr;
+  __m128i* acc4_ptr;
+  __m128i loader;
+#else
+  uintptr_t acc4_word_ct = perm_vec_ct128 * 16;
+  uintptr_t acc8_word_ct = perm_vec_ct128 * 32;
+  uintptr_t perm_vec_wct = (perm_vec_ct + (BITCT - 1)) / BITCT;
+  const uintptr_t* pheno_perm_ptr;
+  uintptr_t* acc4_ptr;
+  uintptr_t loader;
+#endif
+  uint32_t case_ct_base = 0;
+  uintptr_t perm_idx;
+  double total_ctd;
+  double total_ct_recip;
+  double xxm1_recip;
+  double hom_a1_ctd;
+  double het_ctd;
+  double case_ctd;
+  double ctrl_ctd;
+  double case_proportion;
+  double case_expected_hom_a1;
+  double case_expected_het;
+  double case_ctrl_div_xxxm1;
+  double case_var_hom_a1;
+  double case_var_het;
+  double case_neg_covar;
+  double case_expected_a1_ct;
+  double case_var_a1_ct;
+  double case_a1_ctd;
+  double dbl_total_ctd;
+  uint32_t sib_idx;
+  uint32_t sample_idx;
+  uint32_t cur_geno;
+  uint32_t geno_match;
+  uint32_t cur_case_ct;
+  uint32_t case_missing_ct;
+  uint32_t case_a1_ct;
+  uint32_t total_ct;
+  uint32_t cur_ctrl_ct;
+  uint32_t max_incr4;
+  uint32_t max_incr8;
+  uint32_t uii;
+  // first check if all genotypes are identical
+  fill_uint_zero(cur_genotype_cts, 4);
+  for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+    sample_idx = cur_dfam_ptr[sib_idx];
+    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+    cur_genotype_cts[cur_geno] += 1;
+    case_ct_base += IS_SET(orig_pheno_c, sample_idx);
+  }
+  cur_geno = 4;
+  for (geno_match = 0; geno_match < 4; geno_match++) {
+    if (cur_genotype_cts[geno_match]) {
+      if (cur_geno != 4) {
+	break;
+      }
+      cur_geno = geno_match;
+    }
+  }
+  if (geno_match == 4) {
+    if ((!is_unrelated_calc) && (!(cur_geno % 2))) {
+      if (!cur_geno) {
+	uii = cur_genotype_cts[0] * 2;
+      } else {
+	uii = cur_genotype_cts[0];
+      }
+      for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	total_counts[perm_idx] += uii;
+      }
+    }
+    return;
+  }
+
+#ifdef __LP64__
+  fill_v128_zero(acc4, acc4_vec_ct);
+  fill_v128_zero(acc8, acc8_vec_ct);
+#else
+  fill_ulong_zero(acc4, acc4_word_ct);
+  fill_ulong_zero(acc8, acc8_word_ct);
+#endif
+  fill_uint_zero(cur_case_a1_cts, perm_vec_ct);
+  max_incr4 = 0;
+  max_incr8 = 0;
+  for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+    sample_idx = cur_dfam_ptr[sib_idx];
+    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+    if (cur_geno & 1) {
+      continue;
+    }
+    uii = 2 - (cur_geno / 2);
+#ifdef __LP64__
+    if (max_incr4 + uii > 15) {
+      unroll_zero_incr_4_8(acc4, acc8, acc4_vec_ct);
+      max_incr8 += max_incr4;
+      if (max_incr8 > 240) {
+	unroll_zero_incr_8_32(acc8, (__m128i*)cur_case_a1_cts, acc8_vec_ct);
+	max_incr8 = 0;
+      }
+      max_incr4 = 0;
+    }
+    max_incr4 += uii;
+    pheno_perm_ptr = (const __m128i*)(&(perm_vecst[sample_idx * perm_vec_wcta]));
+    if (cur_geno) {
+      unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_ct128);
+    } else {
+      // add 2 whenever this sample x permutation is a case
+      acc4_ptr = acc4;
+      for (uii = 0; uii < acc4_vec_ct; uii++) {
+	loader = *pheno_perm_ptr++;
+	acc4_ptr[0] = _mm_add_epi64(acc4_ptr[0], _mm_slli_epi64(_mm_and_si128(loader, m1x4), 1));
+	acc4_ptr[1] = _mm_add_epi64(acc4_ptr[1], _mm_and_si128(loader, m1x4ls1));
+	loader = _mm_srli_epi64(loader, 1);
+	acc4_ptr[2] = _mm_add_epi64(acc4_ptr[2], _mm_and_si128(loader, m1x4ls1));
+	loader = _mm_srli_epi64(loader, 1);
+	acc4_ptr[3] = _mm_add_epi64(acc4_ptr[3], _mm_and_si128(loader, m1x4ls1));
+	acc4_ptr = &(acc4_ptr[4]);
+      }
+    }
+#else
+    if (max_incr4 + uii > 15) {
+      unroll_zero_incr_4_8(acc4, acc8, acc4_word_ct);
+      max_incr8 += max_incr4;
+      if (max_incr8 > 240) {
+	unroll_zero_incr_8_32(acc8, (uintptr_t*)cur_case_a1_cts, acc8_word_ct);
+	max_incr8 = 0;
+      }
+      max_incr4 = 0;
+    }
+    max_incr4 += uii;
+    pheno_perm_ptr = &(perm_vecst[sample_idx * perm_vec_wcta]);
+    if (cur_geno) {
+      unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_wct);
+    } else {
+      acc4_ptr = acc4;
+      for (uii = 0; uii < perm_vec_wct; uii++) {
+	loader = *pheno_perm_ptr++;
+	acc4_ptr[0] += (loader & 0x11111111U) << 1;
+	acc4_ptr[1] += loader & 0x22222222U;
+	acc4_ptr[2] += (loader >> 1) & 0x22222222U;
+	acc4_ptr[3] += (loader >> 2) & 0x22222222U;
+	acc4_ptr = &(acc4_ptr[4]);
+      }
+    }
+#endif
+  }
+#ifdef __LP64__
+  unroll_incr_4_8(acc4, acc8, acc4_vec_ct);
+  unroll_incr_8_32(acc8, (__m128i*)cur_case_a1_cts, acc8_vec_ct);
+#else
+  unroll_incr_4_8(acc4, acc8, acc4_word_ct);
+  unroll_incr_8_32(acc8, (uintptr_t*)cur_case_a1_cts, acc8_word_ct);
+#endif
+
+  if (!cur_genotype_cts[1]) {
+    // optimize the common no-missing-genotypes case
+    total_ctd = (double)((int32_t)sibling_ct);
+    total_ct_recip = 1.0 / total_ctd;
+    case_ctd = (double)((int32_t)case_ct_base);
+    case_proportion = case_ctd * total_ct_recip;
+    cur_ctrl_ct = sibling_ct - case_ct_base;
+    ctrl_ctd = (double)((int32_t)cur_ctrl_ct);
+    if (!is_unrelated_calc) {
+      // actually ctrl_ct/(x(x-1)), not 1/(x(x-1))
+      xxm1_recip = ctrl_ctd * total_ct_recip / ((double)((int32_t)(sibling_ct - 1)));
+      hom_a1_ctd = (double)((int32_t)cur_genotype_cts[0]);
+      het_ctd = (double)((int32_t)cur_genotype_cts[2]);
+      case_expected_hom_a1 = case_proportion * hom_a1_ctd;
+      case_expected_het = case_proportion * het_ctd;
+      case_ctrl_div_xxxm1 = case_proportion * xxm1_recip;
+      case_var_hom_a1 = case_ctrl_div_xxxm1 * hom_a1_ctd * (total_ctd - hom_a1_ctd);
+      case_var_het = case_ctrl_div_xxxm1 * het_ctd * (total_ctd - het_ctd);
+      case_neg_covar = case_ctrl_div_xxxm1 * het_ctd;
+      case_expected_a1_ct = 2 * case_expected_hom_a1 + case_expected_het;
+      case_var_a1_ct = 4 * (case_var_hom_a1 + case_neg_covar) + case_var_het;
+      for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	case_a1_ct = cur_case_a1_cts[perm_idx];
+	total_counts[perm_idx] += case_a1_ct;
+	numers[perm_idx] += (double)((int32_t)case_a1_ct) - case_expected_a1_ct;
+	denoms[perm_idx] += case_var_a1_ct;
+      }
+    } else {
+      // actually ctrl_ct/(x(2x-1)), not 1/(x(x-1))
+      xxm1_recip = ctrl_ctd * total_ct_recip / ((double)((int32_t)(2 * sibling_ct - 1)));
+      dbl_total_ctd = 2 * total_ctd;
+      for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	case_a1_ct = cur_case_a1_cts[perm_idx];
+	case_a1_ctd = (double)((int32_t)case_a1_ct);
+	case_expected_a1_ct = case_proportion * case_a1_ctd;
+	case_var_a1_ct = case_expected_a1_ct * (dbl_total_ctd - case_a1_ctd) * xxm1_recip;
+	total_counts[perm_idx] += case_a1_ct;
+	numers[perm_idx] += case_a1_ctd - case_expected_a1_ct;
+	denoms[perm_idx] += case_var_a1_ct;
+      }
+    }
+    return;
+  }
+
+#ifdef __LP64__
+  fill_v128_zero(acc4, acc4_vec_ct);
+  fill_v128_zero(acc8, acc8_vec_ct);
+#else
+  fill_ulong_zero(acc4, acc4_word_ct);
+  fill_ulong_zero(acc8, acc8_word_ct);
+#endif
+  fill_uint_zero(cur_case_missing_cts, perm_vec_ct);
+  uii = 0;
+  for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+    sample_idx = cur_dfam_ptr[sib_idx];
+    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+    if (cur_geno != geno_match) {
+      continue;
+    }
+
+#ifdef __LP64__
+    pheno_perm_ptr = (const __m128i*)(&(perm_vecst[sample_idx * perm_vec_wcta]));
+    unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_ct128);
+    if (!(uii % 15)) {
+      unroll_zero_incr_4_8(acc4, acc8, acc4_vec_ct);
+      if (!(uii % 255)) {
+	unroll_zero_incr_8_32(acc8, (__m128i*)cur_case_missing_cts, acc8_vec_ct);
+      }
+    }
+#else
+    pheno_perm_ptr = &(perm_vecst[sample_idx * perm_vec_wcta]);
+    unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_wct);
+    if (!(uii % 15)) {
+      unroll_zero_incr_4_8(acc4, acc8, acc4_word_ct);
+      if (!(uii % 255)) {
+	unroll_zero_incr_8_32(acc8, (uintptr_t*)cur_case_missing_cts, acc8_word_ct);
+      }
+    }
+#endif
+  }
+  if (uii % 255) {
+#ifdef __LP64__
+    if (uii % 15) {
+      unroll_incr_4_8(acc4, acc8, acc4_vec_ct);
+    }
+    unroll_incr_8_32(acc8, (__m128i*)cur_case_missing_cts, acc8_vec_ct);
+#else
+    if (uii % 15) {
+      unroll_incr_4_8(acc4, acc8, acc4_word_ct);
+    }
+    unroll_incr_8_32(acc8, (uintptr_t*)cur_case_missing_cts, acc8_word_ct);
+#endif
+  }
+
+  total_ct = sibling_ct - cur_genotype_cts[1];
+  total_ctd = (double)((int32_t)total_ct);
+  total_ct_recip = 1.0 / total_ctd;
+  if (!is_unrelated_calc) {
+    xxm1_recip = total_ct_recip / ((double)((int32_t)(total_ct - 1)));
+    hom_a1_ctd = (double)((int32_t)cur_genotype_cts[0]);
+    het_ctd = (double)((int32_t)cur_genotype_cts[2]);
+    for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+      case_missing_ct = cur_case_missing_cts[perm_idx];
+      cur_case_ct = case_ct_base - case_missing_ct;
+      cur_ctrl_ct = total_ct - cur_case_ct;
+      if ((!cur_case_ct) || (!cur_ctrl_ct)) {
+	continue;
+      }
+      case_ctd = (double)((int32_t)cur_case_ct);
+      ctrl_ctd = (double)((int32_t)cur_ctrl_ct);
+      case_a1_ct = cur_case_a1_cts[perm_idx];
+      case_proportion = case_ctd * total_ct_recip;
+      case_expected_hom_a1 = case_proportion * hom_a1_ctd;
+      case_expected_het = case_proportion * het_ctd;
+      case_ctrl_div_xxxm1 = case_proportion * ctrl_ctd * xxm1_recip;
+      case_var_hom_a1 = case_ctrl_div_xxxm1 * hom_a1_ctd * (total_ctd - hom_a1_ctd);
+      case_var_het = case_ctrl_div_xxxm1 * het_ctd * (total_ctd - het_ctd);
+      case_neg_covar = case_ctrl_div_xxxm1 * het_ctd;
+      case_expected_a1_ct = 2 * case_expected_hom_a1 + case_expected_het;
+      case_var_a1_ct = 4 * (case_var_hom_a1 + case_neg_covar) + case_var_het;
+      total_counts[perm_idx] += case_a1_ct;
+      numers[perm_idx] += (double)((int32_t)case_a1_ct) - case_expected_a1_ct;
+      denoms[perm_idx] += case_var_a1_ct;
+    }
+  } else {
+    // actually 1/(x(2x-1)), not 1/(x(x-1))
+    xxm1_recip = total_ct_recip / ((double)((int32_t)(2 * total_ct - 1)));
+    dbl_total_ctd = 2 * total_ctd;
+
+    for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+      case_missing_ct = cur_case_missing_cts[perm_idx];
+      cur_case_ct = case_ct_base - case_missing_ct;
+      cur_ctrl_ct = total_ct - cur_case_ct;
+      if ((!cur_case_ct) || (!cur_ctrl_ct)) {
+	continue;
+      }
+      case_proportion = ((double)((int32_t)cur_case_ct)) * total_ct_recip;
+      case_a1_ct = cur_case_a1_cts[perm_idx];
+      case_a1_ctd = (double)((int32_t)case_a1_ct);
+      case_expected_a1_ct = case_proportion * case_a1_ctd;
+      case_var_a1_ct = case_expected_a1_ct * (dbl_total_ctd - case_a1_ctd) * ((double)((int32_t)cur_ctrl_ct)) * xxm1_recip;
+      total_counts[perm_idx] += case_a1_ct;
+      numers[perm_idx] += case_a1_ctd - case_expected_a1_ct;
+      denoms[perm_idx] += case_var_a1_ct;
+    }
+  }
+}
+
+THREAD_RET_TYPE dfam_perm_thread(void* arg) {
+  uintptr_t tidx = (uintptr_t)arg;
+  uintptr_t perm_vec_ct = g_perm_vec_ct;
+  uintptr_t perm_vec_ct128 = (perm_vec_ct + 127) / 128;
+  uintptr_t perm_vec_cta128 = perm_vec_ct128 * 128;
+  uintptr_t perm_vec_ctcl8m = CACHEALIGN32_DBL(perm_vec_ct);
+  uint32_t dfam_thread_ct = g_xfam_thread_ct;
+  uint32_t pidx_offset = g_perms_done;
+  uint32_t first_adapt_check = g_first_adapt_check;
+  uint32_t family_all_case_children_ct = g_dfam_family_all_case_children_ct;
+  uint32_t family_mixed_ct = g_dfam_family_mixed_ct;
+  uint32_t sibship_mixed_ct = g_dfam_sibship_mixed_ct;
+  uint32_t unrelated_cluster_ct = g_dfam_unrelated_cluster_ct;
+  uint32_t dfam_sample_ct = g_dfam_sample_ct;
+  uint32_t dfam_sample_ctl2 = (dfam_sample_ct + (BITCT2 - 1)) / BITCT2;
+  const uintptr_t perm_vec_wcta = perm_vec_ct128 * (128 / BITCT);
+  const uintptr_t* flipa = g_dfam_flipa;
+  const uintptr_t* perm_vecst = g_dfam_perm_vecst;
+  const uintptr_t* orig_pheno_c = g_pheno_c;
+  int32_t* __restrict__ twice_numers = &(g_dfam_twice_numers[tidx * perm_vec_cta128]);
+  uint32_t* __restrict__ total_counts = &(g_dfam_total_counts[tidx * perm_vec_cta128]);
+  uint32_t* __restrict__ perm_attempt_ct = g_perm_attempt_ct;
+  uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
+  double* __restrict__ mperm_save_all = g_mperm_save_all;
+  double* msa_ptr = NULL;
+  double* numers = &(g_dfam_numers[tidx * perm_vec_cta128]);
+  double* denoms = &(g_dfam_denoms[tidx * perm_vec_cta128]);
+  const uint32_t* dfam_iteration_order = g_dfam_iteration_order;
+  unsigned char* perm_adapt_stop = NULL;
+  double adaptive_intercept = 0.0;
+  double adaptive_slope = 0.0;
+  double adaptive_ci_zt = 0.0;
+  double aperm_alpha = 0.0;
+  double* maxt_results = NULL;
+  uint32_t perm_adapt = g_test_type;
+  uint32_t next_adapt_check = 0;
+  uint32_t cur_case_a1_ct_flip[2];
+#ifdef __LP64__
+  const __m128i m1x8 = {0x0101010101010101LLU, 0x0101010101010101LLU};
+  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
+  const __m128i m1x4ls1 = {0x2222222222222222LLU, 0x2222222222222222LLU};
+  __m128i diff_vec;
+  __m128i incr8;
+  __m128i loader;
+  // acc8 (8-bit accumulator) requires (perm_vec_ct + 7) / 8 words; this is
+  //   16-byte aligned when perm_vec_ct is divisible by 16
+  // acc4 requires (perm_vec_ct + 15) / 16 words
+  // sum reduces to (perm_vec_ct128 * 248) since we have 3 acc8s and 2 acc32s
+  const uintptr_t acc_thread_offset = perm_vec_ct128 * 184;
+  const uintptr_t acc4_word_ct = perm_vec_ct128 * 8;
+  const uintptr_t acc8_word_ct = perm_vec_ct128 * 16;
+  const uintptr_t acc4_vec_ct = perm_vec_ct128 * 4;
+  const uintptr_t acc8_vec_ct = acc4_word_ct;
+  __m128i* acc4 = (__m128i*)(&(g_dfam_acc[tidx * acc_thread_offset]));
+  __m128i* acc8 = (__m128i*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct]));
+  __m128i* case_a1_ct_acc8 = (__m128i*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + acc8_word_ct]));
+  // __m128i* cur_case_ct_acc8 = (__m128i*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + 2 * acc8_word_ct]));
+
+  uint32_t* cur_case_a1_cts = (uint32_t*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + 3 * acc8_word_ct]));
+  uint32_t* cur_case_missing_cts = (uint32_t*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + 7 * acc8_word_ct]));
+
+  const uintptr_t* flipa_shuffled = g_dfam_flipa_shuffled;
+  const __m128i* pheno_perm_ptr;
+  const __m128i* flipa_perm_ptr;
+  __m128i* acc4_ptr;
+  __m128i* acc8_ptr;
+  uintptr_t vidx;
+#else
+  const uintptr_t perm_vec_wct = (perm_vec_ct + (BITCT - 1)) / BITCT;
+  // acc8 requires (perm_vec_ct + 3) / 4 words
+  // acc4 requires (perm_vec_ct + 7) / 8 words
+  // sum reduces to perm_vec_ct128 * 304 since we also have 2 acc32s
+  const uintptr_t acc_thread_offset = perm_vec_ct128 * 304;
+  const uintptr_t acc4_word_ct = perm_vec_ct128 * 16;
+  const uintptr_t acc8_word_ct = perm_vec_ct128 * 32;
+  uintptr_t* acc4 = &(g_dfam_acc[tidx * acc_thread_offset]);
+  uintptr_t* acc8 = &(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct]);
+  uint32_t* cur_case_a1_cts = (uint32_t*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + acc8_word_ct]));
+  uint32_t* cur_case_missing_cts = (uint32_t*)(&(g_dfam_acc[tidx * acc_thread_offset + acc4_word_ct + 5 * acc8_word_ct]));
+  const uintptr_t* pheno_perm_ptr;
+  uintptr_t* acc4_ptr;
+  uintptr_t loader;
+  uintptr_t widx;
+#endif
+  uintptr_t perm_idx;
+  const uintptr_t* cur_flipa;
+  double* orig_chisq;
+  const uint32_t* cur_dfam_ptr;
+  uintptr_t* loadbuf_ptr;
+  double chisq_high;
+  double chisq_low;
+  double chisq;
+  double pval;
+  double dxx;
+  double dyy;
+  double dzz;
+  uint32_t marker_bidx;
+  uint32_t marker_bceil;
+  uint32_t marker_idx;
+  uint32_t sample_idx;
+  uint32_t fs_idx;
+  uint32_t unrelated_cluster_idx;
+  uint32_t quad_denom;
+  uint32_t twice_numer_subtract;
+  uint32_t paternal_id;
+  uint32_t maternal_id;
+  uint32_t sibling_ct;
+  uint32_t paternal_geno;
+  uint32_t maternal_geno;
+  uint32_t parental_a1_ct;
+  uint32_t sib_idx;
+  uint32_t nonmissing_sib_ct;
+  uint32_t cur_geno;
+  uint32_t is_flipped;
+  uint32_t max_incr4;
+  uint32_t max_incr8;
+  uint32_t cur_max_incr;
+  uint32_t orig_case_ct;
+  uint32_t success_2start;
+  uint32_t success_2incr;
+  uint32_t uii;
+  uint32_t ujj;
+  if (perm_adapt) {
+    perm_adapt_stop = g_perm_adapt_stop;
+    adaptive_intercept = g_adaptive_intercept;
+    adaptive_slope = g_adaptive_slope;
+    adaptive_ci_zt = g_adaptive_ci_zt;
+    aperm_alpha = g_aperm_alpha;
+  } else {
+    maxt_results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
+  }
+  while (1) {
+    if (g_block_size <= dfam_thread_ct) {
+      if (g_block_size <= tidx) {
+	goto dfam_perm_thread_skip_all;
+      }
+      marker_bidx = tidx;
+      marker_bceil = tidx + 1;
+    } else {
+      marker_bidx = (((uint64_t)tidx) * g_block_size) / dfam_thread_ct;
+      marker_bceil = (((uint64_t)tidx + 1) * g_block_size) / dfam_thread_ct;
+    }
+    orig_chisq = g_orig_stat;
+    for (; marker_bidx < marker_bceil; marker_bidx++) {
+      marker_idx = g_adapt_m_table[marker_bidx];
+      loadbuf_ptr = &(g_loadbuf[marker_bidx * dfam_sample_ctl2]);
+      if (perm_adapt) {
+	next_adapt_check = first_adapt_check;
+      } else if (mperm_save_all) {
+	msa_ptr = &(mperm_save_all[marker_idx * perm_vec_ct]);
+      }
+      quad_denom = 0;
+      twice_numer_subtract = 0;
+      fill_uint_zero(total_counts, perm_vec_ct);
+      fill_double_zero(numers, perm_vec_ct);
+      fill_double_zero(denoms, perm_vec_ct);
+
+      cur_dfam_ptr = dfam_iteration_order;
+      success_2start = perm_2success_ct[marker_idx];
+      success_2incr = 0;
+      chisq_high = orig_chisq[marker_idx] + EPSILON;
+      chisq_low = orig_chisq[marker_idx] - EPSILON;
+#ifdef __LP64__
+      fill_v128_zero(case_a1_ct_acc8, acc8_vec_ct);
+      max_incr4 = 0;
+      max_incr8 = 0;
+#endif
+      for (fs_idx = 0; fs_idx < family_all_case_children_ct; fs_idx++, cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct])) {
+	paternal_id = *cur_dfam_ptr++;
+	maternal_id = *cur_dfam_ptr++;
+	sibling_ct = *cur_dfam_ptr++;
+	paternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, paternal_id);
+	maternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, maternal_id);
+	parental_a1_ct = dfam_allele_ct_table[paternal_geno * 4 + maternal_geno];
+	// skip if parent has missing genotype, or neither parent is het
+	if (!parental_a1_ct) {
+	  continue;
+	}
+
+	for (sib_idx = 0, nonmissing_sib_ct = 0; sib_idx < sibling_ct; sib_idx++) {
+	  sample_idx = cur_dfam_ptr[sib_idx];
+	  cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+	  nonmissing_sib_ct += (cur_geno != 1);
+	}
+	// skip if all children have missing genotypes
+	if (!nonmissing_sib_ct) {
+	  continue;
+	}
+
+	cur_case_a1_ct_flip[0] = 0;
+	cur_case_a1_ct_flip[1] = 0;
+	for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	  sample_idx = cur_dfam_ptr[sib_idx];
+	  cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+	  if (cur_geno == 1) {
+	    continue;
+	  }
+	  cur_case_a1_ct_flip[0] += (4 - cur_geno) / 2;
+	}
+	cur_case_a1_ct_flip[1] = nonmissing_sib_ct * parental_a1_ct - cur_case_a1_ct_flip[0];
+	quad_denom += (2 - (parental_a1_ct & 1)) * nonmissing_sib_ct;
+	twice_numer_subtract += nonmissing_sib_ct * parental_a1_ct;
+
+#ifdef __LP64__
+	cur_max_incr = MAXV(cur_case_a1_ct_flip[0], cur_case_a1_ct_flip[1]);
+	max_incr8 += cur_max_incr;
+	// also tried 16-bit accumulators, but that has ~50% greater runtime on
+	// typical datasets
+	if (max_incr8 >= 256) {
+	  if (max_incr4) {
+	    loader = _mm_set1_epi8(max_incr4);
+	    acc8_ptr = case_a1_ct_acc8;
+	    for (vidx = 0; vidx < acc8_vec_ct; vidx++) {
+	      *acc8_ptr = _mm_add_epi8(*acc8_ptr, loader);
+	      acc8_ptr++;
+	    }
+	  }
+	  unroll_zero_incr_8_32(case_a1_ct_acc8, (__m128i*)total_counts, acc8_vec_ct);
+	  max_incr8 = cur_max_incr;
+	  max_incr4 = 0;
+	}
+	if (cur_max_incr < 256) {
+          max_incr4 += cur_case_a1_ct_flip[0];
+	  diff_vec = _mm_set1_epi8((uint8_t)(cur_case_a1_ct_flip[1] - cur_case_a1_ct_flip[0]));
+	  acc8_ptr = case_a1_ct_acc8;
+	  flipa_perm_ptr = (__m128i*)(&(flipa_shuffled[fs_idx * perm_vec_wcta]));
+	  for (vidx = 0; vidx < perm_vec_ct128; vidx++) {
+	    loader = *flipa_perm_ptr++;
+	    for (uii = 0; uii < 8; uii++) {
+	      // set incr8 to (cur_case_a1_ct_flip[1] - cur_case_a1_ct_flip[0])
+	      // where (specially permuted) flipA is set, zero when it is not
+	      incr8 = _mm_and_si128(_mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(loader, m1x8)), diff_vec);
+	      *acc8_ptr = _mm_add_epi8(*acc8_ptr, incr8);
+	      acc8_ptr++;
+	      loader = _mm_srli_epi64(loader, 1);
+	    }
+	  }
+	} else {
+	  cur_flipa = &(flipa[fs_idx * perm_vec_wcta]);
+	  for (uii = 0; uii < perm_vec_ct; uii++) {
+	    is_flipped = IS_SET(cur_flipa, uii);
+	    total_counts[uii] += cur_case_a1_ct_flip[is_flipped];
+	  }
+	  max_incr8 = 0;
+	}
+#else
+	cur_flipa = &(flipa[fs_idx * perm_vec_wcta]);
+	for (uii = 0; uii < perm_vec_ct; uii++) {
+	  is_flipped = IS_SET(cur_flipa, uii);
+	  total_counts[uii] += cur_case_a1_ct_flip[is_flipped];
+	}
+#endif
+      }
+      for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	twice_numers[perm_idx] = 2 * total_counts[perm_idx] - twice_numer_subtract;
+      }
+      for (fs_idx = 0; fs_idx < family_mixed_ct; fs_idx++, cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct])) {
+	paternal_id = *cur_dfam_ptr++;
+	maternal_id = *cur_dfam_ptr++;
+	sibling_ct = *cur_dfam_ptr++;
+	paternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, paternal_id);
+	maternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, maternal_id);
+	parental_a1_ct = dfam_allele_ct_table[paternal_geno * 4 + maternal_geno];
+	if (!parental_a1_ct) {
+	  dfam_sibship_or_unrelated_perm_calc(loadbuf_ptr, cur_dfam_ptr, perm_vecst, orig_pheno_c, sibling_ct, 0, perm_vec_ct, acc4, acc8, cur_case_a1_cts, cur_case_missing_cts, twice_numers, numers, denoms, total_counts);
+	} else {
+	  for (sib_idx = 0, nonmissing_sib_ct = 0; sib_idx < sibling_ct; sib_idx++) {
+	    sample_idx = cur_dfam_ptr[sib_idx];
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+	    nonmissing_sib_ct += (cur_geno != 1);
+	  }
+	  // skip if all children have missing genotypes
+	  if (!nonmissing_sib_ct) {
+	    continue;
+	  }
+
+	  quad_denom += (2 - (parental_a1_ct & 1)) * nonmissing_sib_ct;
+	  cur_flipa = &(flipa[fs_idx * perm_vec_wcta]);
+	  fill_uint_zero(cur_case_a1_cts, perm_vec_ct);
+#ifdef __LP64__
+	  fill_v128_zero(acc4, acc4_vec_ct);
+	  fill_v128_zero(acc8, acc8_vec_ct);
+#else
+	  fill_ulong_zero(acc4, acc4_word_ct);
+	  fill_ulong_zero(acc8, acc8_word_ct);
+#endif
+	  // compute (unflipped) case_a1_ct for each permutation
+	  max_incr4 = 0; // maximum possible value in acc4
+	  max_incr8 = 0; // maximum possible value in acc8
+	  orig_case_ct = 0;
+	  for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	    sample_idx = cur_dfam_ptr[sib_idx];
+	    orig_case_ct += IS_SET(orig_pheno_c, sample_idx);
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+	    // nothing to do here when cur_geno == 3, since a1_ct is zero
+	    if (cur_geno & 1) {
+	      continue;
+	    }
+	    cur_max_incr = (4 - cur_geno) / 2;
+#ifdef __LP64__
+	    if (max_incr4 + cur_max_incr > 15) {
+	      unroll_zero_incr_4_8(acc4, acc8, acc4_vec_ct);
+	      max_incr8 += max_incr4;
+	      if (max_incr8 > 240) {
+	        unroll_zero_incr_8_32(acc8, (__m128i*)cur_case_a1_cts, acc8_vec_ct);
+		max_incr8 = 0;
+	      }
+	      max_incr4 = 0;
+	    }
+	    max_incr4 += cur_max_incr;
+
+	    pheno_perm_ptr = (const __m128i*)(&(perm_vecst[sample_idx * perm_vec_wcta]));
+	    if (cur_max_incr == 1) {
+	      unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_ct128);
+	    } else {
+	      // add 2 whenever this sample is a case
+	      acc4_ptr = acc4;
+	      for (vidx = 0; vidx < acc4_vec_ct; vidx++) {
+		loader = *pheno_perm_ptr++;
+		acc4_ptr[0] = _mm_add_epi64(acc4_ptr[0], _mm_slli_epi64(_mm_and_si128(loader, m1x4), 1));
+		acc4_ptr[1] = _mm_add_epi64(acc4_ptr[1], _mm_and_si128(loader, m1x4ls1));
+		loader = _mm_srli_epi64(loader, 1);
+		acc4_ptr[2] = _mm_add_epi64(acc4_ptr[2], _mm_and_si128(loader, m1x4ls1));
+		loader = _mm_srli_epi64(loader, 1);
+		acc4_ptr[3] = _mm_add_epi64(acc4_ptr[3], _mm_and_si128(loader, m1x4ls1));
+		acc4_ptr = &(acc4_ptr[4]);
+	      }
+	    }
+#else
+	    if (max_incr4 + cur_max_incr > 15) {
+	      unroll_zero_incr_4_8(acc4, acc8, acc4_word_ct);
+	      max_incr8 += max_incr4;
+	      if (max_incr8 > 240) {
+		unroll_zero_incr_8_32(acc8, (uintptr_t*)cur_case_a1_cts, acc8_word_ct);
+		max_incr8 = 0;
+	      }
+	      max_incr4 = 0;
+	    }
+	    max_incr4 += cur_max_incr;
+
+	    pheno_perm_ptr = &(perm_vecst[sample_idx * perm_vec_wcta]);
+	    if (cur_max_incr == 1) {
+	      unroll_incr_1_4(pheno_perm_ptr, acc4, perm_vec_wct);
+	    } else {
+	      acc4_ptr = acc4;
+	      for (widx = 0; widx < perm_vec_wct; widx++) {
+		loader = *pheno_perm_ptr++;
+		acc4_ptr[0] += (loader & 0x11111111U) << 1;
+		acc4_ptr[1] += loader & 0x22222222U;
+		acc4_ptr[2] += (loader >> 1) & 0x22222222U;
+		acc4_ptr[3] += (loader >> 2) & 0x22222222U;
+		acc4_ptr = &(acc4_ptr[4]);
+	      }
+	    }
+#endif
+	  }
+#ifdef __LP64__
+	  // max_incr4 guaranteed to be nonzero unless no child had any A1
+	  // alleles
+	  if (max_incr4) {
+	    unroll_incr_4_8(acc4, acc8, acc4_vec_ct);
+	    unroll_incr_8_32(acc8, (__m128i*)cur_case_a1_cts, acc8_vec_ct);
+	  }
+#else
+	  if (max_incr4) {
+	    unroll_incr_4_8(acc4, acc8, acc4_word_ct);
+	    unroll_incr_8_32(acc8, (uintptr_t*)cur_case_a1_cts, acc8_word_ct);
+	  }
+#endif
+	  if (nonmissing_sib_ct == sibling_ct) {
+	    cur_flipa = &(flipa[fs_idx * perm_vec_wcta]);
+	    cur_max_incr = orig_case_ct * parental_a1_ct;
+	    for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	      uii = cur_case_a1_cts[perm_idx];
+	      if (IS_SET(cur_flipa, perm_idx)) {
+		uii = cur_max_incr - uii;
+	      }
+	      total_counts[perm_idx] += uii;
+	      twice_numers[perm_idx] += (int32_t)(2 * uii) - (int32_t)cur_max_incr;
+	    }
+	  } else {
+	    // cur_case_ct also varies; need to compute case_missing_ct for
+	    // each permutation, and twice_numers/total_counts updates are more
+	    // complex.
+	    // (technically could separate out >50% missingness as a special
+	    // case, but we focus our attention on the far more common sparse
+	    // missingness scenario.)
+	    cur_max_incr = 0;
+	    fill_uint_zero(cur_case_missing_cts, perm_vec_ct);
+#ifdef __LP64__
+	    fill_v128_zero(acc4, acc4_vec_ct);
+	    fill_v128_zero(acc8, acc8_vec_ct);
+#else
+	    fill_ulong_zero(acc4, acc4_word_ct);
+	    fill_ulong_zero(acc8, acc8_word_ct);
+#endif
+	    for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	      sample_idx = cur_dfam_ptr[sib_idx];
+	      cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
+	      if (cur_geno != 1) {
+		continue;
+	      }
+#ifdef __LP64__
+	      if (!(cur_max_incr % 15)) {
+		unroll_zero_incr_4_8(acc4, acc8, acc4_vec_ct);
+		if (!(cur_max_incr % 255)) {
+		  unroll_zero_incr_8_32(acc8, (__m128i*)cur_case_missing_cts, acc8_vec_ct);
+		}
+	      }
+	      unroll_incr_1_4((__m128i*)(&(perm_vecst[sample_idx * perm_vec_wcta])), acc4, perm_vec_ct128);
+#else
+	      if (!(cur_max_incr % 15)) {
+		unroll_zero_incr_4_8(acc4, acc8, acc4_word_ct);
+		if (!(cur_max_incr % 255)) {
+		  unroll_zero_incr_8_32(acc8, (uintptr_t*)cur_case_missing_cts, acc8_word_ct);
+		}
+	      }
+	      unroll_incr_1_4(&(perm_vecst[sample_idx * perm_vec_wcta]), acc4, perm_vec_wct);
+#endif
+	      cur_max_incr++;
+	    }
+#ifdef __LP64__
+	    unroll_incr_4_8(acc4, acc8, acc4_vec_ct);
+	    unroll_incr_8_32(acc8, (__m128i*)cur_case_missing_cts, acc8_vec_ct);
+#else
+	    unroll_incr_4_8(acc4, acc8, acc4_word_ct);
+	    unroll_incr_8_32(acc8, (uintptr_t*)cur_case_missing_cts, acc8_word_ct);
+#endif
+	    for (perm_idx = 0; perm_idx < perm_vec_ct; perm_idx++) {
+	      uii = cur_case_a1_cts[perm_idx];
+	      ujj = (orig_case_ct - cur_case_missing_cts[perm_idx]) * parental_a1_ct;
+	      twice_numers[perm_idx] += (int32_t)(2 * uii) - ((int32_t)ujj);
+	      total_counts[perm_idx] += uii;
+	    }
+	  }
+	}
+      }
+      for (fs_idx = 0; fs_idx < sibship_mixed_ct; fs_idx++, cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct])) {
+	sibling_ct = *cur_dfam_ptr++;
+	dfam_sibship_or_unrelated_perm_calc(loadbuf_ptr, cur_dfam_ptr, perm_vecst, orig_pheno_c, sibling_ct, 0, perm_vec_ct, acc4, acc8, cur_case_a1_cts, cur_case_missing_cts, twice_numers, numers, denoms, total_counts);
+      }
+      for (unrelated_cluster_idx = 0; unrelated_cluster_idx < unrelated_cluster_ct; unrelated_cluster_idx++, cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct])) {
+	sibling_ct = *cur_dfam_ptr++;
+	// call sibling permutation routine with unrelated bool set (most of
+	// the code should be identical so this should be one function)
+	dfam_sibship_or_unrelated_perm_calc(loadbuf_ptr, cur_dfam_ptr, perm_vecst, orig_pheno_c, sibling_ct, 1, perm_vec_ct, acc4, acc8, cur_case_a1_cts, cur_case_missing_cts, twice_numers, numers, denoms, total_counts);
+      }
+      if (perm_adapt) {
+	for (perm_idx = 0; perm_idx < perm_vec_ct;) {
+	  // now harvest the chi-square values, check adaptive termination
+	  // condition, etc.
+	  dxx = numers[perm_idx] + ((double)((int32_t)twice_numers[perm_idx])) * 0.5;
+	  dyy = denoms[perm_idx] + ((double)((int32_t)quad_denom)) * 0.25;
+	  chisq = dxx * dxx / dyy;
+	  if (chisq > chisq_high) {
+	    success_2incr += 2;
+	  } else if (chisq > chisq_low) {
+	    success_2incr++;
+	  }
+	  if (++perm_idx == next_adapt_check - pidx_offset) {
+	    uii = success_2start + success_2incr;
+	    if (uii) {
+	      pval = ((double)((int32_t)uii + 2)) / ((double)(2 * ((int32_t)next_adapt_check + 1)));
+	      dxx = adaptive_ci_zt * sqrt(pval * (1 - pval) / ((int32_t)next_adapt_check));
+	      dyy = pval - dxx; // lower bound
+	      dzz = pval + dxx; // upper bound
+	      if ((dyy > aperm_alpha) || (dzz < aperm_alpha)) {
+		perm_adapt_stop[marker_idx] = 1;
+		perm_attempt_ct[marker_idx] = next_adapt_check;
+		break;
+	      }
+	    }
+	    next_adapt_check += (int32_t)(adaptive_intercept + ((int32_t)next_adapt_check) * adaptive_slope);
+	  }
+	}
+      } else {
+	for (perm_idx = 0; perm_idx < perm_vec_ct;) {
+	  dxx = numers[perm_idx] + ((double)((int32_t)twice_numers[perm_idx])) * 0.5;
+	  dyy = denoms[perm_idx] + ((double)((int32_t)quad_denom)) * 0.25;
+	  chisq = dxx * dxx / dyy;
+	  if (chisq > chisq_high) {
+	    success_2incr += 2;
+	  } else if (chisq > chisq_low) {
+	    success_2incr++;
+	  }
+	  if (maxt_results[perm_idx] < chisq) {
+	    maxt_results[perm_idx] = chisq;
+	  }
+	  if (msa_ptr) {
+	    *msa_ptr++ = chisq;
+	  }
+	}
+      }
+      perm_2success_ct[marker_idx] += success_2incr;
+    }
+  dfam_perm_thread_skip_all:
+    if ((!tidx) || g_is_last_thread_block) {
+      THREAD_RETURN;
+    }
+    THREAD_BLOCK_FINISH(tidx);
+  }
+}
+
 void dfam_sibship_calc(uint32_t cur_case_ct, uint32_t case_hom_a1_ct, uint32_t case_het_ct, uint32_t cur_ctrl_ct, uint32_t ctrl_hom_a1_ct, uint32_t ctrl_het_ct, uint32_t* total_a1_count_ptr, double* numer_ptr, double* denom_ptr, double* total_expected_ptr) {
   if (!cur_ctrl_ct) {
     return;
@@ -2954,6 +3796,35 @@ void dfam_sibship_calc(uint32_t cur_case_ct, uint32_t case_hom_a1_ct, uint32_t c
   *total_expected_ptr += case_expected_a1_ct;
 }
 
+#ifdef __LP64__
+void dfam_flipa_shuffle(uintptr_t* perms, uintptr_t* shuffled_perms, uint32_t perm_ct) {
+  // 0 16 32 48 64 80 96 112 4 20 36 52 68 84 100 116 8 24 40 56 72 88 104 120 12 28 44 60 76 92 108 124
+  // 1 17 ...
+  uint32_t vct = (perm_ct + 127) / 128;
+  uint32_t vidx;
+  uint32_t offset1;
+  uint32_t offset8;
+  uint32_t read_offset;
+  uint32_t write_offset;
+  for (vidx = 0; vidx < vct; ++vidx) {
+    shuffled_perms[0] = 0;
+    shuffled_perms[1] = 0;
+    for (offset1 = 0; offset1 < 8; offset1++) {
+      for (offset8 = 0; offset8 < 4; offset8++) {
+	read_offset = offset1 * 16 + offset8 * 4;
+	write_offset = offset1 + offset8 * 8;
+	shuffled_perms[0] |= IS_SET(perms, read_offset) << write_offset;
+	shuffled_perms[0] |= IS_SET(perms, read_offset + 1) << (write_offset + 32);
+	shuffled_perms[1] |= IS_SET(perms, read_offset + 2) << write_offset;
+	shuffled_perms[1] |= IS_SET(perms, read_offset + 3) << (write_offset + 32);
+      }
+    }
+    perms = &(perms[2]);
+    shuffled_perms = &(shuffled_perms[2]);
+  }
+}
+#endif
+
 int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct_orig, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude,  [...]
   unsigned char* wkspace_mark = wkspace_base;
   FILE* outfile = NULL;
@@ -2966,9 +3837,24 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uintptr_t unfiltered_sample_ctl2 = (unfiltered_sample_ct + (BITCT2 - 1)) / BITCT2;
   uintptr_t unfiltered_sample_ctp1l2 = 1 + (unfiltered_sample_ct / BITCT2);
   uintptr_t final_mask = get_final_mask(unfiltered_sample_ct);
+  uintptr_t perm_vec_ct128 = 0;
+  uintptr_t perm_vec_cta128 = 0;
+  uintptr_t perm_vec_wct = 0;
+  uintptr_t perm_vec_wcta = 0;
+  uintptr_t perm_vec_ctcl8m = 0;
   uintptr_t* marker_exclude_orig_autosomal = marker_exclude_orig;
   uintptr_t* founder_pnm = NULL;
+  uintptr_t* perm_preimage = NULL;
   double* orig_chisq = NULL;
+  double* maxt_extreme_stat = NULL;
+  uint32_t* dfam_cluster_map = NULL;
+  uint32_t* dfam_cluster_starts = NULL;
+  uint32_t* dfam_cluster_case_cts = NULL;
+  uint32_t* dfam_tot_quotients = NULL;
+  uint64_t* dfam_totq_magics = NULL;
+  uint32_t* dfam_totq_preshifts = NULL;
+  uint32_t* dfam_totq_postshifts = NULL;
+  uint32_t* dfam_totq_incrs = NULL;
   uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
   uint32_t multigen = (fam_ip->mendel_modifier / MENDEL_MULTIGEN) & 1;
   uint32_t is_set_test = fam_ip->dfam_modifier & DFAM_SET_TEST;
@@ -2976,7 +3862,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uint32_t perm_maxt_nst = (fam_ip->dfam_modifier & DFAM_MPERM) && (!is_set_test);
   uint32_t do_perms = fam_ip->dfam_modifier & (DFAM_PERM | DFAM_MPERM);
   uint32_t do_perms_nst = do_perms && (!is_set_test);
-  // uint32_t perm_count = fam_ip->dfam_modifier & DFAM_PERM_COUNT;
+  uint32_t perm_count = fam_ip->dfam_modifier & DFAM_PERM_COUNT;
   uint32_t fill_orig_chisq = do_perms || mtest_adjust;
   uint32_t no_unrelateds = (fam_ip->dfam_modifier & DFAM_NO_UNRELATEDS) || (within_cmdflag && (!cluster_ct));
   uint32_t family_all_case_children_ct = 0;
@@ -2984,10 +3870,10 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uint32_t sibship_mixed_ct = 0;
   uint32_t unrelated_cluster_ct = 0;
   uint32_t pct = 0;
-  // uint32_t max_thread_ct = g_thread_ct;
+  uint32_t max_thread_ct = MINV(g_thread_ct, MODEL_BLOCKSIZE);
   uint32_t perm_pass_idx = 0;
   uint32_t perms_total = 0;
-  uint32_t perms_done = 0;
+  uint32_t dfam_cluster_map_size = 0;
   int32_t retval = 0;
   uintptr_t* pheno_nm;
   uintptr_t* dfam_pheno_c;
@@ -2997,10 +3883,10 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uintptr_t* marker_exclude;
   uintptr_t* dfam_sample_exclude;
   uintptr_t* size_one_sibships;
-  double* maxt_extreme_stat = NULL;
   uint32_t mu_table[MODEL_BLOCKSIZE];
-  // char* outname_end2;
+  char* outname_end2;
   char* wptr;
+  char* wptr_start;
   uint64_t* family_list;
   uint64_t* trio_list;
   uint32_t* trio_error_lookup;
@@ -3009,11 +3895,12 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uint32_t* sample_to_fss_idx;
   uint32_t* dfam_iteration_order;
   uint32_t* idx_to_uidx;
-  uint32_t* uidx_to_idx;
+  uint32_t* sample_uidx_to_idx;
   uint32_t* sample_to_cluster;
   uint32_t* cluster_ctrl_case_cts;
   uint32_t* cluster_write_idxs;
   uint32_t* cur_dfam_ptr;
+  uint32_t* dfam_mixed_start;
   uintptr_t marker_ct;
   uintptr_t marker_uidx; // loading
   uintptr_t marker_uidx2; // writing
@@ -3029,6 +3916,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   double chisq;
   double pval;
   double dxx;
+  double dyy;
   uint32_t family_ct;
   uint32_t fs_ct;
   uint32_t sample_uidx;
@@ -3042,8 +3930,10 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uint32_t cur_ctrl_ct;
   uint32_t cur_case_ct;
   uint32_t dfam_sample_ct;
+  uint32_t dfam_sample_ctl;
   uint32_t dfam_sample_ctl2;
   uint32_t chrom_fo_idx;
+  uint32_t chrom_end;
   uint32_t chrom_idx;
   uint32_t block_size;
   uint32_t block_end;
@@ -3070,6 +3960,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   uint32_t ctrl_het_ct;
   uint32_t hom_a1_ct;
   uint32_t het_ct;
+  uint32_t dfam_cluster_ct;
   uint32_t uii;
   uint32_t ujj;
   int32_t twice_numer;
@@ -3191,6 +4082,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       }
     }
   }
+  dfam_mixed_start = cur_dfam_ptr;
   for (fs_idx = 0; fs_idx < family_ct; fs_idx++) {
     // Scan for families with at least one case and one control child.
     fssc_start = fs_starts[fs_idx] + 2;
@@ -3199,7 +4091,8 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
     for (fssc_idx = fssc_start; fssc_idx < fssc_end; fssc_idx++) {
       cur_case_ct += is_set(pheno_c, idx_to_uidx[fss_contents[fssc_idx]]);
     }
-    if (cur_case_ct && (cur_case_ct != fssc_end - fssc_start)) {
+    sibling_ct = fssc_end - fssc_start;
+    if (cur_case_ct && (cur_case_ct != sibling_ct)) {
       family_mixed_ct++;
       sample_uidx = idx_to_uidx[fss_contents[fssc_start - 2]];
       clear_bit(dfam_sample_exclude, sample_uidx);
@@ -3209,7 +4102,8 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       clear_bit(dfam_sample_exclude, sample_uidx);
       *cur_dfam_ptr++ = sample_uidx;
 
-      *cur_dfam_ptr++ = fssc_end - fssc_start;
+      dfam_cluster_map_size += sibling_ct;
+      *cur_dfam_ptr++ = sibling_ct;
       for (fssc_idx = fssc_start; fssc_idx < fssc_end; fssc_idx++) {
 	sample_uidx = idx_to_uidx[fss_contents[fssc_idx]];
 	clear_bit(dfam_sample_exclude, sample_uidx);
@@ -3225,11 +4119,13 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
     for (fssc_idx = fssc_start; fssc_idx < fssc_end; fssc_idx++) {
       cur_case_ct += is_set(pheno_c, idx_to_uidx[fss_contents[fssc_idx]]);
     }
-    if (cur_case_ct && (cur_case_ct != fssc_end - fssc_start)) {
+    sibling_ct = fssc_end - fssc_start;
+    if (cur_case_ct && (cur_case_ct != sibling_ct)) {
       sibship_mixed_ct++;
       // [0]: sibling ct
       // [1...]: member uidxs
-      *cur_dfam_ptr++ = fssc_end - fssc_start;
+      dfam_cluster_map_size += sibling_ct;
+      *cur_dfam_ptr++ = sibling_ct;
       for (fssc_idx = fssc_start; fssc_idx < fssc_end; fssc_idx++) {
 	sample_uidx = idx_to_uidx[fss_contents[fssc_idx]];
 	clear_bit(dfam_sample_exclude, sample_uidx);
@@ -3237,6 +4133,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       }
     }
   }
+  dfam_cluster_map_size = ((uintptr_t)(cur_dfam_ptr - dfam_mixed_start)) - 3 * family_mixed_ct - sibship_mixed_ct;
   if (!no_unrelateds) {
     if (wkspace_alloc_ui_checked(&sample_to_cluster, sample_ct * sizeof(int32_t))) {
       goto dfam_ret_NOMEM;
@@ -3278,11 +4175,13 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       cur_case_ct = cluster_ctrl_case_cts[2 * unrelated_cluster_idx + 1];
       if (cur_ctrl_ct && cur_case_ct) {
 	unrelated_cluster_ct++;
-	cur_dfam_ptr[write_idx++] = cur_ctrl_ct + cur_case_ct;
+	uii = cur_ctrl_ct + cur_case_ct;
+	cur_dfam_ptr[write_idx++] = uii;
 	cluster_write_idxs[unrelated_cluster_idx] = write_idx;
-	write_idx += cur_ctrl_ct + cur_case_ct;
+	write_idx += uii;
       }
     }
+    dfam_cluster_map_size += write_idx - unrelated_cluster_ct;
     for (sample_uidx = 0, sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
       next_unset_unsafe_ck(sample_exclude, &sample_uidx);
       unrelated_cluster_idx = sample_to_cluster[sample_idx];
@@ -3301,27 +4200,23 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   }
   wkspace_reset((unsigned char*)idx_to_uidx);
   wkspace_shrink_top(dfam_iteration_order, (cur_dfam_ptr - dfam_iteration_order) * sizeof(int32_t));
-  if (do_perms) {
-    logerrprint("Error: --dfam permutation tests are currently under development.\n");
-    retval = RET_CALC_NOT_YET_SUPPORTED;
-    goto dfam_ret_1;
-  }
   dfam_sample_ct = unfiltered_sample_ct - popcount_longs(dfam_sample_exclude, unfiltered_sample_ctl);
+  dfam_sample_ctl = (dfam_sample_ct + (BITCT - 1)) / BITCT;
   dfam_sample_ctl2 = (dfam_sample_ct + (BITCT2 - 1)) / BITCT2;
-  if (wkspace_alloc_ui_checked(&uidx_to_idx, unfiltered_sample_ct * sizeof(int32_t))) {
+  if (wkspace_alloc_ui_checked(&sample_uidx_to_idx, unfiltered_sample_ct * sizeof(int32_t))) {
     goto dfam_ret_NOMEM;
   }
-  fill_uidx_to_idx(dfam_sample_exclude, unfiltered_sample_ct, dfam_sample_ct, uidx_to_idx);
+  fill_uidx_to_idx(dfam_sample_exclude, unfiltered_sample_ct, dfam_sample_ct, sample_uidx_to_idx);
   cur_dfam_ptr = dfam_iteration_order;
   uii = family_all_case_children_ct + family_mixed_ct;
   for (fs_idx = 0; fs_idx < uii; fs_idx++) {
-    *cur_dfam_ptr = uidx_to_idx[*cur_dfam_ptr];
+    *cur_dfam_ptr = sample_uidx_to_idx[*cur_dfam_ptr];
     cur_dfam_ptr++;
-    *cur_dfam_ptr = uidx_to_idx[*cur_dfam_ptr];
+    *cur_dfam_ptr = sample_uidx_to_idx[*cur_dfam_ptr];
     cur_dfam_ptr++;
     sibling_ct = *cur_dfam_ptr++;
     for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
-      *cur_dfam_ptr = uidx_to_idx[*cur_dfam_ptr];
+      *cur_dfam_ptr = sample_uidx_to_idx[*cur_dfam_ptr];
       cur_dfam_ptr++;
     }
   }
@@ -3329,13 +4224,11 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   for (fs_idx = 0; fs_idx < uii; fs_idx++) {
     sibling_ct = *cur_dfam_ptr++;
     for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
-      *cur_dfam_ptr = uidx_to_idx[*cur_dfam_ptr];
+      *cur_dfam_ptr = sample_uidx_to_idx[*cur_dfam_ptr];
       cur_dfam_ptr++;
     }
   }
-  // DEBUG
-  // printf("*** %u %u %u %u\n", family_all_case_children_ct, family_mixed_ct, sibship_mixed_ct, unrelated_cluster_ct);
-  wkspace_reset((unsigned char*)uidx_to_idx);
+  wkspace_reset((unsigned char*)sample_uidx_to_idx);
   if (wkspace_alloc_ul_checked(&dfam_pheno_c, dfam_sample_ctl2 * sizeof(intptr_t)) ||
       wkspace_alloc_ul_checked(&loadbuf_raw, unfiltered_sample_ctl2 * sizeof(intptr_t)) ||
       wkspace_alloc_ul_checked(&workbuf, unfiltered_sample_ctp1l2 * sizeof(intptr_t)) ||
@@ -3344,6 +4237,13 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   }
   collapse_copy_bitarr(sample_ct, pheno_c, dfam_sample_exclude, dfam_sample_ct, dfam_pheno_c);
   g_pheno_c = dfam_pheno_c;
+  g_dfam_iteration_order = dfam_iteration_order;
+  g_dfam_family_all_case_children_ct = family_all_case_children_ct;
+  g_dfam_family_mixed_ct = family_mixed_ct;
+  g_dfam_sample_ct = dfam_sample_ct;
+  g_dfam_sibship_mixed_ct = sibship_mixed_ct;
+  g_dfam_unrelated_cluster_ct = unrelated_cluster_ct;
+  g_test_type = perm_adapt_nst;
   loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
   workbuf[unfiltered_sample_ctp1l2 - 1] = 0;
   for (ulii = 1; ulii <= MODEL_BLOCKSIZE; ulii++) {
@@ -3359,6 +4259,67 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
     g_orig_stat = orig_chisq;
   }
 
+  dfam_cluster_ct = family_mixed_ct + sibship_mixed_ct + unrelated_cluster_ct;
+  if (do_perms_nst) {
+    logerrprint("Error: --dfam permutation tests are currently under development.\n");
+    retval = RET_CALC_NOT_YET_SUPPORTED;
+    goto dfam_ret_1;
+    if (wkspace_alloc_ui_checked(&dfam_cluster_map, dfam_cluster_map_size * sizeof(int32_t)) ||
+        wkspace_alloc_ui_checked(&dfam_cluster_starts, (dfam_cluster_ct + 1) * sizeof(int32_t)) ||
+        wkspace_alloc_ui_checked(&dfam_cluster_case_cts, dfam_cluster_ct * sizeof(int32_t)) ||
+        wkspace_alloc_ul_checked(&perm_preimage, dfam_sample_ctl * sizeof(intptr_t))) {
+      goto dfam_ret_NOMEM;
+    }
+    fill_ulong_zero(perm_preimage, dfam_sample_ctl);
+    cur_dfam_ptr = dfam_mixed_start;
+    write_idx = 0;
+    for (uii = 0; uii < family_mixed_ct; uii++) {
+      dfam_cluster_starts[uii] = write_idx;
+      cur_dfam_ptr = &(cur_dfam_ptr[2]);
+      sibling_ct = *cur_dfam_ptr++;
+      cur_case_ct = 0;
+      for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	sample_idx = cur_dfam_ptr[sib_idx];
+	dfam_cluster_map[write_idx++] = sample_idx;
+	cur_case_ct += IS_SET(dfam_pheno_c, sample_idx);
+      }
+      if (cur_case_ct * 2 >= sibling_ct) {
+	for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	  SET_BIT(perm_preimage, cur_dfam_ptr[sib_idx]);
+	}
+      }
+      cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct]);
+      dfam_cluster_case_cts[uii] = cur_case_ct;
+    }
+    for (; uii < dfam_cluster_ct; uii++) {
+      dfam_cluster_starts[uii] = write_idx;
+      sibling_ct = *cur_dfam_ptr++;
+      cur_case_ct = 0;
+      for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	sample_idx = cur_dfam_ptr[sib_idx];
+	dfam_cluster_map[write_idx++] = sample_idx;
+	cur_case_ct += IS_SET(dfam_pheno_c, sample_idx);
+      }
+      if (cur_case_ct * 2 >= sibling_ct) {
+	for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
+	  SET_BIT(perm_preimage, cur_dfam_ptr[sib_idx]);
+	}
+      }
+      cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct]);
+      dfam_cluster_case_cts[uii] = cur_case_ct;
+    }
+    if (write_idx != dfam_cluster_map_size) {
+      logerrprint("assert failure: write_idx != dfam_cluster_map_size\n");
+      exit(1);
+    }
+    dfam_cluster_starts[dfam_cluster_ct] = write_idx;
+
+    retval = cluster_alloc_and_populate_magic_nums(dfam_cluster_ct, dfam_cluster_map, dfam_cluster_starts, &dfam_tot_quotients, &dfam_totq_magics, &dfam_totq_preshifts, &dfam_totq_postshifts, &dfam_totq_incrs);
+    if (retval) {
+      goto dfam_ret_1;
+    }
+  }
+
   ulii = 2 * max_marker_allele_len + plink_maxsnp + MAX_ID_LEN + 256;
   if (ulii > MAXLINELEN) {
     if (wkspace_alloc_c_checked(&textbuf, ulii)) {
@@ -3370,6 +4331,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   // since it's also restricted to autosomes
   g_perms_done = 0;
   g_mperm_save_all = NULL;
+  g_dfam_perm_vecs = NULL;
   if (perm_maxt_nst) {
     perms_total = fam_ip->dfam_mperm_val;
     if (wkspace_alloc_d_checked(&maxt_extreme_stat, perms_total * sizeof(double))) {
@@ -3412,8 +4374,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
     }
   }
 
-  memcpy(outname_end, ".dfam", 6);
-  // outname_end2 = memcpyb(outname_end, ".dfam", 6);
+  outname_end2 = memcpyb(outname_end, ".dfam", 6);
   if (fopen_checked(&outfile, outname, "w")) {
     goto dfam_ret_OPEN_FAIL;
   }
@@ -3439,28 +4400,64 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   fputs("0%", stdout);
   fflush(stdout);
   // ----- begin main loop -----
-  // dfam_more_perms:
+ dfam_more_perms:
   if (do_perms_nst) {
-    logerrprint("Error: --dfam permutation tests are currently under development.\n");
-    retval = RET_CALC_NOT_YET_SUPPORTED;
-    goto dfam_ret_1;
-    /*
     if (perm_adapt_nst && perm_pass_idx) {
       while (g_first_adapt_check <= g_perms_done) {
 	// APERM_MAX prevents infinite loop here
 	g_first_adapt_check += (int32_t)(apip->init_interval + ((int32_t)g_first_adapt_check) * apip->interval_slope);
       }
     }
-    // g_perm_vec_ct memory allocation dependencies:
-    //   ;;;
+    // todo: check whether larger batches make sense
     g_perm_vec_ct = perm_batch_size;
     if (g_perm_vec_ct > perms_total - g_perms_done) {
       g_perm_vec_ct = perms_total - g_perms_done;
     }
-    if (wkspace_alloc_ul_checked(&g_dfam_perm_vecs, g_perm_vec_ct * sample_ctv2 * sizeof(intptr_t))) {
+    perm_vec_ct128 = (g_perm_vec_ct + 127) / 128;
+    perm_vec_cta128 = perm_vec_ct128 * 128;
+    perm_vec_wct = (g_perm_vec_ct + (BITCT - 1)) / BITCT;
+    perm_vec_wcta = perm_vec_ct128 * (128 / BITCT);
+    perm_vec_ctcl8m = CACHEALIGN32_DBL(g_perm_vec_ct);
+
+    if (wkspace_alloc_ul_checked(&g_dfam_perm_vecs, g_perm_vec_ct * dfam_sample_ctl * sizeof(intptr_t)) ||
+	wkspace_alloc_ul_checked(&g_dfam_perm_vecst, dfam_sample_ct * perm_vec_wcta * sizeof(intptr_t)) ||
+	wkspace_alloc_ul_checked(&g_dfam_flipa, family_ct * perm_vec_wct * sizeof(intptr_t)) ||
+#ifdef __LP64__
+        wkspace_alloc_ul_checked(&g_dfam_flipa_shuffled, family_all_case_children_ct * perm_vec_wcta * sizeof(intptr_t)) ||
+#endif
+	wkspace_alloc_i_checked(&g_dfam_twice_numers, max_thread_ct * perm_vec_cta128 * sizeof(int32_t)) ||
+	wkspace_alloc_ui_checked(&g_dfam_total_counts, max_thread_ct * perm_vec_cta128 * sizeof(int32_t)) ||
+	wkspace_alloc_d_checked(&g_dfam_numers, max_thread_ct * perm_vec_cta128 * sizeof(double)) ||
+	wkspace_alloc_d_checked(&g_dfam_denoms, max_thread_ct * perm_vec_cta128 * sizeof(double))
+	) {
       goto dfam_ret_NOMEM;
     }
+    // initialize phenotype and flipa permutations.
+    // don't bother multithreading for now
+    for (ulii = 0; ulii < g_perm_vec_ct; ulii++) {
+      generate_cc_cluster_perm1(dfam_sample_ct, perm_preimage, dfam_cluster_ct, dfam_cluster_map, dfam_cluster_starts, dfam_cluster_case_cts, dfam_tot_quotients, dfam_totq_magics, dfam_totq_preshifts, dfam_totq_postshifts, dfam_totq_incrs, &(g_dfam_perm_vecs[ulii * dfam_sample_ctl]), &sfmt);
+    }
+    transpose_perm1s(g_dfam_perm_vecs, g_perm_vec_ct, sample_ct, (uint32_t*)g_dfam_perm_vecst);
+    /*
+    for () {
+    }
     */
+
+#ifdef __LP64__
+    for (fs_idx = 0; fs_idx < family_all_case_children_ct; fs_idx++) {
+      dfam_flipa_shuffle(&(g_dfam_flipa[fs_idx * perm_vec_wcta]), &(g_dfam_flipa_shuffled[fs_idx * perm_vec_wcta]), g_perm_vec_ct);
+    }
+#endif
+    if (perm_maxt_nst) {
+      if (wkspace_alloc_d_checked(&g_maxt_thread_results, max_thread_ct * perm_vec_ctcl8m * sizeof(double))) {
+	goto dfam_ret_NOMEM;
+      }
+      if (mperm_save & MPERM_DUMP_ALL) {
+	if (wkspace_alloc_d_checked(&g_mperm_save_all, marker_ct * g_perm_vec_ct * sizeof(double))) {
+	  goto dfam_ret_NOMEM;
+	}
+      }
+    }
   }
   chrom_fo_idx = 0xffffffffU;
   marker_uidx = next_unset_unsafe(marker_exclude, 0);
@@ -3496,7 +4493,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       }
       erase_mendel_errors(unfiltered_sample_ct, loadbuf_raw, workbuf, sex_male, trio_error_lookup, trio_ct, 0, multigen);
       collapse_copy_2bitarr(loadbuf_raw, &(g_loadbuf[block_size * dfam_sample_ctl2]), unfiltered_sample_ct, dfam_sample_ct, dfam_sample_exclude);
-      if (perm_adapt_nst) {
+      if (do_perms_nst) {
 	g_adapt_m_table[block_size] = marker_idx2++;
       }
       mu_table[block_size++] = marker_uidx;
@@ -3586,8 +4583,8 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
           paternal_id = *cur_dfam_ptr++;
 	  maternal_id = *cur_dfam_ptr++;
 	  sibling_ct = *cur_dfam_ptr++;
-	  paternal_geno = (loadbuf_ptr[paternal_id / BITCT2] >> (2 * (paternal_id % BITCT2))) & 3;
-	  maternal_geno = (loadbuf_ptr[maternal_id / BITCT2] >> (2 * (maternal_id % BITCT2))) & 3;
+	  paternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, paternal_id);
+	  maternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, maternal_id);
 	  parental_a1_ct = dfam_allele_ct_table[paternal_geno * 4 + maternal_geno];
 	  if (!parental_a1_ct) {
 	    cur_dfam_ptr = &(cur_dfam_ptr[sibling_ct]);
@@ -3597,7 +4594,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
 	  case_a1_ct = 0;
           for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
             sample_idx = *cur_dfam_ptr++;
-	    cur_geno = (loadbuf_ptr[sample_idx / BITCT2] >> (2 * (sample_idx % BITCT2))) & 3;
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
 	    if (cur_geno == 1) {
 	      continue;
 	    }
@@ -3615,8 +4612,8 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
           paternal_id = *cur_dfam_ptr++;
 	  maternal_id = *cur_dfam_ptr++;
 	  sibling_ct = *cur_dfam_ptr++;
-	  paternal_geno = (loadbuf_ptr[paternal_id / BITCT2] >> (2 * (paternal_id % BITCT2))) & 3;
-	  maternal_geno = (loadbuf_ptr[maternal_id / BITCT2] >> (2 * (maternal_id % BITCT2))) & 3;
+	  paternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, paternal_id);
+	  maternal_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, maternal_id);
 	  parental_a1_ct = dfam_allele_ct_table[paternal_geno * 4 + maternal_geno];
 	  cur_case_ct = 0;
 	  cur_ctrl_ct = 0;
@@ -3626,7 +4623,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
 	  ctrl_het_ct = 0;
           for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
             sample_idx = *cur_dfam_ptr++;
-	    cur_geno = (loadbuf_ptr[sample_idx / BITCT2] >> (2 * (sample_idx % BITCT2))) & 3;
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
 	    if (cur_geno == 1) {
 	      continue;
 	    }
@@ -3676,7 +4673,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
 	  ctrl_het_ct = 0;
           for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
             sample_idx = *cur_dfam_ptr++;
-	    cur_geno = (loadbuf_ptr[sample_idx / BITCT2] >> (2 * (sample_idx % BITCT2))) & 3;
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
 	    if (cur_geno == 1) {
 	      continue;
 	    }
@@ -3715,7 +4712,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
 	  ctrl_het_ct = 0;
           for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
             sample_idx = *cur_dfam_ptr++;
-	    cur_geno = (loadbuf_ptr[sample_idx / BITCT2] >> (2 * (sample_idx % BITCT2))) & 3;
+	    cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
 	    if (cur_geno == 1) {
 	      continue;
 	    }
@@ -3789,6 +4786,11 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
 	}
       }
     }
+    if (do_perms_nst) {
+      // g_xfam_thread_ct = ;;; // f(block size)
+      // ...
+      g_perms_done += g_perm_vec_ct;
+    }
     marker_idx += block_size;
     if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
       if (marker_idx < marker_unstopped_ct) {
@@ -3815,6 +4817,9 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
       goto dfam_ret_WRITE_FAIL;
     }
     if (!is_set_test) {
+      if (do_perms_nst) {
+	wkspace_reset(g_dfam_perm_vecs);
+      }
       if (mtest_adjust) {
 	if (wkspace_alloc_ui_checked(&idx_to_uidx, marker_ct * sizeof(int32_t))) {
 	  goto dfam_ret_NOMEM;
@@ -3836,9 +4841,117 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   }
   if (do_perms_nst) {
     // if (mperm_save & MPERM_DUMP_ALL) { ...
-    // wkspace_reset();
-    if (perms_done < perms_total) {
+    wkspace_reset(g_dfam_perm_vecs);
+    if (g_perms_done < perms_total) {
+      if (perm_adapt_nst) {
+	marker_unstopped_ct = marker_ct - popcount_longs((uintptr_t*)g_perm_adapt_stop, (marker_ct + sizeof(intptr_t) - 1) / sizeof(intptr_t));
+	if (!marker_unstopped_ct) {
+	  goto dfam_adapt_perm_count;
+	}
+      }
+      printf("\r%u permutation%s complete.", g_perms_done, (g_perms_done != 1)? "s" : "");
+      fflush(stdout);
+      perm_pass_idx++;
+      goto dfam_more_perms;
     }
+    if (perm_adapt_nst) {
+    dfam_adapt_perm_count:
+      g_perms_done = 0;
+      for (uii = 0; uii < marker_ct; uii++) {
+	if (g_perm_attempt_ct[uii] > g_perms_done) {
+	  g_perms_done = g_perm_attempt_ct[uii];
+	  if (g_perms_done == perms_total) {
+	    break;
+	  }
+	}
+      }
+    }
+    putchar('\r');
+    LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt_nst? "max(T)" : "adaptive", (g_perms_done != 1)? "s" : "");
+    if (perm_adapt_nst) {
+      memcpy(outname_end2, ".perm", 6);
+    } else {
+      if (mperm_save & MPERM_DUMP_BEST) {
+	memcpy(outname_end, ".mperm.dump.best", 17);
+	// ...
+	memcpy(outname_end, ".qassoc", 7);
+      }
+      memcpy(outname_end2, ".mperm", 7);
+    }
+    if (fopen_checked(&outfile, outname, "w")) {
+      goto dfam_ret_OPEN_FAIL;
+    }
+    if (perm_adapt_nst) {
+      sprintf(tbuf, " CHR %%%us    CHISQ_TDT         EMP1           NP \n", plink_maxsnp);
+    } else {
+      sprintf(tbuf, " CHR %%%us    CHISQ_TDT         EMP1         EMP2 \n", plink_maxsnp);
+#ifdef __cplusplus
+      std::sort(g_maxt_extreme_stat, &(g_maxt_extreme_stat[perms_total]));
+#else
+      qsort(g_maxt_extreme_stat, perms_total, sizeof(double), double_cmp);
+#endif
+    }
+    fprintf(outfile, tbuf, "SNP");
+    chrom_fo_idx = 0xffffffffU;
+    marker_uidx = next_unset_unsafe(marker_exclude, 0);
+    marker_idx = 0;
+    dyy = 1.0 / ((double)((int32_t)perms_total + 1));
+    dxx = 0.5 * dyy;
+    while (1) {
+      do {
+	chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+      } while (marker_uidx >= chrom_end);
+      uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+      wptr_start = width_force(4, tbuf, chrom_name_write(tbuf, chrom_info_ptr, uii));
+      *wptr_start++ = ' ';
+      wptr_start[plink_maxsnp] = ' ';
+      for (; marker_uidx < chrom_end;) {
+	if (perm_adapt_nst) {
+	  pval = ((double)(g_perm_2success_ct[marker_idx] + 2)) / ((double)(2 * (g_perm_attempt_ct[marker_idx + 1])));
+	} else {
+	  pval = ((double)(g_perm_2success_ct[marker_idx + 2])) * dxx;
+	}
+	if (pval <= pfilter) {
+	  fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
+	  wptr = &(wptr_start[1 + plink_maxsnp]);
+	  if (perm_adapt_nst && (!g_perm_attempt_ct[marker_idx])) {
+	    // invalid
+	    wptr = memcpya(wptr, "          NA           NA           NA", 38);
+	  } else {
+	    wptr = double_g_writewx4x(wptr, orig_chisq[marker_idx], 12, ' ');
+	    if (!perm_count) {
+	      wptr = double_g_writewx4(wptr, pval, 12);
+	    } else {
+	      wptr = double_g_writewx4(wptr, ((double)g_perm_2success_ct[marker_idx]) * 0.5, 12);
+	    }
+	    *wptr++ = ' ';
+	    if (perm_adapt_nst) {
+	      wptr = memseta(wptr, 32, 2);
+	      wptr = uint32_writew10(wptr, g_perm_attempt_ct[marker_idx]);
+	    } else {
+	      // ...
+	      if (!perm_count) {
+	      } else {
+	      }
+	    }
+	    *wptr++ = '\n';
+	    if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+	      goto dfam_ret_WRITE_FAIL;
+	    }
+	  }
+	  if (++marker_idx == marker_ct) {
+	    goto dfam_loop_end;
+	  }
+	  marker_uidx++;
+	  next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
+	}
+      }
+    }
+  dfam_loop_end:
+    if (fclose_null(&outfile)) {
+      goto dfam_ret_WRITE_FAIL;
+    }
+    LOGPRINTFWW("Permutation test report written to %s .\n", outname);
   }
   // ...
   
@@ -3858,6 +4971,11 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   dfam_ret_INVALID_CMDLINE:
     retval = RET_INVALID_CMDLINE;
     break;
+    /*
+  dfam_ret_THREAD_CREATE_FAIL:
+    retval = RET_THREAD_CREATE_FAIL;
+    break;
+    */
   }
  dfam_ret_1:
   wkspace_reset(wkspace_mark);
@@ -3912,8 +5030,8 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
     cur_end = *fs_starts_ptr++;
     sample_uidx = fss_contents[cur_start];
     uii = fss_contents[cur_start + 1];
-    ulii = (loadbuf[sample_uidx / BITCT2] >> (2 * (sample_uidx % BITCT2))) & 3;
-    uljj = (loadbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3;
+    ulii = EXTRACT_2BIT_GENO(loadbuf, sample_uidx);
+    uljj = EXTRACT_2BIT_GENO(loadbuf, uii);
     if ((ulii != 1) && (uljj != 1)) {
       // both parents nonmissing
       qfam_b[cur_idx] = 0.5 * (double)(4 - ((intptr_t)((ulii + (ulii == 0)) + (uljj + (uljj == 0)))));
@@ -3925,7 +5043,7 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
       uljj = 0;
       do {
         sample_uidx = *fss_ptr++;
-        ulii = (loadbuf[sample_uidx / BITCT2] >> (2 * (sample_uidx % BITCT2))) & 3;
+        ulii = EXTRACT_2BIT_GENO(loadbuf, sample_uidx);
         if (ulii != 1) {
           uljj += ulii + (ulii == 0);
 	} else {
@@ -3949,7 +5067,7 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
     uljj = 0;
     do {
       sample_uidx = *fss_ptr++;
-      ulii = (loadbuf[sample_uidx / BITCT2] >> (2 * (sample_uidx % BITCT2))) & 3;
+      ulii = EXTRACT_2BIT_GENO(loadbuf, sample_uidx);
       if (ulii != 1) {
         uljj += ulii + (ulii == 0);
       } else {
@@ -3966,7 +5084,7 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
   for (; cur_idx < fss_ct; cur_idx++) {
     // singletons
     sample_uidx = *fss_ptr++;
-    ulii = (loadbuf[sample_uidx / BITCT2] >> (2 * (sample_uidx % BITCT2))) & 3;
+    ulii = EXTRACT_2BIT_GENO(loadbuf, sample_uidx);
     if (ulii != 1) {
       qfam_b[cur_idx] = (double)(2 - (intptr_t)(ulii + (ulii == 0)));
     } else {
@@ -3976,7 +5094,7 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
   fill_all_bits(nm_lm, lm_ct);
   for (sample_uidx = 0, sample_idx = 0; sample_idx < lm_ct; sample_uidx++, sample_idx++) {
     next_set_unsafe_ck(lm_eligible, &sample_uidx);
-    ulii = (loadbuf[sample_uidx / BITCT2] >> (2 * (sample_uidx % BITCT2))) & 3;
+    ulii = EXTRACT_2BIT_GENO(loadbuf, sample_uidx);
     if (ulii != 1) {
       fss_idx = sample_lm_to_fss_idx[sample_idx];
       if (!is_set(nm_fss, fss_idx)) {
@@ -3990,7 +5108,7 @@ void qfam_compute_bw(uintptr_t* loadbuf, uintptr_t sample_ct, uint32_t* fs_start
 	  // assert: fss_contents[uii + 1] == sample_uidx
           uii = fss_contents[uii];
 	}
-        if (((loadbuf[uii / BITCT2] >> (2 * (uii % BITCT2))) & 3) == 1) {
+        if (EXTRACT_2BIT_GENO(loadbuf, uii) == 1) {
 	  goto qfam_compute_bw_skip;
 	}
       }
@@ -4115,7 +5233,7 @@ static inline uint32_t qfam_regress(uint32_t test_type, uint32_t nind, uint32_t
 
 THREAD_RET_TYPE qfam_thread(void* arg) {
   uintptr_t tidx = (uintptr_t)arg;
-  uint32_t qfam_thread_ct = g_qfam_thread_ct;
+  uint32_t qfam_thread_ct = g_xfam_thread_ct;
   uint32_t fs_ct = g_fs_ct;
   uint32_t lm_ct = g_lm_ct;
   uint32_t singleton_ct = g_singleton_ct;
@@ -4460,7 +5578,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
   g_fs_ct = fs_ct;
   g_singleton_ct = singleton_ct;
   g_lm_ct = lm_ct;
-  g_qfam_thread_ct = qfam_thread_ct;
+  g_xfam_thread_ct = qfam_thread_ct;
   fss_ctl = (fss_ct + BITCT - 1) / BITCT;
   lm_ctl = (lm_ct + BITCT - 1) / BITCT;
   flip_ctl = only_within? lm_ctl : fss_ctl;
diff --git a/plink_family.h b/plink_family.h
index 5c6826d..4223c6b 100644
--- a/plink_family.h
+++ b/plink_family.h
@@ -53,7 +53,7 @@ int32_t get_trios_and_families(uintptr_t unfiltered_sample_ct, uintptr_t* sample
 
 uint32_t erase_mendel_errors(uintptr_t unfiltered_sample_ct, uintptr_t* loadbuf, uintptr_t* workbuf, uintptr_t* sex_male, uint32_t* trio_lookup, uint32_t trio_ct, uint32_t is_x, uint32_t multigen);
 
-int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t plink_maxfid, uint32_t plink_maxiid, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_e [...]
+int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t plink_maxfid, uint32_t plink_maxiid, uint32_t plink_maxsnp, uint32_t allow_no_variants, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_ [...]
 
 typedef struct {
   char* family_ids;
diff --git a/plink_filter.c b/plink_filter.c
index 8b66155..baf4eb0 100644
--- a/plink_filter.c
+++ b/plink_filter.c
@@ -45,7 +45,7 @@ const char* keep_or_remove_flag_str(uint32_t flags) {
   return NULL;
 }
 
-int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags) {
+int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags, uint32_t allow_no_samples) {
   FILE* infile = NULL;
   unsigned char* wkspace_mark = wkspace_base;
   uintptr_t* exclude_arr_new = NULL;
@@ -143,7 +143,7 @@ int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, u
   }
   memcpy(exclude_arr, exclude_arr_new, unfiltered_ctl * sizeof(intptr_t));
   *exclude_ct_ptr = popcount_longs(exclude_arr, unfiltered_ctl);
-  if (*exclude_ct_ptr == unfiltered_ct) {
+  if ((*exclude_ct_ptr == unfiltered_ct) && (!allow_no_samples)) {
     LOGERRPRINTF("Error: No %s remaining after --%s.\n", g_species_plural, keep_or_remove_flag_str(flags));
     goto keep_or_remove_ret_ALL_SAMPLES_EXCLUDED;
   }
@@ -235,7 +235,7 @@ void extract_exclude_process_token(const char* tok_start, const uint32_t* marker
   }
 }
 
-int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr) {
+int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants) {
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile = NULL;
   uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
@@ -313,7 +313,7 @@ int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, ui
     zero_trailing_bits(marker_exclude, unfiltered_marker_ct);
   }
   *marker_exclude_ct_ptr = popcount_longs(marker_exclude, unfiltered_marker_ctl);
-  if (*marker_exclude_ct_ptr == unfiltered_marker_ct) {
+  if ((*marker_exclude_ct_ptr == unfiltered_marker_ct) && (!allow_no_variants)) {
     LOGERRPRINTF("Error: No variants remaining after --%s.\n", do_exclude? "exclude" : "extract");
     goto extract_exclude_flag_norange_ret_ALL_MARKERS_EXCLUDED;
   }
@@ -347,7 +347,7 @@ int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, ui
   return retval;
 }
 
-int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
+int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, uint32_t allow_no_variants, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
   gzFile gz_infile = NULL;
   unsigned char* wkspace_mark = wkspace_base;
   uintptr_t include_ct = 0;
@@ -548,7 +548,7 @@ int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uin
     clear_bit(exclude_arr_new, item_uidx);
     include_ct++;
   }
-  if (!include_ct) {
+  if ((!include_ct) && (!allow_no_variants)) {
     logerrprint("Error: No variants remaining after --attrib.\n");
     retval = RET_ALL_MARKERS_EXCLUDED;
     goto filter_attrib_ret_1;
@@ -582,7 +582,7 @@ int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uin
   return retval;
 }
 
-int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
+int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uint32_t allow_no_samples, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
   // re-merge this with filter_attrib() after making sample ID lookup
   // hash-based
   gzFile gz_infile = NULL;
@@ -612,7 +612,7 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
   uint32_t unfiltered_idx;
   uint32_t pos_match_needed;
   int32_t sorted_idx;
-  
+
   if (wkspace_alloc_ul_checked(&exclude_arr_new, unfiltered_ctl * sizeof(intptr_t)) ||
       wkspace_alloc_ul_checked(&already_seen, unfiltered_ctl * sizeof(intptr_t)) ||
       wkspace_alloc_c_checked(&id_buf, max_id_len)) { 
@@ -761,6 +761,10 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
     }
     set_bit(already_seen, sorted_idx);
     unfiltered_idx = id_map[(uint32_t)sorted_idx];
+    if (is_set(exclude_arr, unfiltered_idx)) {
+      // bugfix: don't proceed here
+      continue;
+    }
     pos_match_needed = pos_match_ct;
     while (!is_eoln_kns(*cond_ptr)) {
       bufptr2 = cond_ptr;
@@ -784,7 +788,7 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
     clear_bit(exclude_arr_new, unfiltered_idx);
     include_ct++;
   }
-  if (!include_ct) {
+  if ((!include_ct) && (!allow_no_samples)) {
     LOGERRPRINTF("Error: No %s remaining after --attrib-indiv.\n", g_species_plural);
     retval = RET_ALL_SAMPLES_EXCLUDED;
     goto filter_attrib_sample_ret_1;
@@ -792,6 +796,7 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
   LOGPRINTF("--attrib-indiv: %" PRIuPTR " %s remaining.\n", include_ct, species_str(include_ct));
   memcpy(exclude_arr, exclude_arr_new, unfiltered_ctl * sizeof(intptr_t));
   *exclude_ct_ptr = unfiltered_ct - include_ct;
+
   while (0) {
   filter_attrib_sample_ret_NOMEM:
     retval = RET_NOMEM;
@@ -818,7 +823,7 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
   return retval;
 }
 
-int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh, double qual_max_thresh, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr) {
+int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh, double qual_max_thresh, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t allow_no_variants, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr) {
   unsigned char* wkspace_mark = wkspace_base;
   FILE* infile = NULL;
   uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
@@ -916,6 +921,11 @@ int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh,
   }
   *marker_exclude_ct_ptr = popcount_longs(marker_exclude, unfiltered_marker_ctl);
   marker_ct = unfiltered_marker_ct - *marker_exclude_ct_ptr;
+  if ((!marker_ct) && (!allow_no_variants)) {
+    logerrprint("Error: No variants remaining after --qual-scores.\n");
+    retval = RET_ALL_MARKERS_EXCLUDED;
+    goto filter_qual_scores_ret_1;
+  }
   if (miss_ct) {
     sprintf(logbuf, "--qual-scores: %" PRIuPTR " variant%s remaining, %" PRIuPTR " ID%s missing.\n", marker_ct, (marker_ct == 1)? "" : "s", miss_ct, (miss_ct == 1)? "" : "s");
   } else {
@@ -942,7 +952,7 @@ int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh,
   return retval;
 }
 
-uint32_t random_thin_markers(double thin_keep_prob, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr) {
+uint32_t random_thin_markers(double thin_keep_prob, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants) {
   uint32_t marker_ct = unfiltered_marker_ct - *marker_exclude_ct_ptr;
   uint32_t marker_uidx = 0;
   uint32_t markers_done = 0;
@@ -960,7 +970,7 @@ uint32_t random_thin_markers(double thin_keep_prob, uintptr_t unfiltered_marker_
       }
     } while (++marker_uidx < marker_uidx_stop);
   }
-  if (marker_ct == removed_ct) {
+  if ((marker_ct == removed_ct) && (!allow_no_variants)) {
     logerrprint("Error: All variants removed by --thin.  Try a higher probability.\n");
     return 1;
   }
@@ -981,17 +991,23 @@ int32_t random_thin_markers_ct(uint32_t thin_keep_ct, uintptr_t unfiltered_marke
     LOGERRPRINTF("Error: --thin-count parameter exceeds number of remaining variants.\n");
     goto random_thin_markers_ct_ret_INVALID_CMDLINE;
   }
-  if (wkspace_alloc_ul_checked(&perm_buf, marker_ctl * sizeof(intptr_t))) {
-    goto random_thin_markers_ct_ret_NOMEM;
-  }
-  // no actual interleaving here, but may as well use this function
-  generate_perm1_interleaved(marker_ct, marker_ct - thin_keep_ct, 0, 1, perm_buf);
-  marker_uidx = 0;
-  for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
-    next_unset_unsafe_ck(marker_exclude, &marker_uidx);
-    if (is_set(perm_buf, marker_idx)) {
-      set_bit(marker_exclude, marker_uidx);
+  if (marker_ct > 1) {
+    if (wkspace_alloc_ul_checked(&perm_buf, marker_ctl * sizeof(intptr_t))) {
+      goto random_thin_markers_ct_ret_NOMEM;
+    }
+    // no actual interleaving here, but may as well use this function
+    // note that this requires marker_ct >= 2
+    generate_perm1_interleaved(marker_ct, marker_ct - thin_keep_ct, 0, 1, perm_buf);
+    marker_uidx = 0;
+    for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
+      next_unset_unsafe_ck(marker_exclude, &marker_uidx);
+      if (is_set(perm_buf, marker_idx)) {
+	set_bit(marker_exclude, marker_uidx);
+      }
     }
+  } else if ((!thin_keep_ct) && marker_ct) {
+    marker_uidx = next_unset_unsafe(marker_exclude, 0);
+    set_bit(marker_exclude, marker_uidx);
   }
   LOGPRINTF("--thin-count: %u variant%s removed (%u remaining).\n", marker_ct - thin_keep_ct, (marker_ct - thin_keep_ct == 1)? "" : "s", thin_keep_ct);
   *marker_exclude_ct_ptr = unfiltered_marker_ct - thin_keep_ct;
@@ -1007,7 +1023,7 @@ int32_t random_thin_markers_ct(uint32_t thin_keep_ct, uintptr_t unfiltered_marke
   return retval;
 }
 
-uint32_t random_thin_samples(double thin_keep_prob, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr) {
+uint32_t random_thin_samples(double thin_keep_prob, uintptr_t unfiltered_sample_ct, uint32_t allow_no_samples, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr) {
   uint32_t sample_ct = unfiltered_sample_ct - *sample_exclude_ct_ptr;
   uint32_t sample_uidx = 0;
   uint32_t samples_done = 0;
@@ -1025,7 +1041,7 @@ uint32_t random_thin_samples(double thin_keep_prob, uintptr_t unfiltered_sample_
       }
     } while (++sample_uidx < sample_uidx_stop);
   }
-  if (sample_ct == removed_ct) {
+  if ((sample_ct == removed_ct) && (!allow_no_samples)) {
     LOGERRPRINTF("Error: All %s removed by --thin-indiv. Try a higher probability.\n", g_species_plural);
     return 1;
   }
@@ -1073,7 +1089,7 @@ int32_t random_thin_samples_ct(uint32_t thin_keep_ct, uintptr_t unfiltered_sampl
 }
 
 
-int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, char* sorted_sample_ids, uintptr_t sorted_sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip) {
+int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, char* sorted_sample_ids, uintptr_t sorted_sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip) {
   // 1. load and validate cluster file
   // 2. load marker file, sort by uidx
   // 3. check for early exit (no clusters and/or no .zero entries)
@@ -1378,7 +1394,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
   return retval;
 }
 
-int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t sorted_ids_len, uintptr_t max_sample_id_len, uint32_t* id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* filtervals_flattened, uint32_t mfilter_col) {
+int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t sorted_ids_len, uintptr_t max_sample_id_len, uint32_t* id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* filtervals_flattened, uint32_t mfilter_col, uint32_t allow_no_samples) {
   FILE* infile = NULL;
   unsigned char* wkspace_mark = wkspace_base;
   uintptr_t unfiltered_sample_ctl = (unfiltered_sample_ct + (BITCT - 1)) / BITCT;
@@ -1446,7 +1462,7 @@ int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t
   if (!feof(infile)) {
     goto filter_samples_file_ret_READ_FAIL;
   }
-  if (!include_ct) {
+  if ((!include_ct) && (!allow_no_samples)) {
     LOGERRPRINTF("Error: All %s excluded by --filter.\n", g_species_plural);
     goto filter_samples_file_ret_ALL_SAMPLES_EXCLUDED;
   }
@@ -1510,7 +1526,12 @@ void filter_samples_bitfields(uintptr_t unfiltered_sample_ct, uintptr_t* sample_
   *sample_exclude_ct_ptr = popcount_longs(sample_exclude, unfiltered_sample_ctl);
 }
 
-int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double mind_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip) {
+int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double mind_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip, uint32_t allow_no_samples) {
+  uint32_t sample_exclude_ct = *sample_exclude_ct_ptr;
+  uint32_t sample_ct = unfiltered_sample_ct - sample_exclude_ct;
+  if (!sample_ct) {
+    return 0;
+  }
   unsigned char* wkspace_mark = wkspace_base;
   FILE* outfile = NULL;
   uint32_t marker_ct = unfiltered_marker_ct - marker_exclude_ct;
@@ -1523,8 +1544,6 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
   uintptr_t y_end = 0;
   uintptr_t* sample_male_include2 = NULL;
   uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
-  uint32_t sample_exclude_ct = *sample_exclude_ct_ptr;
-  uint32_t sample_ct = unfiltered_sample_ct - sample_exclude_ct;
   uint32_t sample_uidx = 0;
   uint32_t sample_idx = 0;
   uint32_t removed_ct = 0;
@@ -1664,7 +1683,7 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
     }
   }
   *sample_exclude_ct_ptr += removed_ct;
-  if (*sample_exclude_ct_ptr == unfiltered_sample_ct) {
+  if ((*sample_exclude_ct_ptr == unfiltered_sample_ct) && (!allow_no_samples)) {
     LOGERRPRINTF("Error: All %s removed due to missing genotype data (--mind).\n", g_species_plural);
     LOGPRINTFWW("IDs written to %s .\n", outname);
     goto mind_filter_ret_ALL_SAMPLES_EXCLUDED;
@@ -2846,23 +2865,23 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
     chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
     chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
     marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
-    is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
-    is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
-    is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
-    if (!is_y) {
-      cur_nm = sample_include2;
-      cur_tot = sample_ct;
-      cur_cluster_sizes = cluster_sizes;
-      om_ycorr = 0;
-    } else {
-      cur_nm = sample_male_include2;
-      cur_tot = sample_male_ct;
-      cur_cluster_sizes = cluster_sizes_y;
-      om_ycorr = om_cluster_ct;
-    }
-    cptr = width_force(4, tbuf, chrom_name_write(tbuf, chrom_info_ptr, chrom_idx));
-    *cptr++ = ' ';
     if (marker_uidx < chrom_end) {
+      is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
+      is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+      is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
+      if (!is_y) {
+	cur_nm = sample_include2;
+	cur_tot = sample_ct;
+	cur_cluster_sizes = cluster_sizes;
+	om_ycorr = 0;
+      } else {
+	cur_nm = sample_male_include2;
+	cur_tot = sample_male_ct;
+	cur_cluster_sizes = cluster_sizes_y;
+	om_ycorr = om_cluster_ct;
+      }
+      cptr = width_force(4, tbuf, chrom_name_write(tbuf, chrom_info_ptr, chrom_idx));
+      *cptr++ = ' ';
       if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
 	goto write_missingness_reports_ret_READ_FAIL;
       }
@@ -3049,7 +3068,7 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
   return retval;
 }
 
-int32_t hardy_report_write_line(Pigz_state* ps_ptr, char** pzwritep_ptr, char* prefix_buf, uint32_t prefix_len, uint32_t reverse, uint32_t ll_ct, uint32_t lh_ct, uint32_t hh_ct, char* midbuf_ptr, double pval, double output_min_p) {
+int32_t hardy_report_write_line(Pigz_state* ps_ptr, char** pzwritep_ptr, char* prefix_buf, uint32_t prefix_len, uint32_t reverse, uint32_t ll_ct, uint32_t lh_ct, uint32_t hh_ct, uint32_t hwe_midp, uint32_t is_mt, char* midbuf_ptr, double pval, double output_min_p) {
   char* pzwritep = *pzwritep_ptr;
   char wbuf[48];
   char* cptr;
@@ -3065,12 +3084,13 @@ int32_t hardy_report_write_line(Pigz_state* ps_ptr, char** pzwritep_ptr, char* p
   pzwritep = fw_strcpyn(20, cptr - wbuf, wbuf, pzwritep);
   *pzwritep++ = ' ';
   denom = (ll_ct + lh_ct + hh_ct) * 2;
-  if (denom) {
+  if (denom && (!is_mt)) {
     drecip = 1.0 / ((double)denom);
     minor_freq = (2 * ll_ct + lh_ct) * drecip;
     pzwritep = double_g_writewx4(double_g_writewx4x(double_g_writewx4x(pzwritep, (lh_ct * 2) * drecip, 8, ' '), minor_freq * (2 * hh_ct + lh_ct) * drecip * 2, 8, ' '), MAXV(pval, output_min_p), 12);
   } else {
-    pzwritep = memcpya(pzwritep, "     nan      nan           NA", 30);
+    pzwritep = memcpya(pzwritep, "     nan      nan          ", 27);
+    pzwritep = memcpyl3a(pzwritep, hwe_midp? "0.5" : "  1");
   }
   append_binary_eoln(&pzwritep);
   if (flex_pzwrite(ps_ptr, &pzwritep)) {
@@ -3089,7 +3109,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
   uint32_t hwe_midp = hwe_modifier & HWE_MIDP;
   uint32_t output_gz = (hwe_modifier / HWE_GZ) & 1;
   int32_t retval = 0;
-  uint32_t skip_chrom = 0;
   uint32_t pct = 0;
   Pigz_state ps;
   uint32_t prefix_len;
@@ -3156,7 +3175,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 
   chrom_fo_idx = 0;
   refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-  skip_chrom = (is_haploid && (!is_x)) || is_mt;
   cptr0 = width_force(4, writebuf, chrom_name_write(writebuf, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx]));
   *cptr0++ = ' ';
   cptr = &(cptr0[10 + plink_maxsnp]);
@@ -3175,7 +3193,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	if (marker_uidx >= chrom_end) {
 	  chrom_fo_idx++;
 	  refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-	  skip_chrom = (is_haploid && (!is_x)) || is_mt;
 	  cptr0 = width_force(4, writebuf, chrom_name_write(writebuf, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx]));
 	  *cptr0++ = ' ';
 	  cptr = &(cptr0[10 + plink_maxsnp]);
@@ -3187,9 +3204,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	  }
 	  cptr2 = &(cptr[18 + 2 * max_marker_allele_len]);
 	}
-        if (skip_chrom) {
-	  continue;
-	}
 	fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), cptr0);
 	reverse = IS_SET(marker_reverse, marker_uidx);
 	cptr3 = marker_allele_ptrs[2 * marker_uidx];
@@ -3199,7 +3213,7 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	cptr5 = fw_strcpy(4, cptr4, &(cptr5[1]));
 	*cptr5 = ' ';
 	prefix_len = 1 + (cptr5 - writebuf);
-	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_allfs[marker_uidx], hwe_lh_allfs[marker_uidx], hwe_hh_allfs[marker_uidx], cptr2, p_values[marker_idx], output_min_p)) {
+	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_allfs[marker_uidx], hwe_lh_allfs[marker_uidx], hwe_hh_allfs[marker_uidx], hwe_midp, is_mt, cptr2, p_values[marker_idx], output_min_p)) {
 	  goto hardy_report_ret_WRITE_FAIL;
 	}
       }
@@ -3221,7 +3235,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	if (marker_uidx >= chrom_end) {
 	  chrom_fo_idx++;
 	  refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
-	  skip_chrom = (is_haploid && (!is_x)) || is_mt;
 	  cptr0 = width_force(4, writebuf, chrom_name_write(writebuf, chrom_info_ptr, chrom_info_ptr->chrom_file_order[chrom_fo_idx]));
 	  *cptr0++ = ' ';
           memset(&(cptr0[plink_maxsnp]), 32, 20);
@@ -3229,9 +3242,6 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	  cptr2 = &(cptr[18 + 2 * max_marker_allele_len]);
 	  prefix_len = 10 + ((uintptr_t)(cptr - writebuf));
 	}
-	if (skip_chrom) {
-	  continue;
-	}
 	fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), cptr0);
 	memcpy(&(cptr0[4 + plink_maxsnp]), "  ALL", 5);
 	reverse = IS_SET(marker_reverse, marker_uidx);
@@ -3242,17 +3252,17 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
 	cptr5 = fw_strcpy(4, cptr4, &(cptr5[1]));
 	*cptr5 = ' ';
 	prefix_len = 1 + (cptr5 - writebuf);
-	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_allfs[marker_uidx], hwe_lh_allfs[marker_uidx], hwe_hh_allfs[marker_uidx], cptr2, p_values[3 * marker_idx], output_min_p)) {
+	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_allfs[marker_uidx], hwe_lh_allfs[marker_uidx], hwe_hh_allfs[marker_uidx], hwe_midp, is_mt, cptr2, p_values[3 * marker_idx], output_min_p)) {
 	  goto hardy_report_ret_WRITE_FAIL;
 	}
 
 	memcpy(&(cptr0[7 + plink_maxsnp]), "FF", 2);
-	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_cases[marker_uidx], hwe_lh_cases[marker_uidx], hwe_hh_cases[marker_uidx], cptr2, p_values[3 * marker_idx + 1], output_min_p)) {
+	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_ll_cases[marker_uidx], hwe_lh_cases[marker_uidx], hwe_hh_cases[marker_uidx], hwe_midp, is_mt, cptr2, p_values[3 * marker_idx + 1], output_min_p)) {
 	  goto hardy_report_ret_WRITE_FAIL;
 	}
 
 	memcpy(&(cptr0[4 + plink_maxsnp]), "UN", 2);
-	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_lls[marker_uidx], hwe_lhs[marker_uidx], hwe_hhs[marker_uidx], cptr2, p_values[3 * marker_idx + 2], output_min_p)) {
+	if (hardy_report_write_line(&ps, &pzwritep, writebuf, prefix_len, reverse, hwe_lls[marker_uidx], hwe_lhs[marker_uidx], hwe_hhs[marker_uidx], hwe_midp, is_mt, cptr2, p_values[3 * marker_idx + 2], output_min_p)) {
 	  goto hardy_report_ret_WRITE_FAIL;
 	}
       }
@@ -3284,70 +3294,75 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
   return retval;
 }
 
-uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, int32_t* hwe_ll_allfs, int32_t* hwe_lh_allfs, int32_t* hwe_hh_allfs, Chrom_info* chrom_info_ptr) {
+uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, uint32_t allow_no_variants, int32_t* hwe_ll_allfs, int32_t* hwe_lh_allfs, int32_t* hwe_hh_allfs, Chrom_info* chrom_info_ptr) {
   uint32_t marker_ct = unfiltered_marker_ct - *marker_exclude_ct_ptr;
   uint32_t marker_uidx = 0;
   uint32_t removed_ct = 0;
   uint32_t hwe_all = hwe_modifier & HWE_THRESH_ALL;
   uint32_t hwe_thresh_midp = hwe_modifier & HWE_THRESH_MIDP;
-  uint32_t min_obs = 0xffffffffU;
+  uint32_t min_obs_nonx = 0xffffffffU;
+  uint32_t min_obs_x = 0xffffffffU;
   uint32_t max_obs = 0;
-  int32_t mt_code = chrom_info_ptr->mt_code;
-  uint32_t mt_start = 0;
-  uint32_t mt_end = 0;
-  uint32_t markers_done;
+  uint32_t chrom_fo_idx;
+  uint32_t chrom_idx;
+  uint32_t chrom_end;
   uint32_t cur_obs;
+  uint32_t cur_min_obs;
+  int32_t is_x;
+  int32_t test_failed;
+  if (chrom_info_ptr->haploid_mask[0] & 1) {
+    logerrprint("Warning: --hwe has no effect since entire genome is haploid.\n");
+    return 0;
+  }
   hwe_thresh *= 1 + SMALL_EPSILON;
   if (hwe_all) {
     hwe_lhs = hwe_lh_allfs;
     hwe_lls = hwe_ll_allfs;
     hwe_hhs = hwe_hh_allfs;
   }
-  if ((mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, mt_code)) {
-    mt_start = chrom_info_ptr->chrom_start[(uint32_t)mt_code];
-    mt_end = chrom_info_ptr->chrom_end[(uint32_t)mt_code];
-  }
-  if (hwe_thresh_midp) {
-    for (markers_done = 0; markers_done < marker_ct; marker_uidx++, markers_done++) {
-      next_unset_unsafe_ck(marker_exclude, &marker_uidx);
-      if ((marker_uidx < mt_end) && (marker_uidx >= mt_start)) {
-        continue;
-      }
-      if (SNPHWE_midp_t(hwe_lhs[marker_uidx], hwe_lls[marker_uidx], hwe_hhs[marker_uidx], hwe_thresh)) {
-	SET_BIT(marker_exclude, marker_uidx);
-	removed_ct++;
-      }
-      cur_obs = hwe_lhs[marker_uidx] + hwe_lls[marker_uidx] + hwe_hhs[marker_uidx];
-      if (cur_obs < min_obs) {
-	min_obs = cur_obs;
-      }
-      if (cur_obs > max_obs) {
-	max_obs = cur_obs;
-      }
-    }
-  } else {
-    for (markers_done = 0; markers_done < marker_ct; marker_uidx++, markers_done++) {
-      next_unset_unsafe_ck(marker_exclude, &marker_uidx);
-      if ((marker_uidx < mt_end) && (marker_uidx >= mt_start)) {
-        continue;
-      }
-      if (SNPHWE_t(hwe_lhs[marker_uidx], hwe_lls[marker_uidx], hwe_hhs[marker_uidx], hwe_thresh)) {
-	SET_BIT(marker_exclude, marker_uidx);
-	removed_ct++;
-      }
-      cur_obs = hwe_lhs[marker_uidx] + hwe_lls[marker_uidx] + hwe_hhs[marker_uidx];
-      if (cur_obs < min_obs) {
-	min_obs = cur_obs;
+  for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
+    chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+    chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+    marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+    if (marker_uidx < chrom_end) {
+      is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
+      if ((((int32_t)chrom_idx) == chrom_info_ptr->mt_code) || (is_set(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x))) {
+	continue;
       }
-      if (cur_obs > max_obs) {
-	max_obs = cur_obs;
+      // okay if min_obs_x is an underestimate
+      cur_min_obs = min_obs_nonx;
+      do {
+	if (hwe_thresh_midp) {
+	  test_failed = SNPHWE_midp_t(hwe_lhs[marker_uidx], hwe_lls[marker_uidx], hwe_hhs[marker_uidx], hwe_thresh);
+	} else {
+	  test_failed = SNPHWE_t(hwe_lhs[marker_uidx], hwe_lls[marker_uidx], hwe_hhs[marker_uidx], hwe_thresh);
+	}
+	if (test_failed) {
+	  SET_BIT(marker_exclude, marker_uidx);
+	  removed_ct++;
+	}
+	cur_obs = hwe_lhs[marker_uidx] + hwe_lls[marker_uidx] + hwe_hhs[marker_uidx];
+	if (cur_obs < cur_min_obs) {
+	  cur_min_obs = cur_obs;
+	}
+	if (cur_obs > max_obs) {
+	  max_obs = cur_obs;
+	}
+	marker_uidx = next_unset(marker_exclude, marker_uidx + 1, chrom_end);
+      } while (marker_uidx < chrom_end);
+      if (is_x) {
+	min_obs_x = cur_min_obs;
+      } else {
+	min_obs_nonx = cur_min_obs;
       }
     }
   }
-  if (((uint64_t)max_obs) * 9 > ((uint64_t)min_obs) * 10) {
+  if (((uint64_t)max_obs) * 9 > ((uint64_t)min_obs_nonx) * 10) {
     logerrprint("Warning: --hwe observation counts vary by more than 10%.  Consider using\n--geno, and/or applying different p-value thresholds to distinct subsets of\nyour data.\n");
+  } else if (((uint64_t)max_obs) * 9 > ((uint64_t)min_obs_x) * 10) {
+    logerrprint("Warning: --hwe observation counts vary by more than 10%, due to the X\nchromosome.  You may want to use a less stringent --hwe p-value threshold for X\nchromosome variants.\n");
   }
-  if (marker_ct == removed_ct) {
+  if ((marker_ct == removed_ct) && (!allow_no_variants)) {
     logerrprint("Error: All variants removed due to Hardy-Weinberg exact test (--hwe).\n");
     return 1;
   }
@@ -3356,7 +3371,7 @@ uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct
   return 0;
 }
 
-uint32_t enforce_minor_allele_thresholds(double min_maf, double max_maf, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* ac_excl_bitfield, uintptr_t* marker_exclude_ct_ptr, double* set_allele_freqs) {
+uint32_t enforce_minor_allele_thresholds(double min_maf, double max_maf, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* ac_excl_bitfield, uintptr_t* marker_exclude_ct_ptr, double* set_allele_freqs, uint32_t allow_no_variants) {
   uint32_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
   uint32_t marker_ct = unfiltered_marker_ct - *marker_exclude_ct_ptr;
   uint32_t marker_uidx = 0;
@@ -3383,7 +3398,7 @@ uint32_t enforce_minor_allele_thresholds(double min_maf, double max_maf, uintptr
     bitfield_or(marker_exclude, ac_excl_bitfield, unfiltered_marker_ctl);
   }
   removed_ct = popcount_longs(marker_exclude, unfiltered_marker_ctl) - (*marker_exclude_ct_ptr);
-  if (marker_ct == removed_ct) {
+  if ((marker_ct == removed_ct) && (!allow_no_variants)) {
     logerrprint("Error: All variants removed due to minor allele threshold(s)\n(--maf/--max-maf/--mac/--max-mac).\n");
     return 1;
   }
diff --git a/plink_filter.h b/plink_filter.h
index 9c5194d..7ac2d96 100644
--- a/plink_filter.h
+++ b/plink_filter.h
@@ -17,31 +17,31 @@ void oblig_missing_init(Oblig_missing_info* om_ip);
 
 void oblig_missing_cleanup(Oblig_missing_info* om_ip);
 
-int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_len, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags);
+int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_len, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags, uint32_t allow_no_samples);
 
-int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr);
+int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants);
 
-int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
+int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, uint32_t allow_no_variants, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
 
-int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
+int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uint32_t allow_no_samples, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
 
-int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh, double qual_max_thresh, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr);
+int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh, double qual_max_thresh, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t allow_no_variants, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr);
 
-uint32_t random_thin_markers(double thin_keep_prob, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr);
+uint32_t random_thin_markers(double thin_keep_prob, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants);
 
 int32_t random_thin_markers_ct(uint32_t thin_keep_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr);
 
-uint32_t random_thin_samples(double thin_keep_prob, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr);
+uint32_t random_thin_samples(double thin_keep_prob, uintptr_t unfiltered_sample_ct, uint32_t allow_no_samples, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr);
 
 int32_t random_thin_samples_ct(uint32_t thin_keep_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr);
 
-int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, char* sorted_sample_ids, uintptr_t sorted_sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip);
+int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, char* sorted_sample_ids, uintptr_t sorted_sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip);
 
-int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t sorted_ids_len, uintptr_t max_sample_id_len, uint32_t* id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* filtervals_flattened, uint32_t mfilter_col);
+int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t sorted_ids_len, uintptr_t max_sample_id_len, uint32_t* id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* filtervals_flattened, uint32_t mfilter_col, uint32_t allow_no_samples);
 
 void filter_samples_bitfields(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, uintptr_t* orfield, int32_t orfield_flip, uintptr_t* ornot);
 
-int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double mind_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip);
+int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double mind_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* sex_male, Chrom_info* chrom_info_ptr, Oblig_missing_info* om_ip, uint32_t allow_no_samples);
 
 int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_exclude_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* founder_info, int32_t nonfounders, int32_t maf_succ, double* set_allele_freqs, uintptr_t bed_offset, uint32_t hwe_needed, uint32_t hwe_all, uin [...]
 
@@ -49,9 +49,9 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
 
 int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, uint32_t nonfounders, int32_t* hwe_ll_cases, int32_t* hwe_lh_cases, int32_t* hwe_hh_cases, int [...]
 
-uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, int32_t* hwe_ll_allfs, int32_t* hwe_lh_allfs, int32_t* hwe_hh_allfs, Chrom_info* chrom_info_ptr);
+uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, uint32_t allow_no_variants, int32_t* hwe_ll_allfs, int32_t* hwe_lh_allfs, int32_t* hwe_hh_allfs, Chrom_info* chrom_info_ptr);
 
-uint32_t enforce_minor_allele_thresholds(double min_maf, double max_maf, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* ac_excl_bitfield, uintptr_t* marker_exclude_ct_ptr, double* set_allele_freqs);
+uint32_t enforce_minor_allele_thresholds(double min_maf, double max_maf, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* ac_excl_bitfield, uintptr_t* marker_exclude_ct_ptr, double* set_allele_freqs, uint32_t allow_no_variants);
 
 void enforce_min_bp_space(int32_t min_bp_space, uint32_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, Chrom_info* chrom_info_ptr);
 
diff --git a/plink_glm.c b/plink_glm.c
index ebf3e32..d71c59a 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -4,6 +4,7 @@
 #include "plink_cluster.h"
 #include "plink_ld.h"
 #include "plink_matrix.h"
+#include "plink_perm.h"
 #include "plink_set.h"
 #include "plink_stats.h"
 
@@ -1839,7 +1840,7 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
   }
 }
 
-uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sample_valid_ct, uint32_t missing_ct, uintptr_t* loadbuf, float* covars_cov_major, uintptr_t* perm_vecs, float* coef, float* pp, float* sample_1d_buf, float* pheno_buf, float* param_1d_buf, float* param_1d_buf2, float* param_2d_buf, float* param_2d_buf2, float* logistic_results, uintptr_t constraint_ct, double* constraints_con_major, double* param_1d_dbuf, double* param_2d_dbuf, double* param_2d_dbuf2, double*  [...]
+uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sample_valid_ct, uint32_t missing_ct, uint32_t skip_intercept, uintptr_t* loadbuf, float* covars_cov_major, uintptr_t* perm_vecs, float* coef, float* pp, float* sample_1d_buf, float* pheno_buf, float* param_1d_buf, float* param_1d_buf2, float* param_2d_buf, float* param_2d_buf2, float* logistic_results, uintptr_t constraint_ct, double* constraints_con_major, double* param_1d_dbuf, double* param_2d_dbuf, double* [...]
   // Similar to logistic.cpp fitLM(), but incorporates changes from the
   // postprocessed TopCoder contest code.
   // * coef is now assumed to be initialized with a good starting point for
@@ -1851,10 +1852,10 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
   // Returns number of regression failures.
   uintptr_t param_cta4 = (param_ct + 3) & (~3);
   uintptr_t param_ct_p1 = param_ct + 1;
-  uintptr_t param_ct_m1 = param_ct - 1;
+  uintptr_t param_ct_msi = param_ct - skip_intercept;
   uintptr_t joint_test_requested = (constraints_con_major? 1 : 0);
   uintptr_t param_ctx = param_ct + joint_test_requested;
-  uintptr_t param_ctx_m1 = param_ctx - 1;
+  uintptr_t param_ctx_msi = param_ctx - skip_intercept;
   uintptr_t sample_validx_ctv2 = 2 * ((sample_valid_ct + missing_ct + (BITCT - 1)) / BITCT);
   uintptr_t perm_fail_ct = 0;
   uintptr_t cur_word = 0;
@@ -1927,8 +1928,8 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
 	}
       }
     }
-    fptr = &(logistic_results[perm_idx * param_ctx_m1]);
-    for (param_idx = 1; param_idx < param_ct; param_idx++) {
+    fptr = &(logistic_results[perm_idx * param_ctx_msi]);
+    for (param_idx = skip_intercept; param_idx < param_ct; param_idx++) {
       *fptr++ = param_2d_buf[param_idx * param_ct_p1];
     }
     if (joint_test_requested) {
@@ -1954,11 +1955,11 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
     }
     if (0) {
     glm_logistic_fail:
-      fill_float_zero(&(logistic_results[perm_idx * param_ctx_m1]), param_ct_m1);
+      fill_float_zero(&(logistic_results[perm_idx * param_ctx_msi]), param_ct_msi);
       SET_BIT(perm_fails, perm_idx);
       perm_fail_ct++;
       if (joint_test_requested) {
-        logistic_results[perm_idx * param_ctx_m1 + param_ct_m1] = -9;
+        logistic_results[perm_idx * param_ctx_msi + param_ct_msi] = -9;
       }
     }
     coef = &(coef[param_cta4]);
@@ -3093,6 +3094,7 @@ THREAD_RET_TYPE glm_logistic_adapt_thread(void* arg) {
   double adaptive_ci_zt = g_adaptive_ci_zt;
   double aperm_alpha = g_aperm_alpha;
   uintptr_t cur_param_ct = g_cur_param_ct;
+  uintptr_t cur_param_cta4 = (cur_param_ct + 3) & (~3);
   uintptr_t cur_constraint_ct = g_cur_constraint_ct;
   uint32_t coding_flags = g_coding_flags;
   uint32_t glm_xchr_model = g_glm_xchr_model;
@@ -3167,12 +3169,12 @@ THREAD_RET_TYPE glm_logistic_adapt_thread(void* arg) {
     success_2incr = 0;
     cur_fail_ct = 0;
     // todo: try better starting position
-    fill_float_zero(coef, ((cur_param_ct + 3) & (~3)) * perm_vec_ct);
-    glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
+    fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
+    glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
     for (pidx = 0; pidx < perm_vec_ct;) {
       if (!IS_SET(perm_fails, pidx)) {
 	if (!joint_test_params) {
-	  dxx = (double)coef[pidx * cur_param_ct + 1];
+	  dxx = (double)coef[pidx * cur_param_cta4 + 1]; // bugfix, forgot a4
 	  dxx *= dxx;
           dxx /= (double)regression_results[pidx * param_ctx_m1];
 	  if (dxx > stat_high) {
@@ -3491,7 +3493,7 @@ THREAD_RET_TYPE glm_logistic_maxt_thread(void* arg) {
     success_2incr = 0;
     // todo: try better starting position
     fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
-    perm_fail_ct = glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
+    perm_fail_ct = glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
     for (pidx = 0; pidx < perm_vec_ct; pidx++) {
       if (!IS_SET(perm_fails, pidx)) {
 	if (!joint_test_params) {
@@ -3693,7 +3695,7 @@ THREAD_RET_TYPE glm_logistic_set_thread(void* arg) {
     cur_sample_valid_ct = sample_valid_ct - cur_missing_ct;
     // todo: try better starting position
     fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
-    glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails);
+    glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails);
     for (pidx = 0; pidx < perm_vec_ct; pidx++) {
       if (!IS_SET(perm_fails, pidx)) {
 	dxx = (double)coef[pidx * cur_param_cta4 + 1];
@@ -4560,8 +4562,8 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
   // changed.
   uint32_t max_thread_ct = g_thread_ct;
   uint32_t hh_or_mt_exists = hh_exists;
+  uint32_t report_intercept = glm_modifier & GLM_INTERCEPT;
   int32_t retval = 0;
-  uint32_t linear_intercept = glm_modifier & GLM_INTERCEPT;
   char dgels_trans = 'N';
   __CLPK_integer dgels_m = 0;
   __CLPK_integer dgels_n = 0;
@@ -5416,7 +5418,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
 	      }
 	    }
 	  }
-	  if (linear_intercept) {
+	  if (report_intercept) {
 	    dxx = g_linear_mt[0].dgels_b[0];
 	    wptr = memcpya(wptr_start2, " INTERCEPT ", 11);
 	    wptr = uint32_writew8x(wptr, (uint32_t)cur_sample_valid_ct, ' ');
@@ -5424,7 +5426,6 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
 	    if (display_ci) {
 	      // okay, this should be made more maintainable...
 	      se = sqrt(g_linear_mt[0].param_2d_buf2[0]);
-	      zval = dxx / se;
 	      dyy = ci_zt * se;
 	      wptr = double_g_writewx4x(wptr, se, 8, ' ');
 	      wptr = double_g_writewx4x(wptr, dxx - dyy, 8, ' ');
@@ -6103,6 +6104,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
   uint32_t pct = 0;
   uint32_t max_thread_ct = g_thread_ct;
   uint32_t hh_or_mt_exists = hh_exists;
+  uint32_t skip_intercept = !(glm_modifier & GLM_INTERCEPT);
   int32_t retval = 0;
   double* constraints_con_major = NULL;
   double* orig_pvals = NULL;
@@ -6152,7 +6154,6 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
   uintptr_t sample_idx;
   uintptr_t param_ctx_max;
   uintptr_t param_ctl_max;
-  uintptr_t param_ctx_max_m1;
   uintptr_t condition_list_start_idx;
   uintptr_t covar_start_idx;
   uintptr_t interaction_start_idx;
@@ -6240,7 +6241,6 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
   sample_valid_cta4 = (sample_valid_ct + 3) & (~3);
   sample_valid_ctv2 = 2 * ((sample_valid_ct + BITCT - 1) / BITCT);
   final_mask = get_final_mask(sample_valid_ct);
-  param_ctx_max_m1 = param_ctx_max - 1;
   param_ct_maxa4 = (param_ct_max + 3) & (~3);
   if (wkspace_alloc_d_checked(&g_orig_stats, marker_initial_ct * sizeof(double)) ||
       wkspace_alloc_c_checked(&param_names, param_ctx_max * max_param_name_len) ||
@@ -6549,6 +6549,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
     // (no need to worry about 1D 16-byte alignment requirements since
     // wkspace_alloc actually forces 64-byte alignment, and allocation sizes
     // are automatically rounded up)
+    uii = (tidx || (orig_perm_batch_size > 1) || skip_intercept)? 1 : 0;
     if (wkspace_alloc_f_checked(&(g_logistic_mt[tidx].cur_covars_cov_major), param_ct_max * sample_valid_cta4 * sizeof(float)) ||
 	wkspace_alloc_f_checked(&(g_logistic_mt[tidx].coef), param_ct_maxa4 * orig_perm_batch_size * sizeof(float)) ||
 	wkspace_alloc_f_checked(&(g_logistic_mt[tidx].pp), sample_valid_cta4 * sizeof(float)) ||
@@ -6558,7 +6559,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
         wkspace_alloc_f_checked(&(g_logistic_mt[tidx].param_1d_buf2), param_ct_max * sizeof(float)) ||
         wkspace_alloc_f_checked(&(g_logistic_mt[tidx].param_2d_buf), param_ct_max * param_ct_maxa4 * sizeof(float)) ||
         wkspace_alloc_f_checked(&(g_logistic_mt[tidx].param_2d_buf2), param_ct_max * param_ct_maxa4 * sizeof(float)) ||
-        wkspace_alloc_f_checked(&(g_logistic_mt[tidx].regression_results), orig_perm_batch_size * param_ctx_max_m1 * sizeof(float)) ||
+        wkspace_alloc_f_checked(&(g_logistic_mt[tidx].regression_results), orig_perm_batch_size * (param_ctx_max - uii) * sizeof(float)) ||
         wkspace_alloc_ul_checked(&(g_logistic_mt[tidx].perm_fails), ulii * sizeof(intptr_t))) {
       goto glm_logistic_assoc_ret_NOMEM;
     }
@@ -6811,7 +6812,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
 	if (cur_sample_valid_ct > cur_param_ct) {
 	  // todo: try better starting position
 	  fill_float_zero(g_logistic_mt[0].coef, (cur_param_ct + 3) & (~3));
-	  regression_fail = glm_logistic(1, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, g_logistic_mt[0].cur_covars_cov_major, pheno_c_collapsed, g_logistic_mt[0].coef, g_logistic_mt[0].pp, g_logistic_mt[0].sample_1d_buf, g_logistic_mt[0].pheno_buf, g_logistic_mt[0].param_1d_buf, g_logistic_mt[0].param_1d_buf2, g_logistic_mt[0].param_2d_buf, g_logistic_mt[0].param_2d_buf2, g_logistic_mt[0].regression_results, cur_constraint_ct, constraints_con_major, g_logistic_mt[0].param_1 [...]
+	  regression_fail = glm_logistic(1, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, skip_intercept, loadbuf_ptr, g_logistic_mt[0].cur_covars_cov_major, pheno_c_collapsed, g_logistic_mt[0].coef, g_logistic_mt[0].pp, g_logistic_mt[0].sample_1d_buf, g_logistic_mt[0].pheno_buf, g_logistic_mt[0].param_1d_buf, g_logistic_mt[0].param_1d_buf2, g_logistic_mt[0].param_2d_buf, g_logistic_mt[0].param_2d_buf2, g_logistic_mt[0].regression_results, cur_constraint_ct, constraints_con_major, g_logist [...]
 	} else {
 	  regression_fail = 1;
 	}
@@ -6825,7 +6826,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
 	if (!regression_fail) {
 	  for (param_idx = 1; param_idx < cur_param_ct; param_idx++) {
 	    dxx = (double)g_logistic_mt[0].coef[param_idx];
-	    se = sqrt((double)g_logistic_mt[0].regression_results[param_idx - 1]);
+	    se = sqrt((double)g_logistic_mt[0].regression_results[param_idx - skip_intercept]);
 	    zval = dxx / se;
 	    pval = chiprob_p(zval * zval, 1);
 	    if (param_idx == 1) {
@@ -6863,8 +6864,30 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
 	      }
 	    }
 	  }
+	  if (!skip_intercept) {
+	    dxx = (double)g_logistic_mt[0].coef[0];
+	    wptr = memcpya(wptr_start2, " INTERCEPT ", 11);
+	    wptr = uint32_writew8x(wptr, (uint32_t)cur_sample_valid_ct, ' ');
+	    wptr = double_g_writewx4x(wptr, dxx, 10, ' ');
+	    if (display_ci) {
+	      se = sqrt((double)g_logistic_mt[0].regression_results[0]);
+	      dyy = ci_zt * se;
+	      wptr = double_g_writewx4x(wptr, se, 8, ' ');
+	      if (report_odds) {
+		wptr = double_g_writewx4x(wptr, exp(dxx - dyy), 8, ' ');
+		wptr = double_g_writewx4x(wptr, exp(dxx + dyy), 8, ' ');
+	      } else {
+		wptr = double_g_writewx4x(wptr, dxx - dyy, 8, ' ');
+		wptr = double_g_writewx4x(wptr, dxx + dyy, 8, ' ');
+	      }
+	    }
+	    wptr = memcpya(wptr, "          NA           NA\n", 26);
+	    if (fwrite_checked(writebuf, wptr - writebuf, outfile)) {
+	      goto glm_logistic_assoc_ret_WRITE_FAIL;
+	    }
+	  }
 	  if (cur_constraint_ct) {
-	    dxx = (double)g_logistic_mt[0].regression_results[cur_param_ct - 1];
+	    dxx = (double)g_logistic_mt[0].regression_results[cur_param_ct - skip_intercept];
 	    *orig_stats_ptr = dxx;
 	    pval = chiprob_p(dxx, cur_constraint_ct);
 	    if (orig_pvals) {
@@ -7238,7 +7261,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
   uint32_t perm_fail_ct = 0;
   uint32_t max_thread_ct = g_thread_ct;
   int32_t retval = 0;
-  uint32_t linear_intercept = glm_modifier & GLM_INTERCEPT;
+  uint32_t report_intercept = glm_modifier & GLM_INTERCEPT;
   char dgels_trans = 'N';
   __CLPK_integer dgels_m = 0;
   __CLPK_integer dgels_n = 0;
@@ -7831,13 +7854,12 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
       }
     }      
   }
-  if (linear_intercept) {
+  if (report_intercept) {
     wptr = memcpya(tbuf, " INTERCEPT ", 11);
     wptr = uint32_writew8x(wptr, (uint32_t)sample_valid_ct, ' ');
     wptr = double_g_writewx4x(wptr, dgels_b[0], 10, ' ');
     if (display_ci) {
       se = sqrt(param_2d_buf2[0]);
-      zval = dgels_b[0] / se;
       dyy = ci_zt * se;
       wptr = double_g_writewx4x(wptr, se, 8, ' ');
       wptr = double_g_writewx4x(wptr, dgels_b[0] - dyy, 8, ' ');
@@ -8049,7 +8071,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
 	    wptr = tbuf;
 	  }
 	}
-	if (fwrite_checkedz(tbuf, wptr - tbuf, outfile)) {
+	if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
 	  goto glm_linear_nosnp_ret_WRITE_FAIL;
 	}
 	if (fclose_null(&outfile)) {
@@ -8121,6 +8143,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
   uint32_t perm_fail_total = 0;
   uint32_t joint_perm_fail_extra = 0;
   uint32_t max_thread_ct = g_thread_ct;
+  uint32_t skip_intercept = !(glm_modifier & GLM_INTERCEPT);
   int32_t retval = 0;
   uintptr_t* loadbuf_raw = NULL;
   uintptr_t* loadbuf_collapsed = NULL;
@@ -8503,6 +8526,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
     mperm_save = 0;
   }
   ulii = (perm_batch_size + (BITCT - 1)) / BITCT;
+  uii = ((perm_batch_size > 1) || skip_intercept)? 1 : 0;
   if (wkspace_alloc_f_checked(&coef, param_cta4 * perm_batch_size * sizeof(float)) ||
       wkspace_alloc_f_checked(&pp, sample_valid_cta4 * sizeof(float)) ||
       wkspace_alloc_f_checked(&sample_1d_buf, sample_valid_ct * sizeof(float)) ||
@@ -8511,7 +8535,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
       wkspace_alloc_f_checked(&param_1d_buf2, param_ct * sizeof(float)) ||
       wkspace_alloc_f_checked(&param_2d_buf, param_ct * param_cta4 * sizeof(float)) ||
       wkspace_alloc_f_checked(&param_2d_buf2, param_ct * param_cta4 * sizeof(float)) ||
-      wkspace_alloc_f_checked(&regression_results, perm_batch_size * (param_ctx - 1) * sizeof(float)) ||
+      wkspace_alloc_f_checked(&regression_results, perm_batch_size * (param_ctx - uii) * sizeof(float)) ||
       wkspace_alloc_ul_checked(&perm_fails, ulii * sizeof(intptr_t)) ||
       wkspace_alloc_ul_checked(&g_perm_vecs, perm_batch_size * sample_valid_ctv2 * sizeof(intptr_t))) {
     goto glm_logistic_nosnp_ret_NOMEM;
@@ -8567,11 +8591,11 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
   }
 
   fill_float_zero(coef, param_cta4);
-  if (glm_logistic(1, param_ct, sample_valid_ct, 0, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails)) {
+  if (glm_logistic(1, param_ct, sample_valid_ct, 0, skip_intercept, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails)) {
     logerrprint("Warning: Skipping --logistic no-snp due to multicollinearity.\n");
     goto glm_logistic_nosnp_ret_1;
   }
-  if (constraint_ct && (regression_results[param_ct - 1] == -9)) {
+  if (constraint_ct && (regression_results[param_ct - skip_intercept] == -9)) {
     logerrprint("Warning: Ignoring --tests due to regression failure.\n");
     constraint_ct = 0;
   }
@@ -8607,7 +8631,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
 
   for (param_idx = 1; param_idx < param_ct; param_idx++) {
     dxx = (double)coef[param_idx];
-    se = sqrt((double)regression_results[param_idx - 1]);
+    se = sqrt((double)regression_results[param_idx - skip_intercept]);
     zval = dxx / se;
     orig_stats[param_idx - 1] = zval * zval;
     pval = chiprob_p(zval * zval, 1);
@@ -8634,8 +8658,29 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
       }
     }
   }
+  if (!skip_intercept) {
+    dxx = (double)coef[0];
+    wptr = memcpya(tbuf, " INTERCEPT ", 11);
+    wptr = uint32_writew8x(wptr, (uint32_t)sample_valid_ct, ' ');
+    wptr = double_g_writewx4x(wptr, report_odds? exp(dxx) : dxx, 10, ' ');
+    if (display_ci) {
+      se = sqrt((double)regression_results[0]);
+      dyy = ci_zt * se;
+      wptr = double_g_writewx4x(wptr, se, 8, ' ');
+      if (report_odds) {
+	wptr = double_g_writewx4x(wptr, exp(dxx - dyy), 8, ' ');
+	wptr = double_g_writewx4x(wptr, exp(dxx + dyy), 8, ' ');
+      } else {
+	wptr = double_g_writewx4x(wptr, dxx - dyy, 8, ' ');
+	wptr = double_g_writewx4x(wptr, dxx + dyy, 8, ' ');
+      }
+    }
+    if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+      goto glm_logistic_nosnp_ret_WRITE_FAIL;
+    }
+  }
   if (constraint_ct) {
-    dxx = (double)regression_results[param_ct - 1];
+    dxx = (double)regression_results[param_ct - skip_intercept];
     orig_stats[param_ct - 1] = dxx;
     pval = chiprob_p(dxx, constraint_ct);
     if ((pfilter == 2.0) || ((pval <= pfilter) && (pval >= 0.0))) {
@@ -8685,7 +8730,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
     }
     join_threads(threads, g_assoc_thread_ct);
     fill_float_zero(coef, cur_batch_size * param_cta4);
-    perm_fail_total += glm_logistic(cur_batch_size, param_ct, sample_valid_ct, 0, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
+    perm_fail_total += glm_logistic(cur_batch_size, param_ct, sample_valid_ct, 0, 1, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
     ulii = param_ct - 1;
     uljj = param_ctx - 1;
     for (perm_idx = 0; perm_idx < cur_batch_size; perm_idx++) {
@@ -8823,7 +8868,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
 	    wptr = tbuf;
 	  }
 	}
-	if (fwrite_checkedz(tbuf, wptr - tbuf, outfile)) {
+	if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
 	  goto glm_logistic_nosnp_ret_WRITE_FAIL;
 	}
 	if (fclose_null(&outfile)) {
@@ -9026,7 +9071,7 @@ uint32_t glm_logistic_dosage(uintptr_t sample_ct, uintptr_t* cur_samples, uintpt
     covar_f++;
   }
   fill_float_zero(coef, param_cta4);
-  if (glm_logistic(1, param_ct, sample_valid_ct, 0, NULL, covars_cov_major, perm_vec, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails) || perm_fails[0]) {
+  if (glm_logistic(1, param_ct, sample_valid_ct, 0, 1, NULL, covars_cov_major, perm_vec, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails) || perm_fails[0]) {
     return 0;
   }
   dxx = (double)coef[1];
diff --git a/plink_help.c b/plink_help.c
index ec8daa0..a172b8c 100644
--- a/plink_help.c
+++ b/plink_help.c
@@ -303,9 +303,10 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 	       );
     help_print("lfile\treference\tallele-count", &help_ctrl, 1,
 "  --lfile {prefix} : Specify .lgen + .map + .fam (long-format fileset) prefix.\n"
-"  --reference [fn] : Specify default allele file accompanying --lfile input.\n"
-"  --allele-count   : When used with --lfile + --reference, specifies that the\n"
-"                     .lgen file contains reference allele counts.\n\n"
+"  --lgen [fname]   : Specify full name of .lgen file.\n"
+"  --reference [fn] : Specify default allele file accompanying .lgen input.\n"
+"  --allele-count   : When used with --lfile/--lgen + --reference, specifies\n"
+"                     that the .lgen file contains reference allele counts.\n\n"
 	       );
     help_print("vcf\tbcf", &help_ctrl, 1,
 "  --vcf [filename] : Specify full name of .vcf or .vcf.gz file.\n"
@@ -962,7 +963,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 	       */
 "  --logistic <perm | mperm=[value]> <perm-count> <set-test>\n"
 "             <genotypic | hethom | dominant | recessive | no-snp> <hide-covar>\n"
-"             <sex | no-x-sex> <interaction> <beta>\n"
+"             <sex | no-x-sex> <interaction> <beta> <intercept>\n"
 "    Multi-covariate association analysis on a quantitative (--linear) or\n"
 "    case/control (--logistic) phenotype.  Normally used with --covar.\n"
 "    * 'perm' normally causes an adaptive permutation test to be performed on\n"
@@ -991,11 +992,11 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 "    * 'interaction' adds genotype x covariate interactions to the model.  This\n"
 "      cannot be used with the usual permutation tests; use --tests to define\n"
 "      the permutation test statistic instead.\n"
+"    * 'intercept' causes intercepts to be included in the main report.\n"
 "    * For logistic regressions, the 'beta' modifier causes regression\n"
 "      coefficients instead of odds ratios to be reported.\n"
 "    * With --linear, the 'standard-beta' modifier standardizes the phenotype\n"
-"      and all predictors to zero mean and unit variance before regression, and\n"
-"      the 'intercept' modifier adds intercepts to the main report.\n\n"
+"      and all predictors to zero mean and unit variance before regression.\n\n"
 	       );
     help_print("dosage\twrite-dosage", &help_ctrl, 1,
 "  --dosage [allele dosage file] <noheader> <skip0=[i]> <skip1=[j]> <skip2=[k]>\n"
@@ -1236,7 +1237,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 	       );
     help_print("score\tscore-no-mean-imputation", &help_ctrl, 1,
 "  --score [filename] {i} {j} {k} <header> <sum | no-sum>\n"
-"          <no-mean-imputation | center> <include-cnt>\n"
+"          <no-mean-imputation | center> <include-cnt> <double-dosage>\n"
 "    Apply a linear scoring system to each sample.\n"
 "    The input file should have one line per scored variant.  Variant IDs are\n"
 "    read from column #i, allele codes are read from column #j, and scores are\n"
@@ -1257,7 +1258,8 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 "      mean zero.\n"
 "    * This command can be used with dosage data.  By default, the 'CNT' column\n"
 "      is omitted from the output file in this case; use 'include-cnt' to keep\n"
-"      it.\n\n"
+"      it.  Also, note that scores are multiplied by 0..1 dosages, not 0..2\n"
+"      diploid allele counts, unless the 'double-dosage' modifier is present.\n\n"
 	       );
 #if defined __cplusplus && !defined _WIN32 && !defined STABLE_BUILD
     help_print("R\tR-debug", &help_ctrl, 1,
@@ -1360,10 +1362,11 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
     help_print("vcf\tbcf\tid-delim\tvcf-idspace-to", &help_ctrl, 0,
 "  --vcf-idspace-to [c] : Convert spaces in sample IDs to the given character.\n"
 	       );
-    help_print("vcf\tbcf\tbiallelic-only\tvcf-min-qual\tvcf-filter\tvcf-half-call\tvcf-min-gq\tvcf-min-gp", &help_ctrl, 0,
+    help_print("vcf\tbcf\tbiallelic-only\tvcf-min-qual\tvcf-filter\tvcf-half-call\tvcf-min-gq\tvcf-min-gp\tvcf-require-gt", &help_ctrl, 0,
 "  --biallelic-only <strict> <list> : Skip VCF variants with 2+ alt. alleles.\n"
 "  --vcf-min-qual [val]             : Skip VCF variants with low/missing QUAL.\n"
 "  --vcf-filter {exception(s)...}   : Skip variants which have FILTER failures.\n"
+"  --vcf-require-gt                 : Skip variants with no GT field.\n"
 "  --vcf-min-gq [val]               : No-call a genotype when GQ is below the\n"
 "                                     given threshold.\n"
 "  --vcf-min-gp [val]               : No-call a genotype when 0-1 scaled GP is\n"
@@ -1423,8 +1426,14 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 "                                   one chromosome, include a '@' in the first\n"
 "                                   parameter where the chrom. number belongs,\n"
 "                                   e.g. 'genetic_map_chr at _combined_b37.txt'.\n"
-"  --zero-cms       : Zero out centimorgan positions.\n"
+"  --zero-cms         : Zero out centimorgan positions.\n"
 	       );
+#ifndef STABLE_BUILD
+    help_print("allow-no-samples\tallow-no-vars", &help_ctrl, 0,
+"  --allow-no-samples : Allow the input fileset to contain no samples.\n"
+"  --allow-no-vars    : Allow the input fileset to contain no variants.\n"
+	       );
+#endif
     help_print("pheno\tall-pheno\tmpheno\tpheno-name\tpheno-merge", &help_ctrl, 0,
 "  --pheno [fname]  : Load phenotype data from the specified file, instead of\n"
 "                     using the values in the main input fileset.\n"
@@ -1462,12 +1471,16 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 "                            unspecified, it is equal to Lt; otherwise,\n"
 "                            in-between phenotype values are set to missing.\n"
 	       );
-    help_print("covar\tcovar-name\tcovar-number", &help_ctrl, 0,
+    help_print("covar\tcovar-name\tcovar-number\tno-const-covar\tallow-no-covars", &help_ctrl, 0,
 "  --covar [filename] <keep-pheno-on-missing-cov> : Specify covariate file.\n"
 "  --covar-name [...]      : Specify covariate(s) in --covar file by name.\n"
 "                            Separate multiple names with spaces or commas, and\n"
 "                            use dashes to designate ranges.\n"
 "  --covar-number [...]    : Specify covariate(s) in --covar file by index.\n"
+"  --no-const-covar        : Exclude constant covariates.\n"
+#ifndef STABLE_BUILD
+"  --allow-no-covars       : Allow no covariates to be loaded from --covar file.\n"
+#endif
 	       );
     help_print("within\tmwithin\tfamily", &help_ctrl, 0,
 "  --within [f] <keep-NA>  : Specify initial cluster assignments.\n"
@@ -2049,6 +2062,10 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
 "                                     size should be\n"
 "                                       4 / (1/[# cases] + 1/[# controls]).\n"
 	       );
+    help_print("meta-analysis-report-dups\tmeta-analysis", &help_ctrl, 0,
+"  --meta-analysis-report-dups      : When a variant appears multiple times in\n"
+"                                     in the same file, report that.\n"
+	       );
     help_print("gene-list-border\tgene-report\tgene-subset\tgene-list\tgene-report-snp-field", &help_ctrl, 0,
 "  --gene-list-border [kbs]   : Extend --gene-report regions by given # of kbs.\n"
 "  --gene-subset [filename]   : Specify gene name subset for --gene-report.\n"
diff --git a/plink_lasso.c b/plink_lasso.c
index 3908a86..98ed6f0 100644
--- a/plink_lasso.c
+++ b/plink_lasso.c
@@ -947,6 +947,7 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
     }
   }
   ullii += CACHEALIGN(((uint64_t)uii) * sample_valid_ct * sizeof(double));
+  // if (0) {
   if (ullii <= wkspace_left) {
     retval = lasso_bigmem(bedfile, bed_offset, marker_exclude, marker_ct, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, pheno_nm2, lasso_h2, lasso_minlambda, select_covars, select_covars_bitfield, pheno_d_collapsed, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, hh_or_mt_exists, sample_valid_ct, sample_include2, sample_male_include2, loadbuf_raw, loadbuf_collapsed, rand_matrix, misc_arr, residuals, polymorphic_markers, &polymorphic_marker_ct, &iter_tot, &xhat);
   } else {
diff --git a/plink_ld.c b/plink_ld.c
index a382a7d..3d78800 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -638,21 +638,27 @@ void ld_prune_start_chrom(uint32_t ld_window_kb, uint32_t* cur_chrom_ptr, uint32
   uint32_t uii = 0;
   uint32_t window_size;
   live_indices[0] = window_unfiltered_start;
+  next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
   if (ld_window_kb) {
-    window_size = 0;
-    while ((window_unfiltered_start + window_size < chrom_end) && (marker_pos[window_unfiltered_start + window_size] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
+    window_size = 1;
+    uii = window_unfiltered_end;
+    while ((uii < chrom_end) && (marker_pos[uii] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
       window_size++;
+      uii++;
+      next_unset_ck(marker_exclude, &uii, chrom_end);
     }
+    uii = 0;
   } else {
     window_size = ld_window_size;
   }
-  for (uii = 1; uii < window_size; window_unfiltered_end++, uii++) {
-    next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
+  for (uii = 1; uii < window_size; uii++) {
     if (window_unfiltered_end == chrom_end) {
       break;
     }
     start_arr[uii - 1] = window_unfiltered_end;
     live_indices[uii] = window_unfiltered_end;
+    window_unfiltered_end++;
+    next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
   }
   *cur_window_size_ptr = uii;
   start_arr[uii - 1] = window_unfiltered_end;
@@ -817,6 +823,7 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
   uint32_t bsearch_max;
   uint32_t bsearch_cur;
   double prune_ld_thresh;
+
   if (founder_ct < 2) {
     LOGERRPRINTF("Warning: Skipping --indep%s since there are less than two founders.\n(--make-founders may come in handy here.)\n", pairwise? "-pairwise" : "");
     goto ld_prune_ret_1;
@@ -1029,6 +1036,9 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
 		at_least_one_prune = 1;
 		cur_exclude_ct++;
 		// remove marker with lower MAF
+		// could cache MAFs of all current-window variants, but
+		// get_maf() is too cheap for this to make a noticeable
+		// difference
 		if (get_maf(set_allele_freqs[live_indices[uii]]) < get_maf(set_allele_freqs[live_indices[ujj]])) {
 		  SET_BIT(pruned_arr, live_indices[uii]);
 		} else {
@@ -1173,16 +1183,11 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
 	}
       }
       for (uii = 0; uii < ld_window_incr; uii++) {
-	while (IS_SET(marker_exclude, window_unfiltered_start)) {
-	  if (window_unfiltered_start == chrom_end) {
-	    break;
-	  }
-	  window_unfiltered_start++;
-	}
 	if (window_unfiltered_start == chrom_end) {
 	  break;
 	}
 	window_unfiltered_start++;
+	next_unset_ck(marker_exclude, &window_unfiltered_start, chrom_end);
       }
       if (window_unfiltered_start == chrom_end) {
 	break;
@@ -1194,6 +1199,11 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
 	pct_thresh = chrom_info_ptr->chrom_start[cur_chrom] + (((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100);
       }
       ujj = 0;
+
+      if (window_unfiltered_end < window_unfiltered_start) {
+	window_unfiltered_end = window_unfiltered_start;
+      }
+
       // copy back previously loaded/computed results
       while (live_indices[ujj] < window_unfiltered_start) {
 	ujj++;
@@ -1230,15 +1240,17 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
       cur_window_size = uii;
       if (window_is_kb) {
 	ujj = 0;
-	while ((window_unfiltered_end + ujj < chrom_end) && (marker_pos[window_unfiltered_end + ujj] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
+	ukk = window_unfiltered_end;
+	while ((ukk < chrom_end) && (marker_pos[ukk] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
 	  ujj++;
+	  ukk++;
+	  next_unset_ck(marker_exclude, &ukk, chrom_end);
 	}
       } else {
 	ujj = ld_window_incr;
       }
       old_window_size = cur_window_size;
-      for (uii = 0; uii < ujj; window_unfiltered_end++, uii++) {
-	next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
+      for (uii = 0; uii < ujj; uii++) {
 	if (window_unfiltered_end == chrom_end) {
 	  break;
 	}
@@ -1260,6 +1272,8 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
 	  cur_exclude_ct++;
 	}
 	cur_window_size++;
+	window_unfiltered_end++;
+	next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
       }
       if (cur_window_size > prev_end) {
 	start_arr[cur_window_size] = window_unfiltered_end;
@@ -2091,17 +2105,17 @@ uint32_t ld_matrix_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
 	goto ld_matrix_emitn_ret;
       }
     }
-    if (is_square0) {
-      while (marker_idx < marker_ct) {
-        ulii = (((uintptr_t)(readbuf_end - sptr_cur)) + 1) / 2;
-        if (ulii <= marker_ct - marker_idx) {
-	  sptr_cur = memcpya(sptr_cur, tbuf, ulii * 2);
-	  marker_idx += ulii;
-	  goto ld_matrix_emitn_ret;
-	} else {
-          sptr_cur = memcpya(sptr_cur, tbuf, (marker_ct - marker_idx) * 2);
-          marker_idx = marker_ct;
-	}
+    if (is_square0 && (marker_idx < marker_ct)) {
+      ulii = (((uintptr_t)(readbuf_end - sptr_cur)) + 1) / 2;
+      // bugfix: can't be <= since tab delimiter wouldn't be handled correctly
+      // on subsequent pass
+      if (ulii < marker_ct - marker_idx) {
+	sptr_cur = memcpya(sptr_cur, tbuf, ulii * 2);
+	marker_idx += ulii;
+	goto ld_matrix_emitn_ret;
+      } else {
+	sptr_cur = memcpya(sptr_cur, tbuf, (marker_ct - marker_idx) * 2);
+	marker_idx = marker_ct;
       }
     }
     if (delimiter == '\t') {
@@ -3824,6 +3838,8 @@ THREAD_RET_TYPE fast_epi_thread(void* arg) {
       chisq2_ptr = &(best_chisq2[block_idx2]);
       for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[tot_ctsplit])) {
 	cur_tot2 = &(tot2[block_idx2 * tot_stride]);
+	// this operation isn't extracting a 2-bit genotype, so don't use the
+	// macro
 	cur_zmiss2 = (zmiss2[block_idx2 / BITCT2] >> (2 * (block_idx2 % BITCT2))) & 3;
 	cur_zmiss2_tmp = cur_zmiss2 & 1;
 	if (nm_case_fixed) {
@@ -9973,11 +9989,14 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
 	*wptr_start++ = ' ';
 	marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
 	for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
-          chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
 	  chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+	  if (marker_uidx2 >= chrom_end2) {
+	    continue;
+	  }
+          chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
           wptr_start2 = width_force(4, wptr_start, chrom_name_write(wptr_start, chrom_info_ptr, chrom_idx2));
 	  *wptr_start2++ = ' ';
-	  for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, &marker_uidx2, chrom_end2), marker_idx2++, dptr++) {
+	  for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, &marker_uidx2, unfiltered_marker_ct), marker_idx2++, dptr++) {
 	    if (marker_idx2 == ujj) {
 	      marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
 	      if (marker_idx2 == marker_ct2) {
@@ -10217,7 +10236,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
   uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctl2 / 2);
   uintptr_t founder_ctl = (founder_ct + BITCT - 1) / BITCT;
   uintptr_t founder_ctv3 = 2 * ((founder_ct + (2 * BITCT - 1)) / (2 * BITCT));
-  // no actual case/control split here, but keep the variable name the same to
+  // no actual case/control split here, but keep the variables the same to
   // minimize divergence from ld_report_dprime()
   uintptr_t founder_ctsplit = 3 * founder_ctv3;
   uintptr_t final_mask = get_final_mask(founder_ct);
@@ -10334,7 +10353,8 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
   loadbuf[founder_ctl * 2 - 2] = 0;
   loadbuf[founder_ctl * 2 - 1] = 0;
   fill_all_bits(dummy_nm, founder_ct);
-  for (ulii = 1; ulii <= window_max; ulii++) {
+  // bugfix: this loop must start at 0, not 1
+  for (ulii = 0; ulii < window_max; ulii++) {
     geno[ulii * founder_ctsplit + founder_ctv3 - 1] = 0;
     geno[ulii * founder_ctsplit + 2 * founder_ctv3 - 1] = 0;
     geno[ulii * founder_ctsplit + founder_ctsplit - 1] = 0;
@@ -10471,16 +10491,11 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
 	} while (at_least_one_prune);
       }
       for (uii = 0; uii < ld_window_incr; uii++) {
-	while (IS_SET(marker_exclude, window_unfiltered_start)) {
-	  if (window_unfiltered_start == chrom_end) {
-	    break;
-	  }
-	  window_unfiltered_start++;
-	}
 	if (window_unfiltered_start == chrom_end) {
 	  break;
 	}
 	window_unfiltered_start++;
+	next_unset_ck(marker_exclude, &window_unfiltered_start, chrom_end);
       }
       if (window_unfiltered_start == chrom_end) {
 	break;
@@ -10492,6 +10507,9 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
 	pct_thresh = chrom_info_ptr->chrom_start[cur_chrom] + (((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100);
       }
       uljj = 0;
+      if (window_unfiltered_end < window_unfiltered_start) {
+	window_unfiltered_end = window_unfiltered_start;
+      }
       // copy back previously loaded/computed results
       while (live_indices[uljj] < window_unfiltered_start) {
 	uljj++;
@@ -10507,21 +10525,31 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
 	live_indices[ulii] = live_indices[uljj];
 	start_arr[ulii] = start_arr[uljj];
 	memcpy(&(cur_tots[ulii * 3]), &(cur_tots[uljj * 3]), 3 * sizeof(int32_t));
+	// bugfix: forgot to update zmiss
+	if (IS_SET(zmiss, uljj)) {
+	  SET_BIT(zmiss, ulii);
+	} else {
+	  CLEAR_BIT(zmiss, ulii);
+	}
 	ulii++;
       }
+      clear_bits(zmiss, ulii, window_max);
 
       prev_end = ulii;
       cur_window_size = ulii;
       if (window_is_kb) {
 	uljj = 0;
-	while ((window_unfiltered_end + uljj < chrom_end) && (marker_pos[window_unfiltered_end + uljj] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
+	ulkk = window_unfiltered_end;
+	while ((window_unfiltered_end < chrom_end) && (marker_pos[window_unfiltered_end] <= marker_pos[window_unfiltered_start] + (1000 * ld_window_size))) {
 	  uljj++;
+	  window_unfiltered_end++;
+	  next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
 	}
+	window_unfiltered_end = ulkk;
       } else {
 	uljj = ld_window_incr;
       }
-      for (ulii = 0; ulii < uljj; window_unfiltered_end++, ulii++) {
-	next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
+      for (ulii = 0; ulii < uljj; ulii++) {
 	if (window_unfiltered_end == chrom_end) {
 	  break;
 	}
@@ -10550,6 +10578,8 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
 	  SET_BIT(zmiss, cur_window_size);
 	}
 	cur_window_size++;
+	window_unfiltered_end++;
+	next_unset_ck(marker_exclude, &window_unfiltered_end, chrom_end);
       }
       if (cur_window_size > prev_end) {
 	start_arr[cur_window_size] = window_unfiltered_end;
diff --git a/plink_misc.c b/plink_misc.c
index 20d9791..3955967 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -44,7 +44,7 @@ int32_t make_founders(uintptr_t unfiltered_sample_ct, uintptr_t sample_ct, char*
   }
   bitfield_exclude_to_include(sample_exclude, nf_bitarr, unfiltered_sample_ct);
   bitfield_andnot(nf_bitarr, founder_info, unfiltered_sample_ctl);
-  sample_uidx = next_set(nf_bitarr, 0, unfiltered_sample_ct);
+  sample_uidx = unfiltered_sample_ct? next_set(nf_bitarr, 0, unfiltered_sample_ct) : 0;
   if (sample_uidx == unfiltered_sample_ct) {
     logprint("Note: Skipping --make-founders since there are no nonfounders.\n");
     goto make_founders_ret_1;
@@ -1660,7 +1660,7 @@ void calc_plink_maxfid(uint32_t unfiltered_sample_ct, uintptr_t* sample_exclude,
   // imitate PLINK 1.07 behavior (see Plink::prettyPrintLengths() in
   // helper.cpp), to simplify testing and avoid randomly breaking existing
   // scripts
-  do {
+  while (samples_done < sample_ct) {
     sample_uidx = next_unset_unsafe(sample_exclude, sample_uidx);
     sample_uidx_stop = next_set(sample_exclude, sample_uidx, unfiltered_sample_ct);
     samples_done += sample_uidx_stop - sample_uidx;
@@ -1679,7 +1679,7 @@ void calc_plink_maxfid(uint32_t unfiltered_sample_ct, uintptr_t* sample_exclude,
       }
       cptr = &(cptr[max_sample_id_len]);
     } while (cptr < cptr_end);
-  } while (samples_done < sample_ct);
+  }
   *plink_maxfid_ptr = plink_maxfid;
   *plink_maxiid_ptr = plink_maxiid;
 }
@@ -1991,8 +1991,7 @@ int32_t read_external_freqs(char* freqname, uintptr_t unfiltered_marker_ct, uint
       logprint("--read-freq: .frq file loaded.\n");
     }
   } else if (uii == 3) {
-    // changed from strcmp to avoid eoln problems
-    // known --freqx format, WDIST v0.15.3 or later
+    // --freqx format
     while (fgets(loadbuf, loadbuf_size, freqfile) != NULL) {
       line_idx++;
       if (!loadbuf[loadbuf_size - 1]) {
@@ -2332,6 +2331,7 @@ int32_t load_ax_alleles(Two_col_params* axalleles, uintptr_t unfiltered_marker_c
 }
 
 int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t output_gz, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t sample_ct, uint32_t sample_f_ct, uintptr_t* founder_info, uint32_t nonfounders, uintptr_t* sex_male, uint32_t s [...]
+  // unfiltered_sample_ct == 0 ok
   unsigned char* wkspace_mark = wkspace_base;
   char* writebuf = tbuf;
   char* pzwritep = NULL;
@@ -2620,6 +2620,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
 }
 
 int32_t write_cc_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t output_gz, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t nonfounders, uintptr_t* sex_male, uintptr_t* marker_reverse, uintptr_t* pheno_nm, uintptr_t* ph [...]
+  // unfiltered_sample_ct must be positive
   unsigned char* wkspace_mark = wkspace_base;
   char* pzwritep = NULL;
   uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
@@ -2803,6 +2804,7 @@ int32_t write_cc_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
 }
 
 int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, double* set_allele_freqs, Chrom_info* chrom_info_ptr, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, int32_t* ll_cts, int32_t* lh_cts, int32_t* hh_cts, int32_t* hapl_cts, int32_t* haph_cts, uint32_t sample_f_ct, uint32_t sample_f_male_ct, uint32_t nonfounders, uint64_t misc_flags, uintptr_t* mar [...]
+  // unfiltered_sample_ct == 0 ok
   unsigned char* wkspace_mark = wkspace_base;
   char* pzwritep = NULL;
   uint32_t reverse = 0;
@@ -3282,7 +3284,7 @@ int32_t write_snplist(char* outname, char* outname_end, uintptr_t unfiltered_mar
     goto write_snplist_ret_OPEN_FAIL;
   }
   if (!list_23_indels) {
-    do {
+    while (markers_done < marker_ct) {
       marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
       marker_uidx_stop = next_set_ul(marker_exclude, marker_uidx, unfiltered_marker_ct);
       markers_done += marker_uidx_stop - marker_uidx;
@@ -3296,7 +3298,7 @@ int32_t write_snplist(char* outname, char* outname_end, uintptr_t unfiltered_mar
 	}
         cptr = &(cptr[max_marker_id_len]);
       } while (cptr < cptr_end);
-    } while (markers_done < marker_ct);
+    }
   } else {
     for (; markers_done < marker_ct; marker_uidx++, markers_done++) {
       next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
@@ -4335,10 +4337,10 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
     }
   }
   first_col_m1--;
+  memcpy(outname_end, ".nopred", 8); // bugfix, this was after the goto before
   if (modifier & SCORE_HEADER) {
     goto score_report_load_next;
   }
-  memcpy(outname_end, ".nopred", 8);
   while (1) {
     bufptr_arr[0] = next_token_multz(bufptr, first_col_m1);
     bufptr_arr[1] = next_token_mult(bufptr_arr[0], col_01_delta);
@@ -5121,17 +5123,21 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   gzFile gz_infile = NULL;
   FILE* infile = NULL;
   FILE* outfile = NULL;
-  char* sorted_extract_ids = NULL;
   char* loadbuf_end = (char*)(&(wkspace_base[wkspace_left]));
   char* cur_window_marker_ids = NULL;
+  char* sorted_extract_ids = NULL;
+  uintptr_t* duplicate_id_bitfield = NULL;
+  Ll_str** duplicate_id_htable = NULL;
   uintptr_t header_dict_ct = 2; // 'SE', BETA/OR
   uintptr_t max_header_len = 3;
   uintptr_t extract_ct = 0;
   uintptr_t max_extract_id_len = 0;
+  uintptr_t extract_ctl = 0;
   uintptr_t final_variant_ct = 0;
   uintptr_t last_var_idx = 0;
-  uintptr_t rejected_ct = 0;
   uintptr_t window_entry_base_cost = 2;
+  uintptr_t duplicate_id_htable_max_alloc = 0;
+  uint64_t rejected_ct = 0;
   double cur_p = 0.0;
   double cur_ess = 0.0;
   uint32_t max_var_id_len_p1 = 0;
@@ -5142,6 +5148,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   uint32_t report_all = flags & METAANAL_REPORT_ALL;
   uint32_t output_beta = flags & METAANAL_QT;
   uint32_t report_study_specific = flags & METAANAL_STUDY;
+  uint32_t report_dups = flags & METAANAL_REPORT_DUPS;
 
   uint32_t weighted_z = (flags / METAANAL_WEIGHTED_Z) & 1;
   uint32_t parse_max = 3;
@@ -5152,12 +5159,11 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   uint32_t a2lenp1 = 0;
   uint32_t cur_chrom = 0;
   uint32_t cur_bp = 0;
-  uint32_t cur_file_ct_m1 = 0;
   uint32_t cur_combined_allele_len = 0;
   uint32_t pass_idx = 0;
   int32_t retval = 0;
   char missing_geno = *g_missing_geno_ptr;
-  const char problem_strings[][16] = {"BAD_CHR", "BAD_BP", "MISSING_A1", "MISSING_A2", "BAD_ES", "BAD_SE", "ALLELE_MISMATCH", "BAD_P", "BAD_ESS"};
+  const char problem_strings[][16] = {"BAD_CHR", "BAD_BP", "MISSING_A1", "MISSING_A2", "BAD_ES", "BAD_SE", "ALLELE_MISMATCH", "BAD_P", "BAD_ESS", "DUPLICATE"};
 
   // [0] = SNP
   // [1] = BETA/OR
@@ -5185,11 +5191,13 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   uintptr_t variants_remaining;
   uintptr_t cur_var_idx;
   uintptr_t first_var_idx;
+  uintptr_t htable_write_limit;
   uintptr_t ulii;
   Ll_str** htable;
   Ll_str** ll_pptr;
   Ll_str* ll_ptr;
   Ll_str* htable_write;
+  Ll_str* duplicate_id_htable_write;
   unsigned char* wkspace_mark2;
   char* sorted_header_dict;
   char* master_var_list;
@@ -5231,6 +5239,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   uint32_t file_ct64;
   uint32_t file_idx;
   uint32_t cur_file_ct;
+  uint32_t cur_file_ct_m1;
   uint32_t fname_len;
   uint32_t token_ct;
   uint32_t seq_idx;
@@ -5292,7 +5301,6 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       wkspace_alloc_ui_checked(&header_id_map, header_dict_ct * sizeof(int32_t))) {
     goto meta_analysis_ret_NOMEM;
   }
-  wkspace_mark2 = wkspace_base;
   ulii = 0; // write position
   if (snpfield_search_order) {
     bufptr = snpfield_search_order;
@@ -5386,23 +5394,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
     goto meta_analysis_ret_INVALID_CMDLINE;
   }
 
-  // 2. Allocate space for initial hash table.
-  // Saving memory is pretty important here, so we use the following packing in
-  // the ss field (W = byte width required to save numbers up to file_ct, and
-  // M = 1 iff 'no-map' was not specified):
-  // [W]: number of files this variant appears in, little-endian
-  // [W+1]..[W+5], if M==1: chromosome byte followed by bp coordinate int; may
-  //                        need to widen chromosome byte later
-  // [W+5M+1]: null-terminated variant ID.  Followed by null-terminated A1/A2
-  //           if 'no-allele' not specified
-  htable = (Ll_str**)wkspace_alloc(HASHMEM);
-  if (!htable) {
-    goto meta_analysis_ret_NOMEM;
-  }
-  for (uii = 0; uii < HASHSIZE; uii++) {
-    htable[uii] = NULL;
-  }
-  // 3. If --extract specified, load and sort permitted variant list.
+  // 2. If --extract specified, load and sort permitted variant list.
   if (extractname) {
     if (fopen_checked(&infile, extractname, "rb")) {
       goto meta_analysis_ret_OPEN_FAIL;
@@ -5423,7 +5415,12 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       goto meta_analysis_ret_NOMEM;
     }
     rewind(infile);
-    // todo: switch to hash table to avoid sort
+    // Considered switching to a hash table, but decided against it for now
+    // since it's less memory-efficient (in the usual case of similar-length
+    // IDs), especially when lots of duplicate IDs are present.  Might be worth
+    // revisiting this decision in the future, though, since there are
+    // reasonable use cases involving 40-80 million line --extract files, and
+    // skipping the sort step there is a big win.
     retval = read_tokens(infile, tbuf, MAXLINELEN, extract_ct, max_extract_id_len, sorted_extract_ids);
     if (retval) {
       goto meta_analysis_ret_1;
@@ -5437,7 +5434,32 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       extract_ct = ulii;
       wkspace_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
     }
+    extract_ctl = (extract_ct + BITCT - 1) / BITCT;
+    if (wkspace_alloc_ul_checked(&duplicate_id_bitfield, extract_ctl * sizeof(intptr_t))) {
+      goto meta_analysis_ret_NOMEM;
+    }
+  } else {
+    duplicate_id_htable = (Ll_str**)wkspace_alloc(HASHMEM);
   }
+
+  // 3. Allocate space for initial hash table.
+  // Saving memory is pretty important here, so we use the following packing in
+  // the ss field (W = byte width required to save numbers up to file_ct, and
+  // M = 1 iff 'no-map' was not specified):
+  // [W]: number of files this variant appears in minus 1, little-endian
+  // [W+1]..[W+5], if M==1: chromosome byte followed by bp coordinate int; may
+  //                        need to widen chromosome byte later
+  // [W+5M+1]: null-terminated variant ID.  Followed by null-terminated A1/A2
+  //           if 'no-allele' not specified
+  wkspace_mark2 = wkspace_base;
+  htable = (Ll_str**)wkspace_alloc(HASHMEM);
+  if (!htable) {
+    goto meta_analysis_ret_NOMEM;
+  }
+  for (uii = 0; uii < HASHSIZE; uii++) {
+    htable[uii] = NULL;
+  }
+
   // 4. Initial scan: save all potentially valid variant IDs (and accompanying
   //    allele codes/chr/pos, if present) in the hash table, and produce .prob
   //    file.  Also determine maximum line length, for use in later passes.
@@ -5461,17 +5483,24 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   htable_write = (Ll_str*)wkspace_base;
   loadbuf_end[-1] = ' ';
   for (file_idx = 0; file_idx < file_ct; file_idx++) {
+    if (sorted_extract_ids) {
+      fill_ulong_zero(duplicate_id_bitfield, extract_ctl);
+    } else {
+      for (uii = 0; uii < HASHSIZE; uii++) {
+	duplicate_id_htable[uii] = NULL;
+      }
+    }
     fname_len = strlen(fname_ptr);
-    // divide by two and subtract 16 to prevent overlap between loadbuf and new
-    // hash table entry.
-    loadbuf_size = (((uintptr_t)(loadbuf_end - ((char*)htable_write))) / 2);
-    if (loadbuf_size > MAXLINEBUFLEN + 16) {
-      loadbuf_size = MAXLINEBUFLEN + 16;
-    } else if (loadbuf_size <= MAXLINELEN + 16) {
+    // prevent overlap between loadbuf and new hash table entries.
+    loadbuf_size = (((uintptr_t)(loadbuf_end - ((char*)htable_write))) / 4);
+    if (loadbuf_size > MAXLINEBUFLEN) {
+      loadbuf_size = MAXLINEBUFLEN;
+    } else if (loadbuf_size <= MAXLINELEN) {
       goto meta_analysis_ret_NOMEM;
     }
-    loadbuf_size -= 16;
     loadbuf = &(loadbuf_end[-((intptr_t)loadbuf_size)]);
+    duplicate_id_htable_write = (Ll_str*)loadbuf;
+    htable_write_limit = ((uintptr_t)loadbuf) - loadbuf_size - 16;
     token_ct = parse_max;
     retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, &line_idx, &line_max);
     if (retval) {
@@ -5494,6 +5523,10 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       }
       bufptr = skip_initial_spaces(loadbuf);
       if (is_eoln_kns(*bufptr)) {
+	slen = strlen(bufptr) + ((uintptr_t)(bufptr - loadbuf));
+	if (slen >= line_max) {
+	  line_max = slen + 1;
+	}
         continue;
       }
       bufptr = next_token_multz(bufptr, col_skips[0]);
@@ -5517,20 +5550,55 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       }
       bufptr = token_ptrs[0];
       var_id_len = strlen_se(bufptr);
-      if (sorted_extract_ids && (bsearch_str(bufptr, var_id_len, sorted_extract_ids, max_extract_id_len, extract_ct) == -1)) {
-	continue;
-      }
       if (var_id_len > MAX_ID_LEN) {
 	sprintf(logbuf, "Error: Line %" PRIuPTR " of %s has an excessively long variant ID.\n", line_idx, fname_ptr);
 	goto meta_analysis_ret_INVALID_FORMAT_WW;
       }
+      bufptr[var_id_len] = '\0';
+      uii = hashval2(bufptr, var_id_len++);
+      // var_id_len now includes null-terminator
+      if (sorted_extract_ids) {
+	ii = bsearch_str(bufptr, var_id_len - 1, sorted_extract_ids, max_extract_id_len, extract_ct);
+	if (ii == -1) {
+	  continue;
+	}
+	if (is_set(duplicate_id_bitfield, ii)) {
+	  problem_mask = 0x200;
+	  goto meta_analysis_report_error;
+	}
+	set_bit(duplicate_id_bitfield, ii);
+      } else {
+	ll_pptr = &(duplicate_id_htable[uii]);
+	while (1) {
+	  ll_ptr = *ll_pptr;
+	  if ((!ll_ptr) || (!strcmp(bufptr, ll_ptr->ss))) {
+	    break;
+	  }
+	  ll_pptr = &(ll_ptr->next);
+	}
+	if (ll_ptr) {
+	  problem_mask = 0x200;
+	  goto meta_analysis_report_error;
+	}
+	// word-align for now
+	// note that it is NOT safe to use uii here.
+	ulii = sizeof(intptr_t) + ((var_id_len + BYTECT - 1) & (~(BYTECT - 1)));
+	if (((uintptr_t)htable_write) + ulii > ((uintptr_t)duplicate_id_htable_write)) {
+	  goto meta_analysis_ret_NOMEM;
+	}
+	duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) - ulii);
+	*ll_pptr = duplicate_id_htable_write;
+	duplicate_id_htable_write->next = NULL;
+	memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len);
+      }
+      ll_pptr = &(htable[uii]);
 
       // validate
       problem_mask = 0;
       if (use_map) {
 	ii = get_chrom_code(chrom_info_ptr, token_ptrs[5]);
 	if (ii < 0) {
-	  problem_mask |= 1;
+	  problem_mask = 1;
 	} else {
 	  cur_chrom = (uint32_t)ii;
 	  if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
@@ -5574,12 +5642,8 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 	  problem_mask |= 0x100;
 	}
       }
-      // check hash table
+      // check main hash table
       bufptr = token_ptrs[0];
-      bufptr[var_id_len] = '\0';
-      uii = hashval2(bufptr, var_id_len++);
-      // var_id_len now includes null-terminator
-      ll_pptr = &(htable[uii]);
       while (1) {
 	ll_ptr = *ll_pptr;
 	if ((!ll_ptr) || (!strcmp(bufptr, &(ll_ptr->ss[slen_base])))) {
@@ -5622,15 +5686,9 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 	  final_variant_ct++;
 	}
 	htable_write = (Ll_str*)((((uintptr_t)wptr) + sizeof(uintptr_t) - 1) & (~(sizeof(uintptr_t) - ONELU)));
-	// now shrink loadbuf if necessary
-	loadbuf_size = (((uintptr_t)(loadbuf_end - ((char*)htable_write))) / 2);
-	if (loadbuf_size > MAXLINEBUFLEN + 16) {
-	  loadbuf_size = MAXLINEBUFLEN + 16;
-	} else if (loadbuf_size <= MAXLINELEN + 16) {
+	if ((((uintptr_t)htable_write) > ((uintptr_t)duplicate_id_htable_write)) || (((uintptr_t)htable_write) > htable_write_limit)) {
 	  goto meta_analysis_ret_NOMEM;
 	}
-	loadbuf_size -= 16;
-	loadbuf = &(loadbuf_end[-((intptr_t)loadbuf_size)]);
       } else {
 	if ((token_ct - 2 * weighted_z < 6) || meta_analysis_allelic_match(&(ll_ptr->ss[slen_base + var_id_len]), token_ptrs, token_ct, a1lenp1, a2lenp1)) {
 	  if (problem_mask) {
@@ -5647,6 +5705,9 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 	} else {
 	  problem_mask |= 0x40;
 	meta_analysis_report_error:
+	  if ((problem_mask == 0x200) && (!report_dups)) {
+	    continue;
+	  }
 	  if (!outfile) {
 	    memcpy(outname_end, ".prob", 6);
 	    if (fopen_checked(&outfile, outname, "w")) {
@@ -5671,13 +5732,19 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       goto meta_analysis_ret_READ_FAIL;
     }
     gz_infile = NULL;
+    if (!sorted_extract_ids) {
+      ulii = ((uintptr_t)loadbuf) - ((uintptr_t)duplicate_id_htable_write);
+      if (ulii > duplicate_id_htable_max_alloc) {
+	duplicate_id_htable_max_alloc = ulii;
+      }
+    }
     fname_ptr = &(fname_ptr[fname_len + 1]);
   }
   if (outfile) {
     if (fclose_null(&outfile)) {
       goto meta_analysis_ret_WRITE_FAIL;
     }
-    LOGPRINTFWW("--meta-analysis: %" PRIuPTR " problematic line%s; see %s .\n", rejected_ct, (rejected_ct == 1)? "" : "s", outname);
+    LOGPRINTFWW("--meta-analysis: %" PRIu64 " problematic line%s; see %s .\n", rejected_ct, (rejected_ct == 1)? "" : "s", outname);
   }
 
   // 5. Determine final set of variants, and sort them (by chromosome, then
@@ -5709,6 +5776,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
   // sequentially
   ll_ptr = (Ll_str*)wkspace_base;
   for (master_var_idx = 0; master_var_idx < final_variant_ct;) {
+    cur_file_ct_m1 = 0; // clear high bits
     memcpy(&cur_file_ct_m1, ll_ptr->ss, file_ct_byte_width);
     if (report_all || cur_file_ct_m1) {
       wptr = &(master_var_list[master_var_idx * master_var_entry_len]);
@@ -5744,8 +5812,11 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
     ll_ptr = (Ll_str*)((((uintptr_t)bufptr) + sizeof(uintptr_t) - 1) & (~(sizeof(uintptr_t) - ONELU)));
   }
   qsort(master_var_list, final_variant_ct, master_var_entry_len, strcmp_natural);
-  // don't need sorted_extract_ids anymore
+  // don't need htable anymore
   wkspace_reset(wkspace_mark2);
+  if (!sorted_extract_ids) {
+    wkspace_alloc(duplicate_id_htable_max_alloc);
+  }
   total_data_slots = (wkspace_left - topsize) / sizeof(uintptr_t);
 
   // 6. Remaining load passes: determine how many remaining variants' worth of
@@ -5815,6 +5886,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
       bufptr2 = &(bufptr[cur_variant_ct * master_var_entry_len]);
       bufptr2 = (char*)memchr(bufptr2, 0, master_var_entry_len);
       bufptr2++;
+      cur_file_ct_m1 = 0;
       memcpy(&cur_file_ct_m1, bufptr2, file_ct_byte_width);
       cur_data_slots = 0;
       if (report_study_specific) {
@@ -5825,6 +5897,7 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 #endif
       }
       if (!no_allele) {
+	cur_combined_allele_len = 0;
 	memcpy(&cur_combined_allele_len, &(bufptr2[file_ct_byte_width]), combined_allele_len_byte_width);
 	cur_data_slots += (8 / BYTECT) * ((cur_combined_allele_len + 7) / 8);
       }
@@ -5871,6 +5944,14 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
     }
     fname_ptr = input_fnames;
     for (file_idx = 0; file_idx < file_ct; file_idx++) {
+      if (sorted_extract_ids) {
+	fill_ulong_zero(duplicate_id_bitfield, extract_ctl);
+      } else {
+	for (uii = 0; uii < HASHSIZE; uii++) {
+	  duplicate_id_htable[uii] = NULL;
+	}
+      }
+      duplicate_id_htable_write = (Ll_str*)wkspace_mark2;
       fname_len = strlen(fname_ptr);
       token_ct = parse_max;
       retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, NULL, NULL);
@@ -5898,6 +5979,40 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
         if (!bufptr) {
           continue;
 	}
+        bufptr = token_ptrs[0];
+	var_id_len = strlen_se(bufptr);
+	if (var_id_len >= max_var_id_len_p1) {
+	  continue;
+	}
+	bufptr[var_id_len] = '\0';
+        if (sorted_extract_ids) {
+	  ii = bsearch_str(bufptr, var_id_len, sorted_extract_ids, max_extract_id_len, extract_ct);
+	  if (ii == -1) {
+	    continue;
+	  }
+	  if (is_set(duplicate_id_bitfield, ii)) {
+	    continue;
+	  }
+	  set_bit(duplicate_id_bitfield, ii);
+	} else {
+	  uii = hashval2(bufptr, var_id_len);
+	  ll_pptr = &(duplicate_id_htable[uii]);
+	  while (1) {
+	    ll_ptr = *ll_pptr;
+	    if ((!ll_ptr) || (!strcmp(bufptr, ll_ptr->ss))) {
+	      break;
+	    }
+	    ll_pptr = &(ll_ptr->next);
+	  }
+	  if (ll_ptr) {
+	    continue;
+	  }
+	  *ll_pptr = duplicate_id_htable_write;
+	  duplicate_id_htable_write->next = NULL;
+	  memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len + 1);
+	  ulii = sizeof(intptr_t) + ((var_id_len + BYTECT) & (~(BYTECT - 1)));
+	  duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) + ulii);
+	}
 	if (use_map) {
 	  ii = get_chrom_code(chrom_info_ptr, token_ptrs[5]);
 	  if (ii < 0) {
@@ -5947,17 +6062,15 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 	    continue;
 	  }
 	}
-        bufptr = token_ptrs[0];
-	var_id_len = strlen_se(bufptr);
-	if (var_id_len >= max_var_id_len_p1) {
-	  continue;
-	}
+	bufptr = token_ptrs[0];
 	if (use_map) {
 	  ii = bsearch_str(bufptr, var_id_len, cur_window_marker_ids, max_var_id_len_p5, cur_variant_ct);
 	  if (ii == -1) {
 	    continue;
 	  }
-          cur_var_idx = 0; // clear high bits
+#ifdef __LP64__
+	  cur_var_idx = 0; // clear high 32 bits
+#endif
           memcpy(&cur_var_idx, &(cur_window_marker_ids[(((uint32_t)ii) * max_var_id_len_p5) + max_var_id_len_p1]), 4);
 	} else {
 	  bufptr[var_id_len] = '\0';
@@ -6177,7 +6290,12 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
 	for (file_idx = 0; file_idx < file_ct; file_idx++) {
 	  if (is_set(ulptr, file_idx)) {
 	    uii++;
-	    double_f_writew74x(&(tbuf[1]), exp(cur_data_ptr[((int32_t)(uii + weighted_z)) * (-2)]), '\0');
+	    dxx = cur_data_ptr[((int32_t)(uii + weighted_z)) * (-2)];
+	    if (!output_beta) {
+	      // finish fixing PLINK 1.07 bug
+	      dxx = exp(dxx);
+	    }
+	    double_f_writew74x(&(tbuf[1]), dxx, '\0');
 	    fputs(tbuf, outfile);
 	  } else {
 	    fputs("      NA", outfile);
diff --git a/plink_misc.h b/plink_misc.h
index 799ee73..41a49de 100644
--- a/plink_misc.h
+++ b/plink_misc.h
@@ -14,6 +14,7 @@
 #define METAANAL_LOGSCALE 0x10
 #define METAANAL_QT 0x20
 #define METAANAL_WEIGHTED_Z 0x40
+#define METAANAL_REPORT_DUPS 0x80
 
 typedef struct {
   char* fname;
diff --git a/plink_perm.c b/plink_perm.c
new file mode 100644
index 0000000..20320f4
--- /dev/null
+++ b/plink_perm.c
@@ -0,0 +1,333 @@
+#include "plink_common.h"
+
+void generate_cc_perm_vec(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp) {
+  // Assumes tot_quotient is 2^32 / tot_ct, and
+  // totq_magic/totq_preshift/totq_postshift/totq_incr have been precomputed
+  // from magic_num().
+  uint32_t num_set = 0;
+  uint32_t upper_bound = tot_ct * tot_quotient - 1;
+  uintptr_t widx;
+  uintptr_t wcomp;
+  uintptr_t pv_val;
+  uint32_t urand;
+  uint32_t uii;
+  if (set_ct * 2 < tot_ct) {
+    fill_ulong_zero(perm_vec, 2 * ((tot_ct + (BITCT - 1)) / BITCT));
+    for (; num_set < set_ct; num_set++) {
+      do {
+	do {
+	  urand = sfmt_genrand_uint32(sfmtp);
+	} while (urand > upper_bound);
+	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
+        widx = uii / BITCT2;
+	wcomp = ONELU << (2 * (uii % BITCT2));
+	pv_val = perm_vec[widx];
+      } while (pv_val & wcomp);
+      perm_vec[widx] = pv_val | wcomp;
+    }
+  } else {
+    fill_vec_55(perm_vec, tot_ct);
+    set_ct = tot_ct - set_ct;
+    for (; num_set < set_ct; num_set++) {
+      do {
+	do {
+	  urand = sfmt_genrand_uint32(sfmtp);
+	} while (urand > upper_bound);
+	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
+        widx = uii / BITCT2;
+	wcomp = ONELU << (2 * (uii % BITCT2));
+	pv_val = perm_vec[widx];
+      } while (!(pv_val & wcomp));
+      perm_vec[widx] = pv_val - wcomp;
+    }
+  }
+}
+
+void generate_cc_perm1(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp) {
+  // generate_cc_perm_vec() variant which uses 1-bit packing instead of 2.
+  uint32_t num_set = 0;
+  uint32_t upper_bound = tot_ct * tot_quotient - 1;
+  uintptr_t widx;
+  uintptr_t wcomp;
+  uintptr_t pv_val;
+  uint32_t urand;
+  uint32_t uii;
+  if (set_ct * 2 < tot_ct) {
+    fill_ulong_zero(perm_vec, (tot_ct + (BITCT - 1)) / BITCT);
+    for (; num_set < set_ct; num_set++) {
+      do {
+	do {
+	  urand = sfmt_genrand_uint32(sfmtp);
+	} while (urand > upper_bound);
+	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
+        widx = uii / BITCT;
+	wcomp = ONELU << (uii % BITCT);
+	pv_val = perm_vec[widx];
+      } while (pv_val & wcomp);
+      perm_vec[widx] = pv_val | wcomp;
+    }
+  } else {
+    fill_all_bits(perm_vec, tot_ct);
+    set_ct = tot_ct - set_ct;
+    for (; num_set < set_ct; num_set++) {
+      do {
+	do {
+	  urand = sfmt_genrand_uint32(sfmtp);
+	} while (urand > upper_bound);
+	uii = (totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift;
+        widx = uii / BITCT;
+	wcomp = ONELU << (uii % BITCT);
+	pv_val = perm_vec[widx];
+      } while (!(pv_val & wcomp));
+      perm_vec[widx] = pv_val - wcomp;
+    }
+  }
+}
+
+void generate_cc_cluster_perm_vec(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp) {
+  uint32_t tot_ctl2 = 2 * ((tot_ct + (BITCT - 1)) / BITCT);
+  uint32_t cluster_idx;
+  uint32_t target_ct;
+  uint32_t cluster_end;
+  uint32_t* map_ptr;
+  uint32_t num_swapped;
+  uint32_t cluster_size;
+  uint32_t upper_bound;
+  uint64_t totq_magic;
+  uint32_t totq_preshift;
+  uint32_t totq_postshift;
+  uint32_t totq_incr;
+  uintptr_t widx;
+  uintptr_t wcomp;
+  uintptr_t pv_val;
+  uint32_t urand;
+  uint32_t uii;
+  memcpy(perm_vec, preimage, tot_ctl2 * sizeof(intptr_t));
+  for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
+    target_ct = cluster_case_cts[cluster_idx];
+    cluster_end = cluster_starts[cluster_idx + 1];
+    cluster_size = cluster_end - cluster_starts[cluster_idx];
+    if (target_ct && (target_ct != cluster_size)) {
+      upper_bound = cluster_size * tot_quotients[cluster_idx] - 1;
+      totq_magic = totq_magics[cluster_idx];
+      totq_preshift = totq_preshifts[cluster_idx];
+      totq_postshift = totq_postshifts[cluster_idx];
+      totq_incr = totq_incrs[cluster_idx];
+      map_ptr = &(cluster_map[cluster_starts[cluster_idx]]);
+      if (target_ct * 2 < cluster_size) {
+	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
+	  do {
+	    do {
+	      urand = sfmt_genrand_uint32(sfmtp);
+	    } while (urand > upper_bound);
+	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
+	    widx = uii / BITCT2;
+	    wcomp = ONELU << (2 * (uii % BITCT2));
+	    pv_val = perm_vec[widx];
+	  } while (pv_val & wcomp);
+	  perm_vec[widx] = pv_val | wcomp;
+	}
+      } else {
+	target_ct = cluster_size - target_ct;
+	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
+	  do {
+	    do {
+	      urand = sfmt_genrand_uint32(sfmtp);
+	    } while (urand > upper_bound);
+	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
+	    widx = uii / BITCT2;
+	    wcomp = ONELU << (2 * (uii % BITCT2));
+	    pv_val = perm_vec[widx];
+	  } while (!(pv_val & wcomp));
+	  perm_vec[widx] = pv_val - wcomp;
+	}
+      }
+    }
+  }
+}
+
+void generate_cc_cluster_perm1(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp) {
+  uint32_t tot_ctl = (tot_ct + (BITCT - 1)) / BITCT;
+  uint32_t cluster_idx;
+  uint32_t target_ct;
+  uint32_t cluster_end;
+  uint32_t cluster_size;
+  uint32_t* map_ptr;
+  uint32_t num_swapped;
+  uint32_t upper_bound;
+  uint64_t totq_magic;
+  uint32_t totq_preshift;
+  uint32_t totq_postshift;
+  uint32_t totq_incr;
+  uintptr_t widx;
+  uintptr_t wcomp;
+  uintptr_t pv_val;
+  uint32_t urand;
+  uint32_t uii;
+  memcpy(perm_vec, preimage, tot_ctl * sizeof(intptr_t));
+  for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
+    target_ct = cluster_case_cts[cluster_idx];
+    cluster_end = cluster_starts[cluster_idx + 1];
+    cluster_size = cluster_end - cluster_starts[cluster_idx];
+    if (target_ct && (target_ct != cluster_size)) {
+      upper_bound = cluster_size * tot_quotients[cluster_idx] - 1;
+      totq_magic = totq_magics[cluster_idx];
+      totq_preshift = totq_preshifts[cluster_idx];
+      totq_postshift = totq_postshifts[cluster_idx];
+      totq_incr = totq_incrs[cluster_idx];
+      map_ptr = &(cluster_map[cluster_starts[cluster_idx]]);
+      if (target_ct * 2 < cluster_size) {
+	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
+	  do {
+	    do {
+	      urand = sfmt_genrand_uint32(sfmtp);
+	    } while (urand > upper_bound);
+	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
+	    widx = uii / BITCT;
+	    wcomp = ONELU << (uii % BITCT);
+	    pv_val = perm_vec[widx];
+	  } while (pv_val & wcomp);
+	  perm_vec[widx] = pv_val | wcomp;
+	}
+      } else {
+	target_ct = cluster_size - target_ct;
+	for (num_swapped = 0; num_swapped < target_ct; num_swapped++) {
+	  do {
+	    do {
+	      urand = sfmt_genrand_uint32(sfmtp);
+	    } while (urand > upper_bound);
+	    uii = map_ptr[(uint32_t)((totq_magic * ((urand >> totq_preshift) + totq_incr)) >> totq_postshift)];
+	    widx = uii / BITCT;
+	    wcomp = ONELU << (uii % BITCT);
+	    pv_val = perm_vec[widx];
+	  } while (!(pv_val & wcomp));
+	  perm_vec[widx] = pv_val - wcomp;
+	}
+      }
+    }
+  }
+}
+
+void transpose_perms(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst) {
+  // Transpose permutations so PRESTO/PERMORY-style genotype indexing can work.
+  //
+  // We used a 32-ply interleaved format, to allow counts up to the uint32_t
+  // limit without giving up highly parallel adds in the calc_git() inner loop
+  // (performed with a combination of unroll_incr_1_4, unroll_incr_4_8, and
+  // unroll_incr_8_32).  The index order is:
+  // 64-bit build:
+  //   first 16 bytes: 0 32 64 96 16 48 80 112 4 36 68 100 20 52 84 116
+  //     8 40 72 104 24 56 88 120 12 44 76 108 28 60 92 124 1...
+  //   next 16 bytes: 128 160 192...
+  //
+  // 32-bit build:
+  //   first 4 bytes: 0 8 16 24 4 12 20 28 1 9 17 25 5 13 21 29 2 10 18...
+  //   next 4 bytes: 32 40 48...
+  uintptr_t sample_idx = 0;
+  uintptr_t pheno_nm_ctl2 = 2 * ((pheno_nm_ct + (BITCT - 1)) / BITCT);
+#ifdef __LP64__
+  uint32_t wbuf[4];
+  uint32_t* wbptr;
+#else
+  uint32_t wval;
+#endif
+  uint32_t rshift;
+  uint32_t wshift;
+  uintptr_t* pvptr;
+  uintptr_t perm_idx;
+  for (; sample_idx < pheno_nm_ct; sample_idx++) {
+    perm_idx = 0;
+    pvptr = &(perm_vecs[sample_idx / BITCT2]);
+    rshift = 2 * (sample_idx % BITCT2);
+    goto transpose_perms_loop_start;
+#ifdef __LP64__
+    do {
+      if (!(perm_idx % 4)) {
+	if (perm_idx % 128) {
+	  wshift = ((perm_idx & 96) >> 5) | ((perm_idx & 16) >> 2) | ((perm_idx & 12) << 1);
+	} else {
+	  memcpy(perm_vecst, wbuf, 16);
+	  perm_vecst = &(perm_vecst[4]);
+	transpose_perms_loop_start:
+	  fill_uint_zero(wbuf, 4);
+	  wshift = 0;
+	}
+	wbptr = wbuf;
+      }
+      *wbptr |= ((pvptr[perm_idx * pheno_nm_ctl2] >> rshift) & 1) << wshift;
+      wbptr++;
+    } while (++perm_idx < perm_vec_ct);
+    memcpy(perm_vecst, wbuf, 16);
+    perm_vecst = &(perm_vecst[4]);
+#else
+    do {
+      if (perm_idx % 32) {
+	wshift = ((perm_idx & 24) >> 3) | (perm_idx & 4) | ((perm_idx & 3) << 3);
+      } else {
+	*perm_vecst++ = wval;
+      transpose_perms_loop_start:
+	wval = 0;
+	wshift = 0;
+      }
+      wval |= ((pvptr[perm_idx * pheno_nm_ctl2] >> rshift) & 1) << wshift;
+    } while (++perm_idx < perm_vec_ct);
+    *perm_vecst++ = wval;
+#endif
+  }
+}
+
+void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst) {
+  uintptr_t sample_idx = 0;
+  uintptr_t pheno_nm_ctl = (pheno_nm_ct + (BITCT - 1)) / BITCT;
+#ifdef __LP64__
+  uint32_t wbuf[4];
+  uint32_t* wbptr;
+#else
+  uint32_t wval;
+#endif
+  uint32_t rshift;
+  uint32_t wshift;
+  uintptr_t* pvptr;
+  uintptr_t perm_idx;
+  for (; sample_idx < pheno_nm_ct; sample_idx++) {
+    perm_idx = 0;
+    pvptr = &(perm_vecs[sample_idx / BITCT]);
+    rshift = sample_idx % BITCT;
+    goto transpose_perm1s_loop_start;
+#ifdef __LP64__
+    do {
+      if (!(perm_idx % 4)) {
+	if (perm_idx % 128) {
+	  wshift = ((perm_idx & 96) >> 5) | ((perm_idx & 16) >> 2) | ((perm_idx & 12) << 1);
+	} else {
+	  memcpy(perm_vecst, wbuf, 16);
+	  perm_vecst = &(perm_vecst[4]);
+	transpose_perm1s_loop_start:
+	  fill_uint_zero(wbuf, 2);
+	  wshift = 0;
+	}
+	wbptr = wbuf;
+      }
+      *wbptr |= ((pvptr[perm_idx * pheno_nm_ctl] >> rshift) & 1) << wshift;
+      wbptr++;
+    } while (++perm_idx < perm_vec_ct);
+    memcpy(perm_vecst, wbuf, 16);
+    perm_vecst = &(perm_vecst[4]);
+#else
+    do {
+      if (perm_idx % 32) {
+	wshift = ((perm_idx & 24) >> 3) | (perm_idx & 4) | ((perm_idx & 3) << 3);
+      } else {
+	*perm_vecst++ = wval;
+      transpose_perm1s_loop_start:
+	wval = 0;
+	wshift = 0;
+      }
+      wval |= ((pvptr[perm_idx * pheno_nm_ctl] >> rshift) & 1) << wshift;
+    } while (++perm_idx < perm_vec_ct);
+    *perm_vecst++ = wval;
+#endif
+  }
+}
+
+// todo: add multithread globals with extern linkage
diff --git a/plink_perm.h b/plink_perm.h
new file mode 100644
index 0000000..8de84e5
--- /dev/null
+++ b/plink_perm.h
@@ -0,0 +1,196 @@
+#ifndef __PLINK_PERM_H__
+
+// Permutation generation and interpretation code common to many association
+// tests.
+
+void generate_cc_perm_vec(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp);
+
+void generate_cc_perm1(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient, uint64_t totq_magic, uint32_t totq_preshift, uint32_t totq_postshift, uint32_t totq_incr, uintptr_t* perm_vec, sfmt_t* sfmtp);
+
+void generate_cc_cluster_perm_vec(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp);
+
+void generate_cc_cluster_perm1(uint32_t tot_ct, uintptr_t* preimage, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* cluster_case_cts, uint32_t* tot_quotients, uint64_t* totq_magics, uint32_t* totq_preshifts, uint32_t* totq_postshifts, uint32_t* totq_incrs, uintptr_t* perm_vec, sfmt_t* sfmtp);
+
+// Efficient "vertical popcount" support.
+void transpose_perms(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst);
+
+void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_nm_ct, uint32_t* perm_vecst);
+
+#ifdef __LP64__
+static inline void unroll_incr_1_4(const __m128i* acc1, __m128i* acc4, uint32_t acc1_vec_ct) {
+  const __m128i m1x4 = {0x1111111111111111LLU, 0x1111111111111111LLU};
+  __m128i loader;
+  uint32_t vidx;
+  for (vidx = 0; vidx < acc1_vec_ct; vidx++) {
+    loader = *acc1++;
+    *acc4 = _mm_add_epi64(*acc4, _mm_and_si128(loader, m1x4));
+    acc4++;
+    loader = _mm_srli_epi64(loader, 1);
+    *acc4 = _mm_add_epi64(*acc4, _mm_and_si128(loader, m1x4));
+    acc4++;
+    loader = _mm_srli_epi64(loader, 1);
+    *acc4 = _mm_add_epi64(*acc4, _mm_and_si128(loader, m1x4));
+    acc4++;
+    loader = _mm_srli_epi64(loader, 1);
+    *acc4 = _mm_add_epi64(*acc4, _mm_and_si128(loader, m1x4));
+    acc4++;
+  }
+}
+
+static inline void unroll_incr_4_8(const __m128i* acc4, __m128i* acc8, uint32_t acc4_vec_ct) {
+  const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
+  __m128i loader;
+  uint32_t vidx;
+  for (vidx = 0; vidx < acc4_vec_ct; vidx++) {
+    loader = *acc4++;
+    *acc8 = _mm_add_epi64(*acc8, _mm_and_si128(loader, m4));
+    acc8++;
+    loader = _mm_srli_epi64(loader, 4);
+    *acc8 = _mm_add_epi64(*acc8, _mm_and_si128(loader, m4));
+    acc8++;
+  }
+}
+
+static inline void unroll_zero_incr_4_8(__m128i* acc4, __m128i* acc8, uint32_t acc4_vec_ct) {
+  const __m128i m4 = {0x0f0f0f0f0f0f0f0fLLU, 0x0f0f0f0f0f0f0f0fLLU};
+  __m128i loader;
+  uint32_t vidx;
+  for (vidx = 0; vidx < acc4_vec_ct; vidx++) {
+    loader = *acc4;
+    *acc4++ = _mm_setzero_si128();
+    *acc8 = _mm_add_epi64(*acc8, _mm_and_si128(loader, m4));
+    acc8++;
+    loader = _mm_srli_epi64(loader, 4);
+    *acc8 = _mm_add_epi64(*acc8, _mm_and_si128(loader, m4));
+    acc8++;
+  }
+}
+
+static inline void unroll_incr_8_32(const __m128i* acc8, __m128i* acc32, uint32_t acc8_vec_ct) {
+  const __m128i m8x32 = {0x000000ff000000ffLLU, 0x000000ff000000ffLLU};
+  __m128i loader;
+  uint32_t vidx;
+  for (vidx = 0; vidx < acc8_vec_ct; vidx++) {
+    loader = *acc8++;
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+  }
+}
+
+static inline void unroll_zero_incr_8_32(__m128i* acc8, __m128i* acc32, uint32_t acc8_vec_ct) {
+  const __m128i m8x32 = {0x000000ff000000ffLLU, 0x000000ff000000ffLLU};
+  __m128i loader;
+  uint32_t vidx;
+  for (vidx = 0; vidx < acc8_vec_ct; vidx++) {
+    loader = *acc8;
+    *acc8++ = _mm_setzero_si128();
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+    loader = _mm_srli_epi64(loader, 8);
+    *acc32 = _mm_add_epi64(*acc32, _mm_and_si128(loader, m8x32));
+    acc32++;
+  }
+}
+#else
+static inline void unroll_incr_1_4(const uintptr_t* acc1, uintptr_t* acc4, uint32_t acc1_word_ct) {
+  uint32_t widx;
+  uintptr_t loader;
+  for (widx = 0; widx < acc1_word_ct; widx++) {
+    loader = *acc1++;
+    *acc4 += loader & 0x11111111U;
+    acc4++;
+    loader >>= 1;
+    *acc4 += loader & 0x11111111U;
+    acc4++;
+    loader >>= 1;
+    *acc4 += loader & 0x11111111U;
+    acc4++;
+    loader >>= 1;
+    *acc4 += loader & 0x11111111U;
+    acc4++;
+  }
+}
+
+static inline void unroll_incr_4_8(const uintptr_t* acc4, uintptr_t* acc8, uint32_t acc4_word_ct) {
+  uint32_t widx;
+  uintptr_t loader;
+  for (widx = 0; widx < acc4_word_ct; widx++) {
+    loader = *acc4++;
+    *acc8 += loader & 0x0f0f0f0fU;
+    acc8++;
+    loader >>= 4;
+    *acc8 += loader & 0x0f0f0f0fU;
+    acc8++;
+  }
+}
+
+static inline void unroll_zero_incr_4_8(uintptr_t* acc4, uintptr_t* acc8, uint32_t acc4_word_ct) {
+  uint32_t widx;
+  uintptr_t loader;
+  for (widx = 0; widx < acc4_word_ct; widx++) {
+    loader = *acc4;
+    *acc4++ = 0;
+    *acc8 += loader & 0x0f0f0f0fU;
+    acc8++;
+    loader >>= 4;
+    *acc8 += loader & 0x0f0f0f0fU;
+    acc8++;
+  }
+}
+
+static inline void unroll_incr_8_32(const uintptr_t* acc8, uintptr_t* acc32, uint32_t acc8_word_ct) {
+  uint32_t widx;
+  uintptr_t loader;
+  for (widx = 0; widx < acc8_word_ct; widx++) {
+    loader = *acc8++;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += loader;
+    acc32++;
+  }
+}
+
+static inline void unroll_zero_incr_8_32(uintptr_t* acc8, uintptr_t* acc32, uint32_t acc8_word_ct) {
+  uint32_t widx;
+  uintptr_t loader;
+  for (widx = 0; widx < acc8_word_ct; widx++) {
+    loader = *acc8;
+    *acc8++ = 0;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += (uint8_t)loader;
+    acc32++;
+    loader >>= 8;
+    *acc32 += loader;
+    acc32++;
+  }
+}
+#endif
+
+#endif // __PLINK_PERM_H__
diff --git a/plink_set.c b/plink_set.c
index 59e7ba2..014de18 100644
--- a/plink_set.c
+++ b/plink_set.c
@@ -250,7 +250,7 @@ uint32_t alloc_and_populate_nonempty_set_incl(Set_info* sip, uint32_t* nonempty_
   return 0;
 }
 
-int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_extend, uint32_t collapse_group, uint32_t fail_on_no_sets, uint32_t c_prefix, uintptr_t subset_ct, char* sorted_subset_ids, uintptr_t max_subset_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, uintptr_t* topsize_ptr, uintptr_t* set_ct_ptr, char** set_names_ptr, uintptr_t* max_set_id_len_ptr, Make_set_range*** make_set_range_arr_ptr, uint64_t** range_sort_buf_ptr, const char* file_descrip) {
+int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_extend, uint32_t collapse_group, uint32_t fail_on_no_sets, uint32_t c_prefix, uint32_t allow_no_variants, uintptr_t subset_ct, char* sorted_subset_ids, uintptr_t max_subset_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, uintptr_t* topsize_ptr, uintptr_t* set_ct_ptr, char** set_names_ptr, uintptr_t* max_set_id_len_ptr, Make_set_range*** make_set_range_arr_ptr, uint64_t** range_sort_buf_ptr, const ch [...]
   // Called directly by extract_exclude_range(), define_sets(), and indirectly
   // by annotate(), gene_report(), and clump_reports().
   // Assumes topsize has not been subtracted off wkspace_left.  (This remains
@@ -349,9 +349,12 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
     if (!set_ct) {
       if (fail_on_no_sets) {
 	if (marker_pos) {
-	  // okay, this is a kludge
-	  logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--make-set file.\n");
-	  retval = RET_ALL_MARKERS_EXCLUDED;
+	  if (!allow_no_variants) {
+	    // okay, this is a kludge
+	    logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--make-set file.\n");
+	    retval = RET_ALL_MARKERS_EXCLUDED;
+	    goto load_range_list_ret_1;
+	  }
 	} else {
 	  if (subset_ct) {
 	    logerrprint("Error: No --gene-subset genes present in --gene-report file.\n");
@@ -359,8 +362,8 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
 	    logerrprint("Error: Empty --gene-report file.\n");
 	  }
 	  retval = RET_INVALID_FORMAT;
+	  goto load_range_list_ret_1;
 	}
-	goto load_range_list_ret_1;
       }
       LOGERRPRINTF("Warning: No valid ranges in %s file.\n", file_descrip);
       goto load_range_list_ret_1;
@@ -539,7 +542,10 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
   return retval;
 }
 
-int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t is_exclude, Chrom_info* chrom_info_ptr) {
+int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t is_exclude, uint32_t allow_no_variants, Chrom_info* chrom_info_ptr) {
+  if (unfiltered_marker_ct == *marker_exclude_ct_ptr) {
+    return 0;
+  }
   unsigned char* wkspace_mark = wkspace_base;
   uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
   FILE* infile = NULL;
@@ -552,7 +558,7 @@ int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfil
   if (fopen_checked(&infile, fname, "r")) {
     goto extract_exclude_range_ret_OPEN_FAIL;
   }
-  retval = load_range_list(infile, 0, 0, 0, 0, 0, 0, NULL, 0, marker_pos, chrom_info_ptr, &topsize, NULL, NULL, NULL, &range_arr, NULL, is_exclude? "--exclude range" : "--extract range");
+  retval = load_range_list(infile, 0, 0, 0, 0, 0, allow_no_variants, 0, NULL, 0, marker_pos, chrom_info_ptr, &topsize, NULL, NULL, NULL, &range_arr, NULL, is_exclude? "--exclude range" : "--extract range");
   if (retval) {
     goto extract_exclude_range_ret_1;
   }
@@ -580,7 +586,7 @@ int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfil
     bitfield_or(marker_exclude, marker_exclude_new, unfiltered_marker_ctl);
   }
   *marker_exclude_ct_ptr = popcount_longs(marker_exclude, unfiltered_marker_ctl);
-  if (*marker_exclude_ct_ptr == unfiltered_marker_ct) {
+  if ((*marker_exclude_ct_ptr == unfiltered_marker_ct) && (!allow_no_variants)) {
     LOGERRPRINTF("Error: All variants excluded by '--%s range'.\n", is_exclude? "exclude" : "extract");
     retval = RET_ALL_MARKERS_EXCLUDED;
   } else if (*marker_exclude_ct_ptr == orig_marker_exclude_ct) {
@@ -745,6 +751,10 @@ uint32_t save_set_bitfield(uintptr_t* marker_bitfield_tmp, uint32_t marker_ct, u
   save_set_bitfield_standard:
     bound_bottom_d128 *= 128;
     bound_top_d128 *= 128;
+    // bugfix
+    if (bound_top_d128 > marker_ct) {
+      bound_top_d128 = marker_ct;
+    }
     (*set_range_pp)[0] = 0xffffffffU;
     (*set_range_pp)[1] = bound_bottom_d128;
     (*set_range_pp)[2] = bound_top_d128 - bound_bottom_d128;
@@ -940,7 +950,7 @@ uint32_t save_set_range(uint64_t* range_sort_buf, uint32_t marker_ct, uint32_t r
   return 0;
 }
 
-int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr) {
+int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, uint32_t allow_no_variants) {
   FILE* infile = NULL;
   uintptr_t topsize = 0;
   char* sorted_marker_ids = NULL;
@@ -1004,7 +1014,11 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
 	}
 	bufptr = &(bufptr[slen + 1]);
 	if (!(*bufptr)) {
-	  goto define_sets_ret_ALL_MARKERS_EXCLUDED;
+	  if (!allow_no_variants) {
+	    goto define_sets_ret_ALL_MARKERS_EXCLUDED;
+	  } else {
+	    goto define_sets_ret_EXCLUDE_ALL_MARKERS_ALLOWED;
+	  }
 	}
       }
       free(sip->genekeep_flattened);
@@ -1022,8 +1036,12 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
 	bufptr = &(bufptr[slen]);
       } while (*bufptr);
       if (!genekeep_ct) {
-	logerrprint("Error: All variants excluded by --gene.\n");
-	goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
+	if (!allow_no_variants) {
+	  logerrprint("Error: All variants excluded by --gene.\n");
+	  goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
+	} else {
+	  goto define_sets_ret_EXCLUDE_ALL_MARKERS_ALLOWED;
+	}
       }
       sorted_genekeep_ids = (char*)top_alloc(&topsize, genekeep_ct * max_genekeep_len);
       if (!sorted_genekeep_ids) {
@@ -1062,12 +1080,16 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
     }
     if (!subset_ct) {
       if ((gene_all || sip->genekeep_flattened) && ((!sip->merged_set_name) || (!complement_sets))) {
-	if (sip->subset_fname) {
-	  logerrprint("Error: All variants excluded, since --subset file is empty.\n");
+	if (!allow_no_variants) {
+	  if (sip->subset_fname) {
+	    logerrprint("Error: All variants excluded, since --subset file is empty.\n");
+	  } else {
+	    logerrprint("Error: All variants excluded, since --set-names was given no parameters.\n");
+	  }
+	  goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
 	} else {
-	  logerrprint("Error: All variants excluded, since --set-names was given no parameters.\n");
+	  goto define_sets_ret_EXCLUDE_ALL_MARKERS_ALLOWED;
 	}
-	goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
       }
       if (sip->merged_set_name) {
 	goto define_sets_merge_nothing;
@@ -1117,7 +1139,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
   }
   // 3. load --make-set range list
   if (make_set) {
-    retval = load_range_list(infile, !sip->merged_set_name, sip->make_set_border, sip->modifier & SET_MAKE_COLLAPSE_GROUP, gene_all || sip->genekeep_flattened, c_prefix, subset_ct, sorted_subset_ids, max_subset_id_len, marker_pos, chrom_info_ptr, &topsize, &set_ct, &set_names, &max_set_id_len, &make_set_range_arr, &range_sort_buf, "--make-set");
+    retval = load_range_list(infile, !sip->merged_set_name, sip->make_set_border, sip->modifier & SET_MAKE_COLLAPSE_GROUP, gene_all || sip->genekeep_flattened, c_prefix, allow_no_variants, subset_ct, sorted_subset_ids, max_subset_id_len, marker_pos, chrom_info_ptr, &topsize, &set_ct, &set_names, &max_set_id_len, &make_set_range_arr, &range_sort_buf, "--make-set");
     if (retval) {
       goto define_sets_ret_1;
     }
@@ -1251,11 +1273,16 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
       }
       if (!set_ct) {
 	if (!complement_sets) {
-	  logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--set file.\n");
-	  goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
+	  if (!allow_no_variants) {
+	    logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--set file.\n");
+	    goto define_sets_ret_ALL_MARKERS_EXCLUDED_2;
+	  } else {
+	    goto define_sets_ret_EXCLUDE_ALL_MARKERS_ALLOWED;
+	  }
+	} else {
+	  logerrprint("Warning: No sets defined from --set file.\n");
+	  goto define_sets_ret_1;
 	}
-	logerrprint("Warning: No sets defined from --set file.\n");
-	goto define_sets_ret_1;
       }
     }
     if (!complement_sets) {
@@ -1263,10 +1290,14 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
     }
     bitfield_or(marker_exclude, marker_exclude_new, unfiltered_marker_ctl);
     marker_exclude_ct = popcount_longs(marker_exclude, unfiltered_marker_ctl);
+    *marker_exclude_ct_ptr = marker_exclude_ct;
     if (marker_exclude_ct == unfiltered_marker_ct) {
-      goto define_sets_ret_ALL_MARKERS_EXCLUDED;
+      if (!allow_no_variants) {
+        goto define_sets_ret_ALL_MARKERS_EXCLUDED;
+      } else {
+	goto define_sets_ret_1;
+      }
     }
-    *marker_exclude_ct_ptr = marker_exclude_ct;
     marker_ct = unfiltered_marker_ct - marker_exclude_ct;
     rewind(infile);
     topsize = topsize_bak;
@@ -1624,6 +1655,10 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
   define_sets_ret_ALL_MARKERS_EXCLUDED_2:
     retval = RET_ALL_MARKERS_EXCLUDED;
     break;
+  define_sets_ret_EXCLUDE_ALL_MARKERS_ALLOWED:
+    fill_all_bits(marker_exclude, unfiltered_marker_ct);
+    *marker_exclude_ct_ptr = unfiltered_marker_ct;
+    break;
   define_sets_ret_INVALID_FORMAT_EXTRA_END:
     logerrprint("Error: Extra 'END' token in --set file.\n");
     retval = RET_INVALID_FORMAT;
@@ -2142,7 +2177,7 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
         fill_bits(cur_bitfield, marker_midx_to_idx[range_offset + range_stop], marker_ct_orig - range_offset - range_stop);
         range_end = marker_ct;
       } else {
-        range_end = 1 + last_set_bit(cur_bitfield, (range_offset + range_stop + (BITCT - 1)) / BITCT);
+        range_end = 1 + last_set_bit(cur_bitfield, (marker_ct + (BITCT - 1)) / BITCT);
       }
       if (range_start) {
         range_start = marker_midx_to_idx[next_set_unsafe(read_bitfield, 0) + range_offset];
@@ -2188,7 +2223,7 @@ int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t s
   if (fopen_checked(&infile, fname, "r")) {
     goto load_range_list_sortpos_ret_OPEN_FAIL;
   }
-  retval = load_range_list(infile, 1, border_extend, 0, 0, 0, subset_ct, sorted_subset_ids, 0, NULL, chrom_info_ptr, &topsize, &gene_ct, gene_names_ptr, &max_gene_id_len, &gene_arr, &range_sort_buf, file_descrip);
+  retval = load_range_list(infile, 1, border_extend, 0, 0, 0, 0, subset_ct, sorted_subset_ids, 0, NULL, chrom_info_ptr, &topsize, &gene_ct, gene_names_ptr, &max_gene_id_len, &gene_arr, &range_sort_buf, file_descrip);
   if (retval) {
     goto load_range_list_sortpos_ret_1;
   }
diff --git a/plink_set.h b/plink_set.h
index 14d1b08..e56f69b 100644
--- a/plink_set.h
+++ b/plink_set.h
@@ -94,13 +94,13 @@ uint32_t setdef_iter(uint32_t* setdef, uint32_t* cur_idx_ptr, uint32_t* aux_ptr)
 
 uint32_t alloc_and_populate_nonempty_set_incl(Set_info* sip, uint32_t* nonempty_set_ct_ptr, uintptr_t** nonempty_set_incl_ptr);
 
-int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t is_exclude, Chrom_info* chrom_info_ptr);
+int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t is_exclude, uint32_t allow_no_variants, Chrom_info* chrom_info_ptr);
 
 uint32_t save_set_bitfield(uintptr_t* marker_bitfield_tmp, uint32_t marker_ct, uint32_t range_start, uint32_t range_end, uint32_t complement_sets, uint32_t** set_range_pp);
 
 uint32_t save_set_range(uint64_t* range_sort_buf, uint32_t marker_ct, uint32_t rsb_last_idx, uint32_t complement_sets, uint32_t** set_range_pp);
 
-int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr);
+int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, uint32_t allow_no_variants);
 
 int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr);
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git



More information about the debian-med-commit mailing list