[med-svn] [plink1.9] 01/03: New upstream version 1.90~b4.9-171013

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Thu Nov 2 23:02:22 UTC 2017


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository plink1.9.

commit 2e936be1d49715f2f8a42ceecab6731b037637b6
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Thu Nov 2 23:53:58 2017 +0100

    New upstream version 1.90~b4.9-171013
---
 plink.c        | 167 +++++++++++++++--------------
 plink_data.c   | 331 +++++++++------------------------------------------------
 plink_data.h   |   2 +-
 plink_dosage.c |  32 +++---
 plink_filter.c |  63 +++++++++++
 plink_filter.h |   2 +
 plink_glm.c    |  11 +-
 plink_ld.c     |  25 ++++-
 plink_misc.c   |   8 +-
 plink_set.c    | 108 ++++++++++++++++---
 plink_set.h    |   4 +-
 11 files changed, 357 insertions(+), 396 deletions(-)

diff --git a/plink.c b/plink.c
index 3f707ed..792e8d9 100644
--- a/plink.c
+++ b/plink.c
@@ -93,7 +93,7 @@
 
 static const char ver_str[] =
 #ifdef STABLE_BUILD
-  "PLINK v1.90b4.7"
+  "PLINK v1.90b4.9"
 #else
   "PLINK v1.90p"
 #endif
@@ -105,10 +105,10 @@ static const char ver_str[] =
 #else
   " 32-bit"
 #endif
-  " (6 Sep 2017)";
+  " (13 Oct 2017)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
-  " "
+  ""
 #ifdef STABLE_BUILD
   "" // (don't want this when version number has a trailing letter)
 #else
@@ -326,7 +326,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   uintptr_t* marker_exclude = nullptr;
   uintptr_t marker_exclude_ct = 0;
   uintptr_t marker_ct = 0;
-  uintptr_t max_marker_id_len = 0;
+  uintptr_t max_marker_id_blen = 0;
   // set_allele_freqs = .bed set bit frequency in middle of loading process, A2
   //   allele frequency later.
   double* set_allele_freqs = nullptr;
@@ -480,7 +480,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
     memcpy(outname_end, ".bed", 5);
     if (realpath_identical(outname, g_textbuf, &(g_textbuf[FNAMESIZE + 64]))) {
-      logprint("Note: --make-bed input and output filenames match.  Appending '~' to input\nfilenames.\n");
+      logerrprint("Warning: --make-bed input and output filenames match.  Appending '~' to input\nfilenames.\n");
       uii = strlen(bedname);
       memcpy(g_textbuf, bedname, uii + 1);
       memcpy(&(bedname[uii]), "~", 2);
@@ -554,7 +554,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   if (bimname[0]) {
     if (update_name) {
       ulii = 0;
-      retval = scan_max_strlen(update_name->fname, update_name->colid, update_name->colx, update_name->skip, update_name->skipchar, &max_marker_id_len, &ulii);
+      retval = scan_max_strlen(update_name->fname, update_name->colid, update_name->colx, update_name->skip, update_name->skipchar, &max_marker_id_blen, &ulii);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -564,14 +564,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	// fixing the problem, so we shouldn't spam them.
 	logerrprint("Warning: Unusually long new variant ID(s) in --update-name file.  Double-check\nyour file and command-line parameters, and consider changing your naming\nscheme if you encounter memory problems.\n");
       }
-      if (ulii > max_marker_id_len) {
-	max_marker_id_len = ulii;
+      if (ulii > max_marker_id_blen) {
+	max_marker_id_blen = ulii;
       }
     }
     if (!marker_alleles_needed) {
       allelexxxx = 0;
     }
-    retval = load_bim(bimname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : nullptr, &marker_allele_ptrs, &max_marker_allele_blen, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is [...]
+    retval = load_bim(bimname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_blen, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : nullptr, &marker_allele_ptrs, &max_marker_allele_blen, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, &map_is_unsorted, marker_po [...]
     if (retval) {
       goto plink_ret_1;
     }
@@ -785,33 +785,44 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   }
 
   if (unfiltered_marker_ct != marker_exclude_ct) {
-    uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter;
+    // bugfix (12 Oct 2017): Previous plink 1.9 implementation of
+    // --{exclude-}snps could affect unwanted variants at the same position as
+    // named variant(s).  Backport plink 2.0's safe implementation.  This has
+    // the drawback of sometimes increasing the overhead associated with
+    // --snps, but we can live with that.
+    uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter || snps_range_list_ptr->names;
     if (uii || extractname || excludename) {
       // only permit duplicate marker IDs for --extract/--exclude
       bigstack_mark = g_bigstack_base;
-      retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable_size, &marker_id_htable);
+      retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, !uii, &marker_id_htable_size, &marker_id_htable);
       if (retval) {
 	goto plink_ret_1;
       }
+      if (snps_range_list_ptr->names) {
+        retval = snps_flag(marker_ids, marker_id_htable, snps_range_list_ptr, unfiltered_marker_ct, max_marker_id_blen, marker_id_htable_size, (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1, marker_exclude, &marker_exclude_ct);
+        if (retval) {
+          goto plink_ret_1;
+        }
+      }
       if (update_cm) {
-	retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_cms);
+	retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_cms);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if (update_map) {
-	retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
+	retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       } else if (update_name) {
-	retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
+	retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
 	if (update_alleles_fname || (marker_alleles_needed && flip_fname && (!flip_subset_fname)) || extractname || excludename) {
 	  bigstack_reset(bigstack_mark);
-	  retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+	  retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
 	  if (retval) {
 	    goto plink_ret_1;
 	  }
@@ -819,13 +830,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
       if (marker_alleles_needed) {
 	if (update_alleles_fname) {
-	  retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_blen, outname, outname_end);
+	  retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_blen, outname, outname_end);
 	  if (retval) {
 	    goto plink_ret_1;
 	  }
 	}
 	if (flip_fname && (!flip_subset_fname)) {
-	  retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
+	  retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
 	  if (retval) {
 	    goto plink_ret_1;
 	  }
@@ -833,7 +844,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
       if (extractname) {
 	if (!(misc_flags & MISC_EXTRACT_RANGE)) {
-	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
 	  if (retval) {
 	    goto plink_ret_1;
 	  }
@@ -852,7 +863,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
       if (excludename) {
 	if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
-	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
 	  if (retval) {
 	    goto plink_ret_1;
 	  }
@@ -870,13 +881,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	}
       }
       if (filter_attrib_fname) {
-	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if (qual_filter) {
-	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink_ret_1;
 	}
@@ -1035,7 +1046,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	om_ip->cluster_ct = 0;
 	om_ip->entry_ct = 0;
       } else {
-	retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sex_male, chrom_info_ptr, om_ip);
+	retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sex_male, chrom_info_ptr, om_ip);
 	if (retval) {
 	  goto plink_ret_1;
 	}
@@ -1278,13 +1289,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   }
 
   if (bimname[0] && (unfiltered_marker_ct != marker_exclude_ct)) {
-    plink_maxsnp = calc_plink_maxsnp(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len);
+    plink_maxsnp = calc_plink_maxsnp(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen);
     uii = BITCT_TO_WORDCT(unfiltered_marker_ct);
     if (bigstack_calloc_ul(uii, &marker_reverse)) {
       goto plink_ret_NOMEM;
     }
     if (bedfile && sample_ct) {
-      retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CALC [...]
+      retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CAL [...]
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1295,7 +1306,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
 
     if (freqname) {
-      retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
+      retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1307,7 +1318,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     }
 
     if (a1alleles || a2alleles) {
-      retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_blen, marker_reverse, marker_ids, max_marker_id_len, set_allele_freqs, a2alleles? 1 : 0);
+      retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_blen, marker_reverse, marker_ids, max_marker_id_blen, set_allele_freqs, a2alleles? 1 : 0);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1324,18 +1335,18 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	  if (misc_flags & MISC_FREQ_COUNTS) {
 	    logprint("Note: --freq 'counts' modifier has no effect on cluster-stratified report.\n");
 	  }
-	  retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
+	  retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
 	} else if (misc_flags & MISC_FREQ_CC) {
-	  retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
+	  retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
 	} else {
-	  retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
+	  retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
 	}
 	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ))))) {
 	  goto plink_ret_1;
 	}
       }
       if (calculation_type & CALC_MISSING_REPORT) {
-	retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
+	retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_blen, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
 	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT))))) {
 	  goto plink_ret_1;
 	}
@@ -1357,7 +1368,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
     oblig_missing_cleanup(om_ip);
     if (sample_ct) {
       if (calculation_type & CALC_HARDY) {
-	retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
+	retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
 	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_HARDY))))) {
 	  goto plink_ret_1;
 	}
@@ -1383,7 +1394,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
     if (bedfile && sample_ct && (unfiltered_marker_ct > marker_exclude_ct)) {
       if ((calculation_type & CALC_MENDEL) || (fam_ip->mendel_modifier & MENDEL_FILTER)) {
-	retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, c [...]
+	retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists,  [...]
 	if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT | CALC_MENDEL))))) {
 	  goto plink_ret_1;
 	}
@@ -1410,7 +1421,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	logerrprint("Error: --set/--make-set requires a sorted .bim file.  Retry this command after\nusing --make-bed to sort your data.\n");
 	goto plink_ret_INVALID_FORMAT;
       }
-      retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, allow_no_variants);
+      retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_blen, chrom_info_ptr, allow_no_variants);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1472,7 +1483,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	    ulii = unfiltered_sample_ct - pca_sample_ct;
 	  }
 	}
-	retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
+	retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_blen, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
 	if (retval) {
 	  goto plink_ret_1;
 	}
@@ -1507,7 +1518,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	}
 #ifndef NOLAPACK
 	if (calculation_type & CALC_PCA) {
-	  retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
+	  retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
 	} else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
 	  if (sample_ct != pheno_nm_ct) {
 	    logerrprint("Error: --unrelated-heritability requires phenotype data for all samples.\n(--prune should help.)\n");
@@ -1532,7 +1543,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
 
       if (calculation_type & CALC_TUCC) {
-	retval = make_pseudocontrols(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, fam_ip);
+	retval = make_pseudocontrols(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_cms, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, fam_ip);
 	if (retval) {
 	  goto plink_ret_1;
 	}
@@ -1558,28 +1569,28 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
   if (marker_ct) {
     if (calculation_type & CALC_WRITE_SET) {
-      retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr);
+      retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr);
       if (retval) {
 	goto plink_ret_1;
       }
     }
 
     if (calculation_type & CALC_WRITE_SNPLIST) {
-      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, nullptr, 0);
+      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, nullptr, 0);
       if (retval) {
 	goto plink_ret_1;
       }
     }
 
     if (calculation_type & CALC_WRITE_VAR_RANGES) {
-      retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, write_var_range_ct);
+      retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, write_var_range_ct);
       if (retval) {
 	goto plink_ret_1;
       }
     }
 
     if (calculation_type & CALC_LIST_23_INDELS) {
-      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, 1);
+      retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, 1);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1590,7 +1601,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	logerrprint("Error: --list-duplicate-vars requires a sorted .bim file.  Retry this command\nafter using --make-bed to sort your data.\n");
 	goto plink_ret_INVALID_FORMAT;
       }
-      retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, marker_allele_ptrs);
+      retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, marker_allele_ptrs);
       if (retval) {
 	goto plink_ret_1;
       }
@@ -1612,13 +1623,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
     }
     if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_MAKE_FAM)) {
-      retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_miss [...]
+      retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_mis [...]
       if (retval) {
         goto plink_ret_1;
       }
     }
     if (calculation_type & CALC_RECODE) {
-      retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, ph [...]
+      retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_blen, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, p [...]
       if (retval) {
         goto plink_ret_1;
       }
@@ -1631,7 +1642,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   }
 
   if ((calculation_type & CALC_EPI) && epi_ip->twolocus_mkr1) {
-    retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, sex_male, outname, outname_end, hh_exists);
+    retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, sex_male, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1642,7 +1653,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: --show-tags requires a sorted .bim file.  Retry this command after using\n--make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = show_tags(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, chrom_info_ptr, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+    retval = show_tags(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, chrom_info_ptr, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1653,7 +1664,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: --blocks requires a sorted .bim file.  Retry this command after using\n--make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = haploview_blocks(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, founder_info, pheno_nm, sex_male, outname, outname_end, hh_exists);
+    retval = haploview_blocks(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, founder_info, pheno_nm, sex_male, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1664,7 +1675,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: Run-of-homozygosity scanning requires a sorted .bim.  Retry this command\nafter using --make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
+    retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1676,9 +1687,9 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       goto plink_ret_INVALID_CMDLINE;
     }
     if (!(ldip->modifier & LD_PRUNE_PAIRPHASE)) {
-      retval = ld_prune(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+      retval = ld_prune(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
     } else {
-      retval = indep_pairphase(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+      retval = indep_pairphase(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
     }
     if (retval) {
       goto plink_ret_1;
@@ -1690,14 +1701,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: LD-based strand flip scanning requires a sorted .bim.  Retry this\ncommand after using --make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
+    retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
   }
 
   if ((calculation_type & CALC_EPI) && epi_ip->ld_mkr1) {
-    retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, nullptr, 0, 0, nullptr, sex_male, nullptr, nullptr, hh_exists);
+    retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, nullptr, 0, 0, nullptr, sex_male, nullptr, nullptr, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1714,7 +1725,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	goto plink_ret_INVALID_CMDLINE;
       }
     }
-    retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, marker_cms, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
+    retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, marker_cms, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1724,7 +1735,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: --test-mishap requires a sorted .bim.  Retry this command after using\n--make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = test_mishap(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, min_maf, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct);
+    retval = test_mishap(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, min_maf, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1732,7 +1743,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
   /*
   if (calculation_type & CALC_REGRESS_PCS) {
-    retval = calc_regress_pcs(evecname, regress_pcs_modifier, max_pcs, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len, sex_nm, sex_male, pheno_d, missing_phenod, outname, outname_end, hh_exists);
+    retval = calc_regress_pcs(evecname, regress_pcs_modifier, max_pcs, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len, sex_nm, sex_male, pheno_d, missing_phenod, outname, outname_end, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1808,7 +1819,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   }
   */
   if (distance_req(read_dists_fname, calculation_type)) {
-    retval = calc_distance(threads, parallel_idx, parallel_tot, bedfile, bed_offset, outname, outname_end, read_dists_fname, distance_wts_fname, distance_exp, calculation_type, dist_calc_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
+    retval = calc_distance(threads, parallel_idx, parallel_tot, bedfile, bed_offset, outname, outname_end, read_dists_fname, distance_wts_fname, distance_exp, calculation_type, dist_calc_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1877,7 +1888,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
   }
 
   if (calculation_type & CALC_FST) {
-    retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : nullptr, cluster_ct, cluster_map, cluster_starts);
+    retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : nullptr, cluster_ct, cluster_map, cluster_starts);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1895,14 +1906,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: --fast-epistasis case-only requires a sorted .bim.  Retry this command\nafter using --make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, sip);
+    retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, sip);
     if (retval) {
       goto plink_ret_1;
     }
   }
 
   if (calculation_type & CALC_SCORE) {
-    retval = score_report(sc_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, set_allele_freqs, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, hh_exists, chrom_info_ptr, outname, outname_end);
+    retval = score_report(sc_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, set_allele_freqs, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, hh_exists, chrom_info_ptr, outname, outname_end);
     if (retval) {
       goto plink_ret_1;
     }
@@ -1910,7 +1921,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 
 #if defined __cplusplus && !defined _WIN32
   if (calculation_type & CALC_RPLUGIN) {
-    retval = rserve_call(rplugin_fname, rplugin_host_or_socket, rplugin_port, (misc_flags / MISC_RPLUGIN_DEBUG) & 1, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, pheno_d, cluster_ct, cluster_map, cluster_starts, covar_ct, covar_d, outname, outname_end);
+    retval = rserve_call(rplugin_fname, rplugin_host_or_socket, rplugin_port, (misc_flags / MISC_RPLUGIN_DEBUG) & 1, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, pheno_d, cluster_ct, cluster_map, cluster_starts, covar_ct, covar_d, outname, outname_end);
     if (retval) {
       goto plink_ret_1;
     }
@@ -2033,10 +2044,10 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       if (calculation_type & CALC_MODEL) {
 	if (pheno_d) {
 	  if (model_modifier & MODEL_ASSOC) {
-	    retval = qassoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_mperm_val, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_d, founder_info, sex_male, hh_exists, ldip->modifier & LD_IGNORE_ [...]
+	    retval = qassoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_mperm_val, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_d, founder_info, sex_male, hh_exists, ldip->modifier & LD_IGNORE [...]
 	  }
 	} else {
-	  retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm [...]
+	  retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_n [...]
 	}
 	if (retval) {
 	  goto plink_ret_1;
@@ -2046,23 +2057,23 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	if (!(glm_modifier & GLM_NO_SNP)) {
 	  if (pheno_d) {
 #ifndef NOLAPACK
-	    retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sam [...]
+	    retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sa [...]
 #else
             logerrprint("Warning: Skipping --logistic on --all-pheno QT since this is a no-LAPACK " PROG_NAME_CAPS"\nbuild.\n");
 #endif
 	  } else {
-	    retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_s [...]
+	    retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_ [...]
 	  }
 	} else {
 	  if (pheno_d) {
 #ifndef NOLAPACK
-	    retval = glm_linear_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_ct [...]
+	    retval = glm_linear_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_c [...]
 #else
             logerrprint("Warning: Skipping --logistic on --all-pheno QT since this is a no-LAPACK " PROG_NAME_CAPS"\nbuild.\n");
 #endif
 
 	  } else {
-	    retval = glm_logistic_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_ [...]
+	    retval = glm_logistic_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm [...]
 	  }
 	}
 	if (retval) {
@@ -2071,47 +2082,47 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       }
       // if case/control phenotype loaded with --all-pheno, skip --gxe
       if ((calculation_type & CALC_GXE) && pheno_d) {
-	retval = gxe_assoc(bedfile, bed_offset, outname, outname_end2, output_min_p, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, pheno_d, gxe_covar_nm, gxe_covar_c, sex_male, hh_exists);
+	retval = gxe_assoc(bedfile, bed_offset, outname, outname_end2, output_min_p, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, pheno_d, gxe_covar_nm, gxe_covar_c, sex_male, hh_exists);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if (calculation_type & CALC_LASSO) {
-	retval = lasso(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, pheno_nm_ct, lasso_h2, lasso_minlambda, lasso_select_covars_range_list_ptr, misc_flags, sample_exclude, pheno_nm, pheno_c, pheno_d, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, sex_male, hh_exists);
+	retval = lasso(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, pheno_nm_ct, lasso_h2, lasso_minlambda, lasso_select_covars_range_list_ptr, misc_flags, sample_exclude, pheno_nm, pheno_c, pheno_d, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, sex_male, hh_exists);
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if ((calculation_type & CALC_CMH) && pheno_c) {
 	if (!(cluster_ptr->modifier & CLUSTER_CMH2)) {
-          retval = cmh_assoc(threads, bedfile, bed_offset, outname, outname_end2, cluster_ptr->cmh_mperm_val, cluster_ptr->modifier, ci_size, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, [...]
+          retval = cmh_assoc(threads, bedfile, bed_offset, outname, outname_end2, cluster_ptr->cmh_mperm_val, cluster_ptr->modifier, ci_size, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male [...]
 	} else {
-          retval = cmh2_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+          retval = cmh2_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
 	}
         if (retval) {
           goto plink_ret_1;
 	}
       }
       if ((calculation_type & CALC_HOMOG) && pheno_c) {
-	retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+	retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
         if (retval) {
           goto plink_ret_1;
 	}
       }
       if ((calculation_type & CALC_TESTMISS) && pheno_c) {
-        retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+        retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
         if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if ((calculation_type & CALC_TDT) && pheno_c) {
-	retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, materna [...]
+	retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, matern [...]
 	if (retval) {
 	  goto plink_ret_1;
 	}
       }
       if ((calculation_type & CALC_DFAM) && pheno_c) {
-	retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_i [...]
+	retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ [...]
 	if (retval) {
 	  goto plink_ret_1;
 	}
@@ -2127,7 +2138,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
 	if (mtest_adjust && (fam_ip->qfam_modifier & QFAM_PERM)) {
 	  logerrprint("Warning: The QFAM test does not support --adjust.  Use max(T) permutation to\nobtain multiple-testing corrected p-values.\n");
 	}
-        retval = qfam(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, pheno_nm, pheno_d, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, hh_exists, perm_batch_size, fam_ip);
+        retval = qfam(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, pheno_nm, pheno_d, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, hh_exists, perm_batch_size, fam_ip);
         if (retval) {
 	  goto plink_ret_1;
 	}
@@ -2140,7 +2151,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
       logerrprint("Error: --clump requires a sorted .bim.  Retry this command after using\n--make-bed to sort your data.\n");
       goto plink_ret_INVALID_CMDLINE;
     }
-    retval = clump_reports(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, founder_info, clump_ip, sex_male, hh_exists);
+    retval = clump_reports(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, founder_info, clump_ip, sex_male, hh_exists);
     if (retval) {
       goto plink_ret_1;
     }
@@ -13461,15 +13472,17 @@ int32_t main(int32_t argc, char** argv) {
     }
   }
   if (annot_info.fname) {
-    retval = annotate(&annot_info, outname, outname_end, pfilter, &chrom_info);
+    retval = annotate(&annot_info, (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1, outname, outname_end, pfilter, &chrom_info);
   }
   if (gene_report_fname) {
-    retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
+    retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
     if (retval) {
       goto main_ret_1;
     }
   }
   if (metaanal_fnames) {
+    // possible todo: make this support --aec (takes a bit of work since
+    // chromosome byte in data structure must be widened)
     retval = meta_analysis(metaanal_fnames, metaanal_snpfield_search_order, metaanal_a1field_search_order, metaanal_a2field_search_order, metaanal_pfield_search_order, metaanal_essfield_search_order, metaanal_flags, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, outname, outname_end, output_min_p, &chrom_info);
     if (retval) {
       goto main_ret_1;
diff --git a/plink_data.c b/plink_data.c
index 50baef0..1046d32 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -415,106 +415,8 @@ static inline uint32_t is_acgtm(unsigned char ucc) {
   return (uint32_t)(acgtm_bool_table[ucc]);
 }
 
-void load_bim_sf_insert(uint32_t chrom_idx, uint32_t pos_start, uint32_t pos_end, uint32_t* start_idxs, uint32_t* llbuf, uint32_t* lltop_ptr, uint32_t* entry_ct_ptr) {
-  uint32_t lltop = *lltop_ptr;
-  uint32_t entry_ct = *entry_ct_ptr;
-  uint32_t llidx;
-  uint32_t new_start;
-  uint32_t new_end;
-  uint32_t new_llidx;
-  uint32_t old_llidx;
-  if (start_idxs[chrom_idx] == 1) {
-    start_idxs[chrom_idx] = lltop;
-    llbuf[lltop++] = pos_start;
-    llbuf[lltop++] = pos_end;
-    llbuf[lltop++] = 1;
-    entry_ct++;
-  } else {
-    llidx = start_idxs[chrom_idx];
-    while (1) {
-      if (llbuf[llidx] > pos_end) {
-	if (llbuf[llidx] == pos_end + 1) {
-	  llbuf[llidx] = pos_start;
-	} else {
-	  new_llidx = llidx;
-	  do {
-	    llidx = new_llidx;
-	    new_start = llbuf[llidx];
-	    llbuf[llidx] = pos_start;
-	    pos_start = new_start;
-	    new_end = llbuf[llidx + 1];
-	    llbuf[llidx + 1] = pos_end;
-	    pos_end = new_end;
-	    new_llidx = llbuf[llidx + 2];
-	  } while (new_llidx != 1);
-	  llbuf[llidx + 2] = lltop;
-	  llbuf[lltop++] = pos_start;
-	  llbuf[lltop++] = pos_end;
-	  llbuf[lltop++] = 1;
-	  entry_ct++;
-	}
-	break;
-      } else if (llbuf[llidx + 1] + 1 >= pos_start) {
-	// mergeable
-	if (llbuf[llidx] > pos_start) {
-	  llbuf[llidx] = pos_start;
-	}
-	if (llbuf[llidx + 1] < pos_end) {
-	  // scan forward, attempt to collapse entries
-
-	  // bugfix: if no forward entries can be collapsed, current entry must
-	  // be updated
-	  llbuf[llidx + 1] = pos_end;
-	  
-	  old_llidx = llidx;
-          new_llidx = llbuf[llidx + 2];
-	  while (new_llidx != 1) {
-	    llidx = new_llidx;
-	    if (llbuf[llidx] > pos_end + 1) {
-	      break;
-	    }
-	    entry_ct--;
-	    new_llidx = llbuf[llidx + 2];
-	    llbuf[old_llidx + 2] = new_llidx;
-	    if (llbuf[llidx + 1] >= pos_end) {
-	      llbuf[old_llidx + 1] = llbuf[llidx + 1];
-	      break;
-	    }
-	  }
-	}
-	break;
-      }
-      new_llidx = llbuf[llidx + 2];
-      if (new_llidx == 1) {
-	llbuf[llidx + 2] = lltop;
-	llbuf[lltop++] = pos_start;
-	llbuf[lltop++] = pos_end;
-	llbuf[lltop++] = 1;
-	entry_ct++;
-	break;
-      }
-      llidx = new_llidx;
-    }
-  }
-  *lltop_ptr = lltop;
-  *entry_ct_ptr = entry_ct;
-}
-
-static inline uint32_t sf_out_of_range(uint32_t cur_pos, uint32_t chrom_idx, uint32_t* sf_start_idxs, uint32_t* sf_pos) {
-  uint32_t cur_idx = sf_start_idxs[chrom_idx];
-  uint32_t end_idx = sf_start_idxs[chrom_idx + 1];
-  while (cur_idx < end_idx) {
-    if ((cur_pos >= sf_pos[cur_idx]) && (cur_pos <= sf_pos[cur_idx + 1])) {
-      return 0;
-    }
-    cur_idx += 2;
-  }
-  return 1;
-}
-
-int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
   // supports .map now too, to make e.g. --snps + --dosage work
-  unsigned char* bigstack_mark = g_bigstack_base;
   FILE* bimfile = nullptr;
   uintptr_t unfiltered_marker_ct = 0;
   uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
@@ -533,12 +435,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
   uint32_t from_slen = markername_from? strlen(markername_from) : 0;
   uint32_t to_slen = markername_to? strlen(markername_to) : 0;
   uint32_t snp_slen = markername_snp? strlen(markername_snp) : 0;
-  // "sf" = "snp filter" (could rename to "vf"...)
-  uint32_t sf_ct = sf_range_list_ptr->name_ct;
-  // assume for now that sf_ct * sf_max_len < 2^32, since these are based on
-  // command-line parameters
-  uint32_t sf_max_len = sf_range_list_ptr->name_max_len;
-  uint32_t slen_check = from_slen || to_slen || snp_slen || sf_ct;
+  uint32_t slen_check = from_slen || to_slen || snp_slen;
   uint32_t from_chrom = MAX_POSSIBLE_CHROM;
   uint32_t to_chrom = MAX_POSSIBLE_CHROM;
   uint32_t snp_chrom = MAX_POSSIBLE_CHROM;
@@ -558,12 +455,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
   int32_t exclude_window_end = -1;
   int32_t retval = 0;
   char* missing_geno_ptr = (char*)g_missing_geno_ptr;
-  uint32_t* sf_start_idxs = nullptr;
-  uint32_t* sf_pos = nullptr;
-  uint32_t* sf_str_chroms = nullptr;
-  uint32_t* sf_str_pos = nullptr;
-  uint32_t* sf_str_lens = nullptr;
-  uint32_t* sf_llbuf = nullptr;
   char* loadbuf2 = nullptr; // on heap, second pass
   char* prev_new_id = nullptr;
   char* bufptr2 = nullptr;
@@ -571,7 +462,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
   char* bufptr5 = nullptr;
   char** marker_allele_ptrs = nullptr;
   uintptr_t loaded_chrom_mask[CHROM_MASK_WORDS];
-  uintptr_t sf_mask[CHROM_MASK_WORDS];
   uint32_t missing_template_seg_len[5];
   uint32_t missing_template_seg_order[4]; // '@', '#', '$1', '$2'
   uint32_t insert_buf_len[4];
@@ -583,15 +473,11 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
   uintptr_t loadbuf_size;
   uintptr_t unfiltered_marker_ctl;
   uintptr_t marker_uidx;
-  uint32_t sf_entry_ct;
-  uint32_t sf_lltop;
   char* bufptr;
   char* col2_ptr;
   uintptr_t ulii;
   uint32_t ukk;
   uint32_t umm;
-  uint32_t unn;
-  uint32_t uoo;
   int32_t jj;
   uint32_t cur_pos;
   double cur_cm;
@@ -602,22 +488,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
     insert_buf[1] = nullptr;
     insert_buf[2] = nullptr;
     insert_buf[3] = nullptr;
-    if (sf_ct) {
-      sf_start_idxs = (uint32_t*)malloc((MAX_POSSIBLE_CHROM + 1) * sizeof(int32_t));
-      if (!sf_start_idxs) {
-	goto load_bim_ret_NOMEM;
-      }
-      if (bigstack_alloc_ui(sf_ct, &sf_str_chroms) ||
-	  bigstack_alloc_ui(sf_ct, &sf_str_pos) ||
-	  bigstack_alloc_ui(sf_ct, &sf_str_lens) ||
-	  bigstack_alloc_ui(3 * (MAX_POSSIBLE_CHROM + sf_ct), &sf_llbuf)) {
-	goto load_bim_ret_NOMEM;
-      }
-      for (uii = 0; uii < sf_ct; uii++) {
-	sf_str_chroms[uii] = MAX_POSSIBLE_CHROM;
-	sf_str_lens[uii] = strlen(&(sf_range_list_ptr->names[uii * sf_max_len]));
-      }
-    }
     fill_uint_zero(5, missing_template_seg_len);
     missing_template_seg[0] = nullptr;
     missing_template_seg[1] = nullptr;
@@ -855,157 +725,54 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
 	  }
 	  bufptr2[strlen_se(bufptr2)] = '\0';
 	}
-	if (sf_ct) {
-	  uii = 0;
-	  do {
-	    if ((ulii == sf_str_lens[uii]) && (!memcmp(col2_ptr, &(sf_range_list_ptr->names[uii * sf_max_len]), ulii))) {
-	      if (sf_str_chroms[uii] != MAX_POSSIBLE_CHROM) {
-		goto load_bim_ret_DUPLICATE_ID;
-	      }
-	      sf_str_chroms[uii] = cur_chrom_code;
-	      if (scan_uint_defcap(bufptr2, &(sf_str_pos[uii]))) {
-		goto load_bim_ret_INVALID_BP_COORDINATE;
-	      }
-	      break;
-	    }
-	  } while (++uii < sf_ct);
-	} else {
-	  if ((ulii == from_slen) && (!memcmp(col2_ptr, markername_from, ulii))) {
-	    if (from_chrom != MAX_POSSIBLE_CHROM) {
-	      goto load_bim_ret_DUPLICATE_ID;
-	    }
-	    from_chrom = cur_chrom_code;
-	    if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
-	      goto load_bim_ret_INVALID_BP_COORDINATE;
-	    }
-	    if (to_chrom != MAX_POSSIBLE_CHROM) {
-	      if (from_chrom != to_chrom) {
-		goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
-	      }
-	    }
-	    fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
-	    SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
-	  }
-	  if ((ulii == to_slen) && (!memcmp(col2_ptr, markername_to, ulii))) {
-	    if (to_chrom != MAX_POSSIBLE_CHROM) {
-	      goto load_bim_ret_DUPLICATE_ID;
-	    }
-	    to_chrom = cur_chrom_code;
-	    if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
-	      goto load_bim_ret_INVALID_BP_COORDINATE;
-	    }
-	    if (from_chrom != MAX_POSSIBLE_CHROM) {
-	      if (to_chrom != from_chrom) {
-		goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
-	      }
-	    }
-	    fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
-	    SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
-	  }
-	  if ((ulii == snp_slen) && (!memcmp(col2_ptr, markername_snp, ulii))) {
-	    if (snp_chrom != MAX_POSSIBLE_CHROM) {
-	      goto load_bim_ret_DUPLICATE_ID;
-	    }
-	    snp_chrom = cur_chrom_code;
-	    if (scan_uint_defcap(bufptr2, &snp_pos)) {
-	      goto load_bim_ret_INVALID_BP_COORDINATE;
-	    }
-	    if (!exclude_snp) {
-	      fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
-	      SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
-	    }
-	  }
-	}
+        if ((ulii == from_slen) && (!memcmp(col2_ptr, markername_from, ulii))) {
+          if (from_chrom != MAX_POSSIBLE_CHROM) {
+            goto load_bim_ret_DUPLICATE_ID;
+          }
+          from_chrom = cur_chrom_code;
+          if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
+            goto load_bim_ret_INVALID_BP_COORDINATE;
+          }
+          if (to_chrom != MAX_POSSIBLE_CHROM) {
+            if (from_chrom != to_chrom) {
+              goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+            }
+          }
+          fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+          SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
+        }
+        if ((ulii == to_slen) && (!memcmp(col2_ptr, markername_to, ulii))) {
+          if (to_chrom != MAX_POSSIBLE_CHROM) {
+            goto load_bim_ret_DUPLICATE_ID;
+          }
+          to_chrom = cur_chrom_code;
+          if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
+            goto load_bim_ret_INVALID_BP_COORDINATE;
+          }
+          if (from_chrom != MAX_POSSIBLE_CHROM) {
+            if (to_chrom != from_chrom) {
+              goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+            }
+          }
+          fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+          SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
+        }
+        if ((ulii == snp_slen) && (!memcmp(col2_ptr, markername_snp, ulii))) {
+          if (snp_chrom != MAX_POSSIBLE_CHROM) {
+            goto load_bim_ret_DUPLICATE_ID;
+          }
+          snp_chrom = cur_chrom_code;
+          if (scan_uint_defcap(bufptr2, &snp_pos)) {
+            goto load_bim_ret_INVALID_BP_COORDINATE;
+          }
+          if (!exclude_snp) {
+            fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+            SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
+          }
+        }
       }
       unfiltered_marker_ct++;
     }
-    if (sf_ct) {
-      for (uii = 0; uii < sf_ct; uii++) {
-	if (sf_str_chroms[uii] == MAX_POSSIBLE_CHROM) {
-	  LOGPREPRINTFWW("Error: Variant '%s' not found in %s.\n", &(sf_range_list_ptr->names[uii * sf_max_len]), ftype_str);
-	  goto load_bim_ret_INVALID_FORMAT_2;
-	}
-      }
-      // effectively build out one linked list per chromosome
-      memcpy(sf_mask, chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
-      sf_entry_ct = 0;
-      sf_lltop = 0;
-      ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
-      for (uii = 0; uii <= ujj; uii++) {
-	sf_start_idxs[uii] = 1; // impossible (multiples of 3)
-      }
-      uii = 0;
-      do {
-	ujj = sf_str_chroms[uii];
-	ukk = sf_str_pos[uii];
-	if (sf_range_list_ptr->starts_range[uii]) {
-	  umm = sf_str_chroms[uii + 1];
-	  unn = sf_str_pos[uii + 1];
-	  if (ujj != umm) {
-	    if (ujj > umm) {
-	      uoo = ujj;
-	      ujj = umm;
-	      umm = uoo;
-	      uoo = ukk;
-	      ukk = unn;
-	      unn = uoo;
-	    }
-	    if (IS_SET(sf_mask, ujj)) {
-	      load_bim_sf_insert(ujj, ukk, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
-	    }
-	    for (uoo = ujj + 1; uoo < umm; uoo++) {
-	      if (IS_SET(sf_mask, uoo)) {
-		load_bim_sf_insert(uoo, 0, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
-	      }
-	    }
-	    if (IS_SET(sf_mask, umm)) {
-	      load_bim_sf_insert(umm, 0, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
-	    }
-	  } else {
-	    if (ukk > unn) {
-	      umm = ukk;
-	      ukk = unn;
-	      unn = umm;
-	    }
-	    if (IS_SET(sf_mask, ujj)) {
-	      load_bim_sf_insert(ujj, ukk, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
-	    }
-	  }
-	  uii += 2;
-	} else {
-	  if (IS_SET(sf_mask, ujj)) {
-	    load_bim_sf_insert(ujj, ukk, ukk, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
-	  }
-	  uii++;
-	}
-      } while (uii < sf_ct);
-      // now compactify
-      sf_pos = (uint32_t*)malloc(sf_entry_ct * 2 * sizeof(int32_t));
-      if (!sf_pos) {
-	goto load_bim_ret_NOMEM;
-      }
-      ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
-      ukk = 0;
-      for (uii = 0; uii <= ujj; uii++) {
-	if (sf_start_idxs[uii] == 1) {
-	  CLEAR_BIT(uii, sf_mask);
-	  sf_start_idxs[uii] = ukk;
-	  continue;
-	}
-	umm = sf_start_idxs[uii];
-	sf_start_idxs[uii] = ukk;
-	do {
-	  sf_pos[ukk++] = sf_llbuf[umm];
-	  sf_pos[ukk++] = sf_llbuf[umm + 1];
-	  umm = sf_llbuf[umm + 2];
-	} while (umm != 1);
-      }
-      sf_start_idxs[ujj + 1] = ukk;
-      if (!exclude_snp) {
-	memcpy(chrom_info_ptr->chrom_mask, sf_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
-      }
-      bigstack_reset(bigstack_mark);
-    }
     if (!feof(bimfile)) {
       goto load_bim_ret_READ_FAIL;
     }
@@ -1249,7 +1016,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
 	} else {
 	  last_pos = cur_pos;
 	}
-	if ((sf_ct && (exclude_snp ^ sf_out_of_range(cur_pos, (uint32_t)cur_chrom_code, sf_start_idxs, sf_pos))) || ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end)))) {
+	if ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end))) {
 	  goto load_bim_skip_marker;
 	}
 	if (snp_slen) {
@@ -1419,8 +1186,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
   }
  load_bim_ret_1:
   fclose_cond(bimfile);
-  free_cond(sf_start_idxs);
-  free_cond(sf_pos);
   free_cond(loadbuf2);
   free_cond(prev_new_id);
   free_cond(insert_buf[2]);
diff --git a/plink_data.h b/plink_data.h
index 4a76e01..502bbe3 100644
--- a/plink_data.h
+++ b/plink_data.h
@@ -20,7 +20,7 @@
 
 int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t unfiltered_sample_ct, uint64_t fsize);
 
-int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
 
 int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* sex_nm, uintptr_t* sex_male, char* sample_ids, uintptr_t max_sample_id_len, double missing_phenod, uint32_t covar_modifier, Range_list* covar_range_list_ptr, uint32_t gxe_mcovar, uintptr_t* covar_ctx_ptr, char** covar_names_ptr, uintptr_t* max_covar_name_len_ptr, uintptr_t* pheno_nm, uintptr_t** covar_nm_ptr, double** covar_d_ptr, uintptr_t** gxe_covar_nm_ptr, [...]
 
diff --git a/plink_dosage.c b/plink_dosage.c
index 952e50c..6c7042e 100644
--- a/plink_dosage.c
+++ b/plink_dosage.c
@@ -562,7 +562,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
   uint32_t* uiptr3 = nullptr;
   uintptr_t unfiltered_marker_ct = 0;
   uintptr_t marker_exclude_ct = 0;
-  uintptr_t max_marker_id_len = 0;
+  uintptr_t max_marker_id_blen = 0;
   uintptr_t unfiltered_sample_ct = 0;
   uintptr_t sample_exclude_ct = 0;
   uintptr_t max_sample_id_len = 4;
@@ -691,7 +691,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
   int32_t ii;
   pzwrite_init_null(&ps);
   if (load_map) {
-    retval = load_bim(mapname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, nullptr, nullptr, nullptr, &ulii, &marker_ids, nullptr, 0, nullptr, chrom_info_ptr, nullptr, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, nullptr, ".map file", [...]
+    retval = load_bim(mapname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_blen, &marker_exclude, nullptr, nullptr, nullptr, &ulii, &marker_ids, nullptr, 0, nullptr, chrom_info_ptr, nullptr, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, nullptr, ".map file", nullptr);
     if (retval) {
       goto plink1_dosage_ret_1;
     }
@@ -779,24 +779,30 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
     bigstack_reset(bigstack_mark);
   }
   if (load_map) {
-    uii = update_map || update_name || filter_attrib_fname || qual_filter;
+    uii = update_map || update_name || filter_attrib_fname || qual_filter || snps_range_list_ptr->names;
     if (uii || extractname || excludename) {
       bigstack_mark = g_bigstack_base;
-      retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable_size, &marker_id_htable);
+      retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, !uii, &marker_id_htable_size, &marker_id_htable);
       if (retval) {
 	goto plink1_dosage_ret_1;
       }
 
+      if (snps_range_list_ptr->names) {
+        retval = snps_flag(marker_ids, marker_id_htable, snps_range_list_ptr, unfiltered_marker_ct, max_marker_id_blen, marker_id_htable_size, (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1, marker_exclude, &marker_exclude_ct);
+        if (retval) {
+          goto plink1_dosage_ret_1;
+        }
+      }
       if (update_map) {
-	retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
+	retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
       } else if (update_name) {
-	retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
+	retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct);
 	if (retval) {
 	  goto plink1_dosage_ret_1;
 	}
 	if (extractname || excludename) {
 	  bigstack_reset(bigstack_mark);
-	  retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+	  retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -804,7 +810,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       }
       if (extractname) {
 	if (!(misc_flags & MISC_EXTRACT_RANGE)) {
-	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
+	  retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -823,7 +829,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       }
       if (excludename) {
 	if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
-	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
+	  retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
 	  if (retval) {
 	    goto plink1_dosage_ret_1;
 	  }
@@ -841,13 +847,13 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 	}
       }
       if (filter_attrib_fname) {
-	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink1_dosage_ret_1;
 	}
       }
       if (qual_filter) {
-	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+	retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
 	if (retval) {
 	  goto plink1_dosage_ret_1;
 	}
@@ -1087,7 +1093,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
       enforce_min_bp_space(min_bp_space, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, chrom_info_ptr);
     }
     marker_ct = unfiltered_marker_ct - marker_exclude_ct;
-    retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+    retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
     if (retval) {
       goto plink1_dosage_ret_1;
     }
@@ -1774,7 +1780,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
 	    goto plink1_dosage_ret_NOMEM;
 	  }
 	  if (load_map) {
-	    marker_idx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
+	    marker_idx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen);
 	    if (marker_idx == 0xffffffffU) {
 #ifdef __LP64__
 	      marker_idx = ~ZEROLU;
diff --git a/plink_filter.c b/plink_filter.c
index c6f40c1..aa63b5a 100644
--- a/plink_filter.c
+++ b/plink_filter.c
@@ -193,6 +193,69 @@ int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, u
   return retval;
 }
 
+// backported from plink 2.0
+int32_t snps_flag(const char* variant_ids, const uint32_t* variant_id_htable, const Range_list* snps_range_list_ptr, uint32_t raw_variant_ct, uintptr_t max_variant_id_blen, uintptr_t variant_id_htable_size, uint32_t do_exclude, uintptr_t* variant_exclude, uintptr_t* exclude_ct_ptr) {
+  unsigned char* bigstack_mark = g_bigstack_base;
+  int32_t reterr = 0;
+  {
+    const char* varid_strbox = snps_range_list_ptr->names;
+    const unsigned char* starts_range = snps_range_list_ptr->starts_range;
+    const uint32_t varid_ct = snps_range_list_ptr->name_ct;
+    const uintptr_t varid_max_blen = snps_range_list_ptr->name_max_len;
+    const uint32_t raw_variant_ctl = BITCT_TO_WORDCT(raw_variant_ct);
+    uintptr_t* seen_uidxs;
+    if (bigstack_calloc_ul(raw_variant_ctl, &seen_uidxs)) {
+      goto snps_flag_ret_NOMEM;
+    }
+    uint32_t range_start_vidx = 0xffffffffU;
+    for (uint32_t varid_idx = 0; varid_idx < varid_ct; ++varid_idx) {
+      const char* cur_varid = &(varid_strbox[varid_idx * varid_max_blen]);
+      uint32_t variant_uidx = id_htable_find(cur_varid, strlen(cur_varid), variant_id_htable, variant_id_htable_size, variant_ids, max_variant_id_blen);
+      if (variant_uidx == 0xffffffffU) {
+	sprintf(g_logbuf, "Error: --%ssnps variant '%s' not found.\n", do_exclude? "exclude-" : "", cur_varid);
+	goto snps_flag_ret_INVALID_FORMAT_WW;
+      }
+      if (starts_range[varid_idx]) {
+	range_start_vidx = variant_uidx;
+      } else {
+	if (range_start_vidx != 0xffffffffU) {
+	  if (variant_uidx < range_start_vidx) {
+	    const uint32_t uii = variant_uidx;
+	    variant_uidx = range_start_vidx;
+	    range_start_vidx = uii;
+	  }
+	  fill_bits(range_start_vidx, variant_uidx + 1 - range_start_vidx, seen_uidxs);
+	} else {
+          set_bit(variant_uidx, seen_uidxs);
+	}
+	range_start_vidx = 0xffffffffU;
+      }
+    }
+    if (do_exclude) {
+      bitvec_or(seen_uidxs, raw_variant_ctl, variant_exclude);
+    } else {
+      bitvec_ornot(seen_uidxs, raw_variant_ctl, variant_exclude);
+      zero_trailing_bits(raw_variant_ct, variant_exclude);
+    }
+    const uint32_t new_exclude_ct = popcount_longs(variant_exclude, raw_variant_ctl);
+    const uint32_t new_variant_ct = raw_variant_ct - new_exclude_ct;
+    LOGPRINTF("--%ssnps: %u variant%s remaining.\n", do_exclude? "exclude-" : "", new_variant_ct, (new_variant_ct == 1)? "" : "s");
+    *exclude_ct_ptr = new_exclude_ct;
+  }
+  while (0) {
+  snps_flag_ret_NOMEM:
+    reterr = RET_NOMEM;
+    break;
+  snps_flag_ret_INVALID_FORMAT_WW:
+    wordwrapb(0);
+    logerrprintb();
+    reterr = RET_INVALID_FORMAT;
+    break;
+  }
+  bigstack_reset(bigstack_mark);
+  return reterr;
+}
+
 void extract_exclude_process_token(const char* tok_start, const uint32_t* marker_id_htable, uint32_t marker_id_htable_size, const uint32_t* extra_alloc_base, const char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* marker_exclude, uintptr_t* already_seen, uintptr_t* duplicate_ct_ptr, uint32_t do_exclude, uint32_t curtoklen) {
   if (curtoklen >= max_marker_id_len) {
     return;
diff --git a/plink_filter.h b/plink_filter.h
index 514ce20..df9c673 100644
--- a/plink_filter.h
+++ b/plink_filter.h
@@ -36,6 +36,8 @@ void oblig_missing_cleanup(Oblig_missing_info* om_ip);
 
 int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_len, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags, uint32_t allow_no_samples);
 
+int32_t snps_flag(const char* variant_ids, const uint32_t* variant_id_htable, const Range_list* snps_range_list_ptr, uint32_t raw_variant_ct, uintptr_t max_variant_id_blen, uintptr_t variant_id_htable_size, uint32_t do_exclude, uintptr_t* variant_exclude, uintptr_t* exclude_ct_ptr);
+
 int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants);
 
 int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, uint32_t allow_no_variants, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
diff --git a/plink_glm.c b/plink_glm.c
index 45faaa1..2397cfe 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -1055,7 +1055,7 @@ static inline void mult_tmatrix_nxd_vect_d(const float* tm, const float* vect, f
   uint32_t row_ctm3;
   uint32_t col_idx;
   if (row_ct < 4) {
-    memset(dest, 0, col_ct * sizeof(float));
+    memset(dest, 0, col_cta4 * sizeof(float));
   } else {
     w1 = _mm_load1_ps(vect);
     w2 = _mm_load1_ps(&(vect[1]));
@@ -1140,6 +1140,8 @@ static inline void mult_tmatrix_nxd_vect_d(const float* tm, const float* vect, f
   }
 }
 
+// N.B. This requires all mm[] rows to be zero-padded at the end, and there
+// can't be nan values at the end of vect[].  (The other way around works too.)
 static inline void mult_matrix_dxn_vect_n(const float* mm, const float* vect, float* dest, uint32_t col_ct, uint32_t row_ct) {
   uintptr_t col_cta4 = round_up_pow2(col_ct, 4);
   uint32_t row_idx = 0;
@@ -1599,7 +1601,7 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
   // Inputs:
   // xx    = covariate (and usually genotype) matrix, covariate-major, rows are
   //         16-byte aligned, trailing row elements must be zeroed out
-  // yy    = case/control phenotype
+  // yy    = case/control phenotype; trailing elements must be zeroed out
   //
   // Input/output:
   // coef  = starting point, overwritten with logistic regression result.  Must
@@ -1719,6 +1721,11 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
         *fptr++ = (float)((int32_t)is_set_ul(perm_vecs, sample_uidx));
       }
     }
+    // bugfix (13 Oct 2017): must guarantee trailing phenotype values are valid
+    // (exact contents don't matter since they are multipled by zero, but they
+    // can't be nan)
+    const uint32_t trail_ct = (-sample_valid_ct) & 3;
+    fill_float_zero(trail_ct, fptr);
     if (logistic_regression(sample_valid_ct, param_ct, sample_1d_buf, param_2d_buf, param_1d_buf, param_2d_buf2, param_1d_buf2, covars_cov_major, pheno_buf, coef, pp)) {
       goto glm_logistic_fail;
     }
diff --git a/plink_ld.c b/plink_ld.c
index 9ff8a24..0db5e15 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -4941,9 +4941,12 @@ uint32_t em_phase_hethet(double known11, double known12, double known21, double
       }
     } else {
       solutions[0] = 0;
-      if ((freq22 + SMALLISH_EPSILON < half_hethet_share + freq21) && (freq21 + SMALLISH_EPSILON < half_hethet_share + freq22)) {
+      // bugfix (6 Oct 2017): need to use all nonzero values here
+      const double nonzero_freq_xx = freq11 + freq22;
+      const double nonzero_freq_xy = freq12 + freq21;
+      if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
 	sol_end_idx = 3;
-	solutions[1] = (half_hethet_share + freq21 - freq22) * 0.5;
+	solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
 	solutions[2] = half_hethet_share;
       } else {
 	sol_end_idx = 2;
@@ -8151,10 +8154,24 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
 	  }
 	}
       } else {
+        // bugfix (6 Oct 2017):
+        // At least one of {f11, f22} is zero, and one of {f12, f21} is zero.
+        // Initially suppose that the zero-values are f11 and f12.  Then the
+        // equality becomes
+        //   x(f22 + x)(K - x) = x(K - x)(f21 + K - x)
+        //   x=0 and x=K are always solutions; the rest becomes
+        //     f22 + x = f21 + K - x
+        //     2x = K + f21 - f22
+        //     x = (K + f21 - f22)/2; in-range iff (f21 - f22) in (-K, K).
+        // So far so good.  However, this code used to *always* check
+        // (f21 - f22), when it's necessary to use all the nonzero values.
+        // (this still works if three or all four values are zero)
 	solutions[0] = 0;
-	if ((freq22 + SMALLISH_EPSILON < half_hethet_share + freq21) && (freq21 + SMALLISH_EPSILON < half_hethet_share + freq22)) {
+        const double nonzero_freq_xx = freq11 + freq22;
+        const double nonzero_freq_xy = freq12 + freq21;
+	if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
 	  uljj = 3;
-	  solutions[1] = (half_hethet_share + freq21 - freq22) * 0.5;
+	  solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
 	  solutions[2] = half_hethet_share;
 	} else {
 	  uljj = 2;
diff --git a/plink_misc.c b/plink_misc.c
index 9f425a0..09f356b 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -2947,8 +2947,11 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
 	  *pzwritep++ = ' ';
 	  pzwritep = fw_strcpy(4, major_ptr, pzwritep);
 	  *pzwritep++ = ' ';
-          pzwritep = uint32toa_w6x(2 * ll_cts[marker_uidx] + lh_cts[marker_uidx] + hapl_cts[marker_uidx], ' ', pzwritep);
-	  pzwritep = uint32toa_w6x(2 * hh_cts[marker_uidx] + lh_cts[marker_uidx] + haph_cts[marker_uidx], ' ', pzwritep);
+          // bugfix (13 Oct 2017): did not take reverse into account here.
+          const uint32_t l_ct = 2 * ll_cts[marker_uidx] + lh_cts[marker_uidx] + hapl_cts[marker_uidx];
+          const uint32_t h_ct = 2 * hh_cts[marker_uidx] + lh_cts[marker_uidx] + haph_cts[marker_uidx];
+          pzwritep = uint32toa_w6x(reverse? h_ct : l_ct, ' ', pzwritep);
+	  pzwritep = uint32toa_w6x(reverse? l_ct : h_ct, ' ', pzwritep);
 	  pzwritep = uint32toa_w6(missing_ct, pzwritep);
 	}
       } else {
@@ -2961,6 +2964,7 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
 	pzwritep = fw_strcpy(4, major_ptr, pzwritep);
 	*pzwritep++ = ' ';
 	uii = 2 * (ll_cts[marker_uidx] + lh_cts[marker_uidx] + hh_cts[marker_uidx]) + hapl_cts[marker_uidx] + haph_cts[marker_uidx];
+        // set_allele_freqs[] already takes reverse into account.
 	if (maf_succ || uii || (set_allele_freqs[marker_uidx] != 0.5)) {
 	  pzwritep = dtoa_g_wxp4(1.0 - set_allele_freqs[marker_uidx], 12, pzwritep);
 	} else {
diff --git a/plink_set.c b/plink_set.c
index 4eb6b9b..366ff97 100644
--- a/plink_set.c
+++ b/plink_set.c
@@ -357,9 +357,14 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
 	if (marker_pos) {
 	  memcpy(ll_tmp->ss, bufptr3, uii);
 	} else {
+	  // quasi-bugfix (7 Oct 2017): forgot to check for this
+	  if (cur_chrom_code > 9999) {
+	    logerrprint("Error: This command does not support 10000+ contigs.\n");
+	    goto load_range_list_ret_INVALID_FORMAT;
+	  }
 	  uitoa_z4((uint32_t)cur_chrom_code, ll_tmp->ss);
-	  // if first character of gene name is a digit, natural sort has strange
-	  // effects unless we force [3] to be nonnumeric...
+	  // if first character of gene name is a digit, natural sort has
+	  // strange effects unless we force [3] to be nonnumeric...
 	  ll_tmp->ss[3] -= 15;
 	  memcpy(&(ll_tmp->ss[4]), bufptr3, uii - 4);
 	}
@@ -381,8 +386,7 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
 	    } else {
 	      logerrprint("Error: Empty --gene-report file.\n");
 	    }
-	    retval = RET_INVALID_FORMAT;
-	    goto load_range_list_ret_1;
+	    goto load_range_list_ret_INVALID_FORMAT;
 	  }
 	}
 	LOGERRPRINTF("Warning: No valid ranges in %s file.\n", file_descrip);
@@ -2323,7 +2327,58 @@ int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t s
   return retval;
 }
 
-int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
+int32_t scrape_extra_chroms(const char* fname, const char* file_descrip, Chrom_info* chrom_info_ptr) {
+  // scan first column of file, add these to chromosome names.
+  // may want to add an option for this to search for "CHR"/"#CHROM" column
+  uintptr_t line_idx = 0;
+  FILE* infile = nullptr;
+  int32_t retval = 0;
+  {
+    if (fopen_checked(fname, "r", &infile)) {
+      goto scrape_extra_chroms_ret_OPEN_FAIL;
+    }
+    g_textbuf[MAXLINELEN - 1] = ' ';
+    while (fgets(g_textbuf, MAXLINELEN, infile)) {
+      ++line_idx;
+      if (!g_textbuf[MAXLINELEN - 1]) {
+	sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, file_descrip);
+	goto scrape_extra_chroms_ret_INVALID_FORMAT_2;
+      }
+      char* first_token = skip_initial_spaces(g_textbuf);
+      if (is_eoln_kns(*first_token)) {
+	continue;
+      }
+      char* first_token_end = token_endnn(first_token);
+      const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - first_token);
+      *first_token_end = '\0';
+      int32_t dummy;
+      retval = get_or_add_chrom_code(first_token, file_descrip, line_idx, chrom_name_slen, 1, chrom_info_ptr, &dummy);
+      if (retval) {
+	goto scrape_extra_chroms_ret_1;
+      }
+    }
+    if (fclose_null(&infile)) {
+      goto scrape_extra_chroms_ret_READ_FAIL;
+    }
+  }
+  while (0) {
+  scrape_extra_chroms_ret_OPEN_FAIL:
+    retval = RET_OPEN_FAIL;
+    break;
+  scrape_extra_chroms_ret_READ_FAIL:
+    retval = RET_READ_FAIL;
+    break;
+  scrape_extra_chroms_ret_INVALID_FORMAT_2:
+    logerrprintb();
+    retval = RET_INVALID_FORMAT;
+    break;
+  }
+ scrape_extra_chroms_ret_1:
+  fclose_cond(infile);
+  return retval;
+}
+
+int32_t annotate(const Annot_info* aip, uint32_t allow_extra_chroms, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
   unsigned char* bigstack_mark = g_bigstack_base;
   unsigned char* bigstack_end_mark = g_bigstack_end;
   gzFile gz_attribfile = nullptr;
@@ -2621,6 +2676,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
   }
   if (need_pos) {
     if (aip->ranges_fname) {
+      if (allow_extra_chroms) {
+	retval = scrape_extra_chroms(aip->ranges_fname, "--annotate ranges file", chrom_info_ptr);
+	if (retval) {
+	  goto annotate_ret_1;
+	}
+      }
       if (aip->subset_fname) {
 	if (fopen_checked(aip->subset_fname, FOPEN_RB, &infile)) {
 	  goto annotate_ret_OPEN_FAIL;
@@ -2673,6 +2734,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
     }
     bigstack_end_reset(bigstack_end_mark);
     if (aip->filter_fname) {
+      if (allow_extra_chroms) {
+	retval = scrape_extra_chroms(aip->filter_fname, "--annotate filter file", chrom_info_ptr);
+	if (retval) {
+	  goto annotate_ret_1;
+	}
+      }
       retval = load_range_list_sortpos(aip->filter_fname, border, 0, nullptr, 0, chrom_info_ptr, &filter_range_ct, &filter_range_names, &max_filter_range_name_len, &chrom_filter_bounds, &filter_rangedefs, &chrom_max_filter_range_ct, "--annotate filter");
       if (retval) {
 	goto annotate_ret_1;
@@ -3149,7 +3216,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
   return retval;
 }
 
-int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
+int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, uint32_t allow_extra_chroms, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
   // similar to define_sets() and --clump
   unsigned char* bigstack_mark = g_bigstack_base;
   unsigned char* bigstack_end_mark = g_bigstack_end;
@@ -3277,6 +3344,14 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
       bigstack_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
     }
   }
+  if (allow_extra_chroms) {
+    // ugh, silly for this to require 'file' at the end while load_range_list()
+    // does not
+    retval = scrape_extra_chroms(glist, "--gene-report file", chrom_info_ptr);
+    if (retval) {
+      goto gene_report_ret_1;
+    }
+  }
   retval = load_range_list_sortpos(glist, 0, subset_ct, sorted_subset_ids, max_subset_id_len, chrom_info_ptr, &gene_ct, &gene_names, &max_gene_name_len, &chrom_bounds, &genedefs, &chrom_max_gene_ct, "--gene-report");
   if (retval) {
     goto gene_report_ret_1;
@@ -3321,7 +3396,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
   if (linebuf_left < MAXLINELEN + 64) {
     goto gene_report_ret_NOMEM;
   }
-  // mirror g_bigstack_base/g_bigstack_base since we'll be doing
+  // mirror g_bigstack_base/g_bigstack_end since we'll be doing
   // nonstandard-size allocations
   linebuf_top = (char*)g_bigstack_base;
   gene_match_list_end = (uint64_t*)g_bigstack_end;
@@ -3536,12 +3611,21 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
       gene_idx = gene_nameidx_to_chridx[ulii];
       bufptr = &(gene_names[gene_idx * max_gene_name_len]);
       fputs(&(bufptr[4]), outfile);
-      fputs(" -- chr", outfile);
-      if (bufptr[2] != '0') {
-	putc_unlocked(bufptr[2], outfile);
+      // 53313 = ('0' * (1000 + 100 + 10)) + ('0' - 15) 
+      chrom_idx = ((unsigned char)bufptr[0]) * 1000 + ((unsigned char)bufptr[1]) * 100 + ((unsigned char)bufptr[2]) * 10 + ((unsigned char)bufptr[3]) - 53313;
+      strcpy(g_textbuf, " -- ");
+      // plink 1.07 explicitly precedes chromosome codes with "chr" here.
+      // obviously "chrchr1" doesn't look right, and neither does
+      // chr[contig name], so make the chr prefix conditional.
+      bufptr = &(g_textbuf[4]);
+      if ((!(chrom_info_ptr->output_encoding & CHR_OUTPUT_PREFIX)) && ((chrom_idx <= ((int32_t)chrom_info_ptr->max_code)) || chrom_info_ptr->zero_extra_chroms)) {
+	bufptr = memcpyl3a(bufptr, "chr");
+      }
+      bufptr = chrom_name_write(chrom_info_ptr, chrom_idx, bufptr);
+      *bufptr++ = ':';
+      if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
+	goto gene_report_ret_WRITE_FAIL;
       }
-      putc_unlocked(bufptr[3] + 15, outfile);
-      putc_unlocked(':', outfile);
       uiptr = genedefs[gene_idx];
       range_ct = *uiptr++;
       ujj = 0; // gene length
diff --git a/plink_set.h b/plink_set.h
index 498b505..784fcb6 100644
--- a/plink_set.h
+++ b/plink_set.h
@@ -133,8 +133,8 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
 
 int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t subset_ct, char* sorted_subset_ids, uintptr_t max_subset_id_len, Chrom_info* chrom_info_ptr, uintptr_t* gene_ct_ptr, char** gene_names_ptr, uintptr_t* max_gene_id_len_ptr, uintptr_t** chrom_bounds_ptr, uint32_t*** genedefs_ptr, uintptr_t* chrom_max_gene_ct_ptr, const char* file_descrip);
 
-int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
+int32_t annotate(const Annot_info* aip, uint32_t allow_extra_chroms, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
 
-int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
+int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, uint32_t allow_extra_chroms, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
 
 #endif // __PLINK_SET_H__

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git



More information about the debian-med-commit mailing list