[med-svn] [plink1.9] 01/03: New upstream version 1.90~b4.9-171013
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Thu Nov 2 23:02:22 UTC 2017
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository plink1.9.
commit 2e936be1d49715f2f8a42ceecab6731b037637b6
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Thu Nov 2 23:53:58 2017 +0100
New upstream version 1.90~b4.9-171013
---
plink.c | 167 +++++++++++++++--------------
plink_data.c | 331 +++++++++------------------------------------------------
plink_data.h | 2 +-
plink_dosage.c | 32 +++---
plink_filter.c | 63 +++++++++++
plink_filter.h | 2 +
plink_glm.c | 11 +-
plink_ld.c | 25 ++++-
plink_misc.c | 8 +-
plink_set.c | 108 ++++++++++++++++---
plink_set.h | 4 +-
11 files changed, 357 insertions(+), 396 deletions(-)
diff --git a/plink.c b/plink.c
index 3f707ed..792e8d9 100644
--- a/plink.c
+++ b/plink.c
@@ -93,7 +93,7 @@
static const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b4.7"
+ "PLINK v1.90b4.9"
#else
"PLINK v1.90p"
#endif
@@ -105,10 +105,10 @@ static const char ver_str[] =
#else
" 32-bit"
#endif
- " (6 Sep 2017)";
+ " (13 Oct 2017)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
- " "
+ ""
#ifdef STABLE_BUILD
"" // (don't want this when version number has a trailing letter)
#else
@@ -326,7 +326,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
uintptr_t* marker_exclude = nullptr;
uintptr_t marker_exclude_ct = 0;
uintptr_t marker_ct = 0;
- uintptr_t max_marker_id_len = 0;
+ uintptr_t max_marker_id_blen = 0;
// set_allele_freqs = .bed set bit frequency in middle of loading process, A2
// allele frequency later.
double* set_allele_freqs = nullptr;
@@ -480,7 +480,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
memcpy(outname_end, ".bed", 5);
if (realpath_identical(outname, g_textbuf, &(g_textbuf[FNAMESIZE + 64]))) {
- logprint("Note: --make-bed input and output filenames match. Appending '~' to input\nfilenames.\n");
+ logerrprint("Warning: --make-bed input and output filenames match. Appending '~' to input\nfilenames.\n");
uii = strlen(bedname);
memcpy(g_textbuf, bedname, uii + 1);
memcpy(&(bedname[uii]), "~", 2);
@@ -554,7 +554,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (bimname[0]) {
if (update_name) {
ulii = 0;
- retval = scan_max_strlen(update_name->fname, update_name->colid, update_name->colx, update_name->skip, update_name->skipchar, &max_marker_id_len, &ulii);
+ retval = scan_max_strlen(update_name->fname, update_name->colid, update_name->colx, update_name->skip, update_name->skipchar, &max_marker_id_blen, &ulii);
if (retval) {
goto plink_ret_1;
}
@@ -564,14 +564,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
// fixing the problem, so we shouldn't spam them.
logerrprint("Warning: Unusually long new variant ID(s) in --update-name file. Double-check\nyour file and command-line parameters, and consider changing your naming\nscheme if you encounter memory problems.\n");
}
- if (ulii > max_marker_id_len) {
- max_marker_id_len = ulii;
+ if (ulii > max_marker_id_blen) {
+ max_marker_id_blen = ulii;
}
}
if (!marker_alleles_needed) {
allelexxxx = 0;
}
- retval = load_bim(bimname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : nullptr, &marker_allele_ptrs, &max_marker_allele_blen, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is [...]
+ retval = load_bim(bimname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_blen, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : nullptr, &marker_allele_ptrs, &max_marker_allele_blen, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, &map_is_unsorted, marker_po [...]
if (retval) {
goto plink_ret_1;
}
@@ -785,33 +785,44 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (unfiltered_marker_ct != marker_exclude_ct) {
- uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter;
+ // bugfix (12 Oct 2017): Previous plink 1.9 implementation of
+ // --{exclude-}snps could affect unwanted variants at the same position as
+ // named variant(s). Backport plink 2.0's safe implementation. This has
+ // the drawback of sometimes increasing the overhead associated with
+ // --snps, but we can live with that.
+ uii = update_cm || update_map || update_name || (marker_alleles_needed && (update_alleles_fname || (flip_fname && (!flip_subset_fname)))) || filter_attrib_fname || qual_filter || snps_range_list_ptr->names;
if (uii || extractname || excludename) {
// only permit duplicate marker IDs for --extract/--exclude
bigstack_mark = g_bigstack_base;
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable_size, &marker_id_htable);
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, !uii, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto plink_ret_1;
}
+ if (snps_range_list_ptr->names) {
+ retval = snps_flag(marker_ids, marker_id_htable, snps_range_list_ptr, unfiltered_marker_ct, max_marker_id_blen, marker_id_htable_size, (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1, marker_exclude, &marker_exclude_ct);
+ if (retval) {
+ goto plink_ret_1;
+ }
+ }
if (update_cm) {
- retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_cms);
+ retval = update_marker_cms(update_cm, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_cms);
if (retval) {
goto plink_ret_1;
}
}
if (update_map) {
- retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
+ retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
if (retval) {
goto plink_ret_1;
}
} else if (update_name) {
- retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
+ retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct);
if (retval) {
goto plink_ret_1;
}
if (update_alleles_fname || (marker_alleles_needed && flip_fname && (!flip_subset_fname)) || extractname || excludename) {
bigstack_reset(bigstack_mark);
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto plink_ret_1;
}
@@ -819,13 +830,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (marker_alleles_needed) {
if (update_alleles_fname) {
- retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_blen, outname, outname_end);
+ retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_blen, outname, outname_end);
if (retval) {
goto plink_ret_1;
}
}
if (flip_fname && (!flip_subset_fname)) {
- retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
+ retval = flip_strand(flip_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs);
if (retval) {
goto plink_ret_1;
}
@@ -833,7 +844,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (extractname) {
if (!(misc_flags & MISC_EXTRACT_RANGE)) {
- retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+ retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
if (retval) {
goto plink_ret_1;
}
@@ -852,7 +863,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (excludename) {
if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
- retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
+ retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, allow_no_variants);
if (retval) {
goto plink_ret_1;
}
@@ -870,13 +881,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if (filter_attrib_fname) {
- retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+ retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
if (retval) {
goto plink_ret_1;
}
}
if (qual_filter) {
- retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+ retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, allow_no_variants, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
if (retval) {
goto plink_ret_1;
}
@@ -1035,7 +1046,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
om_ip->cluster_ct = 0;
om_ip->entry_ct = 0;
} else {
- retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sex_male, chrom_info_ptr, om_ip);
+ retval = load_oblig_missing(bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, cptr, unfiltered_sample_ct, max_sample_id_len, uiptr, unfiltered_sample_ct, sex_male, chrom_info_ptr, om_ip);
if (retval) {
goto plink_ret_1;
}
@@ -1278,13 +1289,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (bimname[0] && (unfiltered_marker_ct != marker_exclude_ct)) {
- plink_maxsnp = calc_plink_maxsnp(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len);
+ plink_maxsnp = calc_plink_maxsnp(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen);
uii = BITCT_TO_WORDCT(unfiltered_marker_ct);
if (bigstack_calloc_ul(uii, &marker_reverse)) {
goto plink_ret_NOMEM;
}
if (bedfile && sample_ct) {
- retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CALC [...]
+ retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CAL [...]
if (retval) {
goto plink_ret_1;
}
@@ -1295,7 +1306,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (freqname) {
- retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
+ retval = read_external_freqs(freqname, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, chrom_info_ptr, marker_allele_ptrs, set_allele_freqs, nchrobs, (misc_flags / MISC_MAF_SUCC) & 1);
if (retval) {
goto plink_ret_1;
}
@@ -1307,7 +1318,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (a1alleles || a2alleles) {
- retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_blen, marker_reverse, marker_ids, max_marker_id_len, set_allele_freqs, a2alleles? 1 : 0);
+ retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_blen, marker_reverse, marker_ids, max_marker_id_blen, set_allele_freqs, a2alleles? 1 : 0);
if (retval) {
goto plink_ret_1;
}
@@ -1324,18 +1335,18 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (misc_flags & MISC_FREQ_COUNTS) {
logprint("Note: --freq 'counts' modifier has no effect on cluster-stratified report.\n");
}
- retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
+ retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
} else if (misc_flags & MISC_FREQ_CC) {
- retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
+ retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
} else {
- retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
+ retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
}
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ))))) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_MISSING_REPORT) {
- retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
+ retval = write_missingness_reports(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_MISSING_GZ) & 1, plink_maxfid, plink_maxiid, plink_maxsnp, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, chrom_info_ptr, om_ip, marker_ids, max_marker_id_blen, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, sex_male, sample_male_ct, sample_ids, max_sample_id_len, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, hh_exists);
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT))))) {
goto plink_ret_1;
}
@@ -1357,7 +1368,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
oblig_missing_cleanup(om_ip);
if (sample_ct) {
if (calculation_type & CALC_HARDY) {
- retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
+ retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_HARDY))))) {
goto plink_ret_1;
}
@@ -1383,7 +1394,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (bedfile && sample_ct && (unfiltered_marker_ct > marker_exclude_ct)) {
if ((calculation_type & CALC_MENDEL) || (fam_ip->mendel_modifier & MENDEL_FILTER)) {
- retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, c [...]
+ retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, [...]
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT | CALC_MENDEL))))) {
goto plink_ret_1;
}
@@ -1410,7 +1421,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --set/--make-set requires a sorted .bim file. Retry this command after\nusing --make-bed to sort your data.\n");
goto plink_ret_INVALID_FORMAT;
}
- retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_len, chrom_info_ptr, allow_no_variants);
+ retval = define_sets(sip, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, marker_ids, max_marker_id_blen, chrom_info_ptr, allow_no_variants);
if (retval) {
goto plink_ret_1;
}
@@ -1472,7 +1483,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
ulii = unfiltered_sample_ct - pca_sample_ct;
}
}
- retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
+ retval = calc_rel(threads, parallel_idx, parallel_tot, calculation_type, relip, bedfile, bed_offset, outname, outname_end, distance_wts_fname, (dist_calc_type & DISTANCE_WTS_NOHEADER), unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_blen, unfiltered_sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? (&ulii) : (&sample_exclude_ct), sample_ids, max_sample_id_len, set_allele_freqs, &rel_ibc, chrom_info_ptr);
if (retval) {
goto plink_ret_1;
}
@@ -1507,7 +1518,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
#ifndef NOLAPACK
if (calculation_type & CALC_PCA) {
- retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
+ retval = calc_pca(bedfile, bed_offset, outname, outname_end, calculation_type, relip, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, pca_sample_exclude? pca_sample_exclude : sample_exclude, pca_sample_exclude? pca_sample_ct : sample_ct, sample_ids, max_sample_id_len, set_allele_freqs, chrom_info_ptr, rel_ibc);
} else if (calculation_type & CALC_UNRELATED_HERITABILITY) {
if (sample_ct != pheno_nm_ct) {
logerrprint("Error: --unrelated-heritability requires phenotype data for all samples.\n(--prune should help.)\n");
@@ -1532,7 +1543,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_TUCC) {
- retval = make_pseudocontrols(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, fam_ip);
+ retval = make_pseudocontrols(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_cms, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, fam_ip);
if (retval) {
goto plink_ret_1;
}
@@ -1558,28 +1569,28 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (marker_ct) {
if (calculation_type & CALC_WRITE_SET) {
- retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr);
+ retval = write_set(sip, outname, outname_end, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_WRITE_SNPLIST) {
- retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, nullptr, 0);
+ retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, nullptr, 0);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_WRITE_VAR_RANGES) {
- retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, write_var_range_ct);
+ retval = write_var_ranges(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, write_var_range_ct);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_LIST_23_INDELS) {
- retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, 1);
+ retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, 1);
if (retval) {
goto plink_ret_1;
}
@@ -1590,7 +1601,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --list-duplicate-vars requires a sorted .bim file. Retry this command\nafter using --make-bed to sort your data.\n");
goto plink_ret_INVALID_FORMAT;
}
- retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, marker_allele_ptrs);
+ retval = list_duplicate_vars(outname, outname_end, dupvar_modifier, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, marker_allele_ptrs);
if (retval) {
goto plink_ret_1;
}
@@ -1612,13 +1623,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_MAKE_FAM)) {
- retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_miss [...]
+ retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_mis [...]
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_RECODE) {
- retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, ph [...]
+ retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_blen, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, p [...]
if (retval) {
goto plink_ret_1;
}
@@ -1631,7 +1642,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if ((calculation_type & CALC_EPI) && epi_ip->twolocus_mkr1) {
- retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, sex_male, outname, outname_end, hh_exists);
+ retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1642,7 +1653,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --show-tags requires a sorted .bim file. Retry this command after using\n--make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = show_tags(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, chrom_info_ptr, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+ retval = show_tags(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, chrom_info_ptr, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1653,7 +1664,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --blocks requires a sorted .bim file. Retry this command after using\n--make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = haploview_blocks(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, founder_info, pheno_nm, sex_male, outname, outname_end, hh_exists);
+ retval = haploview_blocks(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, founder_info, pheno_nm, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1664,7 +1675,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: Run-of-homozygosity scanning requires a sorted .bim. Retry this command\nafter using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
+ retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
if (retval) {
goto plink_ret_1;
}
@@ -1676,9 +1687,9 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_INVALID_CMDLINE;
}
if (!(ldip->modifier & LD_PRUNE_PAIRPHASE)) {
- retval = ld_prune(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+ retval = ld_prune(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
} else {
- retval = indep_pairphase(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
+ retval = indep_pairphase(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, founder_info, sex_male, outname, outname_end, hh_exists);
}
if (retval) {
goto plink_ret_1;
@@ -1690,14 +1701,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: LD-based strand flip scanning requires a sorted .bim. Retry this\ncommand after using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
+ retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_EPI) && epi_ip->ld_mkr1) {
- retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, nullptr, 0, 0, nullptr, sex_male, nullptr, nullptr, hh_exists);
+ retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, nullptr, 0, 0, nullptr, sex_male, nullptr, nullptr, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1714,7 +1725,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_INVALID_CMDLINE;
}
}
- retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, marker_cms, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
+ retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, marker_cms, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1724,7 +1735,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --test-mishap requires a sorted .bim. Retry this command after using\n--make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = test_mishap(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, min_maf, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct);
+ retval = test_mishap(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, min_maf, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, sample_ct);
if (retval) {
goto plink_ret_1;
}
@@ -1732,7 +1743,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
/*
if (calculation_type & CALC_REGRESS_PCS) {
- retval = calc_regress_pcs(evecname, regress_pcs_modifier, max_pcs, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len, sex_nm, sex_male, pheno_d, missing_phenod, outname, outname_end, hh_exists);
+ retval = calc_regress_pcs(evecname, regress_pcs_modifier, max_pcs, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len, sex_nm, sex_male, pheno_d, missing_phenod, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1808,7 +1819,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
*/
if (distance_req(read_dists_fname, calculation_type)) {
- retval = calc_distance(threads, parallel_idx, parallel_tot, bedfile, bed_offset, outname, outname_end, read_dists_fname, distance_wts_fname, distance_exp, calculation_type, dist_calc_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
+ retval = calc_distance(threads, parallel_idx, parallel_tot, bedfile, bed_offset, outname, outname_end, read_dists_fname, distance_wts_fname, distance_exp, calculation_type, dist_calc_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
if (retval) {
goto plink_ret_1;
}
@@ -1877,7 +1888,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_FST) {
- retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : nullptr, cluster_ct, cluster_map, cluster_starts);
+ retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : nullptr, cluster_ct, cluster_map, cluster_starts);
if (retval) {
goto plink_ret_1;
}
@@ -1895,14 +1906,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --fast-epistasis case-only requires a sorted .bim. Retry this command\nafter using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, sip);
+ retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, sip);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_SCORE) {
- retval = score_report(sc_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, set_allele_freqs, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, hh_exists, chrom_info_ptr, outname, outname_end);
+ retval = score_report(sc_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, set_allele_freqs, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, hh_exists, chrom_info_ptr, outname, outname_end);
if (retval) {
goto plink_ret_1;
}
@@ -1910,7 +1921,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
#if defined __cplusplus && !defined _WIN32
if (calculation_type & CALC_RPLUGIN) {
- retval = rserve_call(rplugin_fname, rplugin_host_or_socket, rplugin_port, (misc_flags / MISC_RPLUGIN_DEBUG) & 1, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, pheno_d, cluster_ct, cluster_map, cluster_starts, covar_ct, covar_d, outname, outname_end);
+ retval = rserve_call(rplugin_fname, rplugin_host_or_socket, rplugin_port, (misc_flags / MISC_RPLUGIN_DEBUG) & 1, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, pheno_d, cluster_ct, cluster_map, cluster_starts, covar_ct, covar_d, outname, outname_end);
if (retval) {
goto plink_ret_1;
}
@@ -2033,10 +2044,10 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (calculation_type & CALC_MODEL) {
if (pheno_d) {
if (model_modifier & MODEL_ASSOC) {
- retval = qassoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_mperm_val, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_d, founder_info, sex_male, hh_exists, ldip->modifier & LD_IGNORE_ [...]
+ retval = qassoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_mperm_val, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_d, founder_info, sex_male, hh_exists, ldip->modifier & LD_IGNORE [...]
}
} else {
- retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm [...]
+ retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_n [...]
}
if (retval) {
goto plink_ret_1;
@@ -2046,23 +2057,23 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (!(glm_modifier & GLM_NO_SNP)) {
if (pheno_d) {
#ifndef NOLAPACK
- retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sam [...]
+ retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sa [...]
#else
logerrprint("Warning: Skipping --logistic on --all-pheno QT since this is a no-LAPACK " PROG_NAME_CAPS"\nbuild.\n");
#endif
} else {
- retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_s [...]
+ retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_ [...]
}
} else {
if (pheno_d) {
#ifndef NOLAPACK
- retval = glm_linear_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_ct [...]
+ retval = glm_linear_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_c [...]
#else
logerrprint("Warning: Skipping --logistic on --all-pheno QT since this is a no-LAPACK " PROG_NAME_CAPS"\nbuild.\n");
#endif
} else {
- retval = glm_logistic_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm_ [...]
+ retval = glm_logistic_nosnp(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, cluster_ct, cluster_map, cluster_starts, mperm_save, pheno_nm [...]
}
}
if (retval) {
@@ -2071,47 +2082,47 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
// if case/control phenotype loaded with --all-pheno, skip --gxe
if ((calculation_type & CALC_GXE) && pheno_d) {
- retval = gxe_assoc(bedfile, bed_offset, outname, outname_end2, output_min_p, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, pheno_d, gxe_covar_nm, gxe_covar_c, sex_male, hh_exists);
+ retval = gxe_assoc(bedfile, bed_offset, outname, outname_end2, output_min_p, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, sample_exclude, pheno_nm, pheno_d, gxe_covar_nm, gxe_covar_c, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_LASSO) {
- retval = lasso(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, pheno_nm_ct, lasso_h2, lasso_minlambda, lasso_select_covars_range_list_ptr, misc_flags, sample_exclude, pheno_nm, pheno_c, pheno_d, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, sex_male, hh_exists);
+ retval = lasso(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, sample_ct, pheno_nm_ct, lasso_h2, lasso_minlambda, lasso_select_covars_range_list_ptr, misc_flags, sample_exclude, pheno_nm, pheno_c, pheno_d, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_CMH) && pheno_c) {
if (!(cluster_ptr->modifier & CLUSTER_CMH2)) {
- retval = cmh_assoc(threads, bedfile, bed_offset, outname, outname_end2, cluster_ptr->cmh_mperm_val, cluster_ptr->modifier, ci_size, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, [...]
+ retval = cmh_assoc(threads, bedfile, bed_offset, outname, outname_end2, cluster_ptr->cmh_mperm_val, cluster_ptr->modifier, ci_size, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male [...]
} else {
- retval = cmh2_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+ retval = cmh2_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
}
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_HOMOG) && pheno_c) {
- retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+ retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_TESTMISS) && pheno_c) {
- retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+ retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_TDT) && pheno_c) {
- retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, materna [...]
+ retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, matern [...]
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_DFAM) && pheno_c) {
- retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_i [...]
+ retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ [...]
if (retval) {
goto plink_ret_1;
}
@@ -2127,7 +2138,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (mtest_adjust && (fam_ip->qfam_modifier & QFAM_PERM)) {
logerrprint("Warning: The QFAM test does not support --adjust. Use max(T) permutation to\nobtain multiple-testing corrected p-values.\n");
}
- retval = qfam(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, pheno_nm, pheno_d, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, hh_exists, perm_batch_size, fam_ip);
+ retval = qfam(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, pheno_nm, pheno_d, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, hh_exists, perm_batch_size, fam_ip);
if (retval) {
goto plink_ret_1;
}
@@ -2140,7 +2151,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: --clump requires a sorted .bim. Retry this command after using\n--make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = clump_reports(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, founder_info, clump_ip, sex_male, hh_exists);
+ retval = clump_reports(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, founder_info, clump_ip, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -13461,15 +13472,17 @@ int32_t main(int32_t argc, char** argv) {
}
}
if (annot_info.fname) {
- retval = annotate(&annot_info, outname, outname_end, pfilter, &chrom_info);
+ retval = annotate(&annot_info, (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1, outname, outname_end, pfilter, &chrom_info);
}
if (gene_report_fname) {
- retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
+ retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
if (retval) {
goto main_ret_1;
}
}
if (metaanal_fnames) {
+ // possible todo: make this support --aec (takes a bit of work since
+ // chromosome byte in data structure must be widened)
retval = meta_analysis(metaanal_fnames, metaanal_snpfield_search_order, metaanal_a1field_search_order, metaanal_a2field_search_order, metaanal_pfield_search_order, metaanal_essfield_search_order, metaanal_flags, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, outname, outname_end, output_min_p, &chrom_info);
if (retval) {
goto main_ret_1;
diff --git a/plink_data.c b/plink_data.c
index 50baef0..1046d32 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -415,106 +415,8 @@ static inline uint32_t is_acgtm(unsigned char ucc) {
return (uint32_t)(acgtm_bool_table[ucc]);
}
-void load_bim_sf_insert(uint32_t chrom_idx, uint32_t pos_start, uint32_t pos_end, uint32_t* start_idxs, uint32_t* llbuf, uint32_t* lltop_ptr, uint32_t* entry_ct_ptr) {
- uint32_t lltop = *lltop_ptr;
- uint32_t entry_ct = *entry_ct_ptr;
- uint32_t llidx;
- uint32_t new_start;
- uint32_t new_end;
- uint32_t new_llidx;
- uint32_t old_llidx;
- if (start_idxs[chrom_idx] == 1) {
- start_idxs[chrom_idx] = lltop;
- llbuf[lltop++] = pos_start;
- llbuf[lltop++] = pos_end;
- llbuf[lltop++] = 1;
- entry_ct++;
- } else {
- llidx = start_idxs[chrom_idx];
- while (1) {
- if (llbuf[llidx] > pos_end) {
- if (llbuf[llidx] == pos_end + 1) {
- llbuf[llidx] = pos_start;
- } else {
- new_llidx = llidx;
- do {
- llidx = new_llidx;
- new_start = llbuf[llidx];
- llbuf[llidx] = pos_start;
- pos_start = new_start;
- new_end = llbuf[llidx + 1];
- llbuf[llidx + 1] = pos_end;
- pos_end = new_end;
- new_llidx = llbuf[llidx + 2];
- } while (new_llidx != 1);
- llbuf[llidx + 2] = lltop;
- llbuf[lltop++] = pos_start;
- llbuf[lltop++] = pos_end;
- llbuf[lltop++] = 1;
- entry_ct++;
- }
- break;
- } else if (llbuf[llidx + 1] + 1 >= pos_start) {
- // mergeable
- if (llbuf[llidx] > pos_start) {
- llbuf[llidx] = pos_start;
- }
- if (llbuf[llidx + 1] < pos_end) {
- // scan forward, attempt to collapse entries
-
- // bugfix: if no forward entries can be collapsed, current entry must
- // be updated
- llbuf[llidx + 1] = pos_end;
-
- old_llidx = llidx;
- new_llidx = llbuf[llidx + 2];
- while (new_llidx != 1) {
- llidx = new_llidx;
- if (llbuf[llidx] > pos_end + 1) {
- break;
- }
- entry_ct--;
- new_llidx = llbuf[llidx + 2];
- llbuf[old_llidx + 2] = new_llidx;
- if (llbuf[llidx + 1] >= pos_end) {
- llbuf[old_llidx + 1] = llbuf[llidx + 1];
- break;
- }
- }
- }
- break;
- }
- new_llidx = llbuf[llidx + 2];
- if (new_llidx == 1) {
- llbuf[llidx + 2] = lltop;
- llbuf[lltop++] = pos_start;
- llbuf[lltop++] = pos_end;
- llbuf[lltop++] = 1;
- entry_ct++;
- break;
- }
- llidx = new_llidx;
- }
- }
- *lltop_ptr = lltop;
- *entry_ct_ptr = entry_ct;
-}
-
-static inline uint32_t sf_out_of_range(uint32_t cur_pos, uint32_t chrom_idx, uint32_t* sf_start_idxs, uint32_t* sf_pos) {
- uint32_t cur_idx = sf_start_idxs[chrom_idx];
- uint32_t end_idx = sf_start_idxs[chrom_idx + 1];
- while (cur_idx < end_idx) {
- if ((cur_pos >= sf_pos[cur_idx]) && (cur_pos <= sf_pos[cur_idx + 1])) {
- return 0;
- }
- cur_idx += 2;
- }
- return 1;
-}
-
-int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
// supports .map now too, to make e.g. --snps + --dosage work
- unsigned char* bigstack_mark = g_bigstack_base;
FILE* bimfile = nullptr;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
@@ -533,12 +435,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
uint32_t from_slen = markername_from? strlen(markername_from) : 0;
uint32_t to_slen = markername_to? strlen(markername_to) : 0;
uint32_t snp_slen = markername_snp? strlen(markername_snp) : 0;
- // "sf" = "snp filter" (could rename to "vf"...)
- uint32_t sf_ct = sf_range_list_ptr->name_ct;
- // assume for now that sf_ct * sf_max_len < 2^32, since these are based on
- // command-line parameters
- uint32_t sf_max_len = sf_range_list_ptr->name_max_len;
- uint32_t slen_check = from_slen || to_slen || snp_slen || sf_ct;
+ uint32_t slen_check = from_slen || to_slen || snp_slen;
uint32_t from_chrom = MAX_POSSIBLE_CHROM;
uint32_t to_chrom = MAX_POSSIBLE_CHROM;
uint32_t snp_chrom = MAX_POSSIBLE_CHROM;
@@ -558,12 +455,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
int32_t exclude_window_end = -1;
int32_t retval = 0;
char* missing_geno_ptr = (char*)g_missing_geno_ptr;
- uint32_t* sf_start_idxs = nullptr;
- uint32_t* sf_pos = nullptr;
- uint32_t* sf_str_chroms = nullptr;
- uint32_t* sf_str_pos = nullptr;
- uint32_t* sf_str_lens = nullptr;
- uint32_t* sf_llbuf = nullptr;
char* loadbuf2 = nullptr; // on heap, second pass
char* prev_new_id = nullptr;
char* bufptr2 = nullptr;
@@ -571,7 +462,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
char* bufptr5 = nullptr;
char** marker_allele_ptrs = nullptr;
uintptr_t loaded_chrom_mask[CHROM_MASK_WORDS];
- uintptr_t sf_mask[CHROM_MASK_WORDS];
uint32_t missing_template_seg_len[5];
uint32_t missing_template_seg_order[4]; // '@', '#', '$1', '$2'
uint32_t insert_buf_len[4];
@@ -583,15 +473,11 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
uintptr_t loadbuf_size;
uintptr_t unfiltered_marker_ctl;
uintptr_t marker_uidx;
- uint32_t sf_entry_ct;
- uint32_t sf_lltop;
char* bufptr;
char* col2_ptr;
uintptr_t ulii;
uint32_t ukk;
uint32_t umm;
- uint32_t unn;
- uint32_t uoo;
int32_t jj;
uint32_t cur_pos;
double cur_cm;
@@ -602,22 +488,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
insert_buf[1] = nullptr;
insert_buf[2] = nullptr;
insert_buf[3] = nullptr;
- if (sf_ct) {
- sf_start_idxs = (uint32_t*)malloc((MAX_POSSIBLE_CHROM + 1) * sizeof(int32_t));
- if (!sf_start_idxs) {
- goto load_bim_ret_NOMEM;
- }
- if (bigstack_alloc_ui(sf_ct, &sf_str_chroms) ||
- bigstack_alloc_ui(sf_ct, &sf_str_pos) ||
- bigstack_alloc_ui(sf_ct, &sf_str_lens) ||
- bigstack_alloc_ui(3 * (MAX_POSSIBLE_CHROM + sf_ct), &sf_llbuf)) {
- goto load_bim_ret_NOMEM;
- }
- for (uii = 0; uii < sf_ct; uii++) {
- sf_str_chroms[uii] = MAX_POSSIBLE_CHROM;
- sf_str_lens[uii] = strlen(&(sf_range_list_ptr->names[uii * sf_max_len]));
- }
- }
fill_uint_zero(5, missing_template_seg_len);
missing_template_seg[0] = nullptr;
missing_template_seg[1] = nullptr;
@@ -855,157 +725,54 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
}
bufptr2[strlen_se(bufptr2)] = '\0';
}
- if (sf_ct) {
- uii = 0;
- do {
- if ((ulii == sf_str_lens[uii]) && (!memcmp(col2_ptr, &(sf_range_list_ptr->names[uii * sf_max_len]), ulii))) {
- if (sf_str_chroms[uii] != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- sf_str_chroms[uii] = cur_chrom_code;
- if (scan_uint_defcap(bufptr2, &(sf_str_pos[uii]))) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- break;
- }
- } while (++uii < sf_ct);
- } else {
- if ((ulii == from_slen) && (!memcmp(col2_ptr, markername_from, ulii))) {
- if (from_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- from_chrom = cur_chrom_code;
- if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- if (to_chrom != MAX_POSSIBLE_CHROM) {
- if (from_chrom != to_chrom) {
- goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
- }
- }
- fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
- SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
- }
- if ((ulii == to_slen) && (!memcmp(col2_ptr, markername_to, ulii))) {
- if (to_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- to_chrom = cur_chrom_code;
- if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- if (from_chrom != MAX_POSSIBLE_CHROM) {
- if (to_chrom != from_chrom) {
- goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
- }
- }
- fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
- SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
- }
- if ((ulii == snp_slen) && (!memcmp(col2_ptr, markername_snp, ulii))) {
- if (snp_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- snp_chrom = cur_chrom_code;
- if (scan_uint_defcap(bufptr2, &snp_pos)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- if (!exclude_snp) {
- fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
- SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
- }
- }
- }
+ if ((ulii == from_slen) && (!memcmp(col2_ptr, markername_from, ulii))) {
+ if (from_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ from_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (to_chrom != MAX_POSSIBLE_CHROM) {
+ if (from_chrom != to_chrom) {
+ goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+ }
+ }
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
+ }
+ if ((ulii == to_slen) && (!memcmp(col2_ptr, markername_to, ulii))) {
+ if (to_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ to_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (from_chrom != MAX_POSSIBLE_CHROM) {
+ if (to_chrom != from_chrom) {
+ goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+ }
+ }
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
+ }
+ if ((ulii == snp_slen) && (!memcmp(col2_ptr, markername_snp, ulii))) {
+ if (snp_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ snp_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, &snp_pos)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (!exclude_snp) {
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
+ }
+ }
}
unfiltered_marker_ct++;
}
- if (sf_ct) {
- for (uii = 0; uii < sf_ct; uii++) {
- if (sf_str_chroms[uii] == MAX_POSSIBLE_CHROM) {
- LOGPREPRINTFWW("Error: Variant '%s' not found in %s.\n", &(sf_range_list_ptr->names[uii * sf_max_len]), ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
- }
- }
- // effectively build out one linked list per chromosome
- memcpy(sf_mask, chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
- sf_entry_ct = 0;
- sf_lltop = 0;
- ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
- for (uii = 0; uii <= ujj; uii++) {
- sf_start_idxs[uii] = 1; // impossible (multiples of 3)
- }
- uii = 0;
- do {
- ujj = sf_str_chroms[uii];
- ukk = sf_str_pos[uii];
- if (sf_range_list_ptr->starts_range[uii]) {
- umm = sf_str_chroms[uii + 1];
- unn = sf_str_pos[uii + 1];
- if (ujj != umm) {
- if (ujj > umm) {
- uoo = ujj;
- ujj = umm;
- umm = uoo;
- uoo = ukk;
- ukk = unn;
- unn = uoo;
- }
- if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- for (uoo = ujj + 1; uoo < umm; uoo++) {
- if (IS_SET(sf_mask, uoo)) {
- load_bim_sf_insert(uoo, 0, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- }
- if (IS_SET(sf_mask, umm)) {
- load_bim_sf_insert(umm, 0, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- } else {
- if (ukk > unn) {
- umm = ukk;
- ukk = unn;
- unn = umm;
- }
- if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- }
- uii += 2;
- } else {
- if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, ukk, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- uii++;
- }
- } while (uii < sf_ct);
- // now compactify
- sf_pos = (uint32_t*)malloc(sf_entry_ct * 2 * sizeof(int32_t));
- if (!sf_pos) {
- goto load_bim_ret_NOMEM;
- }
- ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
- ukk = 0;
- for (uii = 0; uii <= ujj; uii++) {
- if (sf_start_idxs[uii] == 1) {
- CLEAR_BIT(uii, sf_mask);
- sf_start_idxs[uii] = ukk;
- continue;
- }
- umm = sf_start_idxs[uii];
- sf_start_idxs[uii] = ukk;
- do {
- sf_pos[ukk++] = sf_llbuf[umm];
- sf_pos[ukk++] = sf_llbuf[umm + 1];
- umm = sf_llbuf[umm + 2];
- } while (umm != 1);
- }
- sf_start_idxs[ujj + 1] = ukk;
- if (!exclude_snp) {
- memcpy(chrom_info_ptr->chrom_mask, sf_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
- }
- bigstack_reset(bigstack_mark);
- }
if (!feof(bimfile)) {
goto load_bim_ret_READ_FAIL;
}
@@ -1249,7 +1016,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
} else {
last_pos = cur_pos;
}
- if ((sf_ct && (exclude_snp ^ sf_out_of_range(cur_pos, (uint32_t)cur_chrom_code, sf_start_idxs, sf_pos))) || ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end)))) {
+ if ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end))) {
goto load_bim_skip_marker;
}
if (snp_slen) {
@@ -1419,8 +1186,6 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
}
load_bim_ret_1:
fclose_cond(bimfile);
- free_cond(sf_start_idxs);
- free_cond(sf_pos);
free_cond(loadbuf2);
free_cond(prev_new_id);
free_cond(insert_buf[2]);
diff --git a/plink_data.h b/plink_data.h
index 4a76e01..502bbe3 100644
--- a/plink_data.h
+++ b/plink_data.h
@@ -20,7 +20,7 @@
int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t unfiltered_sample_ct, uint64_t fsize);
-int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* sex_nm, uintptr_t* sex_male, char* sample_ids, uintptr_t max_sample_id_len, double missing_phenod, uint32_t covar_modifier, Range_list* covar_range_list_ptr, uint32_t gxe_mcovar, uintptr_t* covar_ctx_ptr, char** covar_names_ptr, uintptr_t* max_covar_name_len_ptr, uintptr_t* pheno_nm, uintptr_t** covar_nm_ptr, double** covar_d_ptr, uintptr_t** gxe_covar_nm_ptr, [...]
diff --git a/plink_dosage.c b/plink_dosage.c
index 952e50c..6c7042e 100644
--- a/plink_dosage.c
+++ b/plink_dosage.c
@@ -562,7 +562,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
uint32_t* uiptr3 = nullptr;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t marker_exclude_ct = 0;
- uintptr_t max_marker_id_len = 0;
+ uintptr_t max_marker_id_blen = 0;
uintptr_t unfiltered_sample_ct = 0;
uintptr_t sample_exclude_ct = 0;
uintptr_t max_sample_id_len = 4;
@@ -691,7 +691,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
int32_t ii;
pzwrite_init_null(&ps);
if (load_map) {
- retval = load_bim(mapname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, nullptr, nullptr, nullptr, &ulii, &marker_ids, nullptr, 0, nullptr, chrom_info_ptr, nullptr, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, nullptr, ".map file", [...]
+ retval = load_bim(mapname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_blen, &marker_exclude, nullptr, nullptr, nullptr, &ulii, &marker_ids, nullptr, 0, nullptr, chrom_info_ptr, nullptr, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, nullptr, ".map file", nullptr);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -779,24 +779,30 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
bigstack_reset(bigstack_mark);
}
if (load_map) {
- uii = update_map || update_name || filter_attrib_fname || qual_filter;
+ uii = update_map || update_name || filter_attrib_fname || qual_filter || snps_range_list_ptr->names;
if (uii || extractname || excludename) {
bigstack_mark = g_bigstack_base;
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, !uii, &marker_id_htable_size, &marker_id_htable);
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, !uii, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto plink1_dosage_ret_1;
}
+ if (snps_range_list_ptr->names) {
+ retval = snps_flag(marker_ids, marker_id_htable, snps_range_list_ptr, unfiltered_marker_ct, max_marker_id_blen, marker_id_htable_size, (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1, marker_exclude, &marker_exclude_ct);
+ if (retval) {
+ goto plink1_dosage_ret_1;
+ }
+ }
if (update_map) {
- retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
+ retval = update_marker_pos(update_map, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_pos, &map_is_unsorted, chrom_info_ptr);
} else if (update_name) {
- retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct);
+ retval = update_marker_names(update_name, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen, unfiltered_marker_ct);
if (retval) {
goto plink1_dosage_ret_1;
}
if (extractname || excludename) {
bigstack_reset(bigstack_mark);
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -804,7 +810,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
if (extractname) {
if (!(misc_flags & MISC_EXTRACT_RANGE)) {
- retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
+ retval = extract_exclude_flag_norange(extractname, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -823,7 +829,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
if (excludename) {
if (!(misc_flags & MISC_EXCLUDE_RANGE)) {
- retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
+ retval = extract_exclude_flag_norange(excludename, marker_id_htable, marker_id_htable_size, 1, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, 0);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -841,13 +847,13 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
}
if (filter_attrib_fname) {
- retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+ retval = filter_attrib(filter_attrib_fname, filter_attrib_liststr, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
if (retval) {
goto plink1_dosage_ret_1;
}
}
if (qual_filter) {
- retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
+ retval = filter_qual_scores(qual_filter, qual_min_thresh, qual_max_thresh, marker_id_htable, marker_id_htable_size, 0, marker_ids, max_marker_id_blen, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -1087,7 +1093,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
enforce_min_bp_space(min_bp_space, unfiltered_marker_ct, marker_exclude, marker_pos, &marker_exclude_ct, chrom_info_ptr);
}
marker_ct = unfiltered_marker_ct - marker_exclude_ct;
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_blen, 0, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -1774,7 +1780,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
goto plink1_dosage_ret_NOMEM;
}
if (load_map) {
- marker_idx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
+ marker_idx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_blen);
if (marker_idx == 0xffffffffU) {
#ifdef __LP64__
marker_idx = ~ZEROLU;
diff --git a/plink_filter.c b/plink_filter.c
index c6f40c1..aa63b5a 100644
--- a/plink_filter.c
+++ b/plink_filter.c
@@ -193,6 +193,69 @@ int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, u
return retval;
}
+// backported from plink 2.0
+int32_t snps_flag(const char* variant_ids, const uint32_t* variant_id_htable, const Range_list* snps_range_list_ptr, uint32_t raw_variant_ct, uintptr_t max_variant_id_blen, uintptr_t variant_id_htable_size, uint32_t do_exclude, uintptr_t* variant_exclude, uintptr_t* exclude_ct_ptr) {
+ unsigned char* bigstack_mark = g_bigstack_base;
+ int32_t reterr = 0;
+ {
+ const char* varid_strbox = snps_range_list_ptr->names;
+ const unsigned char* starts_range = snps_range_list_ptr->starts_range;
+ const uint32_t varid_ct = snps_range_list_ptr->name_ct;
+ const uintptr_t varid_max_blen = snps_range_list_ptr->name_max_len;
+ const uint32_t raw_variant_ctl = BITCT_TO_WORDCT(raw_variant_ct);
+ uintptr_t* seen_uidxs;
+ if (bigstack_calloc_ul(raw_variant_ctl, &seen_uidxs)) {
+ goto snps_flag_ret_NOMEM;
+ }
+ uint32_t range_start_vidx = 0xffffffffU;
+ for (uint32_t varid_idx = 0; varid_idx < varid_ct; ++varid_idx) {
+ const char* cur_varid = &(varid_strbox[varid_idx * varid_max_blen]);
+ uint32_t variant_uidx = id_htable_find(cur_varid, strlen(cur_varid), variant_id_htable, variant_id_htable_size, variant_ids, max_variant_id_blen);
+ if (variant_uidx == 0xffffffffU) {
+ sprintf(g_logbuf, "Error: --%ssnps variant '%s' not found.\n", do_exclude? "exclude-" : "", cur_varid);
+ goto snps_flag_ret_INVALID_FORMAT_WW;
+ }
+ if (starts_range[varid_idx]) {
+ range_start_vidx = variant_uidx;
+ } else {
+ if (range_start_vidx != 0xffffffffU) {
+ if (variant_uidx < range_start_vidx) {
+ const uint32_t uii = variant_uidx;
+ variant_uidx = range_start_vidx;
+ range_start_vidx = uii;
+ }
+ fill_bits(range_start_vidx, variant_uidx + 1 - range_start_vidx, seen_uidxs);
+ } else {
+ set_bit(variant_uidx, seen_uidxs);
+ }
+ range_start_vidx = 0xffffffffU;
+ }
+ }
+ if (do_exclude) {
+ bitvec_or(seen_uidxs, raw_variant_ctl, variant_exclude);
+ } else {
+ bitvec_ornot(seen_uidxs, raw_variant_ctl, variant_exclude);
+ zero_trailing_bits(raw_variant_ct, variant_exclude);
+ }
+ const uint32_t new_exclude_ct = popcount_longs(variant_exclude, raw_variant_ctl);
+ const uint32_t new_variant_ct = raw_variant_ct - new_exclude_ct;
+ LOGPRINTF("--%ssnps: %u variant%s remaining.\n", do_exclude? "exclude-" : "", new_variant_ct, (new_variant_ct == 1)? "" : "s");
+ *exclude_ct_ptr = new_exclude_ct;
+ }
+ while (0) {
+ snps_flag_ret_NOMEM:
+ reterr = RET_NOMEM;
+ break;
+ snps_flag_ret_INVALID_FORMAT_WW:
+ wordwrapb(0);
+ logerrprintb();
+ reterr = RET_INVALID_FORMAT;
+ break;
+ }
+ bigstack_reset(bigstack_mark);
+ return reterr;
+}
+
void extract_exclude_process_token(const char* tok_start, const uint32_t* marker_id_htable, uint32_t marker_id_htable_size, const uint32_t* extra_alloc_base, const char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* marker_exclude, uintptr_t* already_seen, uintptr_t* duplicate_ct_ptr, uint32_t do_exclude, uint32_t curtoklen) {
if (curtoklen >= max_marker_id_len) {
return;
diff --git a/plink_filter.h b/plink_filter.h
index 514ce20..df9c673 100644
--- a/plink_filter.h
+++ b/plink_filter.h
@@ -36,6 +36,8 @@ void oblig_missing_cleanup(Oblig_missing_info* om_ip);
int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_len, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags, uint32_t allow_no_samples);
+int32_t snps_flag(const char* variant_ids, const uint32_t* variant_id_htable, const Range_list* snps_range_list_ptr, uint32_t raw_variant_ct, uintptr_t max_variant_id_blen, uintptr_t variant_id_htable_size, uint32_t do_exclude, uintptr_t* variant_exclude, uintptr_t* exclude_ct_ptr);
+
int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants);
int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, uint32_t allow_no_variants, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr);
diff --git a/plink_glm.c b/plink_glm.c
index 45faaa1..2397cfe 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -1055,7 +1055,7 @@ static inline void mult_tmatrix_nxd_vect_d(const float* tm, const float* vect, f
uint32_t row_ctm3;
uint32_t col_idx;
if (row_ct < 4) {
- memset(dest, 0, col_ct * sizeof(float));
+ memset(dest, 0, col_cta4 * sizeof(float));
} else {
w1 = _mm_load1_ps(vect);
w2 = _mm_load1_ps(&(vect[1]));
@@ -1140,6 +1140,8 @@ static inline void mult_tmatrix_nxd_vect_d(const float* tm, const float* vect, f
}
}
+// N.B. This requires all mm[] rows to be zero-padded at the end, and there
+// can't be nan values at the end of vect[]. (The other way around works too.)
static inline void mult_matrix_dxn_vect_n(const float* mm, const float* vect, float* dest, uint32_t col_ct, uint32_t row_ct) {
uintptr_t col_cta4 = round_up_pow2(col_ct, 4);
uint32_t row_idx = 0;
@@ -1599,7 +1601,7 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
// Inputs:
// xx = covariate (and usually genotype) matrix, covariate-major, rows are
// 16-byte aligned, trailing row elements must be zeroed out
- // yy = case/control phenotype
+ // yy = case/control phenotype; trailing elements must be zeroed out
//
// Input/output:
// coef = starting point, overwritten with logistic regression result. Must
@@ -1719,6 +1721,11 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
*fptr++ = (float)((int32_t)is_set_ul(perm_vecs, sample_uidx));
}
}
+ // bugfix (13 Oct 2017): must guarantee trailing phenotype values are valid
+ // (exact contents don't matter since they are multipled by zero, but they
+ // can't be nan)
+ const uint32_t trail_ct = (-sample_valid_ct) & 3;
+ fill_float_zero(trail_ct, fptr);
if (logistic_regression(sample_valid_ct, param_ct, sample_1d_buf, param_2d_buf, param_1d_buf, param_2d_buf2, param_1d_buf2, covars_cov_major, pheno_buf, coef, pp)) {
goto glm_logistic_fail;
}
diff --git a/plink_ld.c b/plink_ld.c
index 9ff8a24..0db5e15 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -4941,9 +4941,12 @@ uint32_t em_phase_hethet(double known11, double known12, double known21, double
}
} else {
solutions[0] = 0;
- if ((freq22 + SMALLISH_EPSILON < half_hethet_share + freq21) && (freq21 + SMALLISH_EPSILON < half_hethet_share + freq22)) {
+ // bugfix (6 Oct 2017): need to use all nonzero values here
+ const double nonzero_freq_xx = freq11 + freq22;
+ const double nonzero_freq_xy = freq12 + freq21;
+ if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
sol_end_idx = 3;
- solutions[1] = (half_hethet_share + freq21 - freq22) * 0.5;
+ solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
solutions[2] = half_hethet_share;
} else {
sol_end_idx = 2;
@@ -8151,10 +8154,24 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
}
}
} else {
+ // bugfix (6 Oct 2017):
+ // At least one of {f11, f22} is zero, and one of {f12, f21} is zero.
+ // Initially suppose that the zero-values are f11 and f12. Then the
+ // equality becomes
+ // x(f22 + x)(K - x) = x(K - x)(f21 + K - x)
+ // x=0 and x=K are always solutions; the rest becomes
+ // f22 + x = f21 + K - x
+ // 2x = K + f21 - f22
+ // x = (K + f21 - f22)/2; in-range iff (f21 - f22) in (-K, K).
+ // So far so good. However, this code used to *always* check
+ // (f21 - f22), when it's necessary to use all the nonzero values.
+ // (this still works if three or all four values are zero)
solutions[0] = 0;
- if ((freq22 + SMALLISH_EPSILON < half_hethet_share + freq21) && (freq21 + SMALLISH_EPSILON < half_hethet_share + freq22)) {
+ const double nonzero_freq_xx = freq11 + freq22;
+ const double nonzero_freq_xy = freq12 + freq21;
+ if ((nonzero_freq_xx + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xy) && (nonzero_freq_xy + SMALLISH_EPSILON < half_hethet_share + nonzero_freq_xx)) {
uljj = 3;
- solutions[1] = (half_hethet_share + freq21 - freq22) * 0.5;
+ solutions[1] = (half_hethet_share + nonzero_freq_xy - nonzero_freq_xx) * 0.5;
solutions[2] = half_hethet_share;
} else {
uljj = 2;
diff --git a/plink_misc.c b/plink_misc.c
index 9f425a0..09f356b 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -2947,8 +2947,11 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
*pzwritep++ = ' ';
pzwritep = fw_strcpy(4, major_ptr, pzwritep);
*pzwritep++ = ' ';
- pzwritep = uint32toa_w6x(2 * ll_cts[marker_uidx] + lh_cts[marker_uidx] + hapl_cts[marker_uidx], ' ', pzwritep);
- pzwritep = uint32toa_w6x(2 * hh_cts[marker_uidx] + lh_cts[marker_uidx] + haph_cts[marker_uidx], ' ', pzwritep);
+ // bugfix (13 Oct 2017): did not take reverse into account here.
+ const uint32_t l_ct = 2 * ll_cts[marker_uidx] + lh_cts[marker_uidx] + hapl_cts[marker_uidx];
+ const uint32_t h_ct = 2 * hh_cts[marker_uidx] + lh_cts[marker_uidx] + haph_cts[marker_uidx];
+ pzwritep = uint32toa_w6x(reverse? h_ct : l_ct, ' ', pzwritep);
+ pzwritep = uint32toa_w6x(reverse? l_ct : h_ct, ' ', pzwritep);
pzwritep = uint32toa_w6(missing_ct, pzwritep);
}
} else {
@@ -2961,6 +2964,7 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
pzwritep = fw_strcpy(4, major_ptr, pzwritep);
*pzwritep++ = ' ';
uii = 2 * (ll_cts[marker_uidx] + lh_cts[marker_uidx] + hh_cts[marker_uidx]) + hapl_cts[marker_uidx] + haph_cts[marker_uidx];
+ // set_allele_freqs[] already takes reverse into account.
if (maf_succ || uii || (set_allele_freqs[marker_uidx] != 0.5)) {
pzwritep = dtoa_g_wxp4(1.0 - set_allele_freqs[marker_uidx], 12, pzwritep);
} else {
diff --git a/plink_set.c b/plink_set.c
index 4eb6b9b..366ff97 100644
--- a/plink_set.c
+++ b/plink_set.c
@@ -357,9 +357,14 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
if (marker_pos) {
memcpy(ll_tmp->ss, bufptr3, uii);
} else {
+ // quasi-bugfix (7 Oct 2017): forgot to check for this
+ if (cur_chrom_code > 9999) {
+ logerrprint("Error: This command does not support 10000+ contigs.\n");
+ goto load_range_list_ret_INVALID_FORMAT;
+ }
uitoa_z4((uint32_t)cur_chrom_code, ll_tmp->ss);
- // if first character of gene name is a digit, natural sort has strange
- // effects unless we force [3] to be nonnumeric...
+ // if first character of gene name is a digit, natural sort has
+ // strange effects unless we force [3] to be nonnumeric...
ll_tmp->ss[3] -= 15;
memcpy(&(ll_tmp->ss[4]), bufptr3, uii - 4);
}
@@ -381,8 +386,7 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
} else {
logerrprint("Error: Empty --gene-report file.\n");
}
- retval = RET_INVALID_FORMAT;
- goto load_range_list_ret_1;
+ goto load_range_list_ret_INVALID_FORMAT;
}
}
LOGERRPRINTF("Warning: No valid ranges in %s file.\n", file_descrip);
@@ -2323,7 +2327,58 @@ int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t s
return retval;
}
-int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
+int32_t scrape_extra_chroms(const char* fname, const char* file_descrip, Chrom_info* chrom_info_ptr) {
+ // scan first column of file, add these to chromosome names.
+ // may want to add an option for this to search for "CHR"/"#CHROM" column
+ uintptr_t line_idx = 0;
+ FILE* infile = nullptr;
+ int32_t retval = 0;
+ {
+ if (fopen_checked(fname, "r", &infile)) {
+ goto scrape_extra_chroms_ret_OPEN_FAIL;
+ }
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ while (fgets(g_textbuf, MAXLINELEN, infile)) {
+ ++line_idx;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, file_descrip);
+ goto scrape_extra_chroms_ret_INVALID_FORMAT_2;
+ }
+ char* first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(first_token);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - first_token);
+ *first_token_end = '\0';
+ int32_t dummy;
+ retval = get_or_add_chrom_code(first_token, file_descrip, line_idx, chrom_name_slen, 1, chrom_info_ptr, &dummy);
+ if (retval) {
+ goto scrape_extra_chroms_ret_1;
+ }
+ }
+ if (fclose_null(&infile)) {
+ goto scrape_extra_chroms_ret_READ_FAIL;
+ }
+ }
+ while (0) {
+ scrape_extra_chroms_ret_OPEN_FAIL:
+ retval = RET_OPEN_FAIL;
+ break;
+ scrape_extra_chroms_ret_READ_FAIL:
+ retval = RET_READ_FAIL;
+ break;
+ scrape_extra_chroms_ret_INVALID_FORMAT_2:
+ logerrprintb();
+ retval = RET_INVALID_FORMAT;
+ break;
+ }
+ scrape_extra_chroms_ret_1:
+ fclose_cond(infile);
+ return retval;
+}
+
+int32_t annotate(const Annot_info* aip, uint32_t allow_extra_chroms, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
gzFile gz_attribfile = nullptr;
@@ -2621,6 +2676,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
}
if (need_pos) {
if (aip->ranges_fname) {
+ if (allow_extra_chroms) {
+ retval = scrape_extra_chroms(aip->ranges_fname, "--annotate ranges file", chrom_info_ptr);
+ if (retval) {
+ goto annotate_ret_1;
+ }
+ }
if (aip->subset_fname) {
if (fopen_checked(aip->subset_fname, FOPEN_RB, &infile)) {
goto annotate_ret_OPEN_FAIL;
@@ -2673,6 +2734,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
}
bigstack_end_reset(bigstack_end_mark);
if (aip->filter_fname) {
+ if (allow_extra_chroms) {
+ retval = scrape_extra_chroms(aip->filter_fname, "--annotate filter file", chrom_info_ptr);
+ if (retval) {
+ goto annotate_ret_1;
+ }
+ }
retval = load_range_list_sortpos(aip->filter_fname, border, 0, nullptr, 0, chrom_info_ptr, &filter_range_ct, &filter_range_names, &max_filter_range_name_len, &chrom_filter_bounds, &filter_rangedefs, &chrom_max_filter_range_ct, "--annotate filter");
if (retval) {
goto annotate_ret_1;
@@ -3149,7 +3216,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
return retval;
}
-int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
+int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, uint32_t allow_extra_chroms, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
// similar to define_sets() and --clump
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
@@ -3277,6 +3344,14 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
bigstack_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
}
}
+ if (allow_extra_chroms) {
+ // ugh, silly for this to require 'file' at the end while load_range_list()
+ // does not
+ retval = scrape_extra_chroms(glist, "--gene-report file", chrom_info_ptr);
+ if (retval) {
+ goto gene_report_ret_1;
+ }
+ }
retval = load_range_list_sortpos(glist, 0, subset_ct, sorted_subset_ids, max_subset_id_len, chrom_info_ptr, &gene_ct, &gene_names, &max_gene_name_len, &chrom_bounds, &genedefs, &chrom_max_gene_ct, "--gene-report");
if (retval) {
goto gene_report_ret_1;
@@ -3321,7 +3396,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
if (linebuf_left < MAXLINELEN + 64) {
goto gene_report_ret_NOMEM;
}
- // mirror g_bigstack_base/g_bigstack_base since we'll be doing
+ // mirror g_bigstack_base/g_bigstack_end since we'll be doing
// nonstandard-size allocations
linebuf_top = (char*)g_bigstack_base;
gene_match_list_end = (uint64_t*)g_bigstack_end;
@@ -3536,12 +3611,21 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
gene_idx = gene_nameidx_to_chridx[ulii];
bufptr = &(gene_names[gene_idx * max_gene_name_len]);
fputs(&(bufptr[4]), outfile);
- fputs(" -- chr", outfile);
- if (bufptr[2] != '0') {
- putc_unlocked(bufptr[2], outfile);
+ // 53313 = ('0' * (1000 + 100 + 10)) + ('0' - 15)
+ chrom_idx = ((unsigned char)bufptr[0]) * 1000 + ((unsigned char)bufptr[1]) * 100 + ((unsigned char)bufptr[2]) * 10 + ((unsigned char)bufptr[3]) - 53313;
+ strcpy(g_textbuf, " -- ");
+ // plink 1.07 explicitly precedes chromosome codes with "chr" here.
+ // obviously "chrchr1" doesn't look right, and neither does
+ // chr[contig name], so make the chr prefix conditional.
+ bufptr = &(g_textbuf[4]);
+ if ((!(chrom_info_ptr->output_encoding & CHR_OUTPUT_PREFIX)) && ((chrom_idx <= ((int32_t)chrom_info_ptr->max_code)) || chrom_info_ptr->zero_extra_chroms)) {
+ bufptr = memcpyl3a(bufptr, "chr");
+ }
+ bufptr = chrom_name_write(chrom_info_ptr, chrom_idx, bufptr);
+ *bufptr++ = ':';
+ if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
+ goto gene_report_ret_WRITE_FAIL;
}
- putc_unlocked(bufptr[3] + 15, outfile);
- putc_unlocked(':', outfile);
uiptr = genedefs[gene_idx];
range_ct = *uiptr++;
ujj = 0; // gene length
diff --git a/plink_set.h b/plink_set.h
index 498b505..784fcb6 100644
--- a/plink_set.h
+++ b/plink_set.h
@@ -133,8 +133,8 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t subset_ct, char* sorted_subset_ids, uintptr_t max_subset_id_len, Chrom_info* chrom_info_ptr, uintptr_t* gene_ct_ptr, char** gene_names_ptr, uintptr_t* max_gene_id_len_ptr, uintptr_t** chrom_bounds_ptr, uint32_t*** genedefs_ptr, uintptr_t* chrom_max_gene_ct_ptr, const char* file_descrip);
-int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
+int32_t annotate(const Annot_info* aip, uint32_t allow_extra_chroms, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
-int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
+int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t border, uint32_t allow_extra_chroms, char* extractname, const char* snp_field, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr);
#endif // __PLINK_SET_H__
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git
More information about the debian-med-commit
mailing list