[med-svn] [plink1.9] 01/02: Imported Upstream version 1.90~b4.4-170531
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Tue Jun 6 14:06:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository plink1.9.
commit 66f851f2fb7ae47643a99bca9b34dcb9da6a4dff
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Tue Jun 6 16:05:25 2017 +0200
Imported Upstream version 1.90~b4.4-170531
---
plink.c | 41 ++++--
plink_calc.c | 2 +-
plink_common.c | 4 +-
plink_common.h | 6 +-
plink_data.c | 10 +-
plink_family.c | 400 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
plink_family.h | 3 +
plink_glm.c | 280 ++++++++++++++++++++--------------------
plink_help.c | 13 +-
plink_matrix.c | 26 ++--
plink_misc.c | 18 ++-
11 files changed, 629 insertions(+), 174 deletions(-)
diff --git a/plink.c b/plink.c
index b28fbb9..4e3d37c 100644
--- a/plink.c
+++ b/plink.c
@@ -93,7 +93,7 @@
static const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b4.1"
+ "PLINK v1.90b4.4"
#else
"PLINK v1.90p"
#endif
@@ -105,7 +105,7 @@ static const char ver_str[] =
#else
" 32-bit"
#endif
- " (30 Mar 2017)";
+ " (31 May 2017)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
@@ -145,9 +145,9 @@ static const char notestr_null_calc2[] = "Commands include --make-bed, --recode,
#endif
#else
#ifndef NOLAPACK
-static const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --pca, --neighbour, --ibs-test, --regress-distance,\n--model, --bd, --gxe, --logistic, --dosage, --lasso, --test-missing,\n--make-pe [...]
+static const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --pca, --neighbour, --ibs-test, --regress-distance,\n--model, --bd, --gxe, --logistic, --dosage, --lasso, --test-missing,\n--make-pe [...]
#else
-static const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --neighbour, --ibs-test, --regress-distance, --model,\n--bd, --gxe, --logistic, --dosage, --lasso, --test-missing, --make-perm-pheno [...]
+static const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --neighbour, --ibs-test, --regress-distance, --model,\n--bd, --gxe, --logistic, --dosage, --lasso, --test-missing, --make-perm-pheno [...]
#endif
#endif
@@ -294,14 +294,14 @@ static inline int32_t bed_suffix_conflict(uint64_t calculation_type, uint32_t re
}
static inline uint32_t are_marker_pos_needed(uint64_t calculation_type, uint64_t misc_flags, char* cm_map_fname, char* set_fname, uint32_t min_bp_space, uint32_t genome_skip_write, uint32_t ld_modifier, uint32_t epi_modifier, uint32_t cluster_modifier) {
- return (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_GENOME | CALC_HOMOZYG | CALC_LD_PRUNE | CALC_REGRESS_PCS | CALC_MODEL | CALC_GLM | CALC_CLUMP | CALC_BLOCKS | CALC_FLIPSCAN | CALC_TDT | CALC_QFAM | CALC_FST | CALC_SHOW_TAGS | CALC_DUPVAR | CALC_RPLUGIN)) || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)) || cm_map_fname || set_fname || min_bp_space || genome_skip_write || ((calculation_type & CALC_LD) && (!(ld_modifier & LD_MATRIX_SHAPEMASK))) || [...]
+ return (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_GENOME | CALC_HOMOZYG | CALC_LD_PRUNE | CALC_REGRESS_PCS | CALC_MODEL | CALC_GLM | CALC_CLUMP | CALC_BLOCKS | CALC_FLIPSCAN | CALC_TDT | CALC_QFAM | CALC_FST | CALC_SHOW_TAGS | CALC_DUPVAR | CALC_RPLUGIN | CALC_TUCC)) || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)) || cm_map_fname || set_fname || min_bp_space || genome_skip_write || ((calculation_type & CALC_LD) && (!(ld_modifier & LD_MATRIX_SHAP [...]
}
static inline uint32_t are_marker_cms_needed(uint64_t calculation_type, char* cm_map_fname, Two_col_params* update_cm, Ld_info* ldip) {
if ((calculation_type & CALC_LD) && (!(ldip->modifier & LD_MATRIX_SHAPEMASK)) && (ldip->window_cm != -1)) {
return MARKER_CMS_FORCED;
}
- if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE)) {
+ if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_TUCC)) {
if (cm_map_fname || update_cm) {
return MARKER_CMS_FORCED;
} else {
@@ -312,7 +312,7 @@ static inline uint32_t are_marker_cms_needed(uint64_t calculation_type, char* cm
}
static inline uint32_t are_marker_alleles_needed(uint64_t calculation_type, char* freqname, Homozyg_info* homozyg_ptr, Two_col_params* a1alleles, Two_col_params* a2alleles, uint32_t ld_modifier, uint32_t snps_only, uint32_t clump_modifier, uint32_t cluster_modifier, uint32_t rel_modifier) {
- return (freqname || (calculation_type & (CALC_FREQ | CALC_HARDY | CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_REGRESS_PCS | CALC_MODEL | CALC_GLM | CALC_LASSO | CALC_LIST_23_INDELS | CALC_EPI | CALC_TESTMISHAP | CALC_SCORE | CALC_MENDEL | CALC_TDT | CALC_FLIPSCAN | CALC_QFAM | CALC_HOMOG | CALC_DUPVAR | CALC_RPLUGIN | CALC_DFAM)) || ((calculation_type & CALC_HOMOZYG) && (homozyg_ptr->modifier & HOMOZYG_GROUP_VERBOSE)) || ((calculation_type & CALC_LD) && (ld_modifier & LD_INPHASE [...]
+ return (freqname || (calculation_type & (CALC_FREQ | CALC_HARDY | CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_REGRESS_PCS | CALC_MODEL | CALC_GLM | CALC_LASSO | CALC_LIST_23_INDELS | CALC_EPI | CALC_TESTMISHAP | CALC_SCORE | CALC_MENDEL | CALC_TDT | CALC_FLIPSCAN | CALC_QFAM | CALC_HOMOG | CALC_DUPVAR | CALC_RPLUGIN | CALC_DFAM | CALC_TUCC)) || ((calculation_type & CALC_HOMOZYG) && (homozyg_ptr->modifier & HOMOZYG_GROUP_VERBOSE)) || ((calculation_type & CALC_LD) && (ld_modifier [...]
}
static inline int32_t relationship_or_ibc_req(uint64_t calculation_type) {
@@ -1539,6 +1539,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_1;
}
}
+
+ if (calculation_type & CALC_TUCC) {
+ retval = make_pseudocontrols(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, chrom_info_ptr, fam_ip);
+ if (retval) {
+ goto plink_ret_1;
+ }
+ }
}
if (calculation_type & CALC_MAKE_PERM_PHENO) {
@@ -3793,6 +3800,7 @@ int32_t main(int32_t argc, char** argv) {
if (!memcmp(argptr2, "12", 2)) {
memcpy(flagptr, "recode 12", 10);
recode_modifier |= RECODE_12;
+ // technically, this should also affect --tucc, but whatever
ujj = 1;
} else if (match_upper(argptr2, "AD")) {
memcpy(flagptr, "recode AD", 10);
@@ -11279,7 +11287,9 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --standard-beta must be used with --linear or --dosage.\n");
goto main_ret_INVALID_CMDLINE_A;
}
- logprint("Note: --standard-beta flag deprecated. Use e.g. '--linear standard-beta'.\n");
+ // '--linear standard-beta' is ALSO deprecated now for plink2's
+ // --{covar-}variance-normalize, so this note is pointless.
+ // logprint("Note: --standard-beta flag deprecated. Use e.g. '--linear standard-beta'.\n");
glm_modifier |= GLM_STANDARD_BETA;
goto main_param_zero;
} else if (!memcmp(argptr2, "et-table", 9)) {
@@ -12124,6 +12134,21 @@ int32_t main(int32_t argc, char** argv) {
}
ld_info.modifier |= LD_SHOW_TAGS_MODE2;
goto main_param_zero;
+ } else if (!memcmp(argptr2, "ucc", 4)) {
+ UNSTABLE("tucc");
+ if (enforce_param_ct_range(param_ct, argv[cur_arg], 0, 1)) {
+ goto main_ret_INVALID_CMDLINE_2A;
+ }
+ if (param_ct) {
+ if (strcmp(argv[cur_arg + 1], "write-bed")) {
+ sprintf(g_logbuf, "Error: Invalid --tucc parameter '%s'.\n", argv[cur_arg + 1]);
+ goto main_ret_INVALID_CMDLINE;
+ }
+ family_info.tucc_bed = 1;
+ } else {
+ logerrprint("Warning: --tucc without 'write-bed' is deprecated.\n");
+ }
+ calculation_type |= CALC_TUCC;
} else {
goto main_ret_INVALID_CMDLINE_UNRECOGNIZED;
}
diff --git a/plink_calc.c b/plink_calc.c
index 6586372..63f76b5 100644
--- a/plink_calc.c
+++ b/plink_calc.c
@@ -5531,7 +5531,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
return retval;
}
-inline void rel_cut_arr_dec(int32_t* rel_ct_arr_elem, uint32_t* exactly_one_rel_ct_ptr) {
+static inline void rel_cut_arr_dec(int32_t* rel_ct_arr_elem, uint32_t* exactly_one_rel_ct_ptr) {
int32_t rcae = *rel_ct_arr_elem - 1;
*rel_ct_arr_elem = rcae;
if (rcae < 2) {
diff --git a/plink_common.c b/plink_common.c
index 255e5d7..e6b8021 100644
--- a/plink_common.c
+++ b/plink_common.c
@@ -6426,7 +6426,9 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const double* __rest
cur_word &= cur_word - 1;
uii--;
}
- marker_uwidx_cur += CTZLU(cur_word);
+ // bugfix (7 May 2017): forgot to round marker_uwidx_cur down to word
+ // boundary, before adding CTZLU(cur_word) offset
+ marker_uwidx_cur = (marker_uwidx_cur & (~(BITCT - ONELU))) + CTZLU(cur_word);
if ((marker_pos[marker_uwidx_cur] < min_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] < min_cm))) {
goto window_back_find_first_pos;
}
diff --git a/plink_common.h b/plink_common.h
index e96a04e..28e6728 100644
--- a/plink_common.h
+++ b/plink_common.h
@@ -29,7 +29,10 @@
#include <stdint.h>
#include <inttypes.h>
-#define NDEBUG
+// avoid compiler warning
+#ifndef NDEBUG
+ #define NDEBUG
+#endif
#include <assert.h>
// Uncomment this to build this without CBLAS/CLAPACK.
@@ -418,6 +421,7 @@
#define CALC_DUPVAR 0x100000000000000LLU
#define CALC_RPLUGIN 0x200000000000000LLU
#define CALC_DFAM 0x400000000000000LLU
+#define CALC_TUCC 0x800000000000000LLU
#define CALC_ONLY_BIM (CALC_WRITE_SET | CALC_WRITE_SNPLIST | CALC_WRITE_VAR_RANGES | CALC_LIST_23_INDELS | CALC_MAKE_BIM | CALC_DUPVAR)
#define CALC_ONLY_FAM (CALC_MAKE_PERM_PHENO | CALC_WRITE_COVAR | CALC_MAKE_FAM)
// only room for 6 more basic commands before we need to switch from a single
diff --git a/plink_data.c b/plink_data.c
index 23e789c..972aa21 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -10464,10 +10464,10 @@ int32_t generate_dummy(char* outname, char* outname_end, uint32_t flags, uintptr
double missing_phenod = (double)missing_pheno;
uint32_t dbl_sample_mod4 = 2 * (sample_ct % 4);
uint32_t four_alleles = 0;
- uint32_t geno_m_check = (geno_mrate > 0.0);
- uint32_t geno_m32 = (uint32_t)(geno_mrate * 4294967296.0);
- uint32_t pheno_m_check = (pheno_mrate > 0.0);
- uint32_t pheno_m32 = (uint32_t)(geno_mrate * 4294967296.0);
+ uint32_t geno_m_check = (geno_mrate >= RECIP_2_32 * 0.5);
+ uint32_t geno_m32 = (uint32_t)(geno_mrate * 4294967296.0 - 0.5);
+ uint32_t pheno_m_check = (pheno_mrate >= RECIP_2_32 * 0.5);
+ uint32_t pheno_m32 = (uint32_t)(pheno_mrate * 4294967296.0 - 0.5);
uint32_t saved_rnormal = 0;
int32_t retval = 0;
char wbuf[64];
@@ -10629,7 +10629,7 @@ int32_t generate_dummy(char* outname, char* outname_end, uint32_t flags, uintptr
}
ucc = 0;
for (ukk = 0; ukk < 8; ukk += 2) {
- if (geno_m_check && (sfmt_genrand_uint32(&g_sfmt) < geno_m32)) {
+ if (geno_m_check && (sfmt_genrand_uint32(&g_sfmt) <= geno_m32)) {
ucc2 = 1;
} else {
ucc2 = urand & 3;
diff --git a/plink_family.c b/plink_family.c
index 01c4adb..b7bbb44 100644
--- a/plink_family.c
+++ b/plink_family.c
@@ -34,6 +34,7 @@ void family_init(Family_info* fam_ip) {
fam_ip->dfam_mperm_val = 0;
fam_ip->qfam_modifier = 0;
fam_ip->qfam_mperm_val = 0;
+ fam_ip->tucc_bed = 0;
}
// bottom 2 bits of index = child genotype
@@ -77,7 +78,8 @@ int32_t get_trios_and_families(uintptr_t unfiltered_sample_ct, uintptr_t* sample
// family_list has paternal indices in low 32 bits, maternal indices in high
// 32, sorted in child ID order.
// trio_list has child IDs in low 32 bits, family_list indices in high 32
- // bits, in PLINK 1.07 reporting order.
+ // bits, in PLINK 1.07 reporting order. (Actually, reporting order may
+ // change, but this isn't a big deal.)
// If include_duos is set, missing parental IDs are coded as
// unfiltered_sample_ct. include_duos must be 0 or 1.
// If toposort is set, trio_lookup has child IDs in [4n], paternal IDs in
@@ -3702,7 +3704,7 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
}
}
} else {
- for (perm_idx = 0; perm_idx < perm_vec_ct;) {
+ for (perm_idx = 0; perm_idx < perm_vec_ct; ++perm_idx) {
dxx = numers[perm_idx] + ((double)((int32_t)twice_numers[perm_idx])) * 0.5;
dyy = denoms[perm_idx] + ((double)((int32_t)quad_denom)) * 0.25;
chisq = dxx * dxx / dyy;
@@ -5948,3 +5950,397 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
fclose_cond(outfile);
return retval;
}
+
+// bottom 2 bits of index = child genotype
+// middle 2 bits of index = paternal genotype
+// top 2 bits of index = maternal genotype
+
+// bottom 2 bits of result = child genotype
+// top 2 bits of result = untransmitted genotype
+const uintptr_t tucc_table[] =
+{0, 5, 5, 5,
+ 5, 5, 5, 5,
+ 8, 5, 2, 5,
+ 5, 5, 10, 5,
+ 5, 5, 5, 5,
+ 5, 5, 5, 5,
+ 5, 5, 5, 5,
+ 5, 5, 5, 5,
+ 8, 5, 2, 5,
+ 5, 5, 5, 5,
+ 12, 5, 10, 3,
+ 5, 5, 14, 11,
+ 5, 5, 10, 5,
+ 5, 5, 5, 5,
+ 5, 5, 14, 11,
+ 5, 5, 5, 15};
+
+int32_t make_pseudocontrols(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_blen, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t max_marker_allele_blen, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* founder_info, uintptr_t* sex_nm, uintptr_t* [...]
+ unsigned char* bigstack_mark = g_bigstack_base;
+ FILE* outfile = NULL;
+ int32_t retval = 0;
+ {
+ const uint32_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
+ const uint32_t sex_mt_variant_ct = count_non_autosomal_markers(chrom_info_ptr, marker_exclude, 1, 1);
+ if (sex_mt_variant_ct) {
+ LOGPRINTF("Excluding %u X/MT/haploid variant%s from --tucc dataset.\n", sex_mt_variant_ct, (sex_mt_variant_ct == 1)? "" : "s");
+ if (sex_mt_variant_ct == marker_ct) {
+ logerrprint("Error: No variants remaining for --tucc.\n");
+ goto make_pseudocontrols_ret_INVALID_CMDLINE;
+ }
+ marker_ct -= sex_mt_variant_ct;
+ uintptr_t* marker_exclude_new;
+ if (bigstack_alloc_ul(unfiltered_marker_ctl, &marker_exclude_new)) {
+ goto make_pseudocontrols_ret_NOMEM;
+ }
+ memcpy(marker_exclude_new, marker_exclude, unfiltered_marker_ctl * sizeof(intptr_t));
+ for (uint32_t chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; ++chrom_fo_idx) {
+ const uint32_t chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ if (is_set(chrom_info_ptr->haploid_mask, chrom_idx) || ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ const uint32_t variant_uidx_start = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ fill_bits(variant_uidx_start, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - variant_uidx_start, marker_exclude_new);
+ }
+ }
+ marker_exclude = marker_exclude_new;
+ } else if (is_set(chrom_info_ptr->haploid_mask, 0)) {
+ logerrprint("Error: --tucc test does not support haploid data.\n");
+ goto make_pseudocontrols_ret_INVALID_CMDLINE;
+ }
+ const uint32_t multigen = (fam_ip->mendel_modifier / MENDEL_MULTIGEN) & 1;
+ uint64_t* family_list;
+ uint64_t* trio_list;
+ uint32_t* trio_error_lookup;
+ uint32_t family_ct;
+ uintptr_t trio_ct;
+ retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_blen, paternal_ids, max_paternal_id_blen, maternal_ids, max_maternal_id_blen, nullptr, nullptr, nullptr, nullptr, &family_list, &family_ct, &trio_list, &trio_ct, &trio_error_lookup, 0, multigen);
+ if (retval) {
+ goto make_pseudocontrols_ret_1;
+ }
+ if (!trio_ct) {
+ LOGERRPRINTF("Warning: Skipping --tucc since there are no trios.\n");
+ goto make_pseudocontrols_ret_1;
+ } else if (trio_ct > 0x3fffffff) {
+ logerrprint("Error: Too many trios for --tucc.\n");
+ goto make_pseudocontrols_ret_INVALID_CMDLINE;
+ }
+ // main write buffer size requirements:
+ // write-bed:
+ // MAXLINELEN + max(max_sample_id_blen + 16,
+ // max_chrom_slen + max_marker_id_blen +
+ // 2 * max_marker_allele_blen + 48,
+ // trio_ct2 + sizeof(intptr_t) - 1)
+ // ped:
+ // MAXLINELEN + max(max_sample_id_blen + 16,
+ // 2 * max_marker_allele_blen)
+ const uint32_t write_bed = fam_ip->tucc_bed;
+ const uintptr_t trio_ct2 = (trio_ct + 1) / 2;
+ char* chrom_buf = nullptr;
+ uintptr_t writebuf_blen = max_sample_id_blen + 16;
+ if (write_bed) {
+ if (writebuf_blen < trio_ct2 + BYTECT - 1) {
+ writebuf_blen = trio_ct2 + BYTECT - 1;
+ }
+ const uint32_t max_chrom_slen = get_max_chrom_slen(chrom_info_ptr);
+ if (bigstack_alloc_c(max_chrom_slen + 1, &chrom_buf)) {
+ goto make_pseudocontrols_ret_NOMEM;
+ }
+ if (writebuf_blen < max_chrom_slen + max_marker_id_blen + 2 * max_marker_id_blen + 48) {
+ writebuf_blen = max_chrom_slen + max_marker_id_blen + 2 * max_marker_id_blen + 48;
+ }
+ } else {
+ if (writebuf_blen < 2 * max_marker_allele_blen) {
+ writebuf_blen = 2 * max_marker_allele_blen;
+ }
+ }
+ writebuf_blen += MAXLINELEN;
+ const uintptr_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
+ const uintptr_t unfiltered_sample_ctp1l2 = 1 + (unfiltered_sample_ct / BITCT2);
+ uintptr_t* loadbuf;
+ uintptr_t* workbuf;
+ char* writebuf;
+ if (bigstack_alloc_ul(unfiltered_sample_ctl2m1 + 1, &loadbuf) ||
+ bigstack_alloc_ul(unfiltered_sample_ctp1l2, &workbuf) ||
+ bigstack_alloc_c(writebuf_blen, &writebuf)) {
+ goto make_pseudocontrols_ret_NOMEM;
+ }
+ loadbuf[unfiltered_sample_ctl2m1] = 0;
+ workbuf[unfiltered_sample_ctp1l2 - 1] = 0;
+ char* writebuf_flush = &(writebuf[MAXLINELEN]);
+ const char* output_missing_geno_ptr = g_output_missing_geno_ptr;
+ unsigned char* new_bed_contents = nullptr;
+ unsigned char* uwrite_iter;
+ if (write_bed) {
+ strcpy(outname_end, ".tucc.fam");
+ if (fopen_checked(outname, "wb", &outfile)) {
+ goto make_pseudocontrols_ret_OPEN_FAIL;
+ }
+ char* write_iter = writebuf;
+ for (uintptr_t trio_idx = 0; trio_idx < trio_ct; ++trio_idx) {
+ const uintptr_t child_uidx = (uint32_t)trio_list[trio_idx];
+ const char* child_sample_id = (const char*)(&(sample_ids[child_uidx * max_sample_id_blen]));
+ const uint32_t child_sample_id_slen = strlen(child_sample_id);
+ const char child_sex_char = sexchar(sex_nm, sex_male, child_uidx);
+ for (uint32_t is_pseudocontrol = 0; is_pseudocontrol < 2; ++is_pseudocontrol) {
+ write_iter = memcpyax(write_iter, child_sample_id, child_sample_id_slen, '_');
+ *write_iter++ = 'T' + is_pseudocontrol;
+ write_iter = strcpya(write_iter, "\t0\t0\t");
+ *write_iter++ = child_sex_char;
+ *write_iter++ = '\t';
+ *write_iter++ = '2' - is_pseudocontrol;
+#ifdef _WIN32
+ *write_iter++ = '\r';
+#endif
+ *write_iter++ = '\n';
+ if (write_iter >= writebuf_flush) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ write_iter = writebuf;
+ }
+ }
+ }
+ if (write_iter != writebuf) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ }
+ if (fclose_null(&outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+
+ memcpy(&(outname_end[6]), "bi", 2);
+ if (fopen_checked(outname, "wb", &outfile)) {
+ goto make_pseudocontrols_ret_OPEN_FAIL;
+ }
+ write_iter = writebuf;
+ const char* missing_geno_ptr = g_missing_geno_ptr;
+ uint32_t variant_uidx = 0;
+ uint32_t chrom_fo_idx = 0xffffffffU;
+ uint32_t chrom_end = 0;
+ uint32_t chrom_name_blen = 0;
+ for (uint32_t variant_idx = 0; variant_idx < marker_ct; ++variant_idx, ++variant_uidx) {
+ next_unset_unsafe_ck(marker_exclude, &variant_uidx);
+ if (variant_uidx >= chrom_end) {
+ do {
+ ++chrom_fo_idx;
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ } while (variant_uidx >= chrom_end);
+ const uint32_t chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ char* chrom_name_end = chrom_name_write(chrom_info_ptr, chrom_idx, chrom_buf);
+ *chrom_name_end = '\t';
+ chrom_name_blen = 1 + (uintptr_t)(chrom_name_end - chrom_buf);
+ }
+ write_iter = memcpya(write_iter, chrom_buf, chrom_name_blen);
+ write_iter = strcpyax(write_iter, &(marker_ids[variant_uidx * max_marker_id_blen]), '\t');
+ if (!marker_cms) {
+ *write_iter++ = '0';
+ } else {
+ write_iter = dtoa_g_wxp8(marker_cms[variant_uidx], 1, write_iter);
+ }
+ *write_iter++ = '\t';
+ write_iter = uint32toa_x(marker_pos[variant_uidx], '\t', write_iter);
+ write_iter = strcpyax(write_iter, cond_replace(marker_allele_ptrs[2 * variant_uidx], missing_geno_ptr, output_missing_geno_ptr), '\t');
+ write_iter = strcpya(write_iter, cond_replace(marker_allele_ptrs[2 * variant_uidx + 1], missing_geno_ptr, output_missing_geno_ptr));
+#ifdef _WIN32
+ *write_iter++ = '\r';
+#endif
+ *write_iter++ = '\n';
+ if (write_iter >= writebuf_flush) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ write_iter = writebuf;
+ }
+ }
+ if (write_iter != writebuf) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ }
+ if (fclose_null(&outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+
+ memcpy(&(outname_end[7]), "ed", 2);
+ if (fopen_checked(outname, "wb", &outfile)) {
+ goto make_pseudocontrols_ret_OPEN_FAIL;
+ }
+ uwrite_iter = (unsigned char*)memcpyl3a(writebuf, "l\x1b\x01");
+ } else {
+ if (bigstack_alloc_uc(trio_ct2 * marker_ct + BYTECT, &new_bed_contents)) {
+ goto make_pseudocontrols_ret_NOMEM;
+ }
+ uwrite_iter = new_bed_contents;
+
+ // never flush
+ writebuf_flush = (char*)(&(new_bed_contents[trio_ct2 * marker_ct + BYTECT]));
+ }
+ if (fseeko(bedfile, bed_offset, SEEK_SET)) {
+ goto make_pseudocontrols_ret_READ_FAIL;
+ }
+ const uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
+ const uintptr_t final_mask = get_final_mask(unfiltered_sample_ct);
+ const uint32_t write_word_ct_m1 = (trio_ct - 1) / (BITCT / 4);
+ uint32_t variant_uidx = 0;
+ for (uint32_t variant_idx = 0; variant_idx < marker_ct; ++variant_idx, ++variant_uidx) {
+ if (!is_set(marker_exclude, variant_uidx)) {
+ variant_uidx = next_unset_unsafe(marker_exclude, variant_uidx);
+ if (fseeko(bedfile, bed_offset + ((uint64_t)variant_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
+ goto make_pseudocontrols_ret_READ_FAIL;
+ }
+ }
+ if (load_raw2(unfiltered_sample_ct4, unfiltered_sample_ctl2m1, final_mask, bedfile, loadbuf)) {
+ goto make_pseudocontrols_ret_READ_FAIL;
+ }
+ if (IS_SET(marker_reverse, variant_uidx)) {
+ reverse_loadbuf(unfiltered_sample_ct, (unsigned char*)loadbuf);
+ }
+ // 1. iterate through all trios, setting Mendel errors to missing
+ // 2. iterate through trio_list:
+ // a. if child or either parent isn't genotyped, save missing genos
+ // b. otherwise, save appropriate 4-bit genotype pair
+ erase_mendel_errors(unfiltered_sample_ct, loadbuf, workbuf, sex_male, trio_error_lookup, trio_ct, 0, multigen);
+ uint32_t loop_len = BITCT / 4;
+ const uint64_t* trio_list_iter = trio_list;
+ uintptr_t* uwrite_alias = (uintptr_t*)uwrite_iter;
+ uint32_t widx = 0;
+ while (1) {
+ if (widx >= write_word_ct_m1) {
+ if (widx > write_word_ct_m1) {
+ break;
+ }
+ loop_len = 1 + ((trio_ct - 1) % (BITCT / 4));
+ }
+ uintptr_t cur_write_word = 0;
+ for (uint32_t trio_idx_lowbits = 0; trio_idx_lowbits < loop_len; ++trio_idx_lowbits) {
+ const uint64_t trio_code = *trio_list_iter++;
+ const uint64_t family_code = family_list[trio_code >> 32];
+ const uint32_t table_index = EXTRACT_2BIT_GENO(loadbuf, ((uint32_t)trio_code)) + 4 * EXTRACT_2BIT_GENO(loadbuf, ((uint32_t)family_code)) + 16 * EXTRACT_2BIT_GENO(loadbuf, (family_code >> 32));
+ cur_write_word |= tucc_table[table_index] << (trio_idx_lowbits * 4);
+ }
+ uwrite_alias[widx++] = cur_write_word;
+ }
+ uwrite_iter = &(uwrite_iter[trio_ct2]);
+ if (uwrite_iter >= ((unsigned char*)writebuf_flush)) {
+ if (fwrite_checked(writebuf, uwrite_iter - ((unsigned char*)writebuf), outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ uwrite_iter = (unsigned char*)writebuf;
+ }
+ }
+ if (write_bed) {
+ if (uwrite_iter != (unsigned char*)writebuf) {
+ if (fwrite_checked(writebuf, uwrite_iter - ((unsigned char*)writebuf), outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ }
+ if (fclose_null(&outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ outname_end[6] = '\0';
+ LOGPRINTFWW("--tucc write-bed: Pseudo cases/controls written to %sbed + %sbim + %sfam .\n", outname, outname, outname);
+ } else {
+ strcpy(outname_end, ".tucc.ped");
+ if (fopen_checked(outname, "wb", &outfile)) {
+ goto make_pseudocontrols_ret_OPEN_FAIL;
+ }
+ writebuf_flush = &(writebuf[MAXLINELEN]);
+ char missing4[4];
+ missing4[0] = ' ';
+ missing4[1] = *output_missing_geno_ptr;
+ missing4[2] = ' ';
+ missing4[3] = *output_missing_geno_ptr;
+ char* write_iter = writebuf;
+ for (uintptr_t trio_idx = 0; trio_idx < trio_ct; ++trio_idx) {
+ const uintptr_t child_uidx = (uint32_t)trio_list[trio_idx];
+ const char* child_fid = (const char*)(&(sample_ids[child_uidx * max_sample_id_blen]));
+ const char* iid_start = (const char*)memchr(child_fid, '\t', max_sample_id_blen);
+ const uint32_t fid_slen = (uintptr_t)(iid_start - child_fid);
+ ++iid_start;
+ const uint32_t iid_slen = strlen(iid_start);
+ const char child_sex_char = sexchar(sex_nm, sex_male, child_uidx);
+ for (uint32_t is_pseudocontrol = 0; is_pseudocontrol < 2; ++is_pseudocontrol) {
+ write_iter = memcpyax(write_iter, child_fid, fid_slen, ' ');
+ write_iter = memcpyax(write_iter, iid_start, iid_slen, '_');
+ *write_iter++ = 'T' + is_pseudocontrol;
+ write_iter = strcpya(write_iter, " 0 0 ");
+ *write_iter++ = child_sex_char;
+ *write_iter++ = ' ';
+ *write_iter++ = '2' - is_pseudocontrol;
+ *write_iter++ = ' ';
+ if (write_iter >= writebuf_flush) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ write_iter = writebuf;
+ }
+ variant_uidx = 0;
+ const unsigned char* cur_geno_base = &(new_bed_contents[trio_idx / 2]);
+ const uint32_t cur_shift = ((trio_idx % 2) * 2 + is_pseudocontrol) * 2;
+ for (uint32_t variant_idx = 0; variant_idx < marker_ct; ++variant_idx, ++variant_uidx) {
+ next_unset_unsafe_ck(marker_exclude, &variant_uidx);
+ const uint32_t cur_geno = (cur_geno_base[variant_uidx * trio_ct2] >> cur_shift) & 3;
+ if (cur_geno == 1) {
+ write_iter = memcpya(write_iter, missing4, 4);
+ } else {
+ *write_iter++ = ' ';
+ const char* a1_allele = marker_allele_ptrs[2 * variant_uidx];
+ const char* a2_allele = marker_allele_ptrs[2 * variant_uidx + 1];
+ if (cur_geno == 3) {
+ write_iter = strcpya(write_iter, a2_allele);
+ } else {
+ write_iter = strcpya(write_iter, a1_allele);
+ }
+ *write_iter++ = ' ';
+ if (cur_geno == 0) {
+ write_iter = strcpya(write_iter, a1_allele);
+ } else {
+ write_iter = strcpya(write_iter, a2_allele);
+ }
+ }
+ if (write_iter >= writebuf_flush) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ write_iter = writebuf;
+ }
+ }
+#ifdef _WIN32
+ *write_iter++ = '\r';
+#endif
+ *write_iter++ = '\n';
+ }
+ }
+ if (write_iter != writebuf_flush) {
+ if (fwrite_checked(writebuf, write_iter - writebuf, outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ }
+ if (fclose_null(&outfile)) {
+ goto make_pseudocontrols_ret_WRITE_FAIL;
+ }
+ LOGPRINTFWW("--tucc: Pseudo cases/controls written to %s .\n", outname);
+ }
+ }
+ while (0) {
+ make_pseudocontrols_ret_NOMEM:
+ retval = RET_NOMEM;
+ break;
+ make_pseudocontrols_ret_OPEN_FAIL:
+ retval = RET_OPEN_FAIL;
+ break;
+ make_pseudocontrols_ret_READ_FAIL:
+ retval = RET_READ_FAIL;
+ break;
+ make_pseudocontrols_ret_WRITE_FAIL:
+ retval = RET_WRITE_FAIL;
+ break;
+ make_pseudocontrols_ret_INVALID_CMDLINE:
+ retval = RET_INVALID_CMDLINE;
+ break;
+ }
+ make_pseudocontrols_ret_1:
+ bigstack_reset(bigstack_mark);
+ fclose_cond(outfile);
+ return retval;
+}
diff --git a/plink_family.h b/plink_family.h
index f546f20..b1569e3 100644
--- a/plink_family.h
+++ b/plink_family.h
@@ -62,6 +62,7 @@ typedef struct {
uint32_t dfam_mperm_val;
uint32_t qfam_modifier;
uint32_t qfam_mperm_val;
+ uint32_t tucc_bed;
} Family_info;
void family_init(Family_info* fam_ip);
@@ -105,4 +106,6 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, Aperm_info* apip, uintptr_t* pheno_nm, double* pheno_d, uintptr_t* founder_info, u [...]
+int32_t make_pseudocontrols(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_blen, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t max_marker_allele_blen, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* founder_info, uintptr_t* sex_nm, uintptr_t* [...]
+
#endif // __PLINK_FAMILY_H__
diff --git a/plink_glm.c b/plink_glm.c
index 405f8a4..45faaa1 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -2078,54 +2078,36 @@ uint32_t glm_fill_design(uintptr_t* loadbuf_collapsed, double* fixed_covars_cov_
copy_when_nonmissing(loadbuf_collapsed, (char*)(&(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct])), sizeof(double), sample_valid_ct, missing_ct, (char*)dptr);
dptr = &(dptr[cur_sample_valid_ct]);
}
- if (interactions_present) {
- if (is_set(active_params, sex_start_idx + 1)) {
- ulptr = loadbuf_collapsed;
- ulptr_end = ulptr_end_init;
- sample_idx = 0;
- sample_idx_stop = BITCT2;
- dptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
- if (!(coding_flags & GLM_HETHOM)) {
- if (!male_x_01) {
- while (1) {
- while (ulptr < ulptr_end) {
- cur_word = *ulptr++;
- for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
- cur_genotype = cur_word & 3;
- if (cur_genotype != 1) {
- // 0/1/2
- *dptr++ = ((double)((intptr_t)(2 + (cur_genotype / 2) - cur_genotype))) * (*dptr2);
- }
- dptr2++;
+ // bugfix (21 May 2017): if no covariates, interactions_present is false
+ // even in "--linear sex interaction" case. so we always check
+ // active_params[] (allocating the necessary extra bits even when
+ // 'interaction' was not specified).
+ if (is_set(active_params, sex_start_idx + 1)) {
+ ulptr = loadbuf_collapsed;
+ ulptr_end = ulptr_end_init;
+ sample_idx = 0;
+ sample_idx_stop = BITCT2;
+ dptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ if (!(coding_flags & GLM_HETHOM)) {
+ if (!male_x_01) {
+ while (1) {
+ while (ulptr < ulptr_end) {
+ cur_word = *ulptr++;
+ for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
+ cur_genotype = cur_word & 3;
+ if (cur_genotype != 1) {
+ // 0/1/2
+ *dptr++ = ((double)((intptr_t)(2 + (cur_genotype / 2) - cur_genotype))) * (*dptr2);
}
- sample_idx_stop += BITCT2;
- }
- if (sample_idx == sample_valid_ct) {
- break;
+ dptr2++;
}
- ulptr_end++;
- sample_idx_stop = sample_valid_ct;
+ sample_idx_stop += BITCT2;
}
- } else {
- while (1) {
- while (ulptr < ulptr_end) {
- cur_word = *ulptr++;
- for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
- cur_genotype = cur_word & 3;
- if (cur_genotype != 1) {
- // 0/1/2
- *dptr++ = ((double)((intptr_t)((2 + (cur_genotype / 2) - cur_genotype) >> IS_SET(sex_male_collapsed, sample_idx)))) * (*dptr2);
- }
- dptr2++;
- }
- sample_idx_stop += BITCT2;
- }
- if (sample_idx == sample_valid_ct) {
- break;
- }
- ulptr_end++;
- sample_idx_stop = sample_valid_ct;
+ if (sample_idx == sample_valid_ct) {
+ break;
}
+ ulptr_end++;
+ sample_idx_stop = sample_valid_ct;
}
} else {
while (1) {
@@ -2134,8 +2116,8 @@ uint32_t glm_fill_design(uintptr_t* loadbuf_collapsed, double* fixed_covars_cov_
for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
cur_genotype = cur_word & 3;
if (cur_genotype != 1) {
- // 0/0/1
- *dptr++ = ((double)(cur_genotype == 0)) * (*dptr2);
+ // 0/1/2
+ *dptr++ = ((double)((intptr_t)((2 + (cur_genotype / 2) - cur_genotype) >> IS_SET(sex_male_collapsed, sample_idx)))) * (*dptr2);
}
dptr2++;
}
@@ -2148,21 +2130,15 @@ uint32_t glm_fill_design(uintptr_t* loadbuf_collapsed, double* fixed_covars_cov_
sample_idx_stop = sample_valid_ct;
}
}
- }
- if (genotypic_or_hethom && (!is_nonx_haploid) && is_set(active_params, sex_start_idx + 2)) {
- ulptr = loadbuf_collapsed;
- ulptr_end = ulptr_end_init;
- sample_idx = 0;
- sample_idx_stop = BITCT2;
- dptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ } else {
while (1) {
while (ulptr < ulptr_end) {
cur_word = *ulptr++;
for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
cur_genotype = cur_word & 3;
if (cur_genotype != 1) {
- // 0/1/0
- *dptr++ = ((double)(cur_genotype == 2)) * (*dptr2);
+ // 0/0/1
+ *dptr++ = ((double)(cur_genotype == 0)) * (*dptr2);
}
dptr2++;
}
@@ -2176,6 +2152,32 @@ uint32_t glm_fill_design(uintptr_t* loadbuf_collapsed, double* fixed_covars_cov_
}
}
}
+ if (genotypic_or_hethom && (!is_nonx_haploid) && is_set(active_params, sex_start_idx + 2)) {
+ ulptr = loadbuf_collapsed;
+ ulptr_end = ulptr_end_init;
+ sample_idx = 0;
+ sample_idx_stop = BITCT2;
+ dptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ while (1) {
+ while (ulptr < ulptr_end) {
+ cur_word = *ulptr++;
+ for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
+ cur_genotype = cur_word & 3;
+ if (cur_genotype != 1) {
+ // 0/1/0
+ *dptr++ = ((double)(cur_genotype == 2)) * (*dptr2);
+ }
+ dptr2++;
+ }
+ sample_idx_stop += BITCT2;
+ }
+ if (sample_idx == sample_valid_ct) {
+ break;
+ }
+ ulptr_end++;
+ sample_idx_stop = sample_valid_ct;
+ }
+ }
}
// if (dptr != &(cur_covars_cov_major[cur_param_ct * cur_sample_valid_ct])) {
// printf("assert failure:\n cur_param_ct = %u\n cur_sample_valid_ct = %" PRIuPTR "\n dptr - cur_covars_cov_major = %" PRIuPTR "\n", cur_param_ct, cur_sample_valid_ct, (uintptr_t)(dptr - cur_covars_cov_major));
@@ -2506,22 +2508,42 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
fill_float_zero(align_skip, &(fptr[cur_sample_valid_ct]));
fptr = &(fptr[cur_sample_valid_cta4]);
}
- if (interactions_present) {
- if (is_set(active_params, sex_start_idx + 1)) {
- ulptr = loadbuf_collapsed;
- ulptr_end = ulptr_end_init;
- sample_idx = 0;
- sample_idx_stop = BITCT2;
- fptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
- if (coding_flags & GLM_DOMINANT) {
+ if (is_set(active_params, sex_start_idx + 1)) {
+ ulptr = loadbuf_collapsed;
+ ulptr_end = ulptr_end_init;
+ sample_idx = 0;
+ sample_idx_stop = BITCT2;
+ fptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ if (coding_flags & GLM_DOMINANT) {
+ while (1) {
+ while (ulptr < ulptr_end) {
+ cur_word = *ulptr++;
+ for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
+ cur_genotype = cur_word & 3;
+ if (cur_genotype != 1) {
+ // 0/1/1
+ *fptr++ = ((float)(cur_genotype != 3)) * (*fptr2);
+ }
+ fptr2++;
+ }
+ sample_idx_stop += BITCT2;
+ }
+ if (sample_idx == sample_valid_ct) {
+ break;
+ }
+ ulptr_end++;
+ sample_idx_stop = sample_valid_ct;
+ }
+ } else if (!(coding_flags & (GLM_HETHOM | GLM_RECESSIVE))) {
+ if (!male_x_01) {
while (1) {
while (ulptr < ulptr_end) {
cur_word = *ulptr++;
for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
cur_genotype = cur_word & 3;
if (cur_genotype != 1) {
- // 0/1/1
- *fptr++ = ((float)(cur_genotype != 3)) * (*fptr2);
+ // 0/1/2
+ *fptr++ = ((float)((intptr_t)(2 + (cur_genotype / 2) - cur_genotype))) * (*fptr2);
}
fptr2++;
}
@@ -2533,48 +2555,6 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
ulptr_end++;
sample_idx_stop = sample_valid_ct;
}
- } else if (!(coding_flags & (GLM_HETHOM | GLM_RECESSIVE))) {
- if (!male_x_01) {
- while (1) {
- while (ulptr < ulptr_end) {
- cur_word = *ulptr++;
- for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
- cur_genotype = cur_word & 3;
- if (cur_genotype != 1) {
- // 0/1/2
- *fptr++ = ((float)((intptr_t)(2 + (cur_genotype / 2) - cur_genotype))) * (*fptr2);
- }
- fptr2++;
- }
- sample_idx_stop += BITCT2;
- }
- if (sample_idx == sample_valid_ct) {
- break;
- }
- ulptr_end++;
- sample_idx_stop = sample_valid_ct;
- }
- } else {
- while (1) {
- while (ulptr < ulptr_end) {
- cur_word = *ulptr++;
- for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
- cur_genotype = cur_word & 3;
- if (cur_genotype != 1) {
- // 0/1/2
- *fptr++ = ((float)((intptr_t)((2 + (cur_genotype / 2) - cur_genotype) >> IS_SET(sex_male_collapsed, sample_idx)))) * (*fptr2);
- }
- fptr2++;
- }
- sample_idx_stop += BITCT2;
- }
- if (sample_idx == sample_valid_ct) {
- break;
- }
- ulptr_end++;
- sample_idx_stop = sample_valid_ct;
- }
- }
} else {
while (1) {
while (ulptr < ulptr_end) {
@@ -2582,8 +2562,8 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
cur_genotype = cur_word & 3;
if (cur_genotype != 1) {
- // 0/0/1
- *fptr++ = ((float)(cur_genotype == 0)) * (*fptr2);
+ // 0/1/2
+ *fptr++ = ((float)((intptr_t)((2 + (cur_genotype / 2) - cur_genotype) >> IS_SET(sex_male_collapsed, sample_idx)))) * (*fptr2);
}
fptr2++;
}
@@ -2596,23 +2576,15 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
sample_idx_stop = sample_valid_ct;
}
}
- fill_float_zero(align_skip, fptr);
- fptr = &(fptr[align_skip]);
- }
- if (genotypic_or_hethom && (!is_nonx_haploid) && is_set(active_params, sex_start_idx + 2)) {
- ulptr = loadbuf_collapsed;
- ulptr_end = ulptr_end_init;
- sample_idx = 0;
- sample_idx_stop = BITCT2;
- fptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ } else {
while (1) {
while (ulptr < ulptr_end) {
cur_word = *ulptr++;
for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
cur_genotype = cur_word & 3;
if (cur_genotype != 1) {
- // 0/1/0
- *fptr++ = ((float)(cur_genotype == 2)) * (*fptr2);
+ // 0/0/1
+ *fptr++ = ((float)(cur_genotype == 0)) * (*fptr2);
}
fptr2++;
}
@@ -2624,9 +2596,37 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
ulptr_end++;
sample_idx_stop = sample_valid_ct;
}
- fill_float_zero(align_skip, fptr);
- fptr = &(fptr[align_skip]);
}
+ fill_float_zero(align_skip, fptr);
+ fptr = &(fptr[align_skip]);
+ }
+ if (genotypic_or_hethom && (!is_nonx_haploid) && is_set(active_params, sex_start_idx + 2)) {
+ ulptr = loadbuf_collapsed;
+ ulptr_end = ulptr_end_init;
+ sample_idx = 0;
+ sample_idx_stop = BITCT2;
+ fptr2 = &(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct]);
+ while (1) {
+ while (ulptr < ulptr_end) {
+ cur_word = *ulptr++;
+ for (; sample_idx < sample_idx_stop; sample_idx++, cur_word >>= 2) {
+ cur_genotype = cur_word & 3;
+ if (cur_genotype != 1) {
+ // 0/1/0
+ *fptr++ = ((float)(cur_genotype == 2)) * (*fptr2);
+ }
+ fptr2++;
+ }
+ sample_idx_stop += BITCT2;
+ }
+ if (sample_idx == sample_valid_ct) {
+ break;
+ }
+ ulptr_end++;
+ sample_idx_stop = sample_valid_ct;
+ }
+ fill_float_zero(align_skip, fptr);
+ fptr = &(fptr[align_skip]);
}
}
// if (fptr != &(cur_covars_cov_major[cur_param_ct * cur_sample_valid_cta4])) {
@@ -3565,7 +3565,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
uintptr_t sample_uidx_stop;
uintptr_t sample_idx;
uintptr_t param_raw_ct_max;
- uintptr_t param_raw_ctl;
+ uintptr_t param_raw_ctp2l; // two extra bits for '--linear sex interaction'
uintptr_t param_ctx_max;
uintptr_t param_ctl_max;
uintptr_t condition_list_start_idx;
@@ -3726,8 +3726,8 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
}
copy_bitarr_subset(sex_male, load_mask, unfiltered_sample_ct, sample_valid_ct, sex_male_collapsed);
param_raw_ct_max = np_base_raw + np_diploid_raw + np_sex_raw;
- param_raw_ctl = BITCT_TO_WORDCT(param_raw_ct_max);
- if (bigstack_alloc_ul(param_raw_ctl, &active_params)) {
+ param_raw_ctp2l = BITCT_TO_WORDCT(param_raw_ct_max + 2);
+ if (bigstack_alloc_ul(param_raw_ctp2l, &active_params)) {
goto glm_common_init_ret_NOMEM;
}
condition_list_start_idx = 2 + genotypic_or_hethom;
@@ -3749,7 +3749,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
g_sex_male_collapsed = sex_male_collapsed;
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(param_raw_ctl, active_params);
+ fill_ulong_zero(param_raw_ctp2l, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct_max, 0, 1, active_params);
if ((!(active_params[0] & 2)) && ((!np_diploid_raw) || (!(active_params[0] & 4))) && ((!covar_interactions) || ((!popcount_bit_idx(active_params, interaction_start_idx, sex_start_idx)) && ((!variation_in_sex) || (!popcount_bit_idx(active_params, sex_start_idx + 1, param_raw_ct_max)))))) {
@@ -3757,7 +3757,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
logerrprint("Error: --parameters must retain at least one dosage-dependent variable. To\nperform one-off regression(s), use the --linear 'no-snp' modifier instead.\n");
goto glm_common_init_ret_INVALID_CMDLINE;
}
- param_ct_max = popcount_longs(active_params, param_raw_ctl);
+ param_ct_max = popcount_longs(active_params, param_raw_ctp2l);
if (np_diploid_raw) {
np_diploid = IS_SET(active_params, 2);
if (covar_interactions) {
@@ -5053,7 +5053,9 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
g_include_sex = sex_covar_everywhere || (g_is_x && np_sex);
if (cur_constraint_ct) {
cur_param_ctx++;
- if (g_is_x || (!g_min_ploidy_1)) {
+ // bugfix (9 May 2017): constraints_con_major wasn't being initialized
+ // in some haploid cases
+ if (g_is_x || (!g_min_ploidy_1) || (!genotypic_or_hethom)) {
ulii = 0;
for (constraint_idx = 0; constraint_idx < cur_constraint_ct; ulii++, constraint_idx++) {
next_set_ul_unsafe_ck(g_joint_test_params, &ulii);
@@ -6524,7 +6526,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
g_include_sex = sex_covar_everywhere || (g_is_x && np_sex);
if (cur_constraint_ct) {
cur_param_ctx++;
- if (g_is_x || (!g_min_ploidy_1)) {
+ if (g_is_x || (!g_min_ploidy_1) || (!genotypic_or_hethom)) {
ulii = 0;
for (constraint_idx = 0; constraint_idx < cur_constraint_ct; ulii++, constraint_idx++) {
next_set_ul_unsafe_ck(g_joint_test_params, &ulii);
@@ -7092,7 +7094,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
uintptr_t sample_valid_ctv2;
uintptr_t sample_uidx_stop;
uintptr_t sample_idx;
- uintptr_t param_raw_ctl;
+ uintptr_t param_raw_ctp2l;
uintptr_t param_ct;
uintptr_t param_ctx; // param_ct + 1 if joint test needed, param_ct otherwise
uintptr_t param_idx;
@@ -7203,15 +7205,15 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
loadbuf_collapsed[sample_valid_ctv2 - 1] = 0;
copy_bitarr_subset(sex_male, load_mask, unfiltered_sample_ct, sample_valid_ct, sex_male_collapsed);
}
- param_raw_ctl = BITCT_TO_WORDCT(param_raw_ct);
- if (aligned_malloc(param_raw_ctl * sizeof(intptr_t), &active_params)) {
+ param_raw_ctp2l = BITCT_TO_WORDCT(param_raw_ct + 2);
+ if (aligned_malloc(param_raw_ctp2l * sizeof(intptr_t), &active_params)) {
goto glm_linear_nosnp_ret_NOMEM;
}
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(param_raw_ctl, active_params);
+ fill_ulong_zero(param_raw_ctp2l, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct, 0, 1, active_params);
- param_ct = popcount_longs(active_params, param_raw_ctl);
+ param_ct = popcount_longs(active_params, param_raw_ctp2l);
} else {
fill_all_bits(param_raw_ct, active_params);
param_ct = param_raw_ct;
@@ -7954,7 +7956,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
uintptr_t sample_valid_ctv2;
uintptr_t sample_uidx_stop;
uintptr_t sample_idx;
- uintptr_t param_raw_ctl;
+ uintptr_t param_raw_ctp2l;
uintptr_t param_ct;
uintptr_t param_cta4;
uintptr_t param_ctx; // param_ct + 1 if joint test needed, param_ct otherwise
@@ -8066,15 +8068,15 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
loadbuf_collapsed[sample_valid_ctv2 - 1] = 0;
copy_bitarr_subset(sex_male, load_mask, unfiltered_sample_ct, sample_valid_ct, sex_male_collapsed);
}
- param_raw_ctl = BITCT_TO_WORDCT(param_raw_ct);
- if (aligned_malloc(param_raw_ctl * sizeof(intptr_t), &active_params)) {
+ param_raw_ctp2l = BITCT_TO_WORDCT(param_raw_ct + 2);
+ if (aligned_malloc(param_raw_ctp2l * sizeof(intptr_t), &active_params)) {
goto glm_logistic_nosnp_ret_NOMEM;
}
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(param_raw_ctl, active_params);
+ fill_ulong_zero(param_raw_ctp2l, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct, 0, 1, active_params);
- param_ct = popcount_longs(active_params, param_raw_ctl);
+ param_ct = popcount_longs(active_params, param_raw_ctp2l);
} else {
fill_all_bits(param_raw_ct, active_params);
param_ct = param_raw_ct;
diff --git a/plink_help.c b/plink_help.c
index 921b90e..6192882 100644
--- a/plink_help.c
+++ b/plink_help.c
@@ -1048,7 +1048,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" normally zero, but can be changed with 'skip0', 'skip1', and 'skip2'\n"
" respectively.) If such a header line is not present,\n"
" * when all samples appear in the same order as they do in the .fam file,\n"
-" you can use the 'noheader' modiifer.\n"
+" you can use the 'noheader' modifier.\n"
" * Otherwise, use the 'sepheader' modifier, and append sample ID filenames\n"
" to your 'list' file entries.\n"
" * The 'format' modifier lets you specify the number of values used to\n"
@@ -1150,6 +1150,17 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * The 'emp-se' modifier adds BETA and EMP_SE (empirical standard error for\n"
" beta) fields to the .perm output file.\n\n"
);
+#ifndef STABLE_BUILD
+ help_print("tucc", &help_ctrl, 1,
+" --tucc <write-bed>\n"
+" By default, this generates a .tucc.ped file where, for each child with two\n"
+" parents in the dataset, one case sample is created with all the child's\n"
+" alleles, and one pseudocontrol sample is created with all untransmitted\n"
+" alleles. (Both genotypes are set to missing when there's a Mendel error.)\n"
+" Add the 'write-bed' modifier to create a .tucc.bed + .tucc.bim + .tucc.fam\n"
+" fileset instead.\n\n"
+ );
+#endif
help_print("annotate", &help_ctrl, 1,
" --annotate [PLINK report] <attrib=[file]> <ranges=[file]> <filter=[file]>\n"
" <snps=[file]> <NA | prune> <block> <subset=[file]> <minimal>\n"
diff --git a/plink_matrix.c b/plink_matrix.c
index 914a983..094ec82 100644
--- a/plink_matrix.c
+++ b/plink_matrix.c
@@ -25,22 +25,10 @@
#endif
#endif
-inline double SQR(const double a) {
+static inline double SQR(const double a) {
return a * a;
}
-#ifdef __cplusplus
-inline double SIGN(const double &a, const double &b) {
- // PLINK helper.h SIGN() template specialized to doubles.
- return (b >= 0)? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);
-}
-#else
-inline double SIGN(const double a, const double b) {
- // PLINK helper.h SIGN() template specialized to doubles.
- return (b >= 0)? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);
-}
-#endif
-
double pythag(const double a, const double b) {
// PLINK stats.cpp pythag().
double absa,absb;
@@ -52,6 +40,18 @@ double pythag(const double a, const double b) {
}
#ifdef NOLAPACK
+ #ifdef __cplusplus
+static inline double SIGN(const double &a, const double &b) {
+ // PLINK helper.h SIGN() template specialized to doubles.
+ return (b >= 0)? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);
+}
+ #else
+static inline double SIGN(const double a, const double b) {
+ // PLINK helper.h SIGN() template specialized to doubles.
+ return (b >= 0)? (a >= 0 ? a : -a) : (a >= 0 ? -a : a);
+}
+ #endif
+
int32_t svdcmp_c(int32_t m, double* a, double* w, double* v) {
// C port of PLINK stats.cpp svdcmp().
// now thread-safe.
diff --git a/plink_misc.c b/plink_misc.c
index 464133f..9f425a0 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -2290,9 +2290,15 @@ int32_t load_ax_alleles(Two_col_params* axalleles, uintptr_t unfiltered_marker_c
if (colid_first) {
colid_ptr = next_token_multz(colid_ptr, colmin);
colx_ptr = next_token_mult(colid_ptr, coldiff);
+ if (!colx_ptr) {
+ goto load_ax_alleles_ret_MISSING_TOKENS;
+ }
} else {
colx_ptr = next_token_multz(colid_ptr, colmin);
colid_ptr = next_token_mult(colx_ptr, coldiff);
+ if (!colid_ptr) {
+ goto load_ax_alleles_ret_MISSING_TOKENS;
+ }
}
idlen = strlen_se(colid_ptr);
marker_uidx = id_htable_find(colid_ptr, idlen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
@@ -2354,6 +2360,9 @@ int32_t load_ax_alleles(Two_col_params* axalleles, uintptr_t unfiltered_marker_c
load_ax_alleles_ret_READ_FAIL:
retval = RET_READ_FAIL;
break;
+ load_ax_alleles_ret_MISSING_TOKENS:
+ sprintf(g_logbuf, "Error: Fewer tokens than expected on line %" PRIuPTR " of --a%c-allele file.\n", line_idx, is_a2? '2' : '1');
+ wordwrapb(0);
load_ax_alleles_ret_INVALID_FORMAT_2:
logerrprintb();
retval = RET_INVALID_FORMAT;
@@ -4217,6 +4226,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
uintptr_t* sample_male_include2 = nullptr;
double* qrange_keys = nullptr;
double* effect_sizes_cur = nullptr;
+ double abs_score_sum = 0.0;
double ploidy_d = 0.0;
double lbound = 0.0;
double ubound = 0.0;
@@ -4370,7 +4380,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
bufptr_arr[allele_idx][strlen_se(bufptr_arr[allele_idx])] = '\0';
uii = strcmp(bufptr_arr[allele_idx], marker_allele_ptrs[2 * marker_uidx]);
if ((!uii) || (!strcmp(bufptr_arr[allele_idx], marker_allele_ptrs[2 * marker_uidx + 1]))) {
- if (scan_double(bufptr_arr[effect_idx], &(dptr[marker_uidx]))) {
+ if (scan_double(bufptr_arr[effect_idx], &dxx) || (dxx != dxx)) {
if (!miss_ct) {
if (fopen_checked(outname, "w", &outfile)) {
goto score_report_ret_OPEN_FAIL;
@@ -4388,6 +4398,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
LOGPREPRINTFWW("Error: Duplicate variant '%s' in --score file.\n", bufptr_arr[varid_idx]);
goto score_report_ret_INVALID_FORMAT_2;
}
+ dptr[marker_uidx] = dxx;
CLEAR_BIT(marker_uidx, marker_exclude);
if (uii) {
SET_BIT(marker_uidx, a2_effect);
@@ -4503,7 +4514,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
marker_uidx = id_htable_find(bufptr_arr[varid_idx], strlen_se(bufptr_arr[varid_idx]), marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
if (marker_uidx != 0xffffffffU) {
if (!IS_SET(marker_exclude, marker_uidx)) {
- if (scan_double(bufptr_arr[1 - varid_idx], &dxx) || (dxx != dxx)) {
+ if (scan_double(bufptr_arr[1 - varid_idx], &dxx) || (dxx != dxx) || (dxx == INFINITY) || (dxx == -INFINITY)) {
miss_ct++;
} else {
dptr[marker_uidx] = dxx;
@@ -4696,6 +4707,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf);
}
cur_effect_size = (*dptr++) * ploidy_d;
+ abs_score_sum += fabs(cur_effect_size);
uii = IS_SET(a2_effect, marker_uidx);
if (!uii) {
delta1 = 1;
@@ -4855,7 +4867,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
}
bufptr = uint32toa_w6x(((int32_t)named_allele_ct_expected) - ujj * named_allele_ct_female_delta + named_allele_ct_deltas[sample_idx], ' ', bufptr);
dxx = (score_base + ((int32_t)ujj) * female_y_offset + score_deltas[sample_idx]);
- if (fabs(dxx) < SMALL_EPSILON) {
+ if (fabs(dxx) < abs_score_sum * RECIP_2_53) {
dxx = 0;
} else if (report_average) {
dxx /= ((double)((int32_t)uii));
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git
More information about the debian-med-commit
mailing list