[med-svn] [plink2] 01/03: Imported Upstream version 1.90~b3-150112
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Tue Jan 13 06:02:01 UTC 2015
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository plink2.
commit 2feb514f4359c3177e7b1e13c9184f697bdae21a
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Tue Jan 13 06:58:51 2015 +0100
Imported Upstream version 1.90~b3-150112
---
SFMT.c | 5 +
plink.c | 77 +--
plink_common.c | 57 ++
plink_common.h | 7 +
plink_data.c | 173 +++--
plink_glm.c | 50 +-
plink_glm.h | 6 +
plink_help.c | 34 +-
plink_ld.c | 1912 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
plink_ld.h | 2 +-
plink_matrix.c | 2 +-
plink_misc.c | 110 +++-
12 files changed, 2204 insertions(+), 231 deletions(-)
diff --git a/SFMT.c b/SFMT.c
index 21ccb96..b79f2b2 100644
--- a/SFMT.c
+++ b/SFMT.c
@@ -47,8 +47,11 @@ extern "C" {
#include <string.h>
#include <assert.h>
#include "SFMT.h"
+
+#ifndef __LP64__
inline static void do_recursion(w128_t * r, w128_t * a, w128_t * b,
w128_t * c, w128_t * d);
+#endif
inline static void rshift128(w128_t *out, w128_t const *in, int shift);
inline static void lshift128(w128_t *out, w128_t const *in, int shift);
@@ -107,6 +110,7 @@ inline static void lshift128(w128_t *out, w128_t const *in, int shift)
* @param c a 128-bit part of the internal state array
* @param d a 128-bit part of the internal state array
*/
+#ifndef __LP64__
inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b,
w128_t *c, w128_t *d)
{
@@ -124,6 +128,7 @@ inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b,
r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SFMT_SR1) & SFMT_MSK4)
^ y.u[3] ^ (d->u[3] << SFMT_SL1);
}
+#endif
/**
* parameters used by sse2.
diff --git a/plink.c b/plink.c
index 6852cd9..ca3e837 100644
--- a/plink.c
+++ b/plink.c
@@ -1,5 +1,5 @@
// PLINK 1.90
-// Copyright (C) 2005-2014 Shaun Purcell, Christopher Chang
+// Copyright (C) 2005-2015 Shaun Purcell, Christopher Chang
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -86,9 +86,9 @@
const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b2t"
+ "PLINK v1.90b3a"
#else
- "PLINK v1.90b3p"
+ "PLINK v1.90p"
#endif
#ifdef NOLAPACK
"NL"
@@ -99,28 +99,30 @@ const char ver_str[] =
" 32-bit"
#endif
// include trailing space if day < 10, so character length stays the same
- " (20 Dec 2014)";
+ " (12 Jan 2015)";
const char ver_str2[] =
#ifdef STABLE_BUILD
- // " " // (don't want this when version number has a trailing letter)
+ "" // (don't want this when version number has a trailing letter)
+#else
+ " " // (don't want this when version number has e.g. "b3" before "p")
#endif
#ifndef NOLAPACK
" "
#endif
" https://www.cog-genomics.org/plink2\n"
- "(C) 2005-2014 Shaun Purcell, Christopher Chang GNU General Public License v3\n";
+ "(C) 2005-2015 Shaun Purcell, Christopher Chang GNU General Public License v3\n";
const char errstr_append[] = "For more information, try '" PROG_NAME_STR " --help [flag name]' or '" PROG_NAME_STR " --help | more'.\n";
#ifdef STABLE_BUILD
#ifndef NOLAPACK
-const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --freqx, --missing, --test-mishap, --hardy, --mendel, --ibc,\n--impute-sex, --indep-pairphase, --r2, --blocks, --distance, --genome,\n--homozyg, --make-rel, --make-grm-gz, --rel-cutoff, --cluster, --pca,\n--neighbour, --ibs-test, --regress-distance, --model, --bd, --gxe, --logistic,\n--dosage, --lasso, --test-missing, --make-perm-pheno, --tdt, --qfam,\n--annotate, --clum [...]
+const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --pca, --neighbour, --ibs-test, --regress-distance,\n--model, --bd, --gxe, --logistic, --dosage, --lasso, --test-missing,\n--make-perm-phen [...]
#else
-const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --freqx, --missing, --test-mishap, --hardy, --mendel, --ibc,\n--impute-sex, --indep-pairphase, --r2, --blocks, --distance, --genome,\n--homozyg, --make-rel, --make-grm-gz, --rel-cutoff, --cluster, --neighbour,\n--ibs-test, --regress-distance, --model, --bd, --gxe, --logistic, --dosage,\n--lasso, --test-missing, --make-perm-pheno, --tdt, --qfam, --annotate, --clump,\n--ge [...]
+const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --neighbour, --ibs-test, --regress-distance, --model,\n--bd, --gxe, --logistic, --dosage, --lasso, --test-missing, --make-perm-pheno,\n--td [...]
#endif
#else
#ifndef NOLAPACK
-const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --pca, --neighbour, --ibs-test, --regress-distance,\n--model, --bd, --gxe, --logistic, --dosage, --lasso, --test-missing,\n--make-perm-phen [...]
+const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --pca, --neighbour, --ibs-test, --regress-distance,\n--model, --bd, --gxe, --logistic, --dosage, --lasso, --test-missing,\n--make-perm-phen [...]
#else
-const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --neighbour, --ibs-test, --regress-distance, --model,\n--bd, --gxe, --logistic, --dosage, --lasso, --test-missing, --make-perm-pheno,\n--td [...]
+const char notestr_null_calc2[] = "Commands include --make-bed, --recode, --flip-scan, --merge-list,\n--write-snplist, --list-duplicate-vars, --freqx, --missing, --test-mishap,\n--hardy, --mendel, --ibc, --impute-sex, --indep-pairphase, --r2, --show-tags,\n--blocks, --distance, --genome, --homozyg, --make-rel, --make-grm-gz,\n--rel-cutoff, --cluster, --neighbour, --ibs-test, --regress-distance, --model,\n--bd, --gxe, --logistic, --dosage, --lasso, --test-missing, --make-perm-pheno,\n--td [...]
#endif
#endif
@@ -1097,7 +1099,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if (g_thread_ct > 1) {
- if ((calculation_type & (CALC_RELATIONSHIP | CALC_REL_CUTOFF | CALC_GDISTANCE_MASK | CALC_IBS_TEST | CALC_GROUPDIST | CALC_REGRESS_DISTANCE | CALC_GENOME | CALC_REGRESS_REL | CALC_UNRELATED_HERITABILITY | CALC_LD | CALC_PCA | CALC_MAKE_PERM_PHENO | CALC_QFAM)) || ((calculation_type & CALC_MODEL) && (model_modifier & (MODEL_PERM | MODEL_MPERM))) || ((calculation_type & CALC_GLM) && (glm_modifier & (GLM_PERM | GLM_MPERM))) || ((calculation_type & CALC_TESTMISS) && (testmiss_modifier & [...]
+ if ((calculation_type & (CALC_RELATIONSHIP | CALC_REL_CUTOFF | CALC_GDISTANCE_MASK | CALC_IBS_TEST | CALC_GROUPDIST | CALC_REGRESS_DISTANCE | CALC_GENOME | CALC_REGRESS_REL | CALC_UNRELATED_HERITABILITY | CALC_LD | CALC_PCA | CALC_MAKE_PERM_PHENO | CALC_QFAM)) || ((calculation_type & CALC_MODEL) && (model_modifier & (MODEL_PERM | MODEL_MPERM))) || ((calculation_type & CALC_GLM) && (glm_modifier & (GLM_PERM | GLM_MPERM))) || ((calculation_type & CALC_TESTMISS) && (testmiss_modifier & [...]
LOGPRINTF("Using up to %u threads (change this with --threads).\n", g_thread_ct);
} else {
logprint("Using 1 thread (no multithreaded calculations invoked).\n");
@@ -1823,7 +1825,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logprint("Error: --fast-epistasis case-only requires a sorted .bim. Retry this command\nafter using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, sip);
+ retval = epistasis_report(threads, epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, marker_pos, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_ctrl_ct, pheno_c, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, sip);
if (retval) {
goto plink_ret_1;
}
@@ -7812,11 +7814,9 @@ int32_t main(int32_t argc, char** argv) {
}
calculation_type |= CALC_EPI;
} else if (!memcmp(argptr2, "ist-all", 8)) {
- UNSTABLE;
ld_info.modifier |= LD_SHOW_TAGS_LIST_ALL;
goto main_param_zero;
} else if (!memcmp(argptr2, "ist-duplicate-vars", 19)) {
- UNSTABLE;
if (enforce_param_ct_range(param_ct, argv[cur_arg], 0, 3)) {
goto main_ret_INVALID_CMDLINE_2A;
}
@@ -8884,7 +8884,6 @@ int32_t main(int32_t argc, char** argv) {
}
calculation_type |= CALC_MAKE_PERM_PHENO;
} else if (!memcmp(argptr2, "eta-analysis", 13)) {
- UNSTABLE;
if (enforce_param_ct_range(param_ct, argv[cur_arg], 2, 0x1fffffff)) {
goto main_ret_INVALID_CMDLINE_2A;
}
@@ -9239,7 +9238,6 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_NOMEM;
}
} else if (!memcmp(argptr2, "utput-min-p", 12)) {
- UNSTABLE;
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
@@ -10410,6 +10408,13 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_INVALID_CMDLINE;
}
calculation_type |= CALC_LD;
+ } else if (!memcmp(argptr2, "eal-ref-alleles", 16)) {
+ if (load_rare & (LOAD_RARE_CNV | LOAD_RARE_DOSAGE)) {
+ sprintf(logbuf, "Error: --real-ref-alleles has no effect with %s.\n", (load_rare == LOAD_RARE_CNV)? "a .cnv fileset" : "--dosage");
+ goto main_ret_INVALID_CMDLINE_2A;
+ }
+ misc_flags |= MISC_REAL_REF_ALLELES | MISC_KEEP_ALLELE_ORDER;
+ goto main_param_zero;
} else if (!memcmp(argptr2, "ange", 5)) {
if (extractname) {
misc_flags |= MISC_EXTRACT_RANGE;
@@ -10693,17 +10698,18 @@ int32_t main(int32_t argc, char** argv) {
goto main_param_zero;
} else if (!memcmp(argptr2, "ex", 3)) {
if (load_rare == LOAD_RARE_DOSAGE) {
- logprint("Error: --dosage + --sex did not work properly in PLINK 1.07 (--sex had no\neffect). If you have used that flag combination in the past, we recommend\nthat you rerun your analysis with PLINK 1.9 '--dosage ... sex'.\n");
- goto main_ret_INVALID_CMDLINE_A;
- } else if (!(calculation_type & CALC_GLM)) {
- logprint("Error: --sex must be used with --linear or --logistic.\n");
- goto main_ret_INVALID_CMDLINE_A;
- } else if (glm_modifier & GLM_NO_X_SEX) {
- sprintf(logbuf, "Error: --sex conflicts with a --%s modifier.\n", (glm_modifier & GLM_LOGISTIC)? "logistic" : "linear");
- goto main_ret_INVALID_CMDLINE_2A;
+ dosage_info.modifier |= DOSAGE_SEX;
+ } else {
+ if (!(calculation_type & CALC_GLM)) {
+ logprint("Error: --sex must be used with --linear/--logistic/--dosage.\n");
+ goto main_ret_INVALID_CMDLINE_A;
+ } else if (glm_modifier & GLM_NO_X_SEX) {
+ sprintf(logbuf, "Error: --sex conflicts with a --%s modifier.\n", (glm_modifier & GLM_LOGISTIC)? "logistic" : "linear");
+ goto main_ret_INVALID_CMDLINE_2A;
+ }
+ logprint("Note: --sex flag deprecated. Use e.g. '--linear sex'.\n");
+ glm_modifier |= GLM_SEX;
}
- logprint("Note: --sex flag deprecated. Use e.g. '--linear sex'.\n");
- glm_modifier |= GLM_SEX;
goto main_param_zero;
} else if (!memcmp(argptr2, "tandard-beta", 13)) {
if (((!(calculation_type & CALC_GLM)) || (glm_modifier & GLM_LOGISTIC)) && (!(dosage_info.modifier & DOSAGE_GLM))) {
@@ -11757,19 +11763,19 @@ int32_t main(int32_t argc, char** argv) {
case 'v':
if (!memcmp(argptr2, "if", 3)) {
- if ((!(calculation_type & CALC_GLM)) || (glm_modifier & GLM_LOGISTIC)) {
- logprint("Error: --vif must be used with --linear.\n");
+ if (((!(calculation_type & CALC_GLM)) || (glm_modifier & GLM_LOGISTIC)) && (!((calculation_type & CALC_EPI) || (!(epi_info.modifier & EPI_REG))))) {
+ logprint("Error: --vif must be used with --linear/--epistasis.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
if (scan_double(argv[cur_arg + 1], &glm_vif_thresh)) {
- sprintf(logbuf, "Error: Invalid --linear/--logistic VIF threshold '%s'.\n", argv[cur_arg + 1]);
+ sprintf(logbuf, "Error: Invalid --linear/--epistasis VIF threshold '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
if (glm_vif_thresh < 1.0) {
- sprintf(logbuf, "Error: --linear/--logistic VIF threshold '%s' too small (must be >= 1).\n", argv[cur_arg + 1]);
+ sprintf(logbuf, "Error: --linear/--epistasis VIF threshold '%s' too small (must be >= 1).\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
} else if (!memcmp(argptr2, "egas", 5)) {
@@ -11821,7 +11827,6 @@ int32_t main(int32_t argc, char** argv) {
}
misc_flags |= MISC_VCF_FILTER;
} else if (!memcmp(argptr2, "cf-min-gp", 10)) {
- UNSTABLE;
if (!(load_rare & LOAD_RARE_VCF)) {
// er, probably want to support BCF too
logprint("Error: --vcf-min-gp must be used with --vcf.\n");
@@ -11836,7 +11841,6 @@ int32_t main(int32_t argc, char** argv) {
}
vcf_min_gp *= 1 - SMALL_EPSILON;
} else if (!memcmp(argptr2, "cf-min-gq", 10)) {
- UNSTABLE;
if (!(load_rare & LOAD_RARE_VCF)) {
// probably want to support BCF too
logprint("Error: --vcf-min-gq must be used with --vcf.\n");
@@ -11898,7 +11902,6 @@ int32_t main(int32_t argc, char** argv) {
calculation_type |= CALC_WRITE_SNPLIST;
goto main_param_zero;
} else if (!memcmp(argptr2, "rite-var-ranges", 16)) {
- UNSTABLE;
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
@@ -12167,6 +12170,10 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_INVALID_CMDLINE_A;
}
}
+ if (sample_sort && (calculation_type & (~(CALC_MERGE | CALC_MAKE_BED)))) {
+ logprint("Error: --indiv-sort only affects --make-bed and --merge/--bmerge/--merge-list.\n");
+ goto main_ret_INVALID_CMDLINE_A;
+ }
if ((cnv_intersect_filter_type & CNV_COUNT) && (!(cnv_calc_type & (CNV_SAMPLE_PERM | CNV_ENRICHMENT_TEST)))) {
logprint("Error: --cnv-count must be used with --cnv-indiv-perm or --cnv-enrichment-test.\n");
goto main_ret_INVALID_CMDLINE;
@@ -12219,8 +12226,8 @@ int32_t main(int32_t argc, char** argv) {
}
}
if ((parallel_tot > 1) && (!(calculation_type & (CALC_LD | CALC_DISTANCE | CALC_GENOME | CALC_RELATIONSHIP)))) {
- if ((!(calculation_type & CALC_EPI)) || (!(epi_info.modifier & EPI_FAST))) {
- logprint("Error: --parallel only affects --r/--r2, --distance, --genome, --make-rel,\n--make-grm-gz/--make-grm-bin, and --fast-epistasis.\n");
+ if ((!(calculation_type & CALC_EPI)) || (!(epi_info.modifier & (EPI_FAST | EPI_REG)))) {
+ logprint("Error: --parallel only affects --r/--r2, --distance, --genome, --make-rel,\n--make-grm-gz/--make-grm-bin, and --epistasis/--fast-epistasis.\n");
goto main_ret_INVALID_CMDLINE_A;
}
}
diff --git a/plink_common.c b/plink_common.c
index adc0a7f..0ee85cd 100644
--- a/plink_common.c
+++ b/plink_common.c
@@ -4325,6 +4325,11 @@ int32_t resolve_or_add_chrom_name(Chrom_info* chrom_info_ptr, char* bufptr, int3
*chrom_idx_ptr = (int32_t)(chrom_idx + max_code_p1);
return 0;
}
+ if (*bufptr == '#') {
+ // this breaks VCF and PLINK 2 binary
+ logprint("Error: Chromosome/contig names may not begin with '#'.\n");
+ return RET_INVALID_FORMAT;
+ }
if (slen > MAX_ID_LEN) {
if (line_idx) {
LOGPRINTFWW("Error: Line %" PRIuPTR " of %s has an excessively long chromosome/contig name. (The " PROG_NAME_CAPS " limit is " MAX_ID_LEN_STR " characters.)\n", line_idx, file_descrip);
@@ -5281,7 +5286,17 @@ void bitfield_xor(uintptr_t* bit_arr, uintptr_t* xor_arr, uintptr_t word_ct) {
*bit_arr++ ^= *xor_arr++;
} while (bit_arr < bit_arr_end);
#endif
+}
+uint32_t is_monomorphic_a2(uintptr_t* lptr, uint32_t sample_ct) {
+ uintptr_t* loop_end = &(lptr[sample_ct / BITCT2]);
+ uint32_t sample_rem = sample_ct % BITCT2;
+ for (; lptr < loop_end; lptr++) {
+ if ((~(*lptr)) & FIVEMASK) {
+ return 0;
+ }
+ }
+ return (sample_rem && ((~(*lptr)) & (FIVEMASK >> (BITCT - sample_rem * 2))))? 0 : 1;
}
uint32_t is_monomorphic(uintptr_t* lptr, uint32_t sample_ct) {
@@ -8337,6 +8352,48 @@ void vec_datamask(uintptr_t unfiltered_sample_ct, uint32_t matchval, uintptr_t*
}
}
+/*
+void vec_rotate_plink1_to_plink2(uintptr_t* lptr, uint32_t word_ct) {
+#ifdef __LP64__
+ const __m128i m1 = {FIVEMASK, FIVEMASK};
+ __m128i* vptr = (__m128i*)lptr;
+ __m128i* vend = (__m128i*)(&(lptr[word_ct]));
+ __m128i vii;
+ __m128i vjj;
+ do {
+ // new high bit set iff old low bit was set
+ // new low bit set iff old bits differed
+ vii = *vptr;
+ vjj = _mm_and_si128(vii, m1); // old low bit
+ vii = _mm_and_si128(_mm_srli_epi64(vii, 1), m1); // old high bit, shifted
+ *vptr = _mm_or_si128(_mm_slli_epi64(vjj, 1), _mm_xor_si128(vii, vjj));
+ } while (++vptr != vend);
+#else
+ uintptr_t* lend = &(lptr[word_ct]);
+ uintptr_t ulii;
+ uintptr_t uljj;
+ do {
+ ulii = *lptr;
+ uljj = ulii & FIVEMASK;
+ ulii = (ulii >> 1) & FIVEMASK;
+ *lptr = ulii ^ (uljj * 3);
+ } while (++lptr != lend);
+#endif
+}
+*/
+
+void rotate_plink1_to_plink2_and_copy(uintptr_t* loadbuf, uintptr_t* writebuf, uintptr_t word_ct) {
+ uintptr_t* loadbuf_end = &(loadbuf[word_ct]);
+ uintptr_t ulii;
+ uintptr_t uljj;
+ do {
+ ulii = *loadbuf++;
+ uljj = ulii & FIVEMASK;
+ ulii = (ulii >> 1) & FIVEMASK;
+ *writebuf++ = ulii ^ (uljj * 3);
+ } while (loadbuf < loadbuf_end);
+}
+
void extract_collapsed_missing_bitfield(uintptr_t* lptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_include2, uintptr_t sample_ct, uintptr_t* missing_bitfield) {
uint32_t word_ct = (unfiltered_sample_ct + (BITCT2 - 1)) / BITCT2;
uintptr_t sample_idx;
diff --git a/plink_common.h b/plink_common.h
index a4751cf..137e872 100644
--- a/plink_common.h
+++ b/plink_common.h
@@ -220,6 +220,7 @@
#define MISC_HET_SMALL_SAMPLE 0x100000000LLU
#define MISC_FST_CC 0x200000000LLU
#define MISC_SPLIT_MERGE_NOFAIL 0x400000000LLU
+#define MISC_REAL_REF_ALLELES 0x800000000LLU
// assume for now that .bed must always be accompanied by both .bim and .fam
#define FILTER_ALL_REQ 1LLU
@@ -1987,6 +1988,8 @@ static inline uint32_t popcount_long(uintptr_t val) {
return popcount2_long(val - ((val >> 1) & FIVEMASK));
}
+uint32_t is_monomorphic_a2(uintptr_t* lptr, uint32_t sample_ct);
+
uint32_t is_monomorphic(uintptr_t* lptr, uint32_t sample_ct);
// same as is_monomorphic, except it also flags the all-heterozygote case
@@ -2153,6 +2156,10 @@ void vec_invert(uintptr_t unfiltered_sample_ct, uintptr_t* vec2);
void vec_datamask(uintptr_t unfiltered_sample_ct, uint32_t matchval, uintptr_t* data_ptr, uintptr_t* mask_ptr, uintptr_t* result_ptr);
+// void vec_rotate_plink1_to_plink2(uintptr_t* lptr, uint32_t word_ct);
+
+void rotate_plink1_to_plink2_and_copy(uintptr_t* loadbuf, uintptr_t* writebuf, uintptr_t word_ct);
+
void extract_collapsed_missing_bitfield(uintptr_t* lptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_include2, uintptr_t sample_ct, uintptr_t* missing_bitfield);
void hh_reset(unsigned char* loadbuf, uintptr_t* sample_include2, uintptr_t unfiltered_sample_ct);
diff --git a/plink_data.c b/plink_data.c
index 0c72017..228a7d7 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -1523,7 +1523,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
// if a single covariate is missing for a person, that person's covar_nm
// bit is zero.
// * covar_d is in sample-major order (i.e. the value of covariate m for
- // sample n is covar_d[n * covar_ct + m], where m and n are zero-based).
+ // sample n is covar_d[n * covar_ctx + m], where m and n are zero-based).
// It does track when some covariates are missing and others aren't
// (missing covariates are represented as the --missing-phenotype value).
if (covar_range_list_ptr) {
@@ -1635,7 +1635,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
goto load_covars_ret_MISSING_TOKENS;
}
if (covar_range_list_ptr) {
- dptr = &(covar_d[sample_idx * covar_ct]);
+ dptr = &(covar_d[sample_idx * covar_ctx]);
}
covar_missing = 0;
for (uii = 0; uii < min_covar_col_ct; uii++) {
@@ -3325,11 +3325,18 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
uintptr_t* writebuf_ptr;
uint32_t* map_reverse;
const char* errptr;
+ uintptr_t pass_size;
uint32_t is_haploid;
uint32_t is_x;
uint32_t is_y;
uint32_t is_mt;
uint32_t loop_end;
+ uint32_t pass_ct;
+ uint32_t pass_idx;
+ uint32_t pass_start;
+ uint32_t pass_end;
+ uint32_t seek_needed;
+ uint32_t markers_done;
if (flip_subset_fname) {
if (wkspace_alloc_ul_checked(&flip_subset_markers, unfiltered_marker_ctl * sizeof(intptr_t)) ||
wkspace_alloc_ul_checked(&flip_subset_vec2, sample_ctv2 * sizeof(intptr_t))) {
@@ -3367,9 +3374,6 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
goto make_bed_ret_WRITE_FAIL;
}
fflush(stdout);
- if (fseeko(bedfile, bed_offset, SEEK_SET)) {
- goto make_bed_ret_READ_FAIL;
- }
if (resort_map) {
if (set_hh_missing || set_me_missing || fill_missing_a2) {
// could remove this restriction if we added a chromosome check to the
@@ -3431,25 +3435,43 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
}
wkspace_reset(ll_buf);
- if (wkspace_alloc_ul_checked(&writebuf, marker_ct * sample_ctv2)) {
- // todo: implement multipass. should now be a minimal change
- logprint("\nError: Insufficient memory for current --make-bed implementation. Try raising\nthe --memory value for now.\n");
- retval = RET_CALC_NOT_YET_SUPPORTED;
- goto make_bed_ret_1;
+ // oops, forgot to multiply by sizeof(intptr_t)! fortunately, this
+ // segfaulted instead of corrupting any data.
+ // anyway, it's now time to implement multipass.
+ if (wkspace_left < sample_ctv2 * sizeof(intptr_t)) {
+ goto make_bed_ret_NOMEM;
}
+ writebuf = (uintptr_t*)wkspace_base;
+ pass_ct = 1 + ((sample_ctv2 * marker_ct * sizeof(intptr_t) - 1) / wkspace_left);
+ pass_size = 1 + ((marker_ct - 1) / pass_ct);
*outname_end = '\0';
LOGPRINTFWW5("--make-bed to %s.bed + %s.bim + %s.fam ... ", outname, outname, outname);
fputs("0%", stdout);
- for (; pct <= 100; pct++) {
- loop_end = (pct * ((uint64_t)marker_ct)) / 100;
- for (; marker_idx < loop_end; marker_uidx++, marker_idx++) {
+ loop_end = marker_ct / 100;
+ markers_done = 0;
+ for (pass_idx = 0; pass_idx < pass_ct; pass_idx++) {
+ pass_start = pass_idx * pass_size;
+ pass_end = (pass_idx + 1) * pass_size;
+ if (pass_idx + 1 == pass_ct) {
+ pass_end = marker_ct;
+ }
+ seek_needed = 1;
+ for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
if (IS_SET(marker_exclude, marker_uidx)) {
marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx);
+ seek_needed = 1;
+ }
+ if ((map_reverse[marker_uidx] < pass_start) || (map_reverse[marker_uidx] >= pass_end)) {
+ seek_needed = 1;
+ continue;
+ }
+ writebuf_ptr = &(writebuf[sample_ctv2 * (map_reverse[marker_uidx] - pass_start)]);
+ if (seek_needed) {
if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
goto make_bed_ret_READ_FAIL;
}
+ seek_needed = 0;
}
- writebuf_ptr = &(writebuf[sample_ctv2 * map_reverse[marker_uidx]]);
retval = make_bed_one_marker(bedfile, loadbuf, unfiltered_sample_ct, unfiltered_sample_ct4, sample_exclude, sample_ct, sample_sort_map, final_mask, IS_SET(marker_reverse, marker_uidx), writebuf_ptr);
if (retval) {
goto make_bed_ret_1;
@@ -3460,20 +3482,25 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
if (flip_subset_markers && is_set(flip_subset_markers, marker_uidx)) {
reverse_subset(writebuf_ptr, flip_subset_vec2, sample_ctv2);
}
- }
- if (pct < 100) {
- if (pct > 10) {
- putchar('\b');
+ if (markers_done >= loop_end) {
+ if (pct > 10) {
+ putchar('\b');
+ }
+ pct = (markers_done * 100LLU) / marker_ct;
+ printf("\b\b%u%%", pct);
+ fflush(stdout);
+ pct++;
+ loop_end = (pct * ((uint64_t)marker_ct)) / 100;
}
- printf("\b\b%u%%", pct);
- fflush(stdout);
+ markers_done++;
}
- }
- for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
- if (fwrite_checked(writebuf, sample_ct4, bedoutfile)) {
- goto make_bed_ret_WRITE_FAIL;
+ writebuf_ptr = writebuf;
+ for (marker_idx = pass_start; marker_idx < pass_end; marker_idx++) {
+ if (fwrite_checked(writebuf_ptr, sample_ct4, bedoutfile)) {
+ goto make_bed_ret_WRITE_FAIL;
+ }
+ writebuf_ptr = &(writebuf_ptr[sample_ctv2]);
}
- writebuf = &(writebuf[sample_ctv2]);
}
} else {
if (!hh_exists) {
@@ -3508,6 +3535,9 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
if (wkspace_alloc_ul_checked(&writebuf, sample_ctv2)) {
goto make_bed_ret_NOMEM;
}
+ if (fseeko(bedfile, bed_offset, SEEK_SET)) {
+ goto make_bed_ret_READ_FAIL;
+ }
*outname_end = '\0';
LOGPRINTFWW5("--make-bed to %s.bed + %s.bim + %s.fam ... ", outname, outname, outname);
fputs("0%", stdout);
@@ -7836,12 +7866,12 @@ uint32_t vcf_gp_invalid(char* bufptr, char* bufptr2, double vcf_min_gp, uint32_t
for (uii = 0; uii < gp_field_pos; uii++) {
bufptr = (char*)memchr(bufptr, ':', (uintptr_t)(bufptr2 - bufptr));
if (!bufptr) {
- *is_error_ptr = 1;
- return 1;
+ *is_error_ptr = 0;
+ return 0;
}
bufptr++;
}
- // hmm, defend against decimal without leading zero
+ // do not treat decimal without leading zero as missing value
if (((*bufptr == '.') || (*bufptr == '?')) && (bufptr[1] < '.')) {
*is_error_ptr = 0;
return 0;
@@ -8254,7 +8284,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
for (ujj = 0; ujj < gq_field_pos; ujj++) {
gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
if (!gq_scan_ptr) {
- goto vcf_to_bed_ret_MISSING_GQ;
+ // non-GT fields are allowed to be missing
+ goto vcf_to_bed_missing_gq_1;
}
gq_scan_ptr++;
}
@@ -8262,6 +8293,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
continue;
}
}
+ vcf_to_bed_missing_gq_1:
cc = bufptr[1];
if ((cc != '/') && (cc != '|')) {
// haploid
@@ -8269,7 +8301,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (gp_field_pos) {
if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8295,7 +8327,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (gp_field_pos) {
if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8347,7 +8379,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
for (ujj = 0; ujj < gq_field_pos; ujj++) {
gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
if (!gq_scan_ptr) {
- goto vcf_to_bed_ret_MISSING_GQ;
+ goto vcf_to_bed_missing_gq_2;
}
gq_scan_ptr++;
}
@@ -8355,13 +8387,14 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
continue;
}
}
+ vcf_to_bed_missing_gq_2:
cc = bufptr[1];
if ((cc != '/') && (cc != '|')) {
vcf_to_bed_haploid_2:
if (gp_field_pos) {
if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8389,7 +8422,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (gp_field_pos) {
if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8426,7 +8459,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
for (ujj = 0; ujj < gq_field_pos; ujj++) {
gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
if (!gq_scan_ptr) {
- goto vcf_to_bed_ret_MISSING_GQ;
+ goto vcf_to_bed_missing_gq_3;
}
gq_scan_ptr++;
}
@@ -8434,6 +8467,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
continue;
}
}
+ vcf_to_bed_missing_gq_3:
while (1) {
ujj = ((unsigned char)(*(++bufptr))) - 48;
if (ujj > 9) {
@@ -8448,7 +8482,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (gp_field_pos) {
if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8484,7 +8518,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (gp_field_pos) {
if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
if (ukk) {
- goto vcf_to_bed_ret_MISSING_GP;
+ goto vcf_to_bed_ret_INVALID_GP;
}
continue;
}
@@ -8679,13 +8713,9 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
retval = RET_INVALID_FORMAT;
break;
- vcf_to_bed_ret_MISSING_GQ:
- LOGPRINTF("\nError: Line %" PRIuPTR " of .vcf file has a missing GQ field.\n", line_idx);
- retval = RET_INVALID_FORMAT;
- break;
- vcf_to_bed_ret_MISSING_GP:
+ vcf_to_bed_ret_INVALID_GP:
logprint("\n");
- LOGPRINTFWW("Error: Line %" PRIuPTR " of .vcf file has a missing or improperly formatted GP field.\n", line_idx);
+ LOGPRINTF("Error: Line %" PRIuPTR " of .vcf file has an improperly formatted GP field.\n", line_idx);
retval = RET_INVALID_FORMAT;
break;
vcf_to_bed_ret_INVALID_GT:
@@ -11541,6 +11571,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
uint32_t vcf_two_ids = vcf_not_fid && vcf_not_iid;
uint32_t recode_012 = recode_modifier & (RECODE_01 | RECODE_12);
uint32_t set_hh_missing = (misc_flags / MISC_SET_HH_MISSING) & 1;
+ uint32_t real_ref_alleles = (misc_flags / MISC_REAL_REF_ALLELES) & 1;
uint32_t xmhh_exists_orig = hh_exists & XMHH_EXISTS;
uintptr_t header_len = 0;
uintptr_t max_chrom_size = 0;
@@ -12264,42 +12295,52 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_OPEN_FAIL;
}
if (fputs_checked(
-"##fileformat=VCFv4.1\n"
-"##filedate=", outfile)) {
+"##fileformat=VCFv4.2\n"
+"##fileDate=", outfile)) {
goto recode_ret_WRITE_FAIL;
}
time(&rawtime);
loctime = localtime(&rawtime);
strftime(tbuf, MAXLINELEN, "%Y%m%d", loctime);
fputs(tbuf, outfile);
- fputs(
-"\n##source=" PROG_NAME_CAPS "\n"
-"##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"
-, outfile);
+ fputs("\n##source=PLINKv1.90\n", outfile);
uii = 0; // '0' written already?
memcpy(tbuf, "##contig=<ID=", 13);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ if (!IS_SET(chrom_info_ptr->chrom_mask, chrom_idx)) {
+ continue;
+ }
cptr = chrom_name_write(&(tbuf[13]), chrom_info_ptr, chrom_idx);
if ((tbuf[13] == '0') && (cptr == &(tbuf[14]))) {
if (uii) {
continue;
}
uii = 1;
- cptr = memcpya(cptr, ",length=536870911", 17);
+ cptr = memcpya(cptr, ",length=2147483645", 18);
} else {
+ *cptr = '\0';
+ if (strchr(&(tbuf[13]), ':')) {
+ logprint("Error: VCF chromosome codes may not include the ':' character.\n");
+ goto recode_ret_INVALID_FORMAT;
+ }
cptr = memcpya(cptr, ",length=", 8);
if (!(map_is_unsorted & UNSORTED_BP)) {
cptr = uint32_write(cptr, marker_pos[chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - 1] + 1);
} else {
- cptr = memcpya(cptr, "536870911", 9); // unknown
+ cptr = memcpya(cptr, "2147483645", 10); // unknown
}
}
cptr = memcpya(cptr, ">\n", 2);
fwrite(tbuf, 1, cptr - tbuf, outfile);
}
+ if (!real_ref_alleles) {
+ fputs("##INFO=<ID=PR,Number=0,Type=Flag,Description=\"Provisional reference allele, may not be based on real reference genome\"\n", outfile);
+ }
+ fputs("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n", outfile);
+ // todo: include PEDIGREE in header, and make --vcf be able to read it?
+ // Can't find a specification for how this should be done...
fputs("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", outfile);
-
chrom_fo_idx = 0;
refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_mt, &is_haploid);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
@@ -12371,23 +12412,35 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
fputs(cptr, outfile);
}
putc('\t', outfile);
+
+ if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
+ goto recode_ret_READ_FAIL;
+ }
+ if (is_haploid && set_hh_missing) {
+ haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+ }
+
cptr = mk_allele_ptrs[2 * marker_uidx];
if (cptr != missing_geno_ptr) {
if ((!invalid_allele_code_seen) && (!valid_vcf_allele_code(cptr))) {
invalid_allele_code_seen = 1;
}
- fputs(cptr, outfile);
+ // if ALT allele is not actually present in immediate dataset, VCF
+ // spec actually requires '.'
+ if (!is_monomorphic_a2(loadbuf_collapsed, sample_ct)) {
+ fputs(cptr, outfile);
+ } else {
+ putc('.', outfile);
+ }
} else {
putc('.', outfile);
}
- fputs("\t.\t.\t.\tGT", outfile);
-
- if (load_and_collapse(bedfile, (uintptr_t*)loadbuf, unfiltered_sample_ct, loadbuf_collapsed, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx))) {
- goto recode_ret_READ_FAIL;
- }
- if (is_haploid && set_hh_missing) {
- haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
+ if (!real_ref_alleles) {
+ fputs("\t.\t.\tPR\tGT", outfile);
+ } else {
+ fputs("\t.\t.\t.\tGT", outfile);
}
+
wbufptr = writebuf;
ulptr = loadbuf_collapsed;
ulptr_end = &(loadbuf_collapsed[sample_ct / BITCT2]);
diff --git a/plink_glm.c b/plink_glm.c
index 4591d91..7d93bdb 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -591,7 +591,6 @@ int32_t glm_check_vif(double vif_thresh, uintptr_t param_ct, uintptr_t sample_va
uintptr_t sample_idx;
double dxx;
double dyy;
- int32_t ii;
for (param_idx = 1; param_idx < param_ct; param_idx++) {
dyy = 0; // sum
dptr = &(covars_collapsed[param_idx * sample_valid_ct]);
@@ -603,7 +602,7 @@ int32_t glm_check_vif(double vif_thresh, uintptr_t param_ct, uintptr_t sample_va
}
for (param_idx = 1; param_idx < param_ct; param_idx++) {
dptr = &(param_2d_buf[(param_idx - 1) * param_ct]);
- dyy = param_2d_buf2[param_idx];
+ dyy = param_2d_buf2[param_idx] * sample_ct_d;
for (param_idx2 = param_idx; param_idx2 < param_ct; param_idx2++) {
dxx = 0;
dptr2 = &(covars_collapsed[param_idx * sample_valid_ct]);
@@ -611,7 +610,7 @@ int32_t glm_check_vif(double vif_thresh, uintptr_t param_ct, uintptr_t sample_va
for (sample_idx = 0; sample_idx < sample_valid_ct; sample_idx++) {
dxx += (*dptr2++) * (*dptr3++);
}
- dxx -= dyy * param_2d_buf2[param_idx2] * sample_ct_d;
+ dxx -= dyy * param_2d_buf2[param_idx2];
*dptr++ = dxx * sample_ct_m1_recip;
}
}
@@ -640,9 +639,8 @@ int32_t glm_check_vif(double vif_thresh, uintptr_t param_ct, uintptr_t sample_va
param_2d_buf[param_idx * param_ct] = 1;
}
- ii = invert_matrix(dim, param_2d_buf, mi_buf, param_2d_buf2);
- if (ii) {
- return ii;
+ if (invert_matrix(dim, param_2d_buf, mi_buf, param_2d_buf2)) {
+ return 1;
}
for (param_idx = 0; param_idx < param_ct_m1; param_idx++) {
if (param_2d_buf[param_idx * param_ct] > vif_thresh) {
@@ -737,7 +735,7 @@ uint32_t glm_linear(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t samp
dptr2 = covars_sample_major;
dxx = 0;
if (!missing_ct) {
- for (sample_idx = 0; sample_idx < sample_valid_ct + missing_ct; sample_idx++) {
+ for (sample_idx = 0; sample_idx < sample_valid_ct; sample_idx++) {
partial = 0;
dptr3 = dptr;
for (param_idx = 0; param_idx < param_ct; param_idx++) {
@@ -1161,9 +1159,14 @@ static inline __m128 fmath_exp_ps(__m128 xx) {
u4 = _mm_srli_epi32(u4, 10);
u4 = _mm_slli_epi32(u4, 23);
uint32_t v0 = _mm_cvtsi128_si32(v4);
- uint32_t v1 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(2)));
- uint32_t v2 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(4)));
- uint32_t v3 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(6)));
+ // uint32_t v1 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(2)));
+ // uint32_t v2 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(4)));
+ // uint32_t v3 = ((int32_t)(uint16_t)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(v4), (int32_t)(6)));
+ // make this work with LLVM
+ uint32_t v1 = _mm_extract_epi16(((__m128i)(v4)), ((int32_t)(2)));
+ uint32_t v2 = _mm_extract_epi16(((__m128i)(v4)), ((int32_t)(4)));
+ uint32_t v3 = _mm_extract_epi16(((__m128i)(v4)), ((int32_t)(6)));
+
__m128 t0 = _mm_set_ss(float_exp_lookup[v0]);
__m128 t1 = _mm_set_ss(float_exp_lookup[v1]);
__m128 t2 = _mm_set_ss(float_exp_lookup[v2]);
@@ -1836,9 +1839,6 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
}
}
-// debug
-static uint32_t g_marker_uidx2;
-
uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sample_valid_ct, uint32_t missing_ct, uintptr_t* loadbuf, float* covars_cov_major, uintptr_t* perm_vecs, float* coef, float* pp, float* sample_1d_buf, float* pheno_buf, float* param_1d_buf, float* param_1d_buf2, float* param_2d_buf, float* param_2d_buf2, float* logistic_results, uintptr_t constraint_ct, double* constraints_con_major, double* param_1d_dbuf, double* param_2d_dbuf, double* param_2d_dbuf2, double* [...]
// Similar to logistic.cpp fitLM(), but incorporates changes from the
// postprocessed TopCoder contest code.
@@ -5015,6 +5015,9 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
goto glm_linear_assoc_ret_NOMEM;
}
g_linear_mt = (Linear_multithread*)wkspace_alloc(max_thread_ct * sizeof(Linear_multithread));
+ if (!g_linear_mt) {
+ goto glm_linear_assoc_ret_NOMEM;
+ }
ulii = (orig_perm_batch_size + (BITCT - 1)) / BITCT;
for (tidx = 0; tidx < max_thread_ct; tidx++) {
if (wkspace_alloc_d_checked(&(g_linear_mt[tidx].cur_covars_cov_major), param_ct_max * sample_valid_ct * sizeof(double)) ||
@@ -5032,13 +5035,13 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
if (constraint_ct_max) {
if (wkspace_alloc_d_checked(&(g_linear_mt[tidx].df_df_buf), constraint_ct_max * constraint_ct_max * sizeof(double)) ||
- wkspace_alloc_d_checked(&(g_linear_mt[tidx].df_buf), constraint_ct_max * sizeof(double))) {
+ wkspace_alloc_d_checked(&(g_linear_mt[tidx].df_buf), constraint_ct_max * sizeof(double)) ||
+ wkspace_alloc_d_checked(&(g_linear_mt[tidx].param_df_buf), constraint_ct_max * param_ct_max * sizeof(double)) ||
+ wkspace_alloc_d_checked(&(g_linear_mt[tidx].param_df_buf2), constraint_ct_max * param_ct_max * sizeof(double))) {
goto glm_linear_assoc_ret_NOMEM;
}
}
- if (wkspace_alloc_d_checked(&(g_linear_mt[tidx].param_df_buf), constraint_ct_max * param_ct_max * sizeof(double)) ||
- wkspace_alloc_d_checked(&(g_linear_mt[tidx].param_df_buf2), constraint_ct_max * param_ct_max * sizeof(double)) ||
- wkspace_alloc_d_checked(&(g_linear_mt[tidx].dgels_a), param_ct_max * sample_valid_ct * sizeof(double)) ||
+ if (wkspace_alloc_d_checked(&(g_linear_mt[tidx].dgels_a), param_ct_max * sample_valid_ct * sizeof(double)) ||
wkspace_alloc_d_checked(&(g_linear_mt[tidx].dgels_b), orig_perm_batch_size * sample_valid_ct * sizeof(double))) {
goto glm_linear_assoc_ret_NOMEM;
}
@@ -6519,6 +6522,9 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
orig_perm_batch_size = 1;
}
g_logistic_mt = (Logistic_multithread*)wkspace_alloc(max_thread_ct * sizeof(Logistic_multithread));
+ if (!g_logistic_mt) {
+ goto glm_logistic_assoc_ret_NOMEM;
+ }
ulii = (orig_perm_batch_size + (BITCT - 1)) / BITCT;
for (tidx = 0; tidx < max_thread_ct; tidx++) {
// covars_cov_major, param_2d_buf, param_2d_buf2 matrices must have 16-byte
@@ -6788,7 +6794,6 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if (cur_sample_valid_ct > cur_param_ct) {
// todo: try better starting position
fill_float_zero(g_logistic_mt[0].coef, (cur_param_ct + 3) & (~3));
- g_marker_uidx2 = marker_uidx2;
regression_fail = glm_logistic(1, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, g_logistic_mt[0].cur_covars_cov_major, pheno_c_collapsed, g_logistic_mt[0].coef, g_logistic_mt[0].pp, g_logistic_mt[0].sample_1d_buf, g_logistic_mt[0].pheno_buf, g_logistic_mt[0].param_1d_buf, g_logistic_mt[0].param_1d_buf2, g_logistic_mt[0].param_2d_buf, g_logistic_mt[0].param_2d_buf2, g_logistic_mt[0].regression_results, cur_constraint_ct, constraints_con_major, g_logistic_mt[0].param_1 [...]
} else {
regression_fail = 1;
@@ -7524,10 +7529,8 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (constraint_ct) {
if (wkspace_alloc_d_checked(&constraints_con_major, constraint_ct * param_ct * sizeof(double)) ||
wkspace_alloc_d_checked(&df_df_buf, constraint_ct * constraint_ct * sizeof(double)) ||
- wkspace_alloc_d_checked(&df_buf, constraint_ct * sizeof(double))) {
- goto glm_linear_nosnp_ret_NOMEM;
- }
- if (wkspace_alloc_d_checked(¶m_df_buf, constraint_ct * param_ct * sizeof(double)) ||
+ wkspace_alloc_d_checked(&df_buf, constraint_ct * sizeof(double)) ||
+ wkspace_alloc_d_checked(¶m_df_buf, constraint_ct * param_ct * sizeof(double)) ||
wkspace_alloc_d_checked(¶m_df_buf2, constraint_ct * param_ct * sizeof(double))) {
goto glm_linear_nosnp_ret_NOMEM;
}
@@ -9013,6 +9016,7 @@ uint32_t glm_logistic_dosage(uintptr_t sample_ct, uintptr_t* cur_samples, uintpt
dyy = sqrt((double)regression_results[0]);
*beta_ptr = dxx;
*se_ptr = dyy;
- *pval_ptr = calc_tprob(dxx / dyy, sample_valid_ct - param_ct);
+ dxx /= dyy;
+ *pval_ptr = chiprob_p(dxx * dxx, 1);
return 1;
}
diff --git a/plink_glm.h b/plink_glm.h
index da359ac..d312046 100644
--- a/plink_glm.h
+++ b/plink_glm.h
@@ -4,6 +4,12 @@
#include "plink_matrix.h"
#include "plink_set.h"
+// int32_t glm_check_vif(double vif_thresh, uintptr_t param_ct, uintptr_t sample_valid_ct, double* covars_collapsed, double* param_2d_buf, MATRIX_INVERT_BUF1_TYPE* mi_buf, double* param_2d_buf2);
+
+void solve_linear_system(const float* ll, const float* yy, float* xx, uint32_t dd);
+
+uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, float* hh, float* grad, float* ll, float* dcoef, const float* xx, const float* yy, float* coef, float* pp);
+
#ifndef NOLAPACK
int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t glm_modifier, double glm_vif_thresh, uint32_t glm_xchr_model, uint32_t glm_mperm_val, Range_list* parameters_range_list_ptr, Range_list* tests_range_list_ptr, double ci_size, double ci_zt, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct, char* marke [...]
#endif
diff --git a/plink_help.c b/plink_help.c
index 8b6ff41..c384356 100644
--- a/plink_help.c
+++ b/plink_help.c
@@ -418,13 +418,14 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * 'oxford' causes an Oxford-format .gen + .sample fileset to be generated.\n"
" * The 'structure' modifier causes a Structure-format file to be generated.\n"
" * 'transpose' creates a transposed text fileset (loadable with --tfile).\n"
-" * 'vcf', 'vcf-fid', and 'vcf-iid' result in production of a VCFv4.1 file.\n"
+" * 'vcf', 'vcf-fid', and 'vcf-iid' result in production of a VCFv4.2 file.\n"
" 'vcf-fid' and 'vcf-iid' cause family IDs or within-family IDs\n"
" respectively to be used for the sample IDs in the last header row, while\n"
-" 'vcf' merges both IDs and puts an underscore between them. The A2 allele\n"
-" is saved as the reference; when it is important for reference alleles to\n"
-" be correct, you'll usually also want to include --a2-allele in your\n"
-" command.\n"
+" 'vcf' merges both IDs and puts an underscore between them.\n"
+" The A2 allele is saved as the reference and normally flagged as not based\n"
+" on a real reference genome ('PR' INFO field value). When it is important\n"
+" for reference alleles to be correct, you'll also want to include\n"
+" --a2-allele and --real-ref-alleles in your command.\n"
" * The 'tab' modifier makes the output mostly tab-delimited instead of\n"
" mostly space-delimited. 'tabx' and 'spacex' force all tabs and all\n"
" spaces, respectively.\n\n"
@@ -483,7 +484,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --list-23-indels writes the subset with 23andMe-style indel calls (D/I\n"
" allele codes).\n\n"
);
-#ifndef STABLE_BUILD
help_print("list-duplicate-vars", &help_ctrl, 1,
" --list-duplicate-vars <require-same-ref> <ids-only> <suppress-first>\n"
" --list-duplicate-vars writes a .dupvar file describing all groups of\n"
@@ -497,7 +497,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * 'suppress-first' causes the first variant ID in each group to be omitted\n"
" from the report.\n\n"
);
-#endif
help_print("freq\tfreqx\tfrqx\tcounts", &help_ctrl, 1,
" --freq <counts>\n"
" --freqx\n"
@@ -635,7 +634,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" maximum likelihood solution identified by --r/--r2), along with HWE exact\n"
" test statistics.\n\n"
);
-#ifndef STABLE_BUILD
help_print("show-tags", &help_ctrl, 1,
" --show-tags [filename]\n"
" --show-tags all\n"
@@ -645,7 +643,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * If 'all' mode is specified, for each variant, each *other* variant which\n"
" tags it is reported.\n\n"
);
-#endif
help_print("blocks\thap\thap-all\thap-assoc\thap-freq\thap-impute\thap-impute-verbose\thap-linear\thap-logistic\thap-max-phase\thap-min-phase-prob\thap-miss\thap-omnibus\thap-only\thap-phase\thap-phase-wide\thap-pp\thap-snps\thap-tdt\thap-window\tchap\twhap", &help_ctrl, 1,
" --blocks <no-pheno-req> <no-small-max-span>\n"
" Estimate haplotype blocks, via Haploview's interpretation of the block\n"
@@ -1029,8 +1026,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" 0..1 hom A2.\n"
" * 'Zout' causes the output file to be gzipped.\n"
" * Normally, an association analysis is performed. 'standard-beta' and\n"
-" 'sex' behave as they are supposed to with --linear/--logistic. (Note\n"
-" that --dosage + --sex did NOT work properly in PLINK 1.07.)\n"
+" 'sex' behave as they are supposed to with --linear/--logistic.\n"
" * There are three alternate modes which cause the association analysis to\n"
" be skipped.\n"
" * 'occur' requests a simple variant occurrence report.\n"
@@ -1152,7 +1148,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * When --extract (without 'range') is present, only variants named in the\n"
" --extract file are considered.\n\n"
);
-#ifndef STABLE_BUILD
help_print("meta-analysis", &help_ctrl, 1,
" --meta-analysis [PLINK report filenames...]\n"
" --meta-analysis [PLINK report filenames...] + <logscale | qt>\n"
@@ -1181,7 +1176,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --extract file are considered.\n"
" * Unless 'no-map' is specified, chromosome filters are also respected.\n\n"
);
-#endif
help_print("fast-epistasis\tepistasis\tset-test\tset-by-all\tcase-only\tnop\tepistasis-summary-merge", &help_ctrl, 1,
" --fast-epistasis <boost | joint-effects | no-ueki> <case-only>\n"
" <set-by-set | set-by-all> <nop>\n"
@@ -1339,12 +1333,10 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --biallelic-only <strict> <list> : Skip VCF variants with 2+ alt. alleles.\n"
" --vcf-min-qual [val] : Skip VCF variants with low/missing QUAL.\n"
" --vcf-filter {exception(s)...} : Skip variants which have FILTER failures.\n"
-#ifndef STABLE_BUILD
" --vcf-min-gq [val] : No-call a genotype when GQ is below the\n"
" given threshold.\n"
" --vcf-min-gp [val] : No-call a genotype when 0-1 scaled GP is\n"
" below the given threshold.\n"
-#endif
" --vcf-half-call [m] : Specify how '0/.' and similar VCF GT values should be\n"
" handled. The following three modes are supported:\n"
" * 'error'/'e' (default) errors out and reports line #.\n"
@@ -1729,9 +1721,11 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --flip-scan-window-kb [x] : Set --flip-scan max kb distance (default 1000).\n"
" --flip-scan-threshold [x] : Set --flip-scan min correlation (default 0.5).\n"
);
- help_print("keep-allele-order\tmake-bed\tmerge\tbmerge\tmerge-list", &help_ctrl, 0,
+ help_print("keep-allele-order\treal-ref-alleles\tmake-bed\tmerge\tbmerge\tmerge-list\trecode", &help_ctrl, 0,
" --keep-allele-order : Keep the allele order defined in the .bim file,\n"
-" instead of forcing A2 to be the major allele.\n"
+" --real-ref-alleles instead of forcing A2 to be the major allele.\n"
+" --real-ref-alleles also removes 'PR' from the INFO\n"
+" values emitted by --recode vcf{-fid/-iid}.\n"
);
help_print("a1-allele\treference-allele\tupdate-ref-allele\ta2-allele", &help_ctrl, 0,
" --a1-allele [f] {a1col} {IDcol} {skip} : Force alleles in the file to A1.\n"
@@ -1789,7 +1783,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --ld-snps [vID...] : Restrict first --r/--r2 variant to the given ranges.\n"
" --ld-snp-list [f] : Restrict first --r/--r2 var. to those named in the file.\n"
);
-#ifndef STABLE_BUILD
help_print("list-all\ttag-kb\ttag-r2\ttag-mode2\tshow-tags", &help_ctrl, 0,
" --list-all : Generate the 'all' mode report when using --show-tags in\n"
" file mode.\n"
@@ -1797,7 +1790,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --tag-r2 [val] : Set --show-tags min tag r-squared (default 0.8)\n"
" --tag-mode2 : Use two-column --show-tags (file mode) I/O format.\n"
);
-#endif
help_print("indep\tindep-pairwise\tr\tr2\tflip-scan\tflipscan\tshow-tags\tld-xchr", &help_ctrl, 0,
" --ld-xchr [code] : Set Xchr model for --indep{-pairwise}, --r/--r2,\n"
" --flip-scan, and --show-tags.\n"
@@ -1996,7 +1988,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
help_print("clump-best\tclump", &help_ctrl, 0,
" --clump-best : Report best proxy for each --clump index var.\n"
);
-#ifndef STABLE_BUILD
help_print("meta-analysis-snp-field\tmeta-analysis-a1-field\tmeta-analysis-a2-field\tmeta-analysis-p-field\tmeta-analysis-ess-field\tmeta-analysis", &help_ctrl, 0,
" --meta-analysis-snp-field [n...] : Set --meta-analysis variant ID, A1/A2\n"
" --meta-analysis-a1-field [n...] allele, p-value, and/or effective sample\n"
@@ -2010,7 +2001,6 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" size should be\n"
" 4 / (1/[# cases] + 1/[# controls]).\n"
);
-#endif
help_print("gene-list-border\tgene-report\tgene-subset\tgene-list\tgene-report-snp-field", &help_ctrl, 0,
" --gene-list-border [kbs] : Extend --gene-report regions by given # of kbs.\n"
" --gene-subset [filename] : Specify gene name subset for --gene-report.\n"
@@ -2070,11 +2060,9 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --perm-batch-size [val] : Set number of permutations per batch for some\n"
" permutation tests.\n"
);
-#ifndef STABLE_BUILD
help_print("output-min-p", &help_ctrl, 0,
" --output-min-p [p] : Specify minimum p-value to write to reports.\n"
);
-#endif
help_print("debug", &help_ctrl, 0,
" --debug : Use slower, more crash-resistant logging method.\n"
);
diff --git a/plink_ld.c b/plink_ld.c
index d16a0b4..2c22be4 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -2,6 +2,7 @@
#include <stddef.h>
#include "plink_assoc.h"
+#include "plink_glm.h"
#include "plink_ld.h"
#include "plink_stats.h"
#include "pigz.h"
@@ -3929,7 +3930,7 @@ THREAD_RET_TYPE fast_epi_thread(void* arg) {
fail_ct_fixed++;
fail_ct2[block_idx2] += 1;
if (alpha1sq == 0.0) {
- // special case: log NA
+ // special case: log NA when '--epi1 1' specified
all_chisq_write[block_idx2] = NAN;
}
}
@@ -3964,6 +3965,814 @@ THREAD_RET_TYPE fast_epi_thread(void* arg) {
}
}
+// epistasis linear/logistic regression multithread globals
+
+static double* g_epi_pheno_d2;
+static double* g_epi_phenogeno1;
+static double* g_epi_phenogeno2;
+static uint32_t* g_epi_genosums1;
+static uint32_t* g_epi_genosums2;
+static double g_epi_pheno_sum;
+static double g_epi_pheno_ssq;
+static double g_epi_vif_thresh;
+
+static uint32_t g_epi_pheno_nm_ct;
+
+typedef struct epi_logistic_multithread_struct {
+ float* cur_covars_cov_major;
+ float* coef;
+ float* pp;
+ float* sample_1d_buf;
+ float* pheno_buf;
+ float* param_1d_buf;
+ float* param_1d_buf2;
+ float* param_2d_buf;
+ float* param_2d_buf2;
+} Epi_logistic_multithread;
+
+static Epi_logistic_multithread* g_epi_logistic_mt;
+static uintptr_t* g_epi_pheno_c;
+static float* g_epi_all_chisq_f;
+static float* g_epi_best_chisq_f1;
+static float* g_epi_best_chisq_f2;
+
+uint32_t matrix_invert_4x4symm(double* dmatrix) {
+ double buf[16];
+ double determinant;
+ // initially, dww = A_{22}A_{34} - A_{23}A_{24}
+ // dxx = A_{23}A_{34} - A_{24}A_{33}
+ // dyy = A_{23}A_{44} - A_{24}A_{34}
+ // dzz = A_{33}A_{44} - A_{34}A_{34}
+ double dww = dmatrix[5] * dmatrix[11] - dmatrix[6] * dmatrix[7];
+ double dxx = dmatrix[6] * dmatrix[11] - dmatrix[7] * dmatrix[10];
+ double dyy = dmatrix[6] * dmatrix[15] - dmatrix[7] * dmatrix[11];
+ double dzz = dmatrix[10] * dmatrix[15] - dmatrix[11] * dmatrix[11];
+ double dvv;
+ double duu;
+ buf[0] = dmatrix[5] * dzz
+ - dmatrix[6] * dyy
+ + dmatrix[7] * dxx;
+ buf[1] = dmatrix[2] * dyy
+ - dmatrix[1] * dzz
+ - dmatrix[3] * dxx;
+ buf[2] = dmatrix[1] * dyy
+ + dmatrix[2] * (dmatrix[7] * dmatrix[7] - dmatrix[5] * dmatrix[15])
+ + dmatrix[3] * dww;
+ duu = dmatrix[5] * dmatrix[10] - dmatrix[6] * dmatrix[6];
+ buf[3] = dmatrix[2] * dww
+ - dmatrix[1] * dxx
+ - dmatrix[3] * duu;
+ determinant = dmatrix[0] * buf[0] + dmatrix[1] * buf[1] + dmatrix[2] * buf[2] + dmatrix[3] * buf[3];
+ if (fabs(determinant) < EPSILON) {
+ return 1;
+ }
+ buf[5] = dmatrix[0] * dzz
+ + dmatrix[2] * (dmatrix[3] * dmatrix[11] - dmatrix[2] * dmatrix[15])
+ + dmatrix[3] * (dmatrix[2] * dmatrix[11] - dmatrix[3] * dmatrix[10]);
+ dzz = dmatrix[1] * dmatrix[15] - dmatrix[3] * dmatrix[7];
+ buf[6] = dmatrix[2] * dzz
+ - dmatrix[0] * dyy
+ + dmatrix[3] * (dmatrix[3] * dmatrix[6] - dmatrix[1] * dmatrix[11]);
+ dyy = dmatrix[1] * dmatrix[11] - dmatrix[2] * dmatrix[7];
+ dvv = dmatrix[1] * dmatrix[10] - dmatrix[2] * dmatrix[6];
+ buf[7] = dmatrix[0] * dxx
+ - dmatrix[2] * dyy
+ + dmatrix[3] * dvv;
+ buf[10] = dmatrix[0] * (dmatrix[5] * dmatrix[15] - dmatrix[7] * dmatrix[7])
+ - dmatrix[1] * dzz
+ + dmatrix[3] * (dmatrix[1] * dmatrix[7] - dmatrix[3] * dmatrix[5]);
+ dxx = dmatrix[1] * dmatrix[6] - dmatrix[2] * dmatrix[5];
+ buf[11] = dmatrix[1] * dyy
+ - dmatrix[0] * dww
+ - dmatrix[3] * dxx;
+ buf[15] = dmatrix[0] * duu
+ - dmatrix[1] * dvv
+ + dmatrix[2] * dxx;
+ determinant = 1.0 / determinant; // now reciprocal
+ dmatrix[0] = buf[0] * determinant;
+ dmatrix[1] = buf[1] * determinant;
+ dmatrix[2] = buf[2] * determinant;
+ dmatrix[3] = buf[3] * determinant;
+ dmatrix[4] = dmatrix[1];
+ dmatrix[5] = buf[5] * determinant;
+ dmatrix[6] = buf[6] * determinant;
+ dmatrix[7] = buf[7] * determinant;
+ dmatrix[8] = dmatrix[2];
+ dmatrix[9] = dmatrix[6];
+ dmatrix[10] = buf[10] * determinant;
+ dmatrix[11] = buf[11] * determinant;
+ dmatrix[12] = dmatrix[3];
+ dmatrix[13] = dmatrix[7];
+ dmatrix[14] = dmatrix[11];
+ dmatrix[15] = buf[15] * determinant;
+ return 0;
+}
+
+THREAD_RET_TYPE epi_linear_thread(void* arg) {
+ uintptr_t tidx = (uintptr_t)arg;
+ uintptr_t block_idx1_start = g_epi_idx1_block_bounds[tidx];
+ uintptr_t block_idx1_end = g_epi_idx1_block_bounds[tidx + 1];
+ uintptr_t idx1_block_start16 = g_epi_idx1_block_bounds16[tidx];
+ uintptr_t marker_idx1 = g_epi_marker_idx1 + block_idx1_start;
+ uintptr_t marker_ct = g_epi_marker_ct;
+ double alpha1sq = g_epi_alpha1sq[0];
+ double alpha2sq = g_epi_alpha2sq[0];
+ double pheno_sum = g_epi_pheno_sum;
+ double pheno_ssq = g_epi_pheno_ssq;
+ double vif_thresh = g_epi_vif_thresh;
+ uint32_t pheno_nm_ct = g_epi_pheno_nm_ct;
+ uint32_t best_id_fixed = 0;
+ uint32_t is_first_half = 0;
+ uintptr_t pheno_nm_ctl2 = (pheno_nm_ct + (BITCT2 - 1)) / BITCT2;
+ uintptr_t* geno1 = g_epi_geno1;
+ double* pheno_d2 = g_epi_pheno_d2;
+ uint32_t* geno1_offsets = g_epi_geno1_offsets;
+ uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
+ const double dconst[] = {1.0, 2.0, 2.0, 4.0};
+ double dmatrix_buf[16];
+ double dmatrix_buf2[4];
+
+ // sum(aa), sum(ab), sum(bb), sum(aab), sum(abb), and sum(aabb) can all be
+ // derived from these four quantities.
+ uint32_t cur_minor_cts[4]; // 11, 12, 21, 22
+
+ uintptr_t* cur_geno1;
+ uintptr_t* geno2;
+ uintptr_t* cur_geno2;
+ double* phenogeno1;
+ double* phenogeno2;
+ double* all_chisq_write;
+ double* chisq2_ptr;
+ double* all_chisq;
+ double* best_chisq1;
+ double* best_chisq2;
+ double* dptr;
+ double* dptr2;
+ uint32_t* n_sig_ct1;
+ uint32_t* fail_ct1;
+ uint32_t* best_id2;
+ uint32_t* n_sig_ct2;
+ uint32_t* fail_ct2;
+ uint32_t* genosums1;
+ uint32_t* genosums2;
+ uintptr_t idx2_block_size;
+ uintptr_t cur_idx2_block_size;
+ uintptr_t idx2_block_start;
+ uintptr_t idx2_block_end;
+ uintptr_t idx2_block_sizem16;
+ uintptr_t block_idx1;
+ uintptr_t block_delta1;
+ uintptr_t block_idx2;
+ uintptr_t cur_word1;
+ uintptr_t cur_word2;
+ uintptr_t active_mask;
+ uintptr_t param_idx;
+ uintptr_t param_idx2;
+ uintptr_t cur_sum_aab;
+ uintptr_t cur_sum_abb;
+ uintptr_t cur_sum_aabb;
+ uintptr_t ulii;
+ uintptr_t uljj;
+ double best_chisq_fixed;
+ double sum_a_pheno_base;
+ double cur_pheno_sum;
+ double cur_pheno_ssq;
+ double cur_sum_a_pheno;
+ double cur_sum_b_pheno;
+ double cur_sum_ab_pheno;
+ double sample_ctd;
+ double sample_ct_recip;
+ double sample_ct_m1_recip;
+ double cur_sum_ad;
+ double cur_sum_bd;
+ double cur_sum_abd;
+ double determinant;
+ double min_sigma;
+ double sigma;
+ double dxx;
+ double dyy;
+ double dzz;
+ double dww;
+ double dvv;
+ double duu;
+ double zsq;
+ uint32_t n_sig_ct_fixed;
+ uint32_t fail_ct_fixed;
+
+ uint32_t sum_a_base;
+ uint32_t sum_aa_base;
+ uint32_t cur_sum_a;
+ uint32_t cur_sum_aa;
+ uint32_t cur_sum_b;
+ uint32_t cur_sum_bb;
+ uint32_t cur_sum_ab;
+ uint32_t widx;
+ uint32_t sample_idx;
+ uint32_t cur_sample_ct;
+ uint32_t woffset;
+ while (1) {
+ idx2_block_size = g_epi_idx2_block_size;
+ cur_idx2_block_size = idx2_block_size;
+ idx2_block_start = g_epi_idx2_block_start;
+ idx2_block_end = idx2_block_start + idx2_block_size;
+ idx2_block_sizem16 = (idx2_block_size + 15) & (~(15 * ONELU));
+ geno2 = g_epi_geno2;
+ phenogeno1 = g_epi_phenogeno1;
+ phenogeno2 = g_epi_phenogeno2;
+ genosums1 = g_epi_genosums1;
+ genosums2 = g_epi_genosums2;
+ all_chisq = &(g_epi_all_chisq[2 * idx2_block_start]);
+ best_chisq1 = &(g_epi_best_chisq1[idx1_block_start16]);
+ best_chisq2 = &(g_epi_best_chisq2[tidx * idx2_block_sizem16]);
+ n_sig_ct1 = &(g_epi_n_sig_ct1[idx1_block_start16]);
+ fail_ct1 = &(g_epi_fail_ct1[idx1_block_start16]);
+ best_id2 = &(g_epi_best_id2[tidx * idx2_block_sizem16]);
+ n_sig_ct2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizem16]);
+ fail_ct2 = &(g_epi_fail_ct2[tidx * idx2_block_sizem16]);
+ for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, marker_idx1++) {
+ ulii = geno1_offsets[2 * block_idx1];
+ if (ulii > idx2_block_start) {
+ block_idx2 = 0;
+ cur_idx2_block_size = ulii - idx2_block_start;
+ if (cur_idx2_block_size >= idx2_block_size) {
+ cur_idx2_block_size = idx2_block_size;
+ } else {
+ is_first_half = 1;
+ }
+ } else {
+ ulii = geno1_offsets[2 * block_idx1 + 1];
+ if (ulii >= idx2_block_end) {
+ // may not be done in set1 x all or set1 x set2 cases
+ continue;
+ } else {
+ if (ulii <= idx2_block_start) {
+ block_idx2 = 0;
+ } else {
+ block_idx2 = ulii - idx2_block_start;
+ }
+ }
+ }
+ cur_geno1 = &(geno1[block_idx1 * pheno_nm_ctl2]);
+ n_sig_ct_fixed = 0;
+ fail_ct_fixed = 0;
+ block_delta1 = block_idx1 - block_idx1_start;
+ best_chisq_fixed = best_chisq1[block_delta1];
+ sum_a_pheno_base = phenogeno1[block_idx1];
+ sum_a_base = genosums1[2 * block_idx1];
+ sum_aa_base = genosums1[2 * block_idx1 + 1];
+
+ // [0] = chisq, [1] = beta
+ all_chisq_write = &(all_chisq[block_idx1 * marker_ct * 2]);
+
+ epi_linear_thread_second_half:
+ cur_geno2 = &(geno2[block_idx2 * pheno_nm_ctl2]);
+ chisq2_ptr = &(best_chisq2[block_idx2]);
+ for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[pheno_nm_ctl2])) {
+ // Our covariates are 1, genotype A (in {0, 1, 2}), genotype B, and
+ // [genotype A] * [genotype B].
+ // The ordinary least squares solution to this system is
+ // (X^T X)^{-1} X^T Y
+ // where X^T X is the following 4x4 matrix (where n = # of samples):
+ // [ n sum(A) sum(B) sum(AB) ]
+ // [ sum(A) sum(AA) sum(AB) sum(AAB) ]
+ // [ sum(B) sum(AB) sum(BB) sum(ABB) ]
+ // [ sum(AB) sum(AAB) sum(ABB) sum(AABB) ]
+ // (sum(.) denotes the sum of that (product of) genotypes, across all
+ // samples.)
+ // Meanwhile, X^T Y is the following 4x1 matrix:
+ // [ sum(pheno) ]
+ // [ sum(A * pheno) ]
+ // [ sum(B * pheno) ]
+ // [ sum(AB * pheno) ]
+ // Crucially, the VIF and valid parameters checks can also operate
+ // purely on the terms above and sum(pheno * pheno).
+
+ // these nine values can be mostly precomputed; just need to subtract
+ // from them sometimes when missing values are present.
+ cur_pheno_sum = pheno_sum;
+ cur_pheno_ssq = pheno_ssq;
+ cur_sum_a_pheno = sum_a_pheno_base;
+ cur_sum_b_pheno = phenogeno2[block_idx2];
+ cur_sum_a = sum_a_base;
+ cur_sum_aa = sum_aa_base;
+ cur_sum_b = genosums2[block_idx2 * 2];
+ cur_sum_bb = genosums2[block_idx2 * 2 + 1];
+ cur_sample_ct = pheno_nm_ct;
+
+ cur_sum_ab_pheno = 0.0;
+ fill_uint_zero(cur_minor_cts, 4);
+ for (widx = 0; widx < pheno_nm_ctl2; widx++) {
+ sample_idx = widx * BITCT2;
+ cur_word1 = cur_geno1[widx];
+ cur_word2 = cur_geno2[widx];
+ // we can entirely skip 5 common cases: 00/00, 00/01, 00/10, 01/00,
+ // 10/00.
+ active_mask = cur_word1 | cur_word2;
+ active_mask = (active_mask & (active_mask >> 1) & FIVEMASK) | (cur_word1 & cur_word2);
+ dptr = &(pheno_d2[sample_idx]);
+ while (active_mask) {
+ woffset = CTZLU(active_mask) / 2;
+ dxx = dptr[woffset];
+ woffset *= 2;
+ ulii = (cur_word1 >> woffset) & (3 * ONELU);
+ uljj = (cur_word2 >> woffset) & (3 * ONELU);
+ active_mask &= ~((3 * ONELU) << woffset);
+ if (ulii && uljj) {
+ if (ulii == 3) {
+ if (uljj == 1) {
+ cur_sum_b_pheno -= dxx;
+ cur_sum_b--;
+ cur_sum_bb--;
+ } else if (uljj == 2) {
+ cur_sum_b_pheno -= 2 * dxx;
+ cur_sum_b -= 2;
+ cur_sum_bb -= 4;
+ }
+ } else if (uljj == 3) {
+ // ulii must be 1 or 2
+ cur_sum_a_pheno -= dxx;
+ if (ulii == 2) {
+ cur_sum_a_pheno -= dxx;
+ }
+ cur_sum_a -= ulii;
+ cur_sum_aa -= ulii * ulii;
+ } else {
+ ulii = ulii * 2 + uljj - 3;
+ cur_sum_ab_pheno += dconst[ulii] * dxx;
+ cur_minor_cts[ulii] += 1;
+ continue;
+ }
+ }
+ cur_pheno_sum -= dxx;
+ cur_pheno_ssq -= dxx * dxx;
+ cur_sample_ct--;
+ }
+ }
+ if (cur_sample_ct <= 4) {
+ goto epi_linear_thread_regression_fail;
+ }
+ // VIF check. Mirrors glm_check_vif(), but param_ct is hardcoded to 4
+ // and we avoid additional iteration over the sample_idxs.
+ sample_ctd = (double)((int32_t)cur_sample_ct);
+ sample_ct_recip = 1.0 / sample_ctd;
+ sample_ct_m1_recip = 1.0 / ((double)((int32_t)(cur_sample_ct - 1)));
+ cur_sum_ab = cur_minor_cts[0] + 2 * (cur_minor_cts[1] + cur_minor_cts[2]) + 4 * cur_minor_cts[3];
+ cur_sum_aab = cur_minor_cts[0] + 2 * cur_minor_cts[1] + 4 * cur_minor_cts[2] + (8 * ONELU) * cur_minor_cts[3];
+ cur_sum_abb = cur_minor_cts[0] + 4 * cur_minor_cts[1] + 2 * cur_minor_cts[2] + (8 * ONELU) * cur_minor_cts[3];
+ cur_sum_aabb = cur_minor_cts[0] + 4 * (cur_minor_cts[1] + cur_minor_cts[2]) + (16 * ONELU) * cur_minor_cts[3];
+
+ cur_sum_ad = (double)((int32_t)cur_sum_a);
+ cur_sum_bd = (double)((int32_t)cur_sum_b);
+ cur_sum_abd = (double)((int32_t)cur_sum_ab);
+
+ // some genotype means
+ dxx = cur_sum_bd * sample_ct_recip;
+ dyy = cur_sum_abd * sample_ct_recip;
+
+ dww = ((double)((int32_t)cur_sum_aa)) - cur_sum_ad * cur_sum_ad * sample_ct_recip;
+ dvv = ((double)((int32_t)cur_sum_bb)) - cur_sum_bd * dxx;
+ duu = ((double)((intptr_t)cur_sum_aabb)) - cur_sum_abd * dyy;
+ if ((dww <= 0) || (dvv <= 0) || (duu <= 0)) {
+ goto epi_linear_thread_regression_fail;
+ }
+ dww = 1.0 / sqrt(dww * sample_ct_m1_recip);
+ dvv = 1.0 / sqrt(dvv * sample_ct_m1_recip);
+ duu = 1.0 / sqrt(duu * sample_ct_m1_recip);
+
+ dxx = (cur_sum_abd - cur_sum_ad * dxx) * sample_ct_m1_recip;
+ dzz = (((double)((intptr_t)cur_sum_abb)) - cur_sum_bd * dyy) * sample_ct_m1_recip;
+ dyy = (((double)((intptr_t)cur_sum_aab)) - cur_sum_ad * dyy) * sample_ct_m1_recip;
+ // now dxx = A_{12}, dyy = A_{13}, dzz = A_{23}
+
+ dxx *= dww * dvv;
+ dyy *= dww * duu;
+ dzz *= dvv * duu;
+ if ((dxx > 0.999) || (dyy > 0.999) || (dzz > 0.999)) {
+ goto epi_linear_thread_regression_fail;
+ }
+ // Use analytic formula for 3x3 symmetric matrix inverse.
+ // det A = A_{11}A_{22}A_{33} + 2 * A_{12}A_{13}A_{23}
+ // - A_{11}(A_{23}^2) - A_{22}(A_{13}^2) - A_{33}(A_{12}^2)
+ // upper left of inverse = (A_{22}A_{33} - (A_{23}^2))(det A)^{-1}
+ // middle = (A_{11}A_{33} - (A_{13}^2))(det A)^{-1}
+ // lower right = (A_{11}A_{22} - (A_{12}^2))(det A)^{-1}
+ dww = dxx * dxx;
+ dvv = dyy * dyy;
+ duu = dzz * dzz;
+ determinant = 1 + 2 * dxx * dyy * dzz - dww - dvv - duu;
+ if (fabs(determinant) < EPSILON) {
+ goto epi_linear_thread_regression_fail;
+ }
+ // (1 - x^2)/det > vif_thresh
+ // if det > 0:
+ // 1 - x^2 > vif_thresh * det
+ // 1 - vif_thresh * det > x^2
+ // otherwise:
+ // 1 - x^2 < vif_thresh * det
+ // 1 - vif_thresh * det < x^2
+ dxx = 1 - vif_thresh * determinant; // now a threshold
+ if (((determinant > 0) && ((dxx > dww) || (dxx > dvv) || (dxx > duu))) || ((determinant < 0) && ((dxx < dww) || (dxx < dvv) || (dxx < duu)))) {
+ goto epi_linear_thread_regression_fail;
+ }
+
+ // VIF check done, now perform linear regression
+ dmatrix_buf[0] = sample_ctd;
+ dmatrix_buf[1] = cur_sum_ad;
+ dmatrix_buf[2] = cur_sum_bd;
+ dmatrix_buf[3] = cur_sum_abd;
+ dmatrix_buf[5] = (double)((int32_t)cur_sum_aa);
+ dmatrix_buf[6] = cur_sum_abd;
+ dmatrix_buf[7] = (double)((intptr_t)cur_sum_aab);
+ dmatrix_buf[10] = (double)((int32_t)cur_sum_bb);
+ dmatrix_buf[11] = (double)((intptr_t)cur_sum_abb);
+ dmatrix_buf[15] = (double)((intptr_t)cur_sum_aabb);
+ if (matrix_invert_4x4symm(dmatrix_buf)) {
+ goto epi_linear_thread_regression_fail;
+ }
+
+ for (param_idx = 0; param_idx < 4; param_idx++) {
+ dmatrix_buf2[param_idx] = sqrt(dmatrix_buf[param_idx * 5]);
+ }
+ for (param_idx = 1; param_idx < 4; param_idx++) {
+ dxx = 0.99999 * dmatrix_buf2[param_idx];
+ dptr = &(dmatrix_buf[param_idx * 4]);
+ dptr2 = dmatrix_buf2;
+ for (param_idx2 = 0; param_idx2 < param_idx; param_idx2++) {
+ if ((*dptr++) > dxx * (*dptr2++)) {
+ goto epi_linear_thread_regression_fail;
+ }
+ }
+ }
+ min_sigma = MAXV(dmatrix_buf[5], dmatrix_buf[10]);
+ if (dmatrix_buf[15] > min_sigma) {
+ min_sigma = dmatrix_buf[15];
+ }
+ min_sigma = 1e-20 / min_sigma;
+
+ for (param_idx = 0; param_idx < 4; param_idx++) {
+ dptr = &(dmatrix_buf[param_idx * 4]);
+ dmatrix_buf2[param_idx] = cur_pheno_sum * dptr[0] + cur_sum_a_pheno * dptr[1] + cur_sum_b_pheno * dptr[2] + cur_sum_ab_pheno * dptr[3];
+ }
+ // dmatrix_buf2[0..3] now has linear regression result
+
+ // partial = coef[0] + A * coef[1] + B * coef[2] + AB * coef[3] - pheno
+ // sigma = \sum_{all samples} (partial * partial)
+ // = \sum (coef[0]^2
+ // + 2 * A * coef[0] * coef[1]
+ // + 2 * B * coef[0] * coef[2]
+ // + 2 * AB * coef[0] * coef[3]
+ // - 2 * coef[0] * pheno
+ // + AA * coef[1]^2
+ // + 2 * AB * coef[1] * coef[2]
+ // + 2 * AAB * coef[1] * coef[3]
+ // - 2 * A * coef[1] * pheno
+ // + BB * coef[2]^2
+ // + 2 * ABB * coef[2] * coef[3]
+ // - 2 * B * coef[2] * pheno
+ // + AABB * coef[3]^2
+ // - 2 * AB * coef[3] * pheno
+ // + pheno * pheno
+ sigma = dmatrix_buf2[0] * dmatrix_buf2[0] * sample_ctd
+ + dmatrix_buf2[1] * dmatrix_buf2[1] * ((double)((int32_t)cur_sum_aa))
+ + dmatrix_buf2[2] * dmatrix_buf2[2] * ((double)((int32_t)cur_sum_bb))
+ + dmatrix_buf2[3] * dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_aabb))
+ + cur_pheno_ssq
+ + 2 * (dmatrix_buf2[0] * (dmatrix_buf2[1] * cur_sum_ad
+ + dmatrix_buf2[2] * cur_sum_bd
+ + dmatrix_buf2[3] * cur_sum_abd
+ - cur_pheno_sum)
+ + dmatrix_buf2[1] * (dmatrix_buf2[2] * cur_sum_abd
+ + dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_aab))
+ - cur_sum_a_pheno)
+ + dmatrix_buf2[2] * (dmatrix_buf2[3] * ((double)((intptr_t)cur_sum_abb))
+ - cur_sum_b_pheno)
+ - dmatrix_buf2[3] * cur_sum_ab_pheno);
+ sigma /= (double)((int32_t)(cur_sample_ct - 4));
+ if (sigma < min_sigma) {
+ goto epi_linear_thread_regression_fail;
+ }
+
+ // dmatrix_buf2[3] = linear regression beta for AB term
+ // sqrt(dmatrix_buf[15] * sigma) = standard error for AB term
+ dxx = dmatrix_buf2[3];
+ zsq = (dxx * dxx) / (dmatrix_buf[15] * sigma);
+ if (zsq >= alpha1sq) {
+ all_chisq_write[2 * block_idx2] = zsq;
+ all_chisq_write[2 * block_idx2 + 1] = dxx;
+ }
+ if (zsq >= alpha2sq) {
+ n_sig_ct_fixed++;
+ n_sig_ct2[block_idx2] += 1;
+ }
+ if (zsq > best_chisq_fixed) {
+ best_chisq_fixed = zsq;
+ best_id_fixed = block_idx2 + idx2_block_start;
+ }
+ dxx = *chisq2_ptr;
+ if (zsq > dxx) {
+ *chisq2_ptr = zsq;
+ best_id2[block_idx2] = marker_idx1;
+ }
+ while (0) {
+ epi_linear_thread_regression_fail:
+ zsq = 0;
+ fail_ct_fixed++;
+ fail_ct2[block_idx2] += 1;
+ if (alpha1sq == 0.0) {
+ // special case: log NA when '--epi1 1' specified
+ all_chisq_write[block_idx2 * 2] = NAN;
+ all_chisq_write[block_idx2 * 2 + 1] = NAN;
+ }
+ }
+ }
+ if (is_first_half) {
+ is_first_half = 0;
+ ulii = geno1_offsets[2 * block_idx1 + 1];
+ cur_idx2_block_size = idx2_block_size;
+ if (ulii < idx2_block_end) {
+ // guaranteed to be larger than idx2_block_start, otherwise there
+ // would have been no first half
+ block_idx2 = ulii - idx2_block_start;
+ goto epi_linear_thread_second_half;
+ }
+ }
+ if (best_chisq_fixed > best_chisq1[block_delta1]) {
+ best_chisq1[block_delta1] = best_chisq_fixed;
+ best_id1[block_delta1] = best_id_fixed;
+ }
+ n_sig_ct1[block_delta1] = n_sig_ct_fixed;
+ if (fail_ct_fixed) {
+ fail_ct1[block_delta1] = fail_ct_fixed;
+ }
+ }
+ if ((!tidx) || g_is_last_thread_block) {
+ THREAD_RETURN;
+ }
+ THREAD_BLOCK_FINISH(tidx);
+ }
+}
+
+THREAD_RET_TYPE epi_logistic_thread(void* arg) {
+ uintptr_t tidx = (uintptr_t)arg;
+ uintptr_t block_idx1_start = g_epi_idx1_block_bounds[tidx];
+ uintptr_t block_idx1_end = g_epi_idx1_block_bounds[tidx + 1];
+ uintptr_t idx1_block_start16 = g_epi_idx1_block_bounds16[tidx];
+ uintptr_t marker_idx1 = g_epi_marker_idx1 + block_idx1_start;
+ uintptr_t marker_ct = g_epi_marker_ct;
+ float alpha1sq = (float)g_epi_alpha1sq[0];
+ float alpha2sq = (float)g_epi_alpha2sq[0];
+ uint32_t pheno_nm_ct = g_epi_pheno_nm_ct;
+ uint32_t best_id_fixed = 0;
+ uint32_t is_first_half = 0;
+ uintptr_t pheno_nm_ctl2 = (pheno_nm_ct + (BITCT2 - 1)) / BITCT2;
+ uintptr_t* geno1 = g_epi_geno1;
+ uintptr_t* pheno_c = g_epi_pheno_c;
+ float* covars_cov_major = g_epi_logistic_mt[tidx].cur_covars_cov_major;
+ float* coef = g_epi_logistic_mt[tidx].coef;
+ float* pp = g_epi_logistic_mt[tidx].pp;
+ float* sample_1d_buf = g_epi_logistic_mt[tidx].sample_1d_buf;
+ float* pheno_buf = g_epi_logistic_mt[tidx].pheno_buf;
+ float* param_1d_buf = g_epi_logistic_mt[tidx].param_1d_buf;
+ float* param_1d_buf2 = g_epi_logistic_mt[tidx].param_1d_buf2;
+ float* param_2d_buf = g_epi_logistic_mt[tidx].param_2d_buf;
+ float* param_2d_buf2 = g_epi_logistic_mt[tidx].param_2d_buf2;
+ uint32_t* geno1_offsets = g_epi_geno1_offsets;
+ uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
+ uintptr_t* cur_geno1;
+ uintptr_t* geno2;
+ uintptr_t* cur_geno2;
+ float* all_chisq_write;
+ float* chisq2_ptr;
+ float* all_chisq;
+ float* best_chisq1;
+ float* best_chisq2;
+ float* fptr;
+ float* fptr2;
+ uint32_t* n_sig_ct1;
+ uint32_t* fail_ct1;
+ uint32_t* best_id2;
+ uint32_t* n_sig_ct2;
+ uint32_t* fail_ct2;
+ uintptr_t idx2_block_size;
+ uintptr_t cur_idx2_block_size;
+ uintptr_t idx2_block_start;
+ uintptr_t idx2_block_end;
+ uintptr_t idx2_block_sizem16;
+ uintptr_t block_idx1;
+ uintptr_t block_delta1;
+ uintptr_t block_idx2;
+ uintptr_t cur_word1;
+ uintptr_t cur_word2;
+ uintptr_t param_idx;
+ uintptr_t param_idx2;
+ uintptr_t cur_sample_cta4;
+ uintptr_t ulii;
+ uintptr_t uljj;
+ float best_chisq_fixed;
+ // todo
+ float fxx;
+ float fyy;
+ float zsq;
+ uint32_t n_sig_ct_fixed;
+ uint32_t fail_ct_fixed;
+ uint32_t widx;
+ uint32_t loop_end;
+ uint32_t sample_idx;
+ uint32_t cur_sample_ct;
+ while (1) {
+ idx2_block_size = g_epi_idx2_block_size;
+ cur_idx2_block_size = idx2_block_size;
+ idx2_block_start = g_epi_idx2_block_start;
+ idx2_block_end = idx2_block_start + idx2_block_size;
+ idx2_block_sizem16 = (idx2_block_size + 15) & (~(15 * ONELU));
+ geno2 = g_epi_geno2;
+ all_chisq = &(g_epi_all_chisq_f[2 * idx2_block_start]);
+ best_chisq1 = &(g_epi_best_chisq_f1[idx1_block_start16]);
+ best_chisq2 = &(g_epi_best_chisq_f2[tidx * idx2_block_sizem16]);
+ n_sig_ct1 = &(g_epi_n_sig_ct1[idx1_block_start16]);
+ fail_ct1 = &(g_epi_fail_ct1[idx1_block_start16]);
+ best_id2 = &(g_epi_best_id2[tidx * idx2_block_sizem16]);
+ n_sig_ct2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizem16]);
+ fail_ct2 = &(g_epi_fail_ct2[tidx * idx2_block_sizem16]);
+ for (block_idx1 = block_idx1_start; block_idx1 < block_idx1_end; block_idx1++, marker_idx1++) {
+ ulii = geno1_offsets[2 * block_idx1];
+ if (ulii > idx2_block_start) {
+ block_idx2 = 0;
+ cur_idx2_block_size = ulii - idx2_block_start;
+ if (cur_idx2_block_size >= idx2_block_size) {
+ cur_idx2_block_size = idx2_block_size;
+ } else {
+ is_first_half = 1;
+ }
+ } else {
+ ulii = geno1_offsets[2 * block_idx1 + 1];
+ if (ulii >= idx2_block_end) {
+ // may not be done in set1 x all or set1 x set2 cases
+ continue;
+ } else {
+ if (ulii <= idx2_block_start) {
+ block_idx2 = 0;
+ } else {
+ block_idx2 = ulii - idx2_block_start;
+ }
+ }
+ }
+ cur_geno1 = &(geno1[block_idx1 * pheno_nm_ctl2]);
+ n_sig_ct_fixed = 0;
+ fail_ct_fixed = 0;
+ block_delta1 = block_idx1 - block_idx1_start;
+ best_chisq_fixed = best_chisq1[block_delta1];
+
+ // [0] = chisq, [1] = beta
+ all_chisq_write = &(all_chisq[block_idx1 * marker_ct * 2]);
+
+ epi_logistic_thread_second_half:
+ cur_geno2 = &(geno2[block_idx2 * pheno_nm_ctl2]);
+ chisq2_ptr = &(best_chisq2[block_idx2]);
+ for (; block_idx2 < cur_idx2_block_size; block_idx2++, chisq2_ptr++, cur_geno2 = &(cur_geno2[pheno_nm_ctl2])) {
+ fptr = covars_cov_major;
+ fptr2 = pheno_buf;
+ cur_sample_ct = pheno_nm_ct;
+ // this part is similar to glm_logistic().
+
+ // 1. determine number of samples with at least one missing genotype
+ for (widx = 0; widx < pheno_nm_ctl2; widx++) {
+ cur_word1 = cur_geno1[widx];
+ cur_word2 = cur_geno2[widx];
+ cur_word1 = cur_word1 & (cur_word1 >> 1);
+ cur_word2 = cur_word2 & (cur_word2 >> 1);
+ cur_sample_ct -= popcount2_long((cur_word1 | cur_word2) & FIVEMASK);
+ }
+ if (cur_sample_ct <= 4) {
+ goto epi_logistic_thread_regression_fail;
+ }
+ // 2. now populate covariate-major matrix with 16-byte-aligned,
+ // trailing-entries-zeroed rows
+ cur_sample_cta4 = (cur_sample_ct + 3) & (~3);
+ for (widx = 0; widx < pheno_nm_ctl2; widx++) {
+ sample_idx = widx * BITCT2;
+ cur_word1 = cur_geno1[widx];
+ cur_word2 = cur_geno2[widx];
+ loop_end = sample_idx + BITCT2;
+ if (loop_end > pheno_nm_ct) {
+ loop_end = pheno_nm_ct;
+ }
+ for (; sample_idx < loop_end; sample_idx++) {
+ ulii = cur_word1 & (3 * ONELU);
+ uljj = cur_word2 & (3 * ONELU);
+ if ((ulii != 3) && (uljj != 3)) {
+ *fptr = 1.0;
+ fxx = (float)((intptr_t)ulii);
+ fyy = (float)((intptr_t)uljj);
+ // maybe this is faster with continuous writes instead of
+ // continuous reads? can experiment later
+ fptr[cur_sample_cta4] = fxx;
+ fptr[2 * cur_sample_cta4] = fyy;
+ fptr[3 * cur_sample_cta4] = fxx * fyy;
+ fptr++;
+ *fptr2++ = (float)((int32_t)is_set(pheno_c, sample_idx));
+ }
+ cur_word1 >>= 2;
+ cur_word2 >>= 2;
+ }
+ }
+ if (cur_sample_ct < cur_sample_cta4) {
+ loop_end = cur_sample_cta4 - cur_sample_ct;
+ fill_float_zero(fptr, loop_end);
+ fill_float_zero(&(fptr[cur_sample_cta4]), loop_end);
+ fill_float_zero(&(fptr[2 * cur_sample_cta4]), loop_end);
+ fill_float_zero(&(fptr[3 * cur_sample_cta4]), loop_end);
+ fill_float_zero(fptr2, loop_end);
+ }
+
+ fill_float_zero(coef, 4);
+ if (logistic_regression(cur_sample_ct, 4, sample_1d_buf, param_2d_buf, param_1d_buf, param_2d_buf2, param_1d_buf2, covars_cov_major, pheno_buf, coef, pp)) {
+ goto epi_logistic_thread_regression_fail;
+ }
+
+ // compute S
+ for (param_idx = 0; param_idx < 4; param_idx++) {
+ fill_float_zero(param_1d_buf, 4);
+ param_1d_buf[param_idx] = 1.0;
+ solve_linear_system(param_2d_buf2, param_1d_buf, param_1d_buf2, 4);
+ memcpy(&(param_2d_buf[param_idx * 4]), param_1d_buf2, 4 * sizeof(float));
+ }
+ for (param_idx = 1; param_idx < 4; param_idx++) {
+ fxx = param_2d_buf[param_idx * 5];
+ if ((fxx < 1e-20) || (!realnum(fxx))) {
+ goto epi_logistic_thread_regression_fail;
+ }
+ param_2d_buf2[param_idx] = sqrtf(fxx);
+ }
+ param_2d_buf2[0] = sqrtf(param_2d_buf[0]);
+ for (param_idx = 1; param_idx < 4; param_idx++) {
+ fxx = 0.99999 * param_2d_buf2[param_idx];
+ fptr = &(param_2d_buf[param_idx * 4]);
+ fptr2 = param_2d_buf2;
+ for (param_idx2 = 0; param_idx2 < param_idx; param_idx2++) {
+ if ((*fptr++) > fxx * (*fptr2++)) {
+ goto epi_logistic_thread_regression_fail;
+ }
+ }
+ }
+
+ // coef[3] = logistic regression beta for AB term
+ // sqrt(param_2d_buf[15]) = standard error for AB term
+ zsq = coef[3] * coef[3] / param_2d_buf[15];
+ if (zsq >= alpha1sq) {
+ all_chisq_write[2 * block_idx2] = zsq;
+ all_chisq_write[2 * block_idx2 + 1] = coef[3];
+ }
+ if (zsq >= alpha2sq) {
+ n_sig_ct_fixed++;
+ n_sig_ct2[block_idx2] += 1;
+ }
+ if (zsq > best_chisq_fixed) {
+ best_chisq_fixed = zsq;
+ best_id_fixed = block_idx2 + idx2_block_start;
+ }
+ fxx = *chisq2_ptr;
+ if (zsq > fxx) {
+ *chisq2_ptr = zsq;
+ best_id2[block_idx2] = marker_idx1;
+ }
+ while (0) {
+ epi_logistic_thread_regression_fail:
+ zsq = 0;
+ fail_ct_fixed++;
+ fail_ct2[block_idx2] += 1;
+ if (alpha1sq == 0.0) {
+ // special case: log NA when '--epi1 1' specified
+ all_chisq_write[block_idx2 * 2] = NAN;
+ all_chisq_write[block_idx2 * 2 + 1] = NAN;
+ }
+ }
+ }
+ if (is_first_half) {
+ is_first_half = 0;
+ ulii = geno1_offsets[2 * block_idx1 + 1];
+ cur_idx2_block_size = idx2_block_size;
+ if (ulii < idx2_block_end) {
+ block_idx2 = ulii - idx2_block_start;
+ goto epi_logistic_thread_second_half;
+ }
+ }
+ if (best_chisq_fixed > best_chisq1[block_delta1]) {
+ best_chisq1[block_delta1] = best_chisq_fixed;
+ best_id1[block_delta1] = best_id_fixed;
+ }
+ n_sig_ct1[block_delta1] = n_sig_ct_fixed;
+ if (fail_ct_fixed) {
+ fail_ct1[block_delta1] = fail_ct_fixed;
+ }
+ }
+ if ((!tidx) || g_is_last_thread_block) {
+ THREAD_RETURN;
+ }
+ THREAD_BLOCK_FINISH(tidx);
+ }
+}
+
double calc_lnlike(double known11, double known12, double known21, double known22, double center_ct_d, double freq11, double freq12, double freq21, double freq22, double half_hethet_share, double freq11_incr) {
double lnlike;
freq11 += freq11_incr;
@@ -6246,7 +7055,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
marker_ct = unfiltered_marker_ct - popcount_longs(marker_exclude, unfiltered_marker_ctl);
}
if (marker_ct < 2) {
- logprint("Warning: Skipping --blocks since there are too few markers with MAF >= 0.05.\n");
+ logprint("Warning: Skipping --blocks since there are too few variants with MAF >= 0.05.\n");
goto haploview_blocks_ret_1;
}
pct_thresh = marker_ct / 100;
@@ -6289,7 +7098,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
}
#ifndef __LP64__
if (max_block_size > 65536) {
- logprint("\nError: 32-bit --blocks cannot analyze potential blocks with more than 65536\nmarkers. Use a 64-bit PLINK build or a smaller --blocks-window-kb value.\n");
+ logprint("\nError: 32-bit --blocks cannot analyze potential blocks with more than 65536\nvariants. Use a 64-bit PLINK build or a smaller --blocks-window-kb value.\n");
goto haploview_blocks_ret_INVALID_CMDLINE;
}
#endif
@@ -7334,49 +8143,1035 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
return retval;
}
-int32_t epistasis_regression() {
- logprint("Error: --epistasis has not been implemented yet.\n");
- return RET_CALC_NOT_YET_SUPPORTED;
+void rotate_loadbuf_and_compute_phenogeno(uintptr_t* loadbuf, double* pheno_d2, uint32_t pheno_nm_ct, uintptr_t* loadbuf_write, double* phenogeno, uint32_t* genosums) {
+ double cur_phenogeno = 0;
+ uint32_t geno1_ct = 0;
+ uint32_t geno2_ct = 0;
+ uintptr_t cur_word;
+ uintptr_t ulii;
+ uintptr_t uljj;
+ double dxx;
+ uint32_t sample_idx;
+ uint32_t sample_idx_base;
+ uint32_t uii;
+ for (sample_idx = 0; sample_idx < pheno_nm_ct;) {
+ // we're interested in hom A1 (non-trailing 00) and het (10) bit
+ // values here.
+ cur_word = ~(*loadbuf++);
+ sample_idx_base = sample_idx;
+ sample_idx += BITCT2;
+ if (sample_idx > pheno_nm_ct) {
+ cur_word &= (ONELU << (2 * (pheno_nm_ct % BITCT2))) - ONELU;
+ }
+ // now hom A1 = 11 and het = 01. Temporarily erase the 10s.
+ uljj = cur_word & FIVEMASK;
+ ulii = uljj | (cur_word & (uljj << 1));
+ while (ulii) {
+ uii = CTZLU(ulii);
+ dxx = pheno_d2[sample_idx_base + uii / 2];
+ if (!((ulii >> (uii + 1)) & 1)) {
+ // het
+ cur_phenogeno += dxx;
+ geno1_ct++;
+ } else {
+ // hom A1
+ cur_phenogeno += 2 * dxx;
+ geno2_ct++;
+ }
+ ulii &= ~((3 * ONELU) << uii);
+ }
+ // currently hom A1 = 11, missing = 10, het = 01, hom A2 = 00
+ // rotate to hom A1 = 10, missing = 11, het = 01, hom A2 = 00
+ // to allow inner loop to use ordinary multiplication
+ *loadbuf_write++ = cur_word ^ ((cur_word >> 1) & FIVEMASK);
+ }
+ *phenogeno = cur_phenogeno;
+ genosums[0] = geno1_ct + 2 * geno2_ct;
+ genosums[1] = geno1_ct + 4 * geno2_ct;
}
-int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct2, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t pa [...]
- unsigned char* wkspace_mark = wkspace_base;
+int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t marker_uidx_base, uintptr_t marker_ct1, uintptr_t* marker_exclude1, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end, uintptr_t marker_ct2, uintptr_t* marker_exclude2, uint32_t is_triangular, uintptr_t job_si [...]
+ // We use QT --assoc's strategy for speeding up linear regression, since we
+ // do not need to support arbitrary covariates. It's more complicated here
+ // because we have 3 covariates instead of one, but two of them are still
+ // restricted to the values {0, 1, 2} and the last is the product of the
+ // first two. So we're able to use variations of the QT --assoc bit hacks.
FILE* outfile = NULL;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
- uintptr_t unfiltered_sample_ctv2 = 2 * ((unfiltered_sample_ct + (BITCT - 1)) / BITCT);
+ uintptr_t pheno_nm_ctl2 = (pheno_nm_ct + (BITCT2 - 1)) / BITCT2;
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
- uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
- uintptr_t marker_uidx_base = next_unset_unsafe(marker_exclude, 0);
uintptr_t marker_uidx = marker_uidx_base;
- uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
- uint32_t modifier = epi_ip->modifier;
- uint32_t is_fast = modifier & EPI_FAST;
- uint32_t is_boost = (modifier / EPI_FAST_BOOST) & 1;
- uint32_t do_joint_effects = modifier & EPI_FAST_JOINT_EFFECTS;
- uint32_t no_ueki = modifier & EPI_FAST_NO_UEKI;
- uint32_t is_case_only = (modifier / EPI_FAST_CASE_ONLY) & 1;
- uint32_t is_triangular = 1;
- uint32_t is_custom_set1 = modifier & (EPI_SET_BY_SET | EPI_SET_BY_ALL)? 1 : 0;
- uint32_t is_set_by_set = modifier & EPI_SET_BY_SET;
- uint32_t tot_stride = 6 - 3 * is_case_only;
- uint32_t no_p_value = modifier & EPI_FAST_NO_P_VALUE;
- uint32_t case_only_gap = epi_ip->case_only_gap;
- uint32_t is_case_only_window = (is_case_only && case_only_gap);
- uint32_t case_ct = pheno_nm_ct - ctrl_ct;
- uint32_t cellminx3 = 0;
- uintptr_t case_ctl2 = (case_ct + (BITCT2 - 1)) / BITCT2;
- uintptr_t case_ctv2 = 2 * ((case_ct + (BITCT - 1)) / BITCT);
- uintptr_t ctrl_ctl2 = (ctrl_ct + (BITCT2 - 1)) / BITCT2;
- uintptr_t case_ctv3 = 2 * ((case_ct + (2 * BITCT - 1)) / (2 * BITCT));
- uintptr_t ctrl_ctv3 = 2 * ((ctrl_ct + (2 * BITCT - 1)) / (2 * BITCT));
- uintptr_t case_ctsplit = 3 * case_ctv3;
- uintptr_t ctrl_ctsplit = 3 * ctrl_ctv3;
uintptr_t pct = 1;
uintptr_t marker_uidx2 = 0;
- uintptr_t marker_uidx2_trail = 0;
+ uintptr_t marker_idx1 = marker_idx1_start;
uintptr_t marker_idx2 = 0;
- uintptr_t marker_idx2_trail = 0;
- uint64_t tests_thrown_out = 0;
+ uint64_t pct_thresh = tests_expected / 100;
+ uint64_t tests_complete = 0;
+ uint32_t max_thread_ct = g_epi_thread_ct;
+ uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ uint32_t chrom_end = 0;
+ uint32_t chrom_idx = 0;
+ uint32_t chrom_idx2 = 0;
+ int32_t retval = 0;
+ unsigned char* wkspace_mark2;
+ char* wptr_start;
+ char* wptr_start2;
+ char* wptr;
+ double* pheno_d2;
+ double* dptr;
+ double* dptr2;
+ uint32_t* uiptr;
+ uint32_t* uiptr2;
+ uint32_t* uiptr3;
+ uint32_t* uiptr4;
+ uint32_t* uiptr5;
+ uintptr_t cur_workload;
+ uintptr_t idx1_block_size;
+ uintptr_t idx2_block_size;
+ uintptr_t idx2_block_sizem16;
+ uintptr_t marker_uidx_tmp;
+ uintptr_t block_idx1;
+ uintptr_t block_idx2;
+ uintptr_t cur_idx2_block_size;
+ uintptr_t chrom_end2;
+ uintptr_t tidx;
+ uintptr_t ulii;
+ uintptr_t uljj;
+ uintptr_t ulkk;
+ double dxx;
+ uint32_t chrom_fo_idx;
+ uint32_t chrom_fo_idx2;
+ uint32_t is_last_block;
+ uint32_t sample_uidx;
+ uint32_t sample_idx;
+ uint32_t uii;
+ uint32_t ujj;
+ if (wkspace_alloc_d_checked(&pheno_d2, pheno_nm_ct * sizeof(double))) {
+ goto epistasis_linear_regression_ret_NOMEM;
+ }
+ g_epi_pheno_d2 = pheno_d2;
+ g_epi_pheno_nm_ct = pheno_nm_ct;
+ dptr = pheno_d2;
+ g_epi_pheno_sum = 0;
+ g_epi_pheno_ssq = 0;
+ for (sample_uidx = 0, sample_idx = 0; sample_idx < pheno_nm_ct; sample_uidx++, sample_idx++) {
+ next_set_unsafe_ck(pheno_nm, &sample_uidx);
+ dxx = pheno_d[sample_uidx];
+ g_epi_pheno_sum += dxx;
+ g_epi_pheno_ssq += dxx * dxx;
+ pheno_d2[sample_idx] = dxx;
+ }
+ // could add an epsilon here, but this is good enough to catch the most
+ // common case (all phenotypes are the same integer near zero).
+ if (g_epi_pheno_ssq * ((double)((int32_t)pheno_nm_ct)) == g_epi_pheno_sum * g_epi_pheno_sum) {
+ logprint("Error: Phenotype is constant.\n");
+ goto epistasis_linear_regression_ret_INVALID_CMDLINE;
+ }
+ g_epi_vif_thresh = glm_vif_thresh;
+
+ // claim up to half of memory with idx1 bufs; each marker currently costs:
+ // pheno_nm_ctl2 * sizeof(intptr_t) for geno buf
+ // sizeof(double) for precomputed sum(phenotype * genotype) values
+ // 2 * sizeof(int32_t) for precomputed sum(genotype) and sum(genotype^2)
+ // values
+ // 4 * sizeof(int32_t) + sizeof(double) + marker_ct2 * 2 * sizeof(double)
+ // for other stuff (see epistasis_report() comment, starting from
+ // "offset"; main result buffer must be double-size to store both beta
+ // and chi-square stat)
+ ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 6 * sizeof(int32_t) + 2 * sizeof(double) + marker_ct2 * 2 * sizeof(double);
+ idx1_block_size = (wkspace_left - 6 * CACHELINE + 5 * sizeof(int32_t) + sizeof(double) - max_thread_ct * (5 * (CACHELINE - 4))) / (ulii * 2 + 1);
+ if (!idx1_block_size) {
+ goto epistasis_linear_regression_ret_NOMEM;
+ }
+ if (idx1_block_size > job_size) {
+ idx1_block_size = job_size;
+ }
+ // pad to avoid threads writing to same cacheline
+ ulii = (max_thread_ct - 1) * 15 + idx1_block_size;
+ g_epi_geno1_offsets = (uint32_t*)wkspace_alloc(idx1_block_size * 2 * sizeof(int32_t));
+ g_epi_geno1 = (uintptr_t*)wkspace_alloc(pheno_nm_ctl2 * idx1_block_size * sizeof(intptr_t));
+ g_epi_phenogeno1 = (double*)wkspace_alloc(idx1_block_size * sizeof(double));
+ // may be better to just recompute genosums values in inner loop? can test
+ // this later
+ g_epi_genosums1 = (uint32_t*)wkspace_alloc(idx1_block_size * 2 * sizeof(int32_t));
+ g_epi_all_chisq = (double*)wkspace_alloc(idx1_block_size * marker_ct2 * 2 * sizeof(double));
+ g_epi_best_chisq1 = (double*)wkspace_alloc(ulii * sizeof(double));
+ g_epi_best_id1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ g_epi_n_sig_ct1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ g_epi_fail_ct1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
+ g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
+ }
+ if (is_triangular) {
+ fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ }
+
+ ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 2 * sizeof(int32_t) + sizeof(double) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
+ idx2_block_size = (wkspace_left - (3 * CACHELINE - sizeof(intptr_t) - 2 * sizeof(int32_t) - sizeof(double)) - max_thread_ct * (3 * (CACHELINE - sizeof(int32_t)) + (CACHELINE - sizeof(double)))) / ulii;
+ if (idx2_block_size > marker_ct2) {
+ idx2_block_size = marker_ct2;
+ }
+ idx2_block_size = (idx2_block_size + 15) & (~(15 * ONELU));
+
+ memcpy(outname_end, ".epi.qt", 8);
+ if (parallel_tot > 1) {
+ outname_end[7] = '.';
+ uint32_writex(&(outname_end[8]), parallel_idx + 1, '\0');
+ }
+ if (fopen_checked(&outfile, outname, "w")) {
+ goto epistasis_linear_regression_ret_OPEN_FAIL;
+ }
+ if (!parallel_idx) {
+ wptr = memcpya(tbuf, "CHR1 ", 5);
+ wptr = fw_strcpyn(plink_maxsnp, 4, "SNP1", wptr);
+ wptr = memcpya(wptr, " CHR2 ", 6);
+ wptr = fw_strcpyn(plink_maxsnp, 4, "SNP2", wptr);
+ wptr = memcpya(wptr, " BETA_INT STAT P \n", 41);
+ if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+ goto epistasis_linear_regression_ret_WRITE_FAIL;
+ }
+ }
+
+ wkspace_mark2 = wkspace_base;
+ while (1) {
+ if (!idx2_block_size) {
+ goto epistasis_linear_regression_ret_NOMEM;
+ }
+ if (!(wkspace_alloc_ul_checked(&g_epi_geno2, pheno_nm_ctl2 * idx2_block_size * sizeof(intptr_t)) ||
+ wkspace_alloc_d_checked(&g_epi_phenogeno2, idx2_block_size * sizeof(double)) ||
+ wkspace_alloc_ui_checked(&g_epi_genosums2, idx2_block_size * 2 * sizeof(int32_t)) ||
+ wkspace_alloc_d_checked(&g_epi_best_chisq2, max_thread_ct * idx2_block_size * sizeof(double)) ||
+ wkspace_alloc_ui_checked(&g_epi_best_id2, max_thread_ct * idx2_block_size * sizeof(int32_t)) ||
+ wkspace_alloc_ui_checked(&g_epi_n_sig_ct2, max_thread_ct * idx2_block_size * sizeof(int32_t)) ||
+ wkspace_alloc_ui_checked(&g_epi_fail_ct2, max_thread_ct * idx2_block_size * sizeof(int32_t)))) {
+ break;
+ }
+ wkspace_reset(wkspace_mark2);
+ idx2_block_size -= 16;
+ }
+ for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
+ g_epi_geno2[block_idx2 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
+ }
+ marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
+ if (marker_idx1) {
+ marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
+ }
+ wptr = memcpya(logbuf, "QT --epistasis to ", 18);
+ wptr = strcpya(wptr, outname);
+ memcpy(wptr, " ... ", 6);
+ wordwrap(logbuf, 16); // strlen("99% [processing]")
+ logprintb();
+ fputs("0%", stdout);
+ do {
+ fputs(" [processing]", stdout);
+ fflush(stdout);
+ if (idx1_block_size > marker_idx1_end - marker_idx1) {
+ idx1_block_size = marker_idx1_end - marker_idx1;
+ if (idx1_block_size < max_thread_ct) {
+ max_thread_ct = idx1_block_size;
+ g_epi_thread_ct = max_thread_ct;
+ }
+ }
+ g_epi_marker_idx1 = marker_idx1;
+ dptr = g_epi_all_chisq;
+ dptr2 = &(g_epi_all_chisq[idx1_block_size * marker_ct2 * 2]);
+ do {
+ *dptr = -1;
+ dptr = &(dptr[2]);
+ } while (dptr < dptr2);
+ marker_uidx_tmp = marker_uidx;
+ if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ cur_workload = idx1_block_size * marker_ct2;
+ if (is_triangular) {
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
+ ulii = block_idx1 + marker_idx1 + 1;
+ cur_workload -= ulii;
+ g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
+ }
+ } else {
+ fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ marker_uidx2 = marker_uidx_base;
+ marker_idx2 = 0;
+ }
+ tests_complete += cur_workload;
+ ulii = 0; // total number of tests
+ g_epi_idx1_block_bounds[0] = 0;
+ g_epi_idx1_block_bounds16[0] = 0;
+ block_idx1 = 0;
+ for (tidx = 1; tidx < max_thread_ct; tidx++) {
+ uljj = (((uint64_t)cur_workload) * tidx) / max_thread_ct;
+ if (is_triangular) {
+ do {
+ ulii += marker_ct2 - g_epi_geno1_offsets[2 * block_idx1 + 1];
+ block_idx1++;
+ } while (ulii < uljj);
+ } else {
+ do {
+ ulii += marker_ct2;
+ block_idx1++;
+ } while (ulii < uljj);
+ }
+ uii = block_idx1 - g_epi_idx1_block_bounds[tidx - 1];
+ g_epi_idx1_block_bounds[tidx] = block_idx1;
+ g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + ((uii + 15) & (~15));
+ }
+ g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
+ chrom_end = 0;
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
+ if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
+ marker_uidx_tmp = next_unset_ul_unsafe(marker_exclude1, marker_uidx_tmp);
+ if (fseeko(bedfile, bed_offset + (marker_uidx_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ }
+ if (load_and_collapse_incl(bedfile, loadbuf_raw, unfiltered_sample_ct, loadbuf, pheno_nm_ct, pheno_nm, final_mask, IS_SET(marker_reverse, marker_uidx_tmp))) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ rotate_loadbuf_and_compute_phenogeno(loadbuf, pheno_d2, pheno_nm_ct, &(g_epi_geno1[block_idx1 * pheno_nm_ctl2]), &(g_epi_phenogeno1[block_idx1]), &(g_epi_genosums1[block_idx1 * 2]));
+ if (!is_triangular) {
+ if (!IS_SET(marker_exclude2, marker_uidx_tmp)) {
+ // do not compare against self
+ marker_idx2 += marker_uidx_tmp - marker_uidx2 - popcount_bit_idx(marker_exclude2, marker_uidx2, marker_uidx_tmp);
+ marker_uidx2 = marker_uidx_tmp;
+ g_epi_geno1_offsets[2 * block_idx1] = marker_idx2;
+ g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2 + 1;
+ gap_cts[block_idx1 + marker_idx1] = 1;
+ }
+ }
+ }
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
+ if (is_triangular) {
+ marker_idx2 = marker_idx1 + 1;
+ marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2);
+ } else {
+ marker_idx2 = 0;
+ }
+ if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ cur_idx2_block_size = idx2_block_size;
+ do {
+ if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
+ cur_idx2_block_size = marker_ct2 - marker_idx2;
+ }
+ for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
+ if (IS_SET(marker_exclude2, marker_uidx2)) {
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
+ if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ }
+ if (load_and_collapse_incl(bedfile, loadbuf_raw, unfiltered_sample_ct, loadbuf, pheno_nm_ct, pheno_nm, final_mask, IS_SET(marker_reverse, marker_uidx2))) {
+ goto epistasis_linear_regression_ret_READ_FAIL;
+ }
+ rotate_loadbuf_and_compute_phenogeno(loadbuf, pheno_d2, pheno_nm_ct, &(g_epi_geno2[block_idx2 * pheno_nm_ctl2]), &(g_epi_phenogeno2[block_idx2]), &(g_epi_genosums2[block_idx2 * 2]));
+ }
+ g_epi_idx2_block_size = cur_idx2_block_size;
+ g_epi_idx2_block_start = marker_idx2;
+ idx2_block_sizem16 = (cur_idx2_block_size + 15) & (~(15 * ONELU));
+ fill_uint_zero(g_epi_n_sig_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
+ fill_uint_zero(g_epi_fail_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
+ fill_uint_zero(g_epi_n_sig_ct2, idx2_block_sizem16 * max_thread_ct);
+ fill_uint_zero(g_epi_fail_ct2, idx2_block_sizem16 * max_thread_ct);
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ ulii = g_epi_idx1_block_bounds[tidx];
+ uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
+ dptr = &(g_epi_best_chisq1[g_epi_idx1_block_bounds[tidx]]);
+ dptr2 = &(g_epi_all_chisq[(marker_idx1 + ulii) * 2]);
+ for (ulkk = 0; ulkk < uljj; ulkk++) {
+ *dptr++ = dptr2[ulkk * 2];
+ }
+ ulii = g_epi_geno1_offsets[2 * ulii + 1];
+ if (ulii < marker_idx2 + cur_idx2_block_size) {
+ if (ulii <= marker_idx2) {
+ ulii = 0;
+ } else {
+ ulii -= marker_idx2;
+ }
+ uljj = cur_idx2_block_size - ulii;
+ dptr = &(g_epi_best_chisq2[tidx * idx2_block_sizem16 + ulii]);
+ dptr2 = &(g_epi_all_chisq[(marker_idx2 + ulii) * 2]);
+ for (ulkk = 0; ulkk < uljj; ulkk++) {
+ *dptr++ = dptr2[ulkk * 2];
+ }
+ }
+ }
+ is_last_block = (marker_idx2 + cur_idx2_block_size >= marker_ct2);
+ if (spawn_threads2(threads, &epi_linear_thread, max_thread_ct, is_last_block)) {
+ goto epistasis_linear_regression_ret_THREAD_CREATE_FAIL;
+ }
+ epi_linear_thread((void*)0);
+ join_threads2(threads, max_thread_ct, is_last_block);
+ // merge best_chisq, best_ids, fail_cts
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ ulii = g_epi_idx1_block_bounds[tidx];
+ uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
+ uii = g_epi_idx1_block_bounds16[tidx];
+ dptr = &(g_epi_best_chisq1[uii]);
+ uiptr = &(g_epi_best_id1[uii]);
+ uiptr2 = &(g_epi_n_sig_ct1[uii]);
+ uiptr3 = &(g_epi_fail_ct1[uii]);
+ ulii += marker_idx1;
+ dptr2 = &(best_chisq[ulii]);
+ uiptr4 = &(n_sig_cts[ulii]);
+ uiptr5 = &(fail_cts[ulii]);
+ for (block_idx1 = 0; block_idx1 < uljj; block_idx1++, dptr2++, uiptr4++, uiptr5++) {
+ dxx = *dptr++;
+ if (dxx > (*dptr2)) {
+ *dptr2 = dxx;
+ best_ids[block_idx1 + ulii] = uiptr[block_idx1];
+ }
+ *uiptr4 += *uiptr2++;
+ *uiptr5 += *uiptr3++;
+ }
+ }
+ if (is_triangular) {
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ block_idx2 = g_epi_geno1_offsets[2 * g_epi_idx1_block_bounds[tidx] + 1];
+ if (block_idx2 <= marker_idx2) {
+ block_idx2 = 0;
+ } else {
+ block_idx2 -= marker_idx2;
+ }
+ dptr = &(g_epi_best_chisq2[tidx * idx2_block_sizem16 + block_idx2]);
+ uiptr = &(g_epi_best_id2[tidx * idx2_block_sizem16]);
+ uiptr2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizem16 + block_idx2]);
+ uiptr3 = &(g_epi_fail_ct2[tidx * idx2_block_sizem16 + block_idx2]);
+ dptr2 = &(best_chisq[block_idx2 + marker_idx2]);
+ uiptr4 = &(n_sig_cts[block_idx2 + marker_idx2]);
+ uiptr5 = &(fail_cts[block_idx2 + marker_idx2]);
+ for (; block_idx2 < cur_idx2_block_size; block_idx2++, dptr2++, uiptr4++, uiptr5++) {
+ dxx = *dptr++;
+ if (dxx > (*dptr2)) {
+ *dptr2 = dxx;
+ best_ids[block_idx2 + marker_idx2] = uiptr[block_idx2];
+ }
+ *uiptr4 += *uiptr2++;
+ *uiptr5 += *uiptr3++;
+ }
+ }
+ }
+ marker_idx2 += cur_idx2_block_size;
+ } while (marker_idx2 < marker_ct2);
+ fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
+ fflush(stdout);
+ chrom_end = 0;
+ block_idx1 = 0;
+ while (1) {
+ next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
+ ujj = g_epi_geno1_offsets[2 * block_idx1];
+ marker_idx2 = 0;
+ dptr = &(g_epi_all_chisq[block_idx1 * 2 * marker_ct2]);
+ if (marker_uidx >= chrom_end) {
+ chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ }
+ wptr_start = width_force(4, tbuf, chrom_name_write(tbuf, chrom_info_ptr, chrom_idx));
+ *wptr_start++ = ' ';
+ wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
+ *wptr_start++ = ' ';
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
+ for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
+ chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
+ chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ wptr_start2 = width_force(4, wptr_start, chrom_name_write(wptr_start, chrom_info_ptr, chrom_idx2));
+ *wptr_start2++ = ' ';
+ for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, &marker_uidx2, chrom_end2), marker_idx2++, dptr = &(dptr[2])) {
+ if (marker_idx2 == ujj) {
+ marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
+ if (marker_idx2 == marker_ct2) {
+ goto epistasis_linear_regression_write_loop;
+ }
+ if (marker_idx2 > ujj) {
+ marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2 - ujj);
+ dptr = &(dptr[2 * (marker_idx2 - ujj)]);
+ if (marker_uidx2 >= chrom_end2) {
+ break;
+ }
+ }
+ } else if (marker_idx2 == marker_ct2) {
+ goto epistasis_linear_regression_write_loop;
+ }
+ dxx = *dptr;
+ if (dxx != -1) {
+ wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start2);
+ *wptr++ = ' ';
+ // beta
+ wptr = width_force(12, wptr, double_g_write(wptr, dptr[1]));
+ *wptr++ = ' ';
+ wptr = width_force(12, wptr, double_g_write(wptr, dxx));
+ *wptr++ = ' ';
+ dxx = normdist(-sqrt(dxx)) * 2;
+ wptr = double_g_writewx4x(wptr, MAXV(dxx, output_min_p), 12, ' ');
+ *wptr++ = '\n';
+ if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+ goto epistasis_linear_regression_ret_WRITE_FAIL;
+ }
+ // could remove this writeback in --epi1 1 case
+ *dptr = -1;
+ }
+ marker_uidx2++;
+ }
+ }
+ epistasis_linear_regression_write_loop:
+ block_idx1++;
+ marker_uidx++;
+ if (block_idx1 >= idx1_block_size) {
+ break;
+ }
+ }
+ marker_idx1 += idx1_block_size;
+ fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
+ if (tests_complete >= pct_thresh) {
+ if (pct > 10) {
+ putchar('\b');
+ }
+ pct = (tests_complete * 100LLU) / tests_expected;
+ if (pct < 100) {
+ printf("\b\b%" PRIuPTR "%%", pct);
+ fflush(stdout);
+ pct_thresh = ((++pct) * ((uint64_t)tests_expected)) / 100;
+ }
+ }
+ } while (marker_idx1 < marker_idx1_end);
+ if (fclose_null(&outfile)) {
+ goto epistasis_linear_regression_ret_WRITE_FAIL;
+ }
+ while (0) {
+ epistasis_linear_regression_ret_NOMEM:
+ retval = RET_NOMEM;
+ break;
+ epistasis_linear_regression_ret_OPEN_FAIL:
+ retval = RET_OPEN_FAIL;
+ break;
+ epistasis_linear_regression_ret_READ_FAIL:
+ retval = RET_READ_FAIL;
+ break;
+ epistasis_linear_regression_ret_WRITE_FAIL:
+ retval = RET_WRITE_FAIL;
+ break;
+ epistasis_linear_regression_ret_INVALID_CMDLINE:
+ retval = RET_INVALID_CMDLINE;
+ break;
+ epistasis_linear_regression_ret_THREAD_CREATE_FAIL:
+ retval = RET_THREAD_CREATE_FAIL;
+ break;
+ }
+ fclose_cond(outfile);
+ // caller will free memory
+ return retval;
+}
+
+int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t marker_uidx_base, uintptr_t marker_ct1, uintptr_t* marker_exclude1, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end, uintptr_t marker_ct2, uintptr_t* marker_exclude2, uint32_t is_triangular, uintptr_t job_ [...]
+ FILE* outfile = NULL;
+ uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
+ uintptr_t pheno_nm_cta4 = (pheno_nm_ct + 3) & (~3);
+ uintptr_t pheno_nm_ctl2 = (pheno_nm_ct + (BITCT2 - 1)) / BITCT2;
+ uintptr_t final_mask = get_final_mask(pheno_nm_ct);
+ uintptr_t marker_uidx = marker_uidx_base;
+ uintptr_t pct = 1;
+ uintptr_t marker_uidx2 = 0;
+ uintptr_t marker_idx1 = marker_idx1_start;
+ uintptr_t marker_idx2 = 0;
+ uint64_t pct_thresh = tests_expected / 100;
+ uint64_t tests_complete = 0;
+ uint32_t max_thread_ct = g_epi_thread_ct;
+ uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ uint32_t chrom_end = 0;
+ uint32_t chrom_idx = 0;
+ uint32_t chrom_idx2 = 0;
+ int32_t retval = 0;
+ unsigned char* wkspace_mark2;
+ uintptr_t* ulptr;
+ char* wptr_start;
+ char* wptr_start2;
+ char* wptr;
+ float* fptr;
+ float* fptr2;
+ double* dptr;
+ uint32_t* uiptr;
+ uint32_t* uiptr2;
+ uint32_t* uiptr3;
+ uint32_t* uiptr4;
+ uint32_t* uiptr5;
+ uintptr_t cur_workload;
+ uintptr_t idx1_block_size;
+ uintptr_t idx2_block_size;
+ uintptr_t idx2_block_sizem16;
+ uintptr_t marker_uidx_tmp;
+ uintptr_t block_idx1;
+ uintptr_t block_idx2;
+ uintptr_t cur_idx2_block_size;
+ uintptr_t chrom_end2;
+ uintptr_t tidx;
+ uintptr_t ulii;
+ uintptr_t uljj;
+ uintptr_t ulkk;
+ double dxx;
+ float fxx;
+ uint32_t chrom_fo_idx;
+ uint32_t chrom_fo_idx2;
+ uint32_t is_last_block;
+ uint32_t uii;
+ uint32_t ujj;
+ if (wkspace_alloc_ul_checked(&g_epi_pheno_c, pheno_nm_ctl2 * sizeof(uintptr_t))) {
+ goto epistasis_logistic_regression_ret_NOMEM;
+ }
+ collapse_copy_bitarr_incl(unfiltered_sample_ct, pheno_c, pheno_nm, pheno_nm_ct, g_epi_pheno_c);
+ g_epi_pheno_nm_ct = pheno_nm_ct;
+ // per-thread buffers
+ g_epi_logistic_mt = (Epi_logistic_multithread*)wkspace_alloc(max_thread_ct * sizeof(Epi_logistic_multithread));
+ if (!g_epi_logistic_mt) {
+ goto epistasis_logistic_regression_ret_NOMEM;
+ }
+ // param_ct_max = 4 (intercept, A, B, AB)
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ if (wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].cur_covars_cov_major), pheno_nm_cta4 * 4 * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].coef), 4 * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].pp), pheno_nm_cta4 * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].sample_1d_buf), pheno_nm_ct * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].pheno_buf), pheno_nm_ct * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].param_1d_buf), pheno_nm_ct * 4 * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].param_1d_buf2), pheno_nm_ct * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].param_2d_buf), 4 * 4 * sizeof(float)) ||
+ wkspace_alloc_f_checked(&(g_epi_logistic_mt[tidx].param_2d_buf2), 4 * 4 * sizeof(float))) {
+ goto epistasis_logistic_regression_ret_NOMEM;
+ }
+ }
+
+ // claim up to half of memory with idx1 bufs; each marker currently costs:
+ // pheno_nm_ctl2 * sizeof(intptr_t) for geno buf
+ // 4 * sizeof(int32_t) + sizeof(float) + marker_ct2 * 2 * sizeof(float)
+ // for other stuff (see epistasis_report() comment, starting from
+ // "offset"; main result buffer must be double-size to store both beta
+ // and chi-square stat)
+ ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 4 * sizeof(int32_t) + sizeof(float) + marker_ct2 * 2 * sizeof(float);
+ idx1_block_size = (wkspace_left - 4 * CACHELINE + 3 * sizeof(int32_t) - max_thread_ct * (5 * (CACHELINE - 4))) / (ulii * 2 + 1);
+ if (!idx1_block_size) {
+ goto epistasis_logistic_regression_ret_NOMEM;
+ }
+ if (idx1_block_size > job_size) {
+ idx1_block_size = job_size;
+ }
+ // pad to avoid threads writing to same cacheline
+ ulii = (max_thread_ct - 1) * 15 + idx1_block_size;
+ g_epi_geno1_offsets = (uint32_t*)wkspace_alloc(idx1_block_size * 2 * sizeof(int32_t));
+ g_epi_geno1 = (uintptr_t*)wkspace_alloc(pheno_nm_ctl2 * idx1_block_size * sizeof(intptr_t));
+ g_epi_all_chisq_f = (float*)wkspace_alloc(idx1_block_size * marker_ct2 * 2 * sizeof(float));
+ g_epi_best_chisq_f1 = (float*)wkspace_alloc(ulii * sizeof(float));
+ g_epi_best_id1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ g_epi_n_sig_ct1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ g_epi_fail_ct1 = (uint32_t*)wkspace_alloc(ulii * sizeof(int32_t));
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
+ g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
+ }
+ if (is_triangular) {
+ fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ }
+
+ ulii = pheno_nm_ctl2 * sizeof(intptr_t) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
+ idx2_block_size = (wkspace_left - (CACHELINE - sizeof(intptr_t)) - max_thread_ct * (3 * (CACHELINE - sizeof(int32_t)) + (CACHELINE - sizeof(float)))) / ulii;
+ if (idx2_block_size > marker_ct2) {
+ idx2_block_size = marker_ct2;
+ }
+ idx2_block_size = (idx2_block_size + 15) & (~(15 * ONELU));
+
+ memcpy(outname_end, ".epi.cc", 8);
+ if (parallel_tot > 1) {
+ outname_end[7] = '.';
+ uint32_writex(&(outname_end[8]), parallel_idx + 1, '\0');
+ }
+ if (fopen_checked(&outfile, outname, "w")) {
+ goto epistasis_logistic_regression_ret_OPEN_FAIL;
+ }
+ if (!parallel_idx) {
+ wptr = memcpya(tbuf, "CHR1 ", 5);
+ wptr = fw_strcpyn(plink_maxsnp, 4, "SNP1", wptr);
+ wptr = memcpya(wptr, " CHR2 ", 6);
+ wptr = fw_strcpyn(plink_maxsnp, 4, "SNP2", wptr);
+ wptr = memcpya(wptr, " OR_INT STAT P \n", 41);
+ if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+ goto epistasis_logistic_regression_ret_WRITE_FAIL;
+ }
+ }
+
+ wkspace_mark2 = wkspace_base;
+ while (1) {
+ if (!idx2_block_size) {
+ goto epistasis_logistic_regression_ret_NOMEM;
+ }
+ if (!(wkspace_alloc_ul_checked(&g_epi_geno2, pheno_nm_ctl2 * idx2_block_size * sizeof(intptr_t)) ||
+ wkspace_alloc_f_checked(&g_epi_best_chisq_f2, max_thread_ct * idx2_block_size * sizeof(float)) ||
+ wkspace_alloc_ui_checked(&g_epi_best_id2, max_thread_ct * idx2_block_size * sizeof(int32_t)) ||
+ wkspace_alloc_ui_checked(&g_epi_n_sig_ct2, max_thread_ct * idx2_block_size * sizeof(int32_t)) ||
+ wkspace_alloc_ui_checked(&g_epi_fail_ct2, max_thread_ct * idx2_block_size * sizeof(int32_t)))) {
+ break;
+ }
+ wkspace_reset(wkspace_mark2);
+ idx2_block_size -= 16;
+ }
+ for (block_idx2 = 0; block_idx2 < idx2_block_size; block_idx2++) {
+ g_epi_geno2[block_idx2 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
+ }
+ marker_uidx = next_unset_ul_unsafe(marker_exclude1, marker_uidx_base);
+ if (marker_idx1) {
+ marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
+ }
+ wptr = memcpya(logbuf, "C/C --epistasis to ", 19);
+ wptr = strcpya(wptr, outname);
+ memcpy(wptr, " ... ", 6);
+ wordwrap(logbuf, 16); // strlen("99% [processing]")
+ logprintb();
+ fputs("0%", stdout);
+ do {
+ fputs(" [processing]", stdout);
+ fflush(stdout);
+ if (idx1_block_size > marker_idx1_end - marker_idx1) {
+ idx1_block_size = marker_idx1_end - marker_idx1;
+ if (idx1_block_size < max_thread_ct) {
+ max_thread_ct = idx1_block_size;
+ g_epi_thread_ct = max_thread_ct;
+ }
+ }
+ g_epi_marker_idx1 = marker_idx1;
+ fptr = g_epi_all_chisq_f;
+ fptr2 = &(g_epi_all_chisq_f[idx1_block_size * marker_ct2 * 2]);
+ do {
+ *fptr = -1;
+ fptr = &(fptr[2]);
+ } while (fptr < fptr2);
+ marker_uidx_tmp = marker_uidx;
+ if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ cur_workload = idx1_block_size * marker_ct2;
+ if (is_triangular) {
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; block_idx1++) {
+ ulii = block_idx1 + marker_idx1 + 1;
+ cur_workload -= ulii;
+ g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
+ }
+ } else {
+ fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ marker_uidx2 = marker_uidx_base;
+ marker_idx2 = 0;
+ }
+ tests_complete += cur_workload;
+ ulii = 0; // total number of tests
+ g_epi_idx1_block_bounds[0] = 0;
+ g_epi_idx1_block_bounds16[0] = 0;
+ block_idx1 = 0;
+ for (tidx = 1; tidx < max_thread_ct; tidx++) {
+ uljj = (((uint64_t)cur_workload) * tidx) / max_thread_ct;
+ if (is_triangular) {
+ do {
+ ulii += marker_ct2 - g_epi_geno1_offsets[2 * block_idx1 + 1];
+ block_idx1++;
+ } while (ulii < uljj);
+ } else {
+ do {
+ ulii += marker_ct2;
+ block_idx1++;
+ } while (ulii < uljj);
+ }
+ uii = block_idx1 - g_epi_idx1_block_bounds[tidx - 1];
+ g_epi_idx1_block_bounds[tidx] = block_idx1;
+ g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + ((uii + 15) & (~15));
+ }
+ g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
+ chrom_end = 0;
+ for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
+ if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
+ marker_uidx_tmp = next_unset_ul_unsafe(marker_exclude1, marker_uidx_tmp);
+ if (fseeko(bedfile, bed_offset + (marker_uidx_tmp * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ }
+ // marker_reverse deliberately flipped
+ if (load_and_collapse_incl(bedfile, loadbuf_raw, unfiltered_sample_ct, loadbuf, pheno_nm_ct, pheno_nm, final_mask, !IS_SET(marker_reverse, marker_uidx_tmp))) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ // rotate to hom A1 = 10, het = 01, hom A2 = 00, missing = 11, to allow
+ // inner loop to use ordinary multiplication
+ // this is a bit redundant with the forced reverse, but it's not a
+ // bottleneck
+ rotate_plink1_to_plink2_and_copy(loadbuf, &(g_epi_geno1[block_idx1 * pheno_nm_ctl2]), pheno_nm_ctl2);
+ if (!is_triangular) {
+ if (!IS_SET(marker_exclude2, marker_uidx_tmp)) {
+ // do not compare against self
+ marker_idx2 += marker_uidx_tmp - marker_uidx2 - popcount_bit_idx(marker_exclude2, marker_uidx2, marker_uidx_tmp);
+ marker_uidx2 = marker_uidx_tmp;
+ g_epi_geno1_offsets[2 * block_idx1] = marker_idx2;
+ g_epi_geno1_offsets[2 * block_idx1 + 1] = marker_idx2 + 1;
+ gap_cts[block_idx1 + marker_idx1] = 1;
+ }
+ }
+ }
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
+ if (is_triangular) {
+ marker_idx2 = marker_idx1 + 1;
+ marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2);
+ } else {
+ marker_idx2 = 0;
+ }
+ if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ cur_idx2_block_size = idx2_block_size;
+ do {
+ if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
+ cur_idx2_block_size = marker_ct2 - marker_idx2;
+ }
+ for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
+ if (IS_SET(marker_exclude2, marker_uidx2)) {
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
+ if (fseeko(bedfile, bed_offset + (marker_uidx2 * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ }
+ ulptr = &(g_epi_geno2[block_idx2 * pheno_nm_ctl2]);
+ // marker_reverse deliberately flipped
+ if (load_and_collapse_incl(bedfile, loadbuf_raw, unfiltered_sample_ct, loadbuf, pheno_nm_ct, pheno_nm, final_mask, !IS_SET(marker_reverse, marker_uidx2))) {
+ goto epistasis_logistic_regression_ret_READ_FAIL;
+ }
+ rotate_plink1_to_plink2_and_copy(loadbuf, ulptr, pheno_nm_ctl2);
+ }
+ g_epi_idx2_block_size = cur_idx2_block_size;
+ g_epi_idx2_block_start = marker_idx2;
+ idx2_block_sizem16 = (cur_idx2_block_size + 15) & (~(15 * ONELU));
+ fill_uint_zero(g_epi_n_sig_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
+ fill_uint_zero(g_epi_fail_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
+ fill_uint_zero(g_epi_n_sig_ct2, idx2_block_sizem16 * max_thread_ct);
+ fill_uint_zero(g_epi_fail_ct2, idx2_block_sizem16 * max_thread_ct);
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ ulii = g_epi_idx1_block_bounds[tidx];
+ uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
+ fptr = &(g_epi_best_chisq_f1[g_epi_idx1_block_bounds[tidx]]);
+ fptr2 = &(g_epi_all_chisq_f[(marker_idx1 + ulii) * 2]);
+ for (ulkk = 0; ulkk < uljj; ulkk++) {
+ *fptr++ = fptr2[ulkk * 2];
+ }
+ ulii = g_epi_geno1_offsets[2 * ulii + 1];
+ if (ulii < marker_idx2 + cur_idx2_block_size) {
+ if (ulii <= marker_idx2) {
+ ulii = 0;
+ } else {
+ ulii -= marker_idx2;
+ }
+ uljj = cur_idx2_block_size - ulii;
+ fptr = &(g_epi_best_chisq_f2[tidx * idx2_block_sizem16 + ulii]);
+ fptr2 = &(g_epi_all_chisq_f[(marker_idx2 + ulii) * 2]);
+ for (ulkk = 0; ulkk < uljj; ulkk++) {
+ *fptr++ = fptr2[ulkk * 2];
+ }
+ }
+ }
+ is_last_block = (marker_idx2 + cur_idx2_block_size >= marker_ct2);
+ if (spawn_threads2(threads, &epi_logistic_thread, max_thread_ct, is_last_block)) {
+ goto epistasis_logistic_regression_ret_THREAD_CREATE_FAIL;
+ }
+ epi_logistic_thread((void*)0);
+ join_threads2(threads, max_thread_ct, is_last_block);
+ // merge best_chisq, best_ids, fail_cts
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ ulii = g_epi_idx1_block_bounds[tidx];
+ uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
+ uii = g_epi_idx1_block_bounds16[tidx];
+ fptr = &(g_epi_best_chisq_f1[uii]);
+ uiptr = &(g_epi_best_id1[uii]);
+ uiptr2 = &(g_epi_n_sig_ct1[uii]);
+ uiptr3 = &(g_epi_fail_ct1[uii]);
+ ulii += marker_idx1;
+ dptr = &(best_chisq[ulii]);
+ uiptr4 = &(n_sig_cts[ulii]);
+ uiptr5 = &(fail_cts[ulii]);
+ for (block_idx1 = 0; block_idx1 < uljj; block_idx1++, dptr++, uiptr4++, uiptr5++) {
+ dxx = (double)(*fptr++);
+ if (dxx > (*dptr)) {
+ *dptr = dxx;
+ best_ids[block_idx1 + ulii] = uiptr[block_idx1];
+ }
+ *uiptr4 += *uiptr2++;
+ *uiptr5 += *uiptr3++;
+ }
+ }
+ if (is_triangular) {
+ for (tidx = 0; tidx < max_thread_ct; tidx++) {
+ block_idx2 = g_epi_geno1_offsets[2 * g_epi_idx1_block_bounds[tidx] + 1];
+ if (block_idx2 <= marker_idx2) {
+ block_idx2 = 0;
+ } else {
+ block_idx2 -= marker_idx2;
+ }
+ fptr = &(g_epi_best_chisq_f2[tidx * idx2_block_sizem16 + block_idx2]);
+ uiptr = &(g_epi_best_id2[tidx * idx2_block_sizem16]);
+ uiptr2 = &(g_epi_n_sig_ct2[tidx * idx2_block_sizem16 + block_idx2]);
+ uiptr3 = &(g_epi_fail_ct2[tidx * idx2_block_sizem16 + block_idx2]);
+ dptr = &(best_chisq[block_idx2 + marker_idx2]);
+ uiptr4 = &(n_sig_cts[block_idx2 + marker_idx2]);
+ uiptr5 = &(fail_cts[block_idx2 + marker_idx2]);
+ for (; block_idx2 < cur_idx2_block_size; block_idx2++, dptr++, uiptr4++, uiptr5++) {
+ dxx = (double)(*fptr++);
+ if (dxx > (*dptr)) {
+ *dptr = dxx;
+ best_ids[block_idx2 + marker_idx2] = uiptr[block_idx2];
+ }
+ *uiptr4 += *uiptr2++;
+ *uiptr5 += *uiptr3++;
+ }
+ }
+ }
+ marker_idx2 += cur_idx2_block_size;
+ } while (marker_idx2 < marker_ct2);
+ fputs("\b\b\b\b\b\b\b\b\b\b\bwriting] \b\b\b", stdout);
+ fflush(stdout);
+ chrom_end = 0;
+ block_idx1 = 0;
+ while (1) {
+ next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
+ ujj = g_epi_geno1_offsets[2 * block_idx1];
+ marker_idx2 = 0;
+ fptr = &(g_epi_all_chisq_f[block_idx1 * 2 * marker_ct2]);
+ if (marker_uidx >= chrom_end) {
+ chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ }
+ wptr_start = width_force(4, tbuf, chrom_name_write(tbuf, chrom_info_ptr, chrom_idx));
+ *wptr_start++ = ' ';
+ wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
+ *wptr_start++ = ' ';
+ marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
+ for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
+ chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
+ chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ wptr_start2 = width_force(4, wptr_start, chrom_name_write(wptr_start, chrom_info_ptr, chrom_idx2));
+ *wptr_start2++ = ' ';
+ for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, &marker_uidx2, chrom_end2), marker_idx2++, fptr = &(fptr[2])) {
+ if (marker_idx2 == ujj) {
+ marker_idx2 = g_epi_geno1_offsets[2 * block_idx1 + 1];
+ if (marker_idx2 == marker_ct2) {
+ goto epistasis_logistic_regression_write_loop;
+ }
+ if (marker_idx2 > ujj) {
+ marker_uidx2 = jump_forward_unset_unsafe(marker_exclude2, marker_uidx2 + 1, marker_idx2 - ujj);
+ fptr = &(fptr[2 * (marker_idx2 - ujj)]);
+ if (marker_uidx2 >= chrom_end2) {
+ break;
+ }
+ }
+ } else if (marker_idx2 == marker_ct2) {
+ goto epistasis_logistic_regression_write_loop;
+ }
+ fxx = *fptr;
+ if (fxx != -1) {
+ dxx = (double)fxx;
+ wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr_start2);
+ *wptr++ = ' ';
+ // odds ratio
+ wptr = width_force(12, wptr, double_g_write(wptr, exp((double)fptr[1])));
+ *wptr++ = ' ';
+ wptr = width_force(12, wptr, float_g_write(wptr, fxx));
+ *wptr++ = ' ';
+ dxx = normdist(-sqrt(dxx)) * 2;
+ wptr = double_g_writewx4x(wptr, MAXV(dxx, output_min_p), 12, ' ');
+ *wptr++ = '\n';
+ if (fwrite_checked(tbuf, wptr - tbuf, outfile)) {
+ goto epistasis_logistic_regression_ret_WRITE_FAIL;
+ }
+ // could remove this writeback in --epi1 1 case
+ *fptr = -1;
+ }
+ marker_uidx2++;
+ }
+ }
+ epistasis_logistic_regression_write_loop:
+ block_idx1++;
+ marker_uidx++;
+ if (block_idx1 >= idx1_block_size) {
+ break;
+ }
+ }
+ marker_idx1 += idx1_block_size;
+ fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
+ if (tests_complete >= pct_thresh) {
+ if (pct > 10) {
+ putchar('\b');
+ }
+ pct = (tests_complete * 100LLU) / tests_expected;
+ if (pct < 100) {
+ printf("\b\b%" PRIuPTR "%%", pct);
+ fflush(stdout);
+ pct_thresh = ((++pct) * ((uint64_t)tests_expected)) / 100;
+ }
+ }
+ } while (marker_idx1 < marker_idx1_end);
+ if (fclose_null(&outfile)) {
+ goto epistasis_logistic_regression_ret_WRITE_FAIL;
+ }
+ while (0) {
+ epistasis_logistic_regression_ret_NOMEM:
+ retval = RET_NOMEM;
+ break;
+ epistasis_logistic_regression_ret_OPEN_FAIL:
+ retval = RET_OPEN_FAIL;
+ break;
+ epistasis_logistic_regression_ret_READ_FAIL:
+ retval = RET_READ_FAIL;
+ break;
+ epistasis_logistic_regression_ret_WRITE_FAIL:
+ retval = RET_WRITE_FAIL;
+ break;
+ epistasis_logistic_regression_ret_THREAD_CREATE_FAIL:
+ retval = RET_THREAD_CREATE_FAIL;
+ break;
+ }
+ fclose_cond(outfile);
+ // caller will free memory
+ return retval;
+}
+
+int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct2, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t pa [...]
+ unsigned char* wkspace_mark = wkspace_base;
+ FILE* outfile = NULL;
+ uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
+ uintptr_t unfiltered_sample_ctv2 = 2 * ((unfiltered_sample_ct + (BITCT - 1)) / BITCT);
+ uintptr_t final_mask = get_final_mask(pheno_nm_ct);
+ uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
+ uintptr_t marker_uidx_base = next_unset_unsafe(marker_exclude, 0);
+ uintptr_t marker_uidx = marker_uidx_base;
+ uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ uint32_t modifier = epi_ip->modifier;
+ uint32_t is_fast = modifier & EPI_FAST;
+ uint32_t is_boost = (modifier / EPI_FAST_BOOST) & 1;
+ uint32_t do_joint_effects = modifier & EPI_FAST_JOINT_EFFECTS;
+ uint32_t no_ueki = modifier & EPI_FAST_NO_UEKI;
+ uint32_t is_case_only = (modifier / EPI_FAST_CASE_ONLY) & 1;
+ uint32_t is_triangular = 1;
+ uint32_t is_custom_set1 = modifier & (EPI_SET_BY_SET | EPI_SET_BY_ALL)? 1 : 0;
+ uint32_t is_set_by_set = modifier & EPI_SET_BY_SET;
+ uint32_t tot_stride = 6 - 3 * is_case_only;
+ uint32_t no_p_value = modifier & EPI_FAST_NO_P_VALUE;
+ uint32_t case_only_gap = epi_ip->case_only_gap;
+ uint32_t is_case_only_window = (is_case_only && case_only_gap);
+ uint32_t case_ct = pheno_nm_ct - ctrl_ct;
+ uint32_t cellminx3 = 0;
+ uintptr_t case_ctl2 = (case_ct + (BITCT2 - 1)) / BITCT2;
+ uintptr_t case_ctv2 = 2 * ((case_ct + (BITCT - 1)) / BITCT);
+ uintptr_t ctrl_ctl2 = (ctrl_ct + (BITCT2 - 1)) / BITCT2;
+ uintptr_t case_ctv3 = 2 * ((case_ct + (2 * BITCT - 1)) / (2 * BITCT));
+ uintptr_t ctrl_ctv3 = 2 * ((ctrl_ct + (2 * BITCT - 1)) / (2 * BITCT));
+ uintptr_t case_ctsplit = 3 * case_ctv3;
+ uintptr_t ctrl_ctsplit = 3 * ctrl_ctv3;
+ uintptr_t pct = 1;
+ uintptr_t marker_uidx2 = 0;
+ uintptr_t marker_uidx2_trail = 0;
+ uintptr_t marker_idx2 = 0;
+ uintptr_t marker_idx2_trail = 0;
+ uint64_t tests_thrown_out = 0;
uint64_t tests_complete = 0;
uint32_t max_thread_ct = g_thread_ct;
uint32_t chrom_idx = 0;
@@ -7438,6 +9233,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
uint32_t is_last_block;
uint32_t missing_ct;
uint32_t ujj;
+
// common initialization between --epistasis and --fast-epistasis: remove
// monomorphic and non-autosomal diploid sites
if (is_custom_set1) {
@@ -7719,14 +9515,17 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
dxx = ltqnorm(epi_ip->epi2 / 2);
g_epi_alpha2sq[0] = dxx * dxx;
}
- pct_thresh = tests_expected / 100;
if (!is_fast) {
- // hmm, this might not end up as a separate function
- retval = epistasis_regression();
+ if (pheno_d) {
+ retval = epistasis_linear_regression(threads, epi_ip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, marker_uidx_base, marker_ct1, marker_exclude1, marker_idx1_start, marker_idx1_end, marker_ct2, marker_exclude2, is_triangular, job_size, tests_expected, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_d, parallel_idx, parallel_tot, outname, outname_end, output_min_p, glm_vif_thresh, loadbuf, casebuf, best_ch [...]
+ } else {
+ retval = epistasis_logistic_regression(threads, epi_ip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, marker_uidx_base, marker_ct1, marker_exclude1, marker_idx1_start, marker_idx1_end, marker_ct2, marker_exclude2, is_triangular, job_size, tests_expected, unfiltered_sample_ct, pheno_nm, pheno_nm_ct, pheno_c, parallel_idx, parallel_tot, outname, outname_end, output_min_p, loadbuf, casebuf, best_chisq, best_ids, [...]
+ }
if (retval) {
goto epistasis_report_ret_1;
}
} else {
+ pct_thresh = tests_expected / 100;
if (is_case_only) {
g_epi_ctrl_ct = 0;
ctrl_ctv3 = 0;
@@ -7761,7 +9560,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
goto epistasis_report_ret_WRITE_FAIL;
}
}
- // claim up to half of memory with idx1 bufs; each marker currently costs
+ // claim up to half of memory with idx1 bufs; each marker currently costs:
// (case_ctsplit + ctrl_ctsplit) * sizeof(intptr_t) for loose geno buf
// 0.25 for missing tracker
// sizeof(int32_t) for offset (to skip bottom left triangle, and/or
@@ -7857,9 +9656,10 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
}
wptr = memcpya(wptr, " to ", 4);
wptr = strcpya(wptr, outname);
- memcpy(wptr, "...", 4);
+ memcpy(wptr, " ... ", 6);
+ wordwrap(logbuf, 16); // strlen("99% [processing]")
logprintb();
- fputs(" 0%", stdout);
+ fputs("0%", stdout);
do {
fputs(" [processing]", stdout);
fflush(stdout);
@@ -8147,15 +9947,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
fflush(stdout);
chrom_end = 0;
block_idx1 = 0;
- goto epistasis_report_write_start;
while (1) {
- epistasis_report_write_loop:
- block_idx1++;
- marker_uidx++;
- if (block_idx1 >= idx1_block_size) {
- break;
- }
- epistasis_report_write_start:
next_unset_ul_unsafe_ck(marker_exclude1, &marker_uidx);
ujj = g_epi_geno1_offsets[2 * block_idx1];
marker_idx2 = 0;
@@ -8238,6 +10030,12 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
marker_uidx2++;
}
}
+ epistasis_report_write_loop:
+ block_idx1++;
+ marker_uidx++;
+ if (block_idx1 >= idx1_block_size) {
+ break;
+ }
}
marker_idx1 += idx1_block_size;
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
@@ -8253,9 +10051,9 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
}
}
} while (marker_idx1 < marker_idx1_end);
- }
- if (fclose_null(&outfile)) {
- goto epistasis_report_ret_WRITE_FAIL;
+ if (fclose_null(&outfile)) {
+ goto epistasis_report_ret_WRITE_FAIL;
+ }
}
memcpy(&(outname_end[7]), ".summary", 9);
if (parallel_tot > 1) {
@@ -8351,9 +10149,9 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
if (is_triangular) {
tests_thrown_out /= 2; // all fails double-counted in triangle case
}
- fputs("\b\b\b", stdout);
- LOGPRINTF(" done.\n");
- LOGPRINTFWW("%" PRIu64 " valid tests performed, summary written to %s .\n", tests_expected - tests_thrown_out, outname);
+ fputs("\b\b", stdout);
+ LOGPRINTF("done.\n");
+ LOGPRINTFWW("%" PRIu64 " valid test%s performed, summary written to %s .\n", tests_expected - tests_thrown_out, (tests_expected - tests_thrown_out == 1)? "" : "s", outname);
while (0) {
epistasis_report_ret_NOMEM:
diff --git a/plink_ld.h b/plink_ld.h
index e0a1e72..573a579 100644
--- a/plink_ld.h
+++ b/plink_ld.h
@@ -120,7 +120,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t pheno_ctrl_ct, uintptr_t* pheno_c, uintptr_t* sex_male, [...]
-int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t par [...]
+int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t par [...]
int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists);
diff --git a/plink_matrix.c b/plink_matrix.c
index 9e3e989..4abb262 100644
--- a/plink_matrix.c
+++ b/plink_matrix.c
@@ -283,7 +283,7 @@ int32_t invert_matrix(__CLPK_integer dim, double* matrix, MATRIX_INVERT_BUF1_TYP
dgetrf_(&dim, &dim, matrix, &dim, int_1d_buf, &info);
dgetri_(&dim, matrix, &dim, int_1d_buf, dbl_2d_buf, &lwork, &info);
if (info) {
- return -1;
+ return 1;
}
return 0;
}
diff --git a/plink_misc.c b/plink_misc.c
index cc05893..70a6fe4 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -1200,55 +1200,102 @@ uint32_t flip_str(char** allele_str_ptr) {
return 2;
}
+uint32_t flip_process_token(char* tok_start, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* marker_exclude, uintptr_t* already_seen, char** marker_allele_ptrs, uintptr_t* hit_ct_ptr, uintptr_t* miss_ct_ptr, uint32_t* non_acgt_ct_ptr) {
+ uint32_t slen = strlen_se(tok_start);
+ uintptr_t marker_uidx;
+ uint32_t cur_non_acgt0;
+ marker_uidx = id_htable_find(tok_start, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
+ if ((marker_uidx == 0xffffffffU) || IS_SET(marker_exclude, marker_uidx)) {
+ *miss_ct_ptr += 1;
+ return 0;
+ }
+ if (IS_SET(already_seen, marker_uidx)) {
+ tok_start[slen] = '\0';
+ LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in --flip file.\n", tok_start);
+ return 1;
+ }
+ SET_BIT(already_seen, marker_uidx);
+ cur_non_acgt0 = 0;
+ cur_non_acgt0 |= flip_str(&(marker_allele_ptrs[2 * marker_uidx]));
+ cur_non_acgt0 |= flip_str(&(marker_allele_ptrs[2 * marker_uidx + 1]));
+ *non_acgt_ct_ptr += (cur_non_acgt0 & 1);
+ *hit_ct_ptr += (cur_non_acgt0 >> 1);
+ return 0;
+}
+
int32_t flip_strand(char* flip_fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, char** marker_allele_ptrs) {
unsigned char* wkspace_mark = wkspace_base;
FILE* flipfile = NULL;
- uint32_t non_acgt_ct = 0;
- int32_t retval = 0;
uintptr_t unfiltered_marker_ctl = (unfiltered_marker_ct + (BITCT - 1)) / BITCT;
uintptr_t hit_ct = 0;
uintptr_t miss_ct = 0;
- uintptr_t line_idx = 0;
+ char* midbuf = &(tbuf[MAXLINELEN]);
+ uint32_t non_acgt_ct = 0;
+ uint32_t curtoklen = 0;
+ int32_t retval = 0;
+ uintptr_t bufsize;
uintptr_t* already_seen;
+ char* bufptr0;
char* bufptr;
- uint32_t slen;
- uint32_t cur_non_acgt0;
- uintptr_t marker_uidx;
+ char* bufptr2;
+ char* bufptr3;
if (wkspace_alloc_ul_checked(&already_seen, unfiltered_marker_ctl * sizeof(intptr_t))) {
goto flip_strand_ret_NOMEM;
}
fill_ulong_zero(already_seen, unfiltered_marker_ctl);
- if (fopen_checked(&flipfile, flip_fname, "r")) {
+ // Compatibility fix: PLINK 1.07 uses a token- rather than a line-based
+ // loader here.
+ if (fopen_checked(&flipfile, flip_fname, "rb")) {
goto flip_strand_ret_OPEN_FAIL;
}
- tbuf[MAXLINELEN - 1] = ' ';
- while (fgets(tbuf, MAXLINELEN, flipfile)) {
- line_idx++;
- if (!tbuf[MAXLINELEN - 1]) {
- sprintf(logbuf, "Error: Line %" PRIuPTR " of --flip file is pathologically long.\n", line_idx);
- goto flip_strand_ret_INVALID_FORMAT_2;
+ while (1) {
+ if (fread_checked(midbuf, MAXLINELEN, flipfile, &bufsize)) {
+ goto flip_strand_ret_READ_FAIL;
}
- bufptr = skip_initial_spaces(tbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
+ if (!bufsize) {
+ if (curtoklen) {
+ if (flip_process_token(&(tbuf[MAXLINELEN - curtoklen]), marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, marker_exclude, already_seen, marker_allele_ptrs, &hit_ct, &miss_ct, &non_acgt_ct)) {
+ goto flip_strand_ret_INVALID_FORMAT_2;
+ }
+ }
+ break;
}
- slen = strlen_se(bufptr);
- marker_uidx = id_htable_find(bufptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
- if ((marker_uidx == 0xffffffffU) || IS_SET(marker_exclude, marker_uidx)) {
- miss_ct++;
- continue;
+ bufptr0 = &(midbuf[bufsize]);
+ *bufptr0 = ' ';
+ bufptr0[1] = '0';
+ bufptr = &(tbuf[MAXLINELEN - curtoklen]);
+ bufptr2 = midbuf;
+ if (curtoklen) {
+ goto flip_strand_tok_start;
}
- if (IS_SET(already_seen, marker_uidx)) {
- bufptr[slen] = '\0';
- LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in --flip file.\n", bufptr);
- goto flip_strand_ret_INVALID_FORMAT_2;
+ while (1) {
+ while (*bufptr <= ' ') {
+ bufptr++;
+ }
+ if (bufptr >= bufptr0) {
+ curtoklen = 0;
+ break;
+ }
+ bufptr2 = &(bufptr[1]);
+ flip_strand_tok_start:
+ while (*bufptr2 > ' ') {
+ bufptr2++;
+ }
+ curtoklen = (uintptr_t)(bufptr2 - bufptr);
+ if (bufptr2 == &(tbuf[MAXLINELEN * 2])) {
+ if (curtoklen > MAX_ID_LEN) {
+ logprint("Error: Excessively long ID in --flip file.\n");
+ goto flip_strand_ret_INVALID_FORMAT;
+ }
+ bufptr3 = &(tbuf[MAXLINELEN - curtoklen]);
+ memcpy(bufptr3, bufptr, curtoklen);
+ break;
+ }
+ if (flip_process_token(bufptr, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, marker_exclude, already_seen, marker_allele_ptrs, &hit_ct, &miss_ct, &non_acgt_ct)) {
+ goto flip_strand_ret_INVALID_FORMAT_2;
+ }
+ bufptr = &(bufptr2[1]);
}
- SET_BIT(already_seen, marker_uidx);
- cur_non_acgt0 = 0;
- cur_non_acgt0 |= flip_str(&(marker_allele_ptrs[2 * marker_uidx]));
- cur_non_acgt0 |= flip_str(&(marker_allele_ptrs[2 * marker_uidx + 1]));
- non_acgt_ct += (cur_non_acgt0 & 1);
- hit_ct += (cur_non_acgt0 >> 1);
}
if (!feof(flipfile)) {
goto flip_strand_ret_READ_FAIL;
@@ -1274,6 +1321,7 @@ int32_t flip_strand(char* flip_fname, uint32_t* marker_id_htable, uint32_t marke
break;
flip_strand_ret_INVALID_FORMAT_2:
logprintb();
+ flip_strand_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
break;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink2.git
More information about the debian-med-commit
mailing list