[med-svn] [plink1.9] 01/02: Imported Upstream version 1.90~b3.40-160816
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Thu Aug 25 05:52:01 UTC 2016
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository plink1.9.
commit fb6c79d2ca87ca56cac4c8f8309fd74eee602408
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Thu Aug 25 07:10:49 2016 +0200
Imported Upstream version 1.90~b3.40-160816
---
Makefile | 2 +-
pigz.c | 12 ++-
plink.c | 248 ++++++++++++++++++++++++++++++++++++++++-----------------
plink_assoc.c | 2 +-
plink_calc.c | 41 ++++++----
plink_cnv.h | 2 -
plink_common.c | 107 +++++++++++++++++++------
plink_common.h | 77 ++++++++++--------
plink_data.c | 116 +++++++++++++++++++++------
plink_dosage.c | 2 +-
plink_glm.c | 12 +--
plink_help.c | 124 +++++++++++++++--------------
plink_lasso.c | 2 +-
plink_ld.c | 155 ++++++++++++++++++++++++++----------
plink_ld.h | 34 ++++----
plink_matrix.c | 14 +---
plink_matrix.h | 7 ++
plink_misc.c | 15 ++--
plink_set.c | 2 +-
19 files changed, 661 insertions(+), 313 deletions(-)
diff --git a/Makefile b/Makefile
index e204486..0235f0e 100644
--- a/Makefile
+++ b/Makefile
@@ -52,7 +52,7 @@ endif
SRC = plink.c plink_assoc.c plink_calc.c plink_cluster.c plink_cnv.c plink_common.c plink_data.c plink_dosage.c plink_family.c plink_filter.c plink_glm.c plink_help.c plink_homozyg.c plink_lasso.c plink_ld.c plink_matrix.c plink_misc.c plink_perm.c plink_rserve.c plink_set.c plink_stats.c SFMT.c dcdflib.c pigz.c yarn.c Rconnection.cc hfile.c bgzf.c
# In the event that you are still concurrently using PLINK 1.07, we suggest
-# renaming that binary to "plink107" and "plink1". (Previously,
+# renaming that binary to "plink107", and this one to "plink1". (Previously,
# "plink1"/"plink2" was suggested here; that also works for now, but it may
# lead to minor problems when PLINK 2.0 is released.)
diff --git a/pigz.c b/pigz.c
index 0273750..8953ae4 100644
--- a/pigz.c
+++ b/pigz.c
@@ -297,7 +297,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
-#include "zlib-1.2.8/zlib.h"
+#ifdef DYNAMIC_ZLIB
+ #include <zlib.h>
+#else
+ #include "zlib-1.2.8/zlib.h"
+#endif
#include "pigz.h"
@@ -519,12 +523,16 @@ int32_t flex_pzwrite_close_null(Pigz_state* ps_ptr, char* writep) {
# include <sys/pstat.h>
#endif
-#include "zlib-1.2.8/zlib.h" /* deflateInit2(), deflateReset(), deflate(), */
+#ifdef DYNAMIC_ZLIB
+ #include <zlib.h>
+#else
+ #include "zlib-1.2.8/zlib.h" /* deflateInit2(), deflateReset(), deflate(), */
/* deflateEnd(), deflateSetDictionary(), crc32(),
inflateBackInit(), inflateBack(), inflateBackEnd(),
Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY,
Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK,
Z_SYNC_FLUSH, z_stream */
+#endif
#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230
# error Need zlib version 1.2.3 or later
#endif
diff --git a/plink.c b/plink.c
index fd5bd0c..09967d6 100644
--- a/plink.c
+++ b/plink.c
@@ -92,7 +92,7 @@
static const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b3.36"
+ "PLINK v1.90b3.40"
#else
"PLINK v1.90p"
#endif
@@ -104,7 +104,7 @@ static const char ver_str[] =
#else
" 32-bit"
#endif
- " (16 Apr 2016)";
+ " (16 Aug 2016)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
@@ -117,7 +117,24 @@ static const char ver_str2[] =
" "
#endif
" https://www.cog-genomics.org/plink2\n"
- "(C) 2005-2016 Shaun Purcell, Christopher Chang GNU General Public License v3\n";
+ "(C) 2005-2016 Shaun Purcell, Christopher Chang GNU General Public License v3"
+#if SPECIES_DEFAULT > 0
+ "\nRecompiled with default species = "
+ #if SPECIES_DEFAULT == SPECIES_COW
+ "cow"
+ #elif SPECIES_DEFAULT == SPECIES_DOG
+ "dog"
+ #elif SPECIES_DEFAULT == SPECIES_HORSE
+ "horse"
+ #elif SPECIES_DEFAULT == SPECIES_MOUSE
+ "mouse"
+ #elif SPECIES_DEFAULT == SPECIES_RICE
+ "rice"
+ #elif SPECIES_DEFAULT == SPECIES_SHEEP
+ "sheep"
+ #endif
+#endif
+ "\n";
static const char errstr_append[] = "For more information, try '" PROG_NAME_STR " --help [flag name]' or '" PROG_NAME_STR " --help | more'.\n";
#ifdef STABLE_BUILD
#ifndef NOLAPACK
@@ -142,6 +159,9 @@ void disp_exit_msg(int32_t retval) {
case RET_NOMEM:
logprint("\n");
logerrprint(errstr_nomem);
+ if (g_failed_alloc_attempt_size) {
+ LOGERRPRINTF("Failed allocation size: %" PRIuPTR "\n", g_failed_alloc_attempt_size);
+ }
break;
case RET_WRITE_FAIL:
logprint("\n");
@@ -276,7 +296,10 @@ static inline uint32_t are_marker_pos_needed(uint64_t calculation_type, uint64_t
return (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE | CALC_GENOME | CALC_HOMOZYG | CALC_LD_PRUNE | CALC_REGRESS_PCS | CALC_MODEL | CALC_GLM | CALC_CLUMP | CALC_BLOCKS | CALC_FLIPSCAN | CALC_TDT | CALC_QFAM | CALC_FST | CALC_SHOW_TAGS | CALC_DUPVAR | CALC_RPLUGIN)) || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)) || cm_map_fname || set_fname || min_bp_space || genome_skip_write || ((calculation_type & CALC_LD) && (!(ld_modifier & LD_MATRIX_SHAPEMASK))) || [...]
}
-static inline uint32_t are_marker_cms_needed(uint64_t calculation_type, char* cm_map_fname, Two_col_params* update_cm) {
+static inline uint32_t are_marker_cms_needed(uint64_t calculation_type, char* cm_map_fname, Two_col_params* update_cm, Ld_info* ldip) {
+ if ((calculation_type & CALC_LD) && (!(ldip->modifier & LD_MATRIX_SHAPEMASK)) && (ldip->window_cm != -1)) {
+ return MARKER_CMS_FORCED;
+ }
if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_RECODE)) {
if (cm_map_fname || update_cm) {
return MARKER_CMS_FORCED;
@@ -318,7 +341,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
uintptr_t* sex_male = nullptr;
uint32_t genome_skip_write = (cluster_ptr->ppc != 0.0) && (!(calculation_type & CALC_GENOME)) && (!read_genome_fname);
uint32_t marker_pos_needed = are_marker_pos_needed(calculation_type, misc_flags, cm_map_fname, sip->fname, min_bp_space, genome_skip_write, ldip->modifier, epi_ip->modifier, cluster_ptr->modifier);
- uint32_t marker_cms_needed = are_marker_cms_needed(calculation_type, cm_map_fname, update_cm);
+ uint32_t marker_cms_needed = are_marker_cms_needed(calculation_type, cm_map_fname, update_cm, ldip);
uint32_t marker_alleles_needed = are_marker_alleles_needed(calculation_type, freqname, homozyg_ptr, a1alleles, a2alleles, ldip->modifier, (filter_flags / FILTER_SNPS_ONLY) & 1, clump_ip->modifier, cluster_ptr->modifier);
// add other nchrobs subscribers later
@@ -497,23 +520,28 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
// Only append -merge to the filename stem if --make-bed or --recode lgen
// is specified.
- ulii = bed_suffix_conflict(calculation_type, recode_modifier);
+ // bugfix: also no need if --merge-mode 6 or 7.
+ ulii = ((merge_type & MERGE_MODE_MASK) < 6) && bed_suffix_conflict(calculation_type, recode_modifier);
if (ulii) {
- memcpy(outname_end, "-merge", 7);
+ memcpy(outname_end, "-merge", 6);
}
retval = merge_datasets(bedname, bimname, famname, outname, ulii? &(outname_end[6]) : outname_end, mergename1, mergename2, mergename3, sample_sort_fname, calculation_type, merge_type, sample_sort, misc_flags, chrom_info_ptr);
if (retval || (!(calculation_type & (~CALC_MERGE)))) {
goto plink_ret_1;
}
- uljj = (uintptr_t)(outname_end - outname) + (ulii? 6 : 0);
- memcpy(memcpya(bedname, outname, uljj), ".bed", 5);
- memcpy(memcpya(famname, bedname, uljj), ".fam", 5);
- memcpy(memcpya(bimname, bedname, uljj), ".bim", 5);
- if ((calculation_type & CALC_MAKE_BED) && ulii) {
- if (push_ll_str(bedname, file_delete_list_ptr) || push_ll_str(famname, file_delete_list_ptr) || push_ll_str(bimname, file_delete_list_ptr)) {
- goto plink_ret_NOMEM;
+ if ((merge_type & MERGE_MODE_MASK) < 6) {
+ uljj = (uintptr_t)(outname_end - outname) + (ulii? 6 : 0);
+ memcpy(memcpya(bedname, outname, uljj), ".bed", 5);
+ memcpy(memcpya(famname, bedname, uljj), ".fam", 5);
+ memcpy(memcpya(bimname, bedname, uljj), ".bim", 5);
+ if ((calculation_type & CALC_MAKE_BED) && ulii) {
+ if (push_ll_str(bedname, file_delete_list_ptr) || push_ll_str(famname, file_delete_list_ptr) || push_ll_str(bimname, file_delete_list_ptr)) {
+ goto plink_ret_NOMEM;
+ }
}
}
+ // do we want to permit other operations with --merge-mode 6/7 at all?
+ // seems like a recipe for confusion...
}
// don't use fopen_checked() here, since we want to customize the error
@@ -1585,13 +1613,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_MAKE_FAM)) {
- retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_miss [...]
+ retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_miss [...]
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_RECODE) {
- retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, ph [...]
+ retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, ph [...]
if (retval) {
goto plink_ret_1;
}
@@ -1677,11 +1705,17 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_LD) {
- if ((!(ldip->modifier & (LD_MATRIX_SHAPEMASK & LD_INTER_CHR))) && (map_is_unsorted & UNSORTED_BP)) {
- logerrprint("Error: Windowed --r/--r2 runs require a sorted .bim. Retry this command after\nusing --make-bed to sort your data.\n");
- goto plink_ret_INVALID_CMDLINE;
+ if (!(ldip->modifier & (LD_MATRIX_SHAPEMASK | LD_INTER_CHR))) {
+ if (map_is_unsorted & UNSORTED_BP) {
+ logerrprint("Error: Windowed --r/--r2 runs require a sorted .bim. Retry this command after\nusing --make-bed to sort your data.\n");
+ goto plink_ret_INVALID_CMDLINE;
+ }
+ if ((map_is_unsorted & UNSORTED_CM) && (ldip->window_cm != -1)) {
+ logerrprint("Error: --ld-window-cm requires nondecreasing CM values on each chromosome.\nRetry this command after regenerating your CM coordinates.\n");
+ goto plink_ret_INVALID_CMDLINE;
+ }
}
- retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
+ retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, marker_cms, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -2495,7 +2529,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
g_textbuf[MAXLINELEN - 1] = ' ';
if (!fgets(g_textbuf, MAXLINELEN, rerunfile)) {
print_ver();
- fflush(stdout);
fputs("Error: Empty log file for --rerun.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2504,7 +2537,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
}
if (!fgets(g_textbuf, MAXLINELEN, rerunfile)) {
print_ver();
- fflush(stdout);
fputs("Error: Only one line in --rerun log file.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2517,7 +2549,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
fclose_null(&rerunfile);
if (scan_posint_capped(g_textbuf, (MAXLINELEN / 2), &loaded_arg_ct)) {
print_ver();
- fflush(stdout);
fputs("Error: Invalid argument count on line 2 of --rerun log file.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2536,7 +2567,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
line_idx++;
if (!fgets(g_textbuf, MAXLINELEN, rerunfile)) {
print_ver();
- fflush(stdout);
fputs("Error: Invalid log file for --rerun.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2554,7 +2584,7 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
break;
}
line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
+ if (!load_ptr[MAXLINELEN - 1]) {
goto rerun_ret_LONG_LINE;
}
sptr = skip_initial_spaces(load_ptr);
@@ -2570,7 +2600,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
load_ptr = argptr;
if (load_ptr >= &(g_textbuf[MAXLINELEN])) {
print_ver();
- fflush(stdout);
fputs("Error: --rerun argument sequence too long.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2581,7 +2610,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
if (!rerun_buf) {
goto rerun_ret_NOMEM;
}
- rerun_buf = (char*)malloc(1 + ((uintptr_t)(load_ptr - g_textbuf)));
memcpy(rerun_buf, g_textbuf, line_byte_ct);
rerun_start_ptr = skip_initial_spaces(rerun_buf);
}
@@ -2594,7 +2622,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
do {
if (no_more_tokens_kns(sptr)) {
print_ver();
- fflush(stdout);
fputs("Error: Line 2 of --rerun log file has fewer tokens than expected.\n", stderr);
goto rerun_ret_INVALID_FORMAT;
}
@@ -2673,7 +2700,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
break;
rerun_ret_LONG_LINE:
print_ver();
- fflush(stdout);
fprintf(stderr, "Error: Line %" PRIuPTR " of --rerun log file is pathologically long.\n", line_idx);
rerun_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
@@ -2894,7 +2920,7 @@ uint32_t valid_varid_template_string(char* varid_str, const char* flag_name) {
}
int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_map, int32_t argc, char** argv, char* range_delim_ptr, Chrom_info* chrom_info_ptr) {
- uint32_t species_code = SPECIES_HUMAN;
+ uint32_t species_code = SPECIES_DEFAULT;
uint32_t flag_idx = 0;
uint32_t retval = 0;
int32_t cur_arg;
@@ -3072,7 +3098,7 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
int32_t recode_type_set(uint32_t* recode_modifier_ptr, uint32_t cur_code) {
if (*recode_modifier_ptr & (RECODE_TYPEMASK - cur_code)) {
- logerrprint("Error: Conflicting --recode modifiers.\n");
+ logerrprint("Error: Multiple --recode output formats.\n");
return -1;
}
*recode_modifier_ptr |= cur_code;
@@ -3216,6 +3242,7 @@ int32_t main(int32_t argc, char** argv) {
double genome_max_pi_hat = 1.0;
FILE* scriptfile = nullptr;
uint32_t recode_modifier = 0;
+ uint32_t recode_require_format = 0;
uint32_t allelexxxx = 0;
uint32_t merge_type = 0;
uint32_t sample_sort = 0;
@@ -3398,14 +3425,13 @@ int32_t main(int32_t argc, char** argv) {
ujj = param_count(argc, argv, uii);
if (enforce_param_ct_range(ujj, argv[uii], 1, 1)) {
print_ver();
- fputs(g_logbuf, stdout);
- fputs(errstr_append, stdout);
+ fputs(g_logbuf, stderr);
+ fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
}
for (ujj = uii + 2; ujj < (uint32_t)argc; ujj++) {
if ((!strcmp("-script", argv[ujj])) || (!strcmp("--script", argv[ujj]))) {
print_ver();
- fflush(stdout);
fputs("Error: Multiple --script flags. Merge the files into one.\n", stderr);
fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
@@ -3428,7 +3454,6 @@ int32_t main(int32_t argc, char** argv) {
// could actually happen if user enters parameters in the wrong order,
// so may as well catch it and print a somewhat informative error msg
print_ver();
- fflush(stdout);
fputs("Error: --script file too large.\n", stderr);
goto main_ret_INVALID_CMDLINE;
}
@@ -3454,6 +3479,9 @@ int32_t main(int32_t argc, char** argv) {
}
}
subst_argv = (char**)malloc((num_params + argc - 3) * sizeof(char*));
+ if (!subst_argv) {
+ goto main_ret_NOMEM_NOLOG;
+ }
num_params = 0;
in_param = 0;
for (ukk = 1; ukk < uii; ukk++) {
@@ -3484,14 +3512,13 @@ int32_t main(int32_t argc, char** argv) {
ujj = param_count(argc, argv, uii);
if (enforce_param_ct_range(ujj, argv[uii], 0, 1)) {
print_ver();
- fputs(g_logbuf, stdout);
- fputs(errstr_append, stdout);
+ fputs(g_logbuf, stderr);
+ fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
}
for (ukk = uii + ujj + 1; ukk < (uint32_t)argc; ukk++) {
if ((!strcmp("-rerun", argv[ukk])) || (!strcmp("--rerun", argv[ukk]))) {
print_ver();
- fflush(stdout);
fputs("Error: Duplicate --rerun flag.\n", stderr);
goto main_ret_INVALID_CMDLINE;
}
@@ -3505,7 +3532,6 @@ int32_t main(int32_t argc, char** argv) {
}
if ((cur_arg < (uint32_t)argc) && (!is_flag(argv[cur_arg]))) {
print_ver();
- fflush(stdout);
fputs("Error: First parameter must be a flag.\n", stderr);
fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
@@ -3523,7 +3549,23 @@ int32_t main(int32_t argc, char** argv) {
if ((cur_arg != 1) || (uii != 1) || subst_argv) {
fputs("--help present, ignoring other flags.\n", stdout);
}
- retval = disp_help(argc - uii - 1, &(argv[uii + 1]));
+ if ((uii == ((uint32_t)argc) - 1) && flag_ct) {
+ // make "plink [valid flags/parameters] --help" work, and skip the
+ // parameters
+ char** help_argv = (char**)malloc(flag_ct * sizeof(intptr_t));
+ if (!help_argv) {
+ goto main_ret_NOMEM_NOLOG2;
+ }
+ uint32_t arg_idx2 = 0;
+ for (uint32_t flag_idx = 0; flag_idx < flag_ct; ++flag_idx) {
+ while (!is_flag_start(argv[++arg_idx2]));
+ help_argv[flag_idx] = argv[arg_idx2];
+ }
+ retval = disp_help(flag_ct, help_argv);
+ free(help_argv);
+ } else {
+ retval = disp_help(argc - uii - 1, &(argv[uii + 1]));
+ }
goto main_ret_1;
}
if ((!strcmp("h", argptr)) || (!strcmp("?", argptr))) {
@@ -3567,7 +3609,14 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_1;
}
if (ukk) {
- if (!freopen("/dev/null", "w", stdout)) {
+ // bugfix: redirection to /dev/null doesn't work on Windows
+ if (!freopen(
+#ifdef _WIN32
+"nul"
+#else
+"/dev/null"
+#endif
+, "w", stdout)) {
fputs("Warning: --silent failed.\n", stderr);
}
}
@@ -3586,7 +3635,6 @@ int32_t main(int32_t argc, char** argv) {
switch (*argptr) {
case '\0':
// special case, since we reserve empty names for preprocessed flags
- fflush(stdout);
fputs("Error: Unrecognized flag ('--').\n", stderr);
goto main_ret_INVALID_CMDLINE;
case 'F':
@@ -3635,6 +3683,10 @@ int32_t main(int32_t argc, char** argv) {
} else if (!strcmp(argptr, "exponent")) {
memcpy(flagptr, "distance-exp", 13);
break;
+ } else if (!strcmp(argptr, "export")) {
+ memcpy(flagptr, "recode", 7);
+ recode_require_format = 1;
+ break;
}
goto main_flag_copy;
case 'f':
@@ -3663,12 +3715,10 @@ int32_t main(int32_t argc, char** argv) {
goto main_flag_copy;
case 'h':
if (!strcmp(argptr, "hwe2")) {
- fflush(stdout);
fputs("Warning: --hwe2 flag is obsolete, and now treated as an alias for '--hwe midp'.\n", stderr);
memcpy(flagptr, "hwe midp", 9);
break;
} else if (!strcmp(argptr, "hardy2")) {
- fflush(stdout);
fputs("Warning: --hardy2 flag is obsolete, and now treated as an alias for\n'--hardy midp'.\n", stderr);
memcpy(flagptr, "hardy midp", 11);
break;
@@ -3851,7 +3901,6 @@ int32_t main(int32_t argc, char** argv) {
ukk = strlen_se(&(flag_buf[cur_flag * MAX_FLAG_LEN]));
if ((ujj == ukk) && (!memcmp(&(flag_buf[(cur_flag - 1) * MAX_FLAG_LEN]), &(flag_buf[cur_flag * MAX_FLAG_LEN]), ukk))) {
flag_buf[cur_flag * MAX_FLAG_LEN + ukk] = '\0'; // just in case of aliases
- fflush(stdout);
fprintf(stderr, "Error: Duplicate --%s flag.\n", &(flag_buf[cur_flag * MAX_FLAG_LEN]));
goto main_ret_INVALID_CMDLINE;
}
@@ -3871,7 +3920,6 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_INVALID_CMDLINE;
}
if (strlen(argv[ujj + 1]) > (FNAMESIZE - MAX_POST_EXT)) {
- fflush(stdout);
fputs("Error: --out parameter too long.\n", stderr);
goto main_ret_OPEN_FAIL;
}
@@ -3886,7 +3934,6 @@ int32_t main(int32_t argc, char** argv) {
memcpy(&(outname[uii]), ".log", 5);
g_logfile = fopen(outname, "w");
if (!g_logfile) {
- fflush(stdout);
fprintf(stderr, "Error: Failed to open %s. Try ", outname);
if (!memcmp(outname, PROG_NAME_STR, 6)) {
fputs("using --out.\n", stderr);
@@ -4869,6 +4916,9 @@ int32_t main(int32_t argc, char** argv) {
} else {
annot_info.border = (int32_t)(dxx * 1000 * (1 + SMALL_EPSILON));
}
+ } else if (!memcmp(argptr2, "pfile", 6)) {
+ sprintf(g_logbuf, "Error: Unrecognized flag (%s). (This is PLINK 1.9, not 2.x.)\n", argv[cur_arg]);
+ goto main_ret_INVALID_CMDLINE_2;
} else {
goto main_ret_INVALID_CMDLINE_UNRECOGNIZED;
}
@@ -7589,7 +7639,7 @@ int32_t main(int32_t argc, char** argv) {
calculation_type |= CALC_PLINK1_IBS_MATRIX;
goto main_param_zero;
} else if (!memcmp(argptr2, "d-delim", 8)) {
- if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
+ if (enforce_param_ct_range(param_ct, argv[cur_arg], 0, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
if (param_ct) {
@@ -7608,7 +7658,7 @@ int32_t main(int32_t argc, char** argv) {
logprint("Note: --inter-chr flag deprecated. Use e.g. '--r2 inter-chr'.\n");
ld_info.modifier |= LD_INTER_CHR;
} else if (!memcmp(argptr2, "nd-major", 9)) {
- logerrprint("Error: --ind-major is retired, to discourage creation of .bed files that\nconstantly have to be transposed back. --recode exports sample-major files\nwhich are good enough for smaller jobs; we suggest transposing small data\nwindows on the fly when tackling large jobs.\n");
+ logerrprint("Error: --ind-major is retired, to discourage creation of .bed files that\nconstantly have to be transposed back. '--recode ped' exports sample-major\nfiles which are good enough for smaller jobs; we suggest transposing small data\nwindows on the fly when tackling large jobs.\n");
goto main_ret_INVALID_CMDLINE;
} else if (!memcmp(argptr2, "mpute-sex", 10)) {
if (calculation_type & CALC_SEXCHECK) {
@@ -8108,6 +8158,15 @@ int32_t main(int32_t argc, char** argv) {
} else {
ld_info.window_bp = ((int32_t)(dxx * 1000 * (1 + SMALL_EPSILON)));
}
+ } else if (!memcmp(argptr2, "d-window-cm", 12)) {
+ if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
+ goto main_ret_INVALID_CMDLINE_2A;
+ }
+ if (scan_double(argv[cur_arg + 1], &dxx) || (dxx < 0)) {
+ sprintf(g_logbuf, "Error: Invalid --ld-window-cm parameter '%s'.\n", argv[cur_arg + 1]);
+ goto main_ret_INVALID_CMDLINE_WWA;
+ }
+ ld_info.window_cm = dxx * (1 + SMALL_EPSILON);
} else if (!memcmp(argptr2, "d-window-r2", 12)) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
@@ -9182,7 +9241,6 @@ int32_t main(int32_t argc, char** argv) {
misc_flags |= MISC_SPLIT_MERGE_NOFAIL;
}
misc_flags |= MISC_MERGEX;
- goto main_param_zero;
} else if (!memcmp(argptr2, "issing-var-code", 16)) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
@@ -9394,6 +9452,7 @@ int32_t main(int32_t argc, char** argv) {
sprintf(g_logbuf, "Error: Invalid --mac parameter '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
+ filter_flags |= FILTER_ALL_REQ | FILTER_NODOSAGE | FILTER_NOCNV;
} else if (!memcmp(argptr2, "ax-mac", 7)) {
UNSTABLE("max-mac");
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
@@ -9401,11 +9460,13 @@ int32_t main(int32_t argc, char** argv) {
}
if (scan_uint_defcap(argv[cur_arg + 1], &max_ac)) {
sprintf(g_logbuf, "Error: Invalid --max-mac parameter '%s'.\n", argv[cur_arg + 1]);
+ goto main_ret_INVALID_CMDLINE_WWA;
}
if (max_ac < min_ac) {
logerrprint("Error: --max-mac parameter cannot be smaller than --mac parameter.\n");
goto main_ret_INVALID_CMDLINE;
}
+ filter_flags |= FILTER_ALL_REQ | FILTER_NODOSAGE | FILTER_NOCNV;
} else if (!memcmp(argptr2, "lma", 4)) {
logerrprint("Error: --mlma is not implemented yet.\n");
goto main_ret_INVALID_CMDLINE;
@@ -9418,6 +9479,9 @@ int32_t main(int32_t argc, char** argv) {
} else if (!memcmp(argptr2, "ishap-window", 13)) {
logerrprint("Error: --mishap-window is provisionally retired. Contact the developers if you\nneed this function.\n");
goto main_ret_INVALID_CMDLINE;
+ } else if ((!memcmp(argptr2, "ake-bpgen", 10)) || (!memcmp(argptr2, "ake-pgen", 9))) {
+ sprintf(g_logbuf, "Error: Unrecognized flag (%s). (This is PLINK 1.9, not 2.x.)\n", argv[cur_arg]);
+ goto main_ret_INVALID_CMDLINE_2;
} else {
goto main_ret_INVALID_CMDLINE_UNRECOGNIZED;
}
@@ -9993,6 +10057,9 @@ int32_t main(int32_t argc, char** argv) {
// happening before 2016.)
logerrprint("Error: PLINK 1 proxy association and imputation commands have been retired due\nto poor accuracy. (See Nothnagel M et al. (2009) A comprehensive evaluation of\nSNP genotype imputation.) We suggest using another tool, such as BEAGLE 4 or\nIMPUTE2, for imputation instead, and performing association analysis on those\nresults. ('--recode vcf' and --vcf can be used to exchange data with BEAGLE 4,\nwhile '--recode oxford' and --data let you work with IMPUTE2.)\n");
goto main_ret_INVALID_CMDLINE;
+ } else if (!memcmp(argptr2, "file", 5)) {
+ sprintf(g_logbuf, "Error: Unrecognized flag (%s). (This is PLINK 1.9, not 2.x.)\n", argv[cur_arg]);
+ goto main_ret_INVALID_CMDLINE_2;
} else {
goto main_ret_INVALID_CMDLINE_UNRECOGNIZED;
}
@@ -10461,22 +10528,22 @@ int32_t main(int32_t argc, char** argv) {
for (uii = 1; uii <= param_ct; uii++) {
if ((!strcmp(argv[cur_arg + uii], "01")) || (!strcmp(argv[cur_arg + uii], "12"))) {
if (recode_modifier & (RECODE_A | RECODE_AD)) {
- logerrprint("Error: --recode '01' and '12' modifiers cannot be used with 'A' or 'AD'.\n");
- goto main_ret_INVALID_CMDLINE_A;
+ sprintf(g_logbuf, "Error: The '%s' modifier does not apply to --recode's A and AD output formats.\n", argv[cur_arg + uii]);
+ goto main_ret_INVALID_CMDLINE_2A;
} else if (recode_modifier & RECODE_VCF) {
main_recode_012_vcf_conflict:
- logerrprint("Error: --recode '01' and '12' modifiers cannot be used with VCF output.\n");
+ logerrprint("Error: '01'/'12' cannot be used with --recode's VCF output formats.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if (argv[cur_arg + uii][0] == '0') {
if (recode_modifier & RECODE_12) {
- logerrprint("Error: --recode '01' and '12' modifiers cannot be used together.\n");
+ logerrprint("Error: --recode '01' and '12' cannot be used together.\n");
goto main_ret_INVALID_CMDLINE;
}
recode_modifier |= RECODE_01;
} else {
if (recode_modifier & RECODE_01) {
- logerrprint("Error: --recode '01' and '12' modifiers cannot be used together.\n");
+ logerrprint("Error: --recode '01' and '12' cannot be used together.\n");
goto main_ret_INVALID_CMDLINE;
}
recode_modifier |= RECODE_12;
@@ -10573,6 +10640,10 @@ int32_t main(int32_t argc, char** argv) {
if (recode_type_set(&recode_modifier, RECODE_OXFORD)) {
goto main_ret_INVALID_CMDLINE_A;
}
+ } else if (!strcmp(argv[cur_arg + uii], "ped")) {
+ if (recode_type_set(&recode_modifier, RECODE_PED)) {
+ goto main_ret_INVALID_CMDLINE_A;
+ }
} else if (!strcmp(argv[cur_arg + uii], "rlist")) {
if (recode_type_set(&recode_modifier, RECODE_RLIST)) {
goto main_ret_INVALID_CMDLINE_A;
@@ -10621,24 +10692,36 @@ int32_t main(int32_t argc, char** argv) {
} else if (!strcmp(argv[cur_arg + uii], "omit-nonmale-y")) {
recode_modifier |= RECODE_OMIT_NONMALE_Y;
} else {
- sprintf(g_logbuf, "Error: Invalid --recode parameter '%s'.%s\n", argv[cur_arg + uii], ((uii == param_ct) && (!outname_end))? " (Did you forget '--out'?)" : "");
+ sprintf(g_logbuf, "Error: Invalid --recode parameter '%s'.%s\n", argv[cur_arg + uii], ((uii == param_ct) && (!outname_end))? " (Did you forget '--out'?)" : "");
goto main_ret_INVALID_CMDLINE_WWA;
}
}
+ if (!(recode_modifier & RECODE_TYPEMASK)) {
+ // thought about including a warning here, and then outright
+ // prohibiting parameterless --recode in v2.0, but that usage is
+ // simply too common. new plan: --export requires an explicit output
+ // format, but --recode will continue to be translated without a
+ // warning even when defaulting to ped output.
+ if (recode_require_format) {
+ logerrprint("Error: --export requires an output format. (Did you forget 'ped' or 'vcf'?)\n");
+ goto main_ret_INVALID_CMDLINE_A;
+ }
+ recode_modifier |= RECODE_PED;
+ }
if ((recode_modifier & RECODE_INCLUDE_ALT) && (!(recode_modifier & (RECODE_A | RECODE_AD)))) {
- logerrprint("Error: --recode 'include-alt' modifier must be used with 'A' or 'AD'.\n");
+ logerrprint("Error: The 'include-alt' modifier only applies to --recode's A and AD output\nformats.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if ((recode_modifier & RECODE_OMIT_NONMALE_Y) && (!(recode_modifier & (RECODE_LIST | RECODE_RLIST)))) {
- logerrprint("Error: --recode 'omit-nonmale-y' modifier must be used with 'list' or 'rlist'.\n");
+ logerrprint("Error: The 'omit-nonmale-y' modifier only applies to --recode's list and rlist\noutput formats.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if ((recode_modifier & RECODE_BGZ) && (!(recode_modifier & RECODE_VCF))) {
- logerrprint("Error: --recode 'bgz' modifier must be used with VCF output.\n");
+ logerrprint("Error: The 'bgz' modifier only applies to --recode's VCF output formats.\n");
goto main_ret_INVALID_CMDLINE_A;
}
if ((recode_modifier & RECODE_GEN_GZ) && (!(recode_modifier & RECODE_OXFORD))) {
- logerrprint("Error: --recode 'gen-gz' modifier must be used with Oxford-format output.\n");
+ logerrprint("Error: The 'gen-gz' modifier only applies to --recode's oxford output format.\n");
goto main_ret_INVALID_CMDLINE_A;
}
calculation_type |= CALC_RECODE;
@@ -10762,9 +10845,9 @@ int32_t main(int32_t argc, char** argv) {
if (ld_info.modifier & LD_MATRIX_SHAPEMASK) {
logerrprint("Error: Multiple --r/--r2 shape modifiers.\n");
goto main_ret_INVALID_CMDLINE;
- } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
main_r2_matrix_conflict:
- sprintf(g_logbuf, "Error: --r/--r2 '%s' cannot be used with matrix output.\n", (ld_info.modifier & LD_INTER_CHR)? "inter-chr" : ((ld_info.modifier & LD_INPHASE)? "in-phase" : ((ld_info.modifier & LD_DPRIME)? "dprime" : "with-freqs")));
+ sprintf(g_logbuf, "Error: --r/--r2 '%s' cannot be used with matrix output.\n", (ld_info.modifier & LD_INTER_CHR)? "inter-chr" : ((ld_info.modifier & LD_INPHASE)? "in-phase" : ((ld_info.modifier & LD_DX)? "d'/'dprime'/'dprime-signed" : "with-freqs")));
goto main_ret_INVALID_CMDLINE_2A;
}
ld_info.modifier |= LD_MATRIX_SQ;
@@ -10772,7 +10855,7 @@ int32_t main(int32_t argc, char** argv) {
if (ld_info.modifier & LD_MATRIX_SHAPEMASK) {
logerrprint("Error: Multiple --r/--r2 shape modifiers.\n");
goto main_ret_INVALID_CMDLINE;
- } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
goto main_r2_matrix_conflict;
}
ld_info.modifier |= LD_MATRIX_SQ0;
@@ -10780,7 +10863,7 @@ int32_t main(int32_t argc, char** argv) {
if (ld_info.modifier & LD_MATRIX_SHAPEMASK) {
logerrprint("Error: Multiple --r/--r2 shape modifiers.\n");
goto main_ret_INVALID_CMDLINE;
- } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ } else if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
goto main_r2_matrix_conflict;
}
ld_info.modifier |= LD_MATRIX_TRI;
@@ -10796,7 +10879,7 @@ int32_t main(int32_t argc, char** argv) {
}
ld_info.modifier |= LD_REPORT_GZ;
} else if (!strcmp(argv[cur_arg + uii], "bin")) {
- if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
goto main_r2_matrix_conflict;
} else if (ld_info.modifier & (LD_REPORT_GZ | LD_MATRIX_BIN4)) {
logerrprint("Error: Conflicting --r/--r2 modifiers.\n");
@@ -10807,7 +10890,7 @@ int32_t main(int32_t argc, char** argv) {
}
ld_info.modifier |= LD_MATRIX_BIN;
} else if (!strcmp(argv[cur_arg + uii], "bin4")) {
- if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
goto main_r2_matrix_conflict;
} else if (ld_info.modifier & (LD_REPORT_GZ | LD_MATRIX_BIN)) {
logerrprint("Error: Conflicting --r/--r2 modifiers.\n");
@@ -10821,7 +10904,7 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --r/--r2 'single-prec' modifier has been retired. Use 'bin4'.\n");
goto main_ret_INVALID_CMDLINE;
} else if (!strcmp(argv[cur_arg + uii], "spaces")) {
- if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DPRIME | LD_WITH_FREQS)) {
+ if (ld_info.modifier & (LD_INTER_CHR | LD_INPHASE | LD_DX | LD_WITH_FREQS)) {
goto main_r2_matrix_conflict;
} else if (ld_info.modifier & (LD_MATRIX_BIN | LD_MATRIX_BIN4)) {
logerrprint("Error: --r/--r2 'bin{4}' and 'spaces' modifiers cannot be used together.\n");
@@ -10833,11 +10916,33 @@ int32_t main(int32_t argc, char** argv) {
if (ld_info.modifier & (LD_MATRIX_SHAPEMASK | LD_MATRIX_BIN | LD_MATRIX_BIN4 | LD_MATRIX_SPACES)) {
goto main_r2_matrix_conflict;
}
+ } else if (!strcmp(argv[cur_arg + uii], "d")) {
+ ld_info.modifier |= LD_D;
+ if (ld_info.modifier & (LD_MATRIX_SHAPEMASK | LD_MATRIX_BIN | LD_MATRIX_BIN4 | LD_MATRIX_SPACES)) {
+ goto main_r2_matrix_conflict;
+ }
+ if (ld_info.modifier & (LD_DPRIME | LD_DPRIME_SIGNED)) {
+ logerrprint("Error: --r/--r2 'd' cannot be used with 'dprime'/'dprime-signed'.\n");
+ goto main_ret_INVALID_CMDLINE;
+ }
} else if (!strcmp(argv[cur_arg + uii], "dprime")) {
ld_info.modifier |= LD_DPRIME;
if (ld_info.modifier & (LD_MATRIX_SHAPEMASK | LD_MATRIX_BIN | LD_MATRIX_BIN4 | LD_MATRIX_SPACES)) {
goto main_r2_matrix_conflict;
}
+ if (ld_info.modifier & (LD_D | LD_DPRIME_SIGNED)) {
+ logerrprint("Error: --r/--r2 'dprime' cannot be used with 'd'/'dprime-signed'.\n");
+ goto main_ret_INVALID_CMDLINE;
+ }
+ } else if (!strcmp(argv[cur_arg + uii], "dprime-signed")) {
+ ld_info.modifier |= LD_DPRIME_SIGNED;
+ if (ld_info.modifier & (LD_MATRIX_SHAPEMASK | LD_MATRIX_BIN | LD_MATRIX_BIN4 | LD_MATRIX_SPACES)) {
+ goto main_r2_matrix_conflict;
+ }
+ if (ld_info.modifier & (LD_D | LD_DPRIME)) {
+ logerrprint("Error: --r/--r2 'dprime-signed' cannot be used with 'd'/'dprime'.\n");
+ goto main_ret_INVALID_CMDLINE;
+ }
} else if (!strcmp(argv[cur_arg + uii], "with-freqs")) {
ld_info.modifier |= LD_WITH_FREQS;
if (ld_info.modifier & (LD_MATRIX_SHAPEMASK | LD_MATRIX_BIN | LD_MATRIX_BIN4 | LD_MATRIX_SPACES)) {
@@ -13343,13 +13448,7 @@ int32_t main(int32_t argc, char** argv) {
}
#endif
} else if (load_rare & LOAD_RARE_CNV) {
-#ifdef HIGH_MAX_CHROM
- logerrprint("Error: The CNV module is disabled in high-contig-limit PLINK builds.\n");
- goto main_ret_INVALID_CMDLINE;
- // no, I don't care about unused variable compiler warnings in this case
-#else
retval = plink_cnv(outname, outname_end, pedname, mapname, famname, phenoname, keepname, removename, filtername, misc_flags, update_chr, update_cm, update_map, update_name, update_ids_fname, update_parents_fname, update_sex_fname, filtervals_flattened, filter_flags, cnv_calc_type, cnv_min_seglen, cnv_max_seglen, cnv_min_score, cnv_max_score, cnv_min_sites, cnv_max_sites, cnv_intersect_filter_type, cnv_intersect_filter_fname, cnv_subset_fname, cnv_overlap_type, cnv_overlap_val, cnv_fr [...]
-#endif
} else if (load_rare & LOAD_RARE_GVAR) {
retval = plink_gvar(outname, outname_end, pedname, mapname, famname);
} else {
@@ -13482,6 +13581,9 @@ int32_t main(int32_t argc, char** argv) {
print_ver();
main_ret_NOMEM_NOLOG2:
fputs(errstr_nomem, stderr);
+ if (g_failed_alloc_attempt_size) {
+ fprintf(stderr, "Failed allocation size: %" PRIuPTR "\n", g_failed_alloc_attempt_size);
+ }
retval = RET_NOMEM;
break;
main_ret_READ_FAIL_NOLOG:
diff --git a/plink_assoc.c b/plink_assoc.c
index 536205b..c9c5ef9 100644
--- a/plink_assoc.c
+++ b/plink_assoc.c
@@ -11411,7 +11411,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
bigstack_alloc_d((cluster_ct1 - 1) * (cluster_ct1 - 1), &dbl_2d_buf)) {
goto cmh2_assoc_ret_NOMEM;
}
- mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc((cluster_ct1 - 1) * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc((cluster_ct1 - 1) * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!mi_buf) {
goto cmh2_assoc_ret_NOMEM;
}
diff --git a/plink_calc.c b/plink_calc.c
index efe3683..2382c6c 100644
--- a/plink_calc.c
+++ b/plink_calc.c
@@ -555,19 +555,6 @@ void collapse_copy_phenod(double* target, double* pheno_d, uintptr_t* sample_exc
} while (target < target_end);
}
-static inline void collapse_copy_phenod_incl(double* target, double* pheno_d, uintptr_t* sample_include, uintptr_t unfiltered_sample_ct, uintptr_t sample_ct) {
- uintptr_t sample_uidx = 0;
- double* target_end = &(target[sample_ct]);
- uintptr_t delta;
- do {
- sample_uidx = next_set_ul_unsafe(sample_include, sample_uidx);
- delta = next_unset_ul(sample_include, sample_uidx, unfiltered_sample_ct) - sample_uidx;
- memcpy(target, &(pheno_d[sample_uidx]), delta * sizeof(double));
- target = &(target[delta]);
- sample_uidx += delta;
- } while (target < target_end);
-}
-
#ifdef __LP64__
// XOR + mask variants of vectorized Lauradoux/Walisch popcount. (See
// popcount_vecs() in plink_common.c for basic documentation.)
@@ -2174,6 +2161,19 @@ void matrix_row_sum_ur(uintptr_t sample_ct, double* sums, double* matrix) {
}
#ifndef NOLAPACK
+static inline void collapse_copy_phenod_incl(double* target, double* pheno_d, uintptr_t* sample_include, uintptr_t unfiltered_sample_ct, uintptr_t sample_ct) {
+ uintptr_t sample_uidx = 0;
+ double* target_end = &(target[sample_ct]);
+ uintptr_t delta;
+ do {
+ sample_uidx = next_set_ul_unsafe(sample_include, sample_uidx);
+ delta = next_unset_ul(sample_include, sample_uidx, unfiltered_sample_ct) - sample_uidx;
+ memcpy(target, &(pheno_d[sample_uidx]), delta * sizeof(double));
+ target = &(target[delta]);
+ sample_uidx += delta;
+ } while (target < target_end);
+}
+
// one-trait REML via EM.
//
// wkbase is assumed to have space for three cache-aligned
@@ -3304,7 +3304,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
bigstack_alloc_d(pc_ct_p1 * pc_ct_p1, &dbl_2d_buf)) {
goto calc_regress_pcs_ret_NOMEM;
}
- inv_1d_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(pc_ct_p1 * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ inv_1d_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(pc_ct_p1 * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!inv_1d_buf) {
goto calc_regress_pcs_ret_NOMEM;
}
@@ -5041,6 +5041,15 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
logerrprint("Error: --genome cannot be used on haploid genomes.\n");
goto calc_genome_ret_INVALID_CMDLINE;
}
+#ifdef __LP64__
+ // 32-bit value in main loop can be up to
+ // sample_ct * (GENOME_MULTIPLEX2 / BITCT) - 1
+ if (sample_ct > 119304647) {
+ logerrprint("Error: --genome does not support > 119304647 samples.\n");
+ retval = RET_CALC_NOT_YET_SUPPORTED;
+ goto calc_genome_ret_1;
+ }
+#endif
g_sample_ct = sample_ct;
if (dist_thread_ct > sample_ct / 32) {
dist_thread_ct = sample_ct / 32;
@@ -5144,7 +5153,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
}
set_allele_freq_buf[ujj] = set_allele_freqs[marker_uidx];
if (nchrobs) {
- nchrobs_buf[ujj] = nchrobs[ujj];
+ nchrobs_buf[ujj] = nchrobs[marker_uidx];
}
// See comments in incr_genome(): the PPC test is time-critical and
// we do a bit of unusual precomputation here to speed it up.
@@ -5177,7 +5186,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
if (mp_lead_unfiltered_idx < unfiltered_marker_ct) {
ulii = 2 * (mp_lead_idx - marker_idx);
if (ulii < BITCT + (2 * (ujj & (~(BITCT2 - 1))))) {
- ulii = ~ZEROLU << (ulii & (BITCT - 1));
+ ulii = (~ZEROLU) << (ulii & (BITCT - 1));
}
} else {
ulii = 2 * (unfiltered_marker_ct + GENOME_MULTIPLEX);
diff --git a/plink_cnv.h b/plink_cnv.h
index e20bd33..fab05ee 100644
--- a/plink_cnv.h
+++ b/plink_cnv.h
@@ -1,9 +1,7 @@
#ifndef __PLINK_CNV_H__
#define __PLINK_CNV_H__
-#ifndef HIGH_MAX_CHROM
int32_t plink_cnv(char* outname, char* outname_end, char* cnvname, char* mapname, char* famname, char* phenoname, char* keepname, char* removename, char* filtername, uint64_t misc_flags, Two_col_params* update_chr, Two_col_params* update_cm, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_flattened, uint64_t filter_flags, uint32_t cnv_calc_type, uint32_t min_seglen, uint32_t max_seglen, [...]
-#endif // HIGH_MAX_CHROM
int32_t plink_gvar(char* outname, char* outname_end, char* gvarname, char* mapname, char* famname);
diff --git a/plink_common.c b/plink_common.c
index a68efdd..06fdab5 100644
--- a/plink_common.c
+++ b/plink_common.c
@@ -16,6 +16,8 @@ const char g_one_char_strs[] = "\0\0\1\0\2\0\3\0\4\0\5\0\6\0\7\0\10\0\11\0\12\0\
const char* g_missing_geno_ptr = &(g_one_char_strs[96]);
const char* g_output_missing_geno_ptr = &(g_one_char_strs[96]);
+uintptr_t g_failed_alloc_attempt_size = 0;
+
sfmt_t g_sfmt;
FILE* g_logfile = nullptr;
@@ -33,6 +35,7 @@ uint32_t aligned_malloc(uintptr_t size, uintptr_t** aligned_pp) {
// does not even guarantee 8-byte alignment.)
uintptr_t* malloc_ptr = (uintptr_t*)malloc(size + VEC_BYTES);
if (!malloc_ptr) {
+ g_failed_alloc_attempt_size = size + VEC_BYTES;
return 1;
}
*aligned_pp = (uintptr_t*)((((uintptr_t)malloc_ptr) + VEC_BYTES) & (~(VEC_BYTES_M1 * ONELU)));
@@ -41,6 +44,7 @@ uint32_t aligned_malloc(uintptr_t size, uintptr_t** aligned_pp) {
// no SSE2 concerns here
*aligned_pp = (uintptr_t*)malloc(size);
if (!(*aligned_pp)) {
+ g_failed_alloc_attempt_size = size;
return 1;
}
#endif
@@ -59,6 +63,7 @@ uint32_t push_ll_str(const char* ss, Ll_str** ll_stack_ptr) {
uintptr_t str_bytes = strlen(ss) + 1;
Ll_str* new_ll_str = (Ll_str*)malloc(sizeof(Ll_str) + str_bytes);
if (!new_ll_str) {
+ g_failed_alloc_attempt_size = sizeof(Ll_str) + str_bytes;
return 1;
}
new_ll_str->next = *ll_stack_ptr;
@@ -188,7 +193,7 @@ void wordwrapb(uint32_t suffix_len) {
int32_t fopen_checked(const char* fname, const char* mode, FILE** target_ptr) {
*target_ptr = fopen(fname, mode);
if (!(*target_ptr)) {
- LOGPRINTFWW(g_errstr_fopen, fname);
+ LOGERRPRINTFWW(g_errstr_fopen, fname);
return -1;
}
return 0;
@@ -210,7 +215,7 @@ int32_t fwrite_checked(const void* buf, size_t len, FILE* outfile) {
int32_t gzopen_read_checked(const char* fname, gzFile* gzf_ptr) {
*gzf_ptr = gzopen(fname, FOPEN_RB);
if (!(*gzf_ptr)) {
- LOGPRINTFWW(g_errstr_fopen, fname);
+ LOGERRPRINTFWW(g_errstr_fopen, fname);
return RET_OPEN_FAIL;
}
if (gzbuffer(*gzf_ptr, 131072)) {
@@ -227,6 +232,7 @@ unsigned char* bigstack_alloc(uintptr_t size) {
unsigned char* alloc_ptr;
size = round_up_pow2(size, CACHELINE);
if (bigstack_left() < size) {
+ g_failed_alloc_attempt_size = size;
return nullptr;
}
alloc_ptr = g_bigstack_base;
@@ -243,6 +249,7 @@ unsigned char* bigstack_end_alloc_presized(uintptr_t size) {
assert(!(size & END_ALLOC_CHUNK_M1));
uintptr_t cur_bigstack_left = bigstack_left();
if (size > cur_bigstack_left) {
+ g_failed_alloc_attempt_size = size;
return nullptr;
} else {
g_bigstack_end -= size;
@@ -4388,6 +4395,7 @@ void init_species(uint32_t species_code, Chrom_info* chrom_info_ptr) {
break;
}
}
+ fill_uint_one(chrom_info_ptr->max_code + 1, chrom_info_ptr->chrom_idx_to_foidx);
}
void init_default_chrom_mask(Chrom_info* chrom_info_ptr) {
@@ -4804,6 +4812,9 @@ int32_t try_to_add_chrom_name(const char* chrom_name, const char* file_descrip,
}
if ((in_name_stack && chrom_info_ptr->is_include_stack) || ((!in_name_stack) && (!chrom_info_ptr->is_include_stack))) {
SET_BIT(chrom_code_end, chrom_info_ptr->chrom_mask);
+ if (chrom_info_ptr->haploid_mask[0] & 1) {
+ SET_BIT(chrom_code_end, chrom_info_ptr->haploid_mask);
+ }
}
memcpy(nonstd_names[chrom_code_end], chrom_name, name_slen + 1);
*chrom_idx_ptr = (int32_t)chrom_code_end;
@@ -6348,14 +6359,16 @@ uint32_t chrom_window_max(const uint32_t* marker_pos, const uintptr_t* marker_ex
return cur_window_max;
}
-uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* marker_exclude, uint32_t marker_uidx_min, uint32_t marker_uidx_start, uint32_t count_max, uint32_t bp_max, uint32_t* __restrict window_trail_ct_ptr) {
- // finds the earliest location which is within count_max sites and bp_max bps
- // count_max must be positive
+uint32_t window_back(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_min, uint32_t marker_uidx_start, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_trail_ct_ptr) {
+ // Finds the earliest location which is within count_max sites, bp_max bps,
+ // and (if marker_cms != nullptr) cm_max centimorgans.
+ // count_max must be positive.
if (marker_uidx_min == marker_uidx_start) {
// special-case this since it happens frequently
*window_trail_ct_ptr = 0;
return marker_uidx_min;
}
+ double min_cm = marker_cms? (marker_cms[marker_uidx_start] - cm_max) : 0.0;
uint32_t min_pos = 0;
uint32_t marker_uwidx_cur = marker_uidx_start / BITCT;
uint32_t uii = marker_uidx_start % BITCT;
@@ -6363,6 +6376,8 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* mar
uint32_t remaining_count = count_max;
const uintptr_t* marker_exclude_cur = &(marker_exclude[marker_uwidx_cur]);
uintptr_t cur_word;
+ uint32_t ujj;
+ uint32_t ukk;
marker_uwidx_cur *= BITCT;
if (bp_max <= marker_pos[marker_uidx_start]) {
min_pos = marker_pos[marker_uidx_start] - bp_max;
@@ -6378,7 +6393,7 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* mar
uii = popcount_long(cur_word);
if (uii >= remaining_count) {
goto window_back_count;
- } else if (marker_pos[marker_uwidx_cur] < min_pos) {
+ } else if ((marker_pos[marker_uwidx_cur] < min_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] < min_cm))) {
goto window_back_find_first_pos;
} else {
goto window_back_min;
@@ -6393,15 +6408,22 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* mar
uii--;
}
marker_uwidx_cur += CTZLU(cur_word);
- if (marker_pos[marker_uwidx_cur] < min_pos) {
+ if ((marker_pos[marker_uwidx_cur] < min_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] < min_cm))) {
goto window_back_find_first_pos;
}
*window_trail_ct_ptr = count_max;
return marker_uwidx_cur;
}
- if (marker_pos[marker_uwidx_cur] < min_pos) {
+ if ((marker_pos[marker_uwidx_cur] < min_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] < min_cm))) {
window_back_find_first_pos:
- marker_uwidx_cur += uint32arr_greater_than(&(marker_pos[marker_uwidx_cur]), marker_uidx_last - marker_uwidx_cur, min_pos);
+ ujj = uint32arr_greater_than(&(marker_pos[marker_uwidx_cur]), marker_uidx_last - marker_uwidx_cur, min_pos);
+ if (marker_cms) {
+ ukk = doublearr_greater_than(&(marker_cms[marker_uwidx_cur]), marker_uidx_last - marker_uwidx_cur, min_cm);
+ if (ujj < ukk) {
+ ujj = ukk;
+ }
+ }
+ marker_uwidx_cur += ujj;
if (marker_uwidx_cur > marker_uidx_min) {
next_unset_unsafe_ck(marker_exclude, &marker_uwidx_cur);
}
@@ -6417,12 +6439,13 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* mar
}
}
-uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, uint32_t* __restrict window_lead_ct_ptr) {
+uint32_t window_forward(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_lead_ct_ptr) {
// window_lead_ct_ptr currently cannot be nullptr
if (marker_uidx_start == marker_uidx_last) {
*window_lead_ct_ptr = 0;
return marker_uidx_start;
}
+ double max_cm = marker_cms? (cm_max + marker_cms[marker_uidx_start]) : 0.0;
uint32_t marker_uwidx_prev = marker_uidx_start;
uint32_t max_pos = bp_max + marker_pos[marker_uidx_start];
uint32_t marker_uwidx_cur = (marker_uidx_start + 1) / BITCT;
@@ -6430,6 +6453,8 @@ uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t*
uint32_t remaining_count = count_max;
const uintptr_t* marker_exclude_cur = &(marker_exclude[marker_uwidx_cur]);
uintptr_t cur_word;
+ uint32_t ujj;
+ uint32_t ukk;
marker_uwidx_cur *= BITCT;
cur_word = ~((*marker_exclude_cur) | ((ONELU << uii) - ONELU));
while (1) {
@@ -6440,14 +6465,14 @@ uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t*
}
marker_uwidx_cur += CTZLU(cur_word);
if (marker_uwidx_cur <= marker_uidx_last) {
- if (marker_pos[marker_uwidx_cur] > max_pos) {
+ if ((marker_pos[marker_uwidx_cur] > max_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] > max_cm))) {
break;
}
*window_lead_ct_ptr = count_max;
return marker_uwidx_cur;
}
- if (marker_pos[marker_uidx_last] <= max_pos) {
- marker_uwidx_prev = marker_uidx_last;
+ if ((marker_pos[marker_uidx_last] <= max_pos) && ((!marker_cms) || (marker_cms[marker_uidx_last] <= max_cm))) {
+ marker_uwidx_prev = marker_uidx_last;
goto window_forward_return;
}
marker_uwidx_cur = marker_uidx_last;
@@ -6455,21 +6480,27 @@ uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t*
}
marker_uwidx_cur += BITCT;
if (marker_uwidx_cur >= marker_uidx_last) {
- if (marker_pos[marker_uidx_last] <= max_pos) {
+ if ((marker_pos[marker_uidx_last] <= max_pos) && ((!marker_cms) || (marker_cms[marker_uidx_last] <= max_cm))) {
marker_uwidx_prev = marker_uidx_last;
goto window_forward_return;
- } else {
- marker_uwidx_cur = marker_uidx_last;
- break;
}
- } else if (marker_pos[marker_uwidx_cur] > max_pos) {
+ marker_uwidx_cur = marker_uidx_last;
+ break;
+ } else if ((marker_pos[marker_uwidx_cur] > max_pos) || (marker_cms && (marker_cms[marker_uwidx_cur] > max_cm))) {
break;
}
marker_uwidx_prev = marker_uwidx_cur;
remaining_count -= uii;
cur_word = ~(*(++marker_exclude_cur));
}
- marker_uwidx_prev += uint32arr_greater_than(&(marker_pos[marker_uwidx_prev]), marker_uwidx_cur - marker_uwidx_prev, max_pos + 1);
+ ujj = uint32arr_greater_than(&(marker_pos[marker_uwidx_prev]), marker_uwidx_cur - marker_uwidx_prev, max_pos + 1);
+ if (marker_cms) {
+ ukk = doublearr_greater_than(&(marker_cms[marker_uwidx_prev]), marker_uwidx_cur - marker_uwidx_prev, max_cm * (1 + SMALL_EPSILON));
+ if (ujj > ukk) {
+ ujj = ukk;
+ }
+ }
+ marker_uwidx_prev += ujj;
prev_unset_unsafe_ck(marker_exclude, &marker_uwidx_prev);
window_forward_return:
*window_lead_ct_ptr = marker_uwidx_prev - marker_uidx_start - popcount_bit_idx(marker_exclude, marker_uidx_start, marker_uwidx_prev);
@@ -10440,9 +10471,11 @@ void join_threads(pthread_t* threads, uint32_t ctp1) {
}
#ifdef _WIN32
WaitForMultipleObjects(ctp1, threads, 1, INFINITE);
+ for (uint32_t uii = 0; uii < ctp1; ++uii) {
+ CloseHandle(threads[uii]);
+ }
#else
- uint32_t uii;
- for (uii = 0; uii < ctp1; uii++) {
+ for (uint32_t uii = 0; uii < ctp1; uii++) {
pthread_join(threads[uii], nullptr);
}
#endif
@@ -10582,6 +10615,7 @@ void join_threads2(pthread_t* threads, uint32_t ctp1, uint32_t is_last_block) {
} else {
WaitForMultipleObjects(ctp1, threads, 1, INFINITE);
for (uii = 0; uii < ctp1; uii++) {
+ CloseHandle(threads[uii]);
CloseHandle(g_thread_start_next_event[uii]);
CloseHandle(g_thread_cur_block_done_events[uii]);
}
@@ -10637,7 +10671,21 @@ int32_t spawn_threads2(pthread_t* threads, void* (*start_routine)(void*), uintpt
for (ulii = 1; ulii < ct; ulii++) {
threads[ulii - 1] = (HANDLE)_beginthreadex(nullptr, 4096, start_routine, (void*)ulii, 0, nullptr);
if (!threads[ulii - 1]) {
- join_threads2(threads, ulii, is_last_block);
+ if (ulii > 1) {
+ join_threads2(threads, ulii, is_last_block);
+ if (!is_last_block) {
+ for (uintptr_t uljj = 0; uljj < ulii - 1; ++uljj) {
+ CloseHandle(threads[uljj]);
+ }
+ }
+ }
+ if ((!is_last_block) || (ulii == 1)) {
+ for (uint32_t uii = 0; uii < ct - 1; ++uii) {
+ CloseHandle(g_thread_start_next_event[uii]);
+ CloseHandle(g_thread_cur_block_done_events[uii]);
+ }
+ g_thread_mutex_initialized = 0;
+ }
return -1;
}
}
@@ -10664,7 +10712,20 @@ int32_t spawn_threads2(pthread_t* threads, void* (*start_routine)(void*), uintpt
}
for (ulii = 1; ulii < ct; ulii++) {
if (pthread_create(&(threads[ulii - 1]), nullptr, start_routine, (void*)ulii)) {
- join_threads2(threads, ulii, is_last_block);
+ if (ulii > 1) {
+ join_threads2(threads, ulii, is_last_block);
+ if (!is_last_block) {
+ for (uintptr_t uljj = 0; uljj < ulii - 1; ++uljj) {
+ pthread_cancel(threads[uljj]);
+ }
+ }
+ }
+ if ((!is_last_block) || (ulii == 1)) {
+ pthread_mutex_destroy(&g_thread_sync_mutex);
+ pthread_cond_destroy(&g_thread_cur_block_done_condvar);
+ pthread_cond_destroy(&g_thread_start_next_condvar);
+ g_thread_mutex_initialized = 0;
+ }
return -1;
}
}
diff --git a/plink_common.h b/plink_common.h
index bbd57ae..e8666df 100644
--- a/plink_common.h
+++ b/plink_common.h
@@ -20,6 +20,16 @@
// the command line.
// #define STABLE_BUILD
+#define SPECIES_HUMAN 0
+#define SPECIES_COW 1
+#define SPECIES_DOG 2
+#define SPECIES_HORSE 3
+#define SPECIES_MOUSE 4
+#define SPECIES_RICE 5
+#define SPECIES_SHEEP 6
+#define SPECIES_UNKNOWN 7
+#define SPECIES_DEFAULT SPECIES_HUMAN
+
#define PROG_NAME_STR "plink"
#define PROG_NAME_CAPS "PLINK"
@@ -32,6 +42,18 @@
#include <sys/stat.h>
#endif
+#ifndef HAVE_NULLPTR
+ #ifndef __cplusplus
+ #define nullptr NULL
+ #else
+ #if __cplusplus <= 199711L
+ #ifndef nullptr
+ #define nullptr NULL
+ #endif
+ #endif
+ #endif
+#endif
+
#ifdef _WIN32
#define PRId64 "I64d"
#define PRIu64 "I64u"
@@ -102,16 +124,6 @@
#define HEADER_INLINE static inline
#endif
-#ifndef HAVE_NULLPTR
- #ifndef __cplusplus
- #define nullptr NULL
- #else
- #if __cplusplus <= 199711L
- #define nullptr NULL
- #endif
- #endif
-#endif
-
// It would be useful to disable compilation on big-endian platforms, but I
// don't see a decent portable way to do this (see e.g. discussion at
// http://esr.ibiblio.org/?p=5095 ).
@@ -191,7 +203,14 @@
#define VEC_BITS (VEC_BYTES * 8)
#define VEC_BITS_M1 (VEC_BITS - 1)
-#include "zlib-1.2.8/zlib.h"
+#ifdef DYNAMIC_ZLIB
+ #include <zlib.h>
+ #if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1240
+ #error "zlib version 1.2.4 or later required."
+ #endif
+#else
+ #include "zlib-1.2.8/zlib.h"
+#endif
#include "SFMT.h"
// 64MB of non-workspace memory guaranteed for now.
@@ -407,8 +426,8 @@
#define UNSORTED_CHROM 1
#define UNSORTED_BP 2
-// probably insert unsorted centimorgans later
-#define UNSORTED_SPLIT_CHROM 4
+#define UNSORTED_CM 4
+#define UNSORTED_SPLIT_CHROM 8
#define ALLOW_NO_SEX 1
#define MUST_HAVE_SEX 2
@@ -470,14 +489,15 @@
#define RECODE_RLIST 0x200000
#define RECODE_STRUCTURE 0x400000
#define RECODE_TRANSPOSE 0x800000
-#define RECODE_VCF 0x1000000
-#define RECODE_TYPEMASK 0x1fffff0
-#define RECODE_FID 0x2000000
-#define RECODE_IID 0x4000000
-#define RECODE_INCLUDE_ALT 0x8000000
-#define RECODE_BGZ 0x10000000
-#define RECODE_GEN_GZ 0x20000000
-#define RECODE_OMIT_NONMALE_Y 0x40000000
+#define RECODE_PED 0x1000000
+#define RECODE_VCF 0x2000000
+#define RECODE_TYPEMASK 0x3fffff0
+#define RECODE_FID 0x4000000
+#define RECODE_IID 0x8000000
+#define RECODE_INCLUDE_ALT 0x10000000
+#define RECODE_BGZ 0x20000000
+#define RECODE_GEN_GZ 0x40000000
+#define RECODE_OMIT_NONMALE_Y 0x80000000U
#define GENOME_OUTPUT_GZ 1
#define GENOME_REL_CHECK 2
@@ -847,6 +867,8 @@ HEADER_INLINE void aligned_free_cond_null(uintptr_t** aligned_pp) {
}
}
+extern uintptr_t g_failed_alloc_attempt_size;
+
extern sfmt_t g_sfmt;
// file-scope string constants don't always have the g_ prefix, but multi-file
@@ -2102,15 +2124,6 @@ typedef struct {
uint32_t output_encoding;
} Chrom_info;
-#define SPECIES_HUMAN 0
-#define SPECIES_COW 1
-#define SPECIES_DOG 2
-#define SPECIES_HORSE 3
-#define SPECIES_MOUSE 4
-#define SPECIES_RICE 5
-#define SPECIES_SHEEP 6
-#define SPECIES_UNKNOWN 7
-
extern const char* g_species_singular;
extern const char* g_species_plural;
@@ -2410,9 +2423,9 @@ uintptr_t popcount_bit_idx(const uintptr_t* lptr, uintptr_t start_idx, uintptr_t
uint32_t chrom_window_max(const uint32_t* marker_pos, const uintptr_t* marker_exclude, const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, uint32_t ct_max, uint32_t bp_max, uint32_t cur_window_max);
-uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* marker_exclude, uint32_t marker_uidx_min, uint32_t marker_uidx, uint32_t count_max, uint32_t bp_max, uint32_t* __restrict window_trail_ct_ptr);
+uint32_t window_back(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_min, uint32_t marker_uidx_start, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_trail_ct_ptr);
-uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, uint32_t* __restrict window_lead_ct_ptr);
+uint32_t window_forward(const uint32_t* __restrict marker_pos, const double* __restrict marker_cms, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, double cm_max, uint32_t* __restrict window_lead_ct_ptr);
uintptr_t jump_forward_unset_unsafe(const uintptr_t* bitarr, uintptr_t cur_pos, uintptr_t forward_ct);
diff --git a/plink_data.c b/plink_data.c
index 490bb28..c19237c 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -480,6 +480,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
uintptr_t line_idx = 0;
int32_t prev_chrom = -1;
uint32_t last_pos = 0;
+ double last_cm = -DBL_MAX;
const uint32_t is_bim = (ftype_str[1] == 'b'); // .map also supported
uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
@@ -550,6 +551,7 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
uint32_t uoo;
int32_t jj;
uint32_t cur_pos;
+ double cur_cm;
char cc;
{
fill_ulong_zero(CHROM_MASK_WORDS, loaded_chrom_mask);
@@ -1141,13 +1143,14 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
goto load_bim_ret_INVALID_FORMAT_2;
}
split_chrom = 1;
- *map_is_unsorted_ptr = UNSORTED_CHROM | UNSORTED_BP | UNSORTED_SPLIT_CHROM;
+ *map_is_unsorted_ptr |= UNSORTED_CHROM | UNSORTED_BP | UNSORTED_SPLIT_CHROM;
} else {
chrom_info_ptr->chrom_file_order[++chroms_encountered_m1] = cur_chrom_code;
chrom_info_ptr->chrom_fo_vidx_start[chroms_encountered_m1] = marker_uidx;
chrom_info_ptr->chrom_idx_to_foidx[(uint32_t)cur_chrom_code] = chroms_encountered_m1;
}
last_pos = 0;
+ last_cm = -DBL_MAX;
}
set_bit(cur_chrom_code, loaded_chrom_mask);
}
@@ -1172,10 +1175,16 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
goto load_bim_ret_NOMEM;
}
}
- if (scan_double(bufptr, &((*marker_cms_ptr)[marker_uidx]))) {
+ if (scan_double(bufptr, &cur_cm)) {
sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
goto load_bim_ret_INVALID_FORMAT_2;
}
+ if (cur_cm < last_cm) {
+ *map_is_unsorted_ptr |= UNSORTED_CM;
+ } else {
+ last_cm = cur_cm;
+ }
+ (*marker_cms_ptr)[marker_uidx] = cur_cm;
}
bufptr = next_token(bufptr);
} else {
@@ -1290,6 +1299,9 @@ int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t*
// support unfiltered marker_pos search
(*marker_pos_ptr)[marker_uidx] = last_pos;
}
+ if (marker_cms_needed & MARKER_CMS_FORCED) {
+ (*marker_cms_ptr)[marker_uidx] = last_cm;
+ }
}
}
if ((unfiltered_marker_ct == marker_exclude_ct) && (!allow_no_variants)) {
@@ -2531,6 +2543,8 @@ int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t mar
uint32_t chrom_end = 0;
uint32_t chrom_fo_idx = 0xffffffffU;
uint32_t chrom_idx = 0;
+ const char* output_missing_geno_ptr = g_output_missing_geno_ptr;
+ const char* missing_geno_ptr = (g_missing_geno_ptr == g_output_missing_geno_ptr)? nullptr : g_missing_geno_ptr;
char* buf_start = nullptr;
uintptr_t marker_idx;
char* bufptr;
@@ -2557,10 +2571,19 @@ int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t mar
goto write_map_or_bim_ret_WRITE_FAIL;
}
if (marker_allele_ptrs) {
+ // quasi-bugfix: --missing-genotype + --output-missing-genotype did not
+ // have the expected behavior with regular --make-bed (though it *did*
+ // already do the right thing when sorting the .bim...)
putc_unlocked(delim, outfile);
- fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
- putc_unlocked(delim, outfile);
- fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
+ if (!missing_geno_ptr) {
+ fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
+ putc_unlocked(delim, outfile);
+ fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
+ } else {
+ fputs(cond_replace(marker_allele_ptrs[2 * marker_uidx], missing_geno_ptr, output_missing_geno_ptr), outfile);
+ putc_unlocked(delim, outfile);
+ fputs(cond_replace(marker_allele_ptrs[2 * marker_uidx + 1], missing_geno_ptr, output_missing_geno_ptr), outfile);
+ }
}
putc_unlocked('\n', outfile);
}
@@ -3500,6 +3523,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, char* outna
// main loop, but no real need to bother.
errptr = errflags[set_hh_missing? 0 : (set_me_missing? 1 : 2)];
if (map_is_unsorted) {
+ // assumes UNSORTED_CM was masked out
LOGERRPRINTF("Error: --%s cannot be used on an unsorted .bim file. Use\n--make-bed without --%s to sort by position first; then run\n--make-bed + --%s on the new fileset.\n", errptr, errptr, errptr);
} else {
LOGERRPRINTF("Error: --%s cannot be used with --merge-x/--split-x/--update-chr.\nFinish updating chromosome codes first.\n", errptr);
@@ -3568,6 +3592,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, char* outna
*outname_end = '\0';
LOGPRINTFWW5("--make-bed to %s.bed + %s.bim + %s.fam ... ", outname, outname, outname);
fputs("0%", stdout);
+ fflush(stdout);
if (sample_ct) {
loop_end = marker_ct / 100;
markers_done = 0;
@@ -4089,7 +4114,7 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
return retval;
}
-#define D_EPSILON 0.000244140625
+#define D_EPSILON 0.001220703125
int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outname_end, char* single_chr, char* pheno_name, double hard_call_threshold, char* missing_code, int32_t missing_pheno, uint64_t misc_flags, uint32_t is_bgen, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
@@ -4663,17 +4688,18 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
goto oxford_to_bed_ret_INVALID_DOSAGE;
}
dxx = strtod(bufptr, &bufptr2) + dyy;
- if (drand < dyy) {
+ if (drand < dxx) {
ulii = 0;
} else if (dxx < 1 - D_EPSILON) {
ulii = 1;
} else {
- // fully called genotype probabilities may add up to less
- // than one due to rounding error. If this appears to
- // have happened, do NOT make a missing call; instead
- // rescale everything to add to one and reinterpret the
- // random number. (D_EPSILON is currently set to make 4
- // decimal place precision safe to use.)
+ // fully called genotype probabilities may add up to
+ // less than one due to rounding error. If this
+ // appears to have happened, do NOT make a missing
+ // call; instead rescale everything to add to one and
+ // reinterpret the random number. (D_EPSILON is
+ // currently set to make 3 decimal place precision safe
+ // to use.)
drand *= dxx;
if (drand < dzz) {
ulii = 3;
@@ -8153,9 +8179,20 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (vcf_half_call_explicit_error) {
vcf_half_call = 0;
}
- retval = gzopen_read_checked(vcfname, &gz_infile);
- if (retval) {
- goto vcf_to_bed_ret_1;
+ // don't use gzopen_read_checked() since we want to customize the error
+ // message
+ gz_infile = gzopen(vcfname, FOPEN_RB);
+ if (!gz_infile) {
+ uii = strlen(vcfname);
+ if ((uii > 4) && (!memcmp(&(vcfname[uii - 4]), ".vcf", 4))) {
+ LOGERRPRINTFWW("Error: Failed to open %s.\n", vcfname);
+ } else {
+ LOGERRPRINTFWW("Error: Failed to open %s. (--vcf expects a complete filename; did you forget '.vcf' at the end?)\n", vcfname);
+ }
+ goto vcf_to_bed_ret_OPEN_FAIL;
+ }
+ if (gzbuffer(gz_infile, 131072)) {
+ goto vcf_to_bed_ret_NOMEM;
}
if (misc_flags & MISC_VCF_FILTER) {
// automatically include "." and "PASS"
@@ -8196,6 +8233,16 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
goto vcf_to_bed_ret_READ_FAIL;
}
+ if ((line_idx == 1) && (!memcmp(loadbuf, "BCF", 3))) {
+ // this is more informative than "missing header line"...
+ if (loadbuf[3] == 2) {
+ LOGPREPRINTFWW("Error: %s appears to be a BCF2 file. Try --bcf instead of --vcf.\n", vcfname);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2;
+ } else if (loadbuf[3] == 4) {
+ LOGPREPRINTFWW("Error: %s appears to be a BCF1 file. Use 'bcftools view' to convert it to a PLINK-readable VCF.\n", vcfname);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2;
+ }
+ }
if (!loadbuf[loadbuf_size - 1]) {
if (loadbuf_size == MAXLINEBUFLEN) {
goto vcf_to_bed_ret_LONG_LINE;
@@ -8542,7 +8589,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
} else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
goto vcf_to_bed_ret_INVALID_GT;
- } else if (vcf_half_call > VCF_HALF_CALL_MISSING) {
+ } else if (vcf_half_call != VCF_HALF_CALL_MISSING) {
cc = bufptr[2];
if ((cc != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
uii = ((unsigned char)cc) - '0';
@@ -8551,6 +8598,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
goto vcf_to_bed_haploid_1;
+ } else if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else {
// VCF_HALF_CALL_REFERENCE
ujj = 0;
@@ -8584,7 +8633,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
uii = (unsigned char)(*bufptr) - '0';
if (uii && (uii != alt_allele_idx)) {
if (uii == (uint32_t)(((unsigned char)'.') - '0')) {
- if (vcf_half_call <= VCF_HALF_CALL_MISSING) {
+ if (vcf_half_call == VCF_HALF_CALL_MISSING) {
continue;
}
cc = bufptr[2];
@@ -8604,6 +8653,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
goto vcf_to_bed_haploid_2;
+ } else if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else {
// VCF_HALF_CALL_REFERENCE
ujj = 0;
@@ -8788,7 +8839,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
} else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
goto vcf_to_bed_ret_INVALID_GT;
- } else if ((vcf_half_call > VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
+ } else if ((vcf_half_call != VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
bufptr = &(bufptr[2]);
uii = ((unsigned char)(*bufptr)) - '0';
if (uii > 9) {
@@ -8803,6 +8854,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
goto vcf_to_bed_haploid_3;
+ } else if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else {
ujj = 0;
goto vcf_to_bed_reference_3;
@@ -8830,7 +8883,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
if (*bufptr == '.') {
// validated on first pass
- if ((vcf_half_call > VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
+ if ((vcf_half_call != VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
bufptr = &(bufptr[2]);
uii = ((unsigned char)(*bufptr)) - '0';
while (1) {
@@ -8842,6 +8895,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
goto vcf_to_bed_haploid_4;
+ } else if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else {
ujj = 0;
goto vcf_to_bed_reference_4;
@@ -8880,6 +8935,8 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
if (uii == alt_allele_idx) {
if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
goto vcf_to_bed_haploid_4;
+ } else if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else if (vcf_half_call == VCF_HALF_CALL_REFERENCE) {
ujj = 0;
goto vcf_to_bed_reference_4;
@@ -9048,6 +9105,7 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .vcf file is pathologically long.\n", line_idx);
vcf_to_bed_ret_INVALID_FORMAT_2N:
logprint("\n");
+ vcf_to_bed_ret_INVALID_FORMAT_2:
logerrprintb();
vcf_to_bed_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
@@ -10131,7 +10189,11 @@ int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_
goto bed_from_23_ret_INVALID_FORMAT_2;
}
int32_t cur_chrom_code = get_chrom_code_raw(chrom_start);
- if (cur_chrom_code < 0) {
+ // X/Y/MT bugfix
+ if (cur_chrom_code >= MAX_POSSIBLE_CHROM) {
+ cur_chrom_code -= MAX_POSSIBLE_CHROM - 23;
+ }
+ if ((cur_chrom_code < 0) || (cur_chrom_code > 26)) {
sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s.\n", line_idx, infile_name);
goto bed_from_23_ret_INVALID_FORMAT_2;
}
@@ -12955,8 +13017,11 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if ((!invalid_allele_code_seen) && (!valid_vcf_allele_code(cptr))) {
invalid_allele_code_seen = 1;
}
- // if ALT allele is not actually present in immediate dataset, VCF
- // spec actually requires '.'
+ // if ALT allele was not actually present in immediate dataset, VCF
+ // spec actually required '.'
+ // this restriction was contrary to practice (even by the 1000G team)
+ // and should be retroactively removed very soon: see
+ // github.com/samtools/hts-specs/pull/152
if (!is_monomorphic_a2(loadbuf_collapsed, sample_ct)) {
if (flexbputs_checked(cptr, output_bgz, outfile, bgz_outfile)) {
goto recode_ret_WRITE_FAIL;
@@ -14276,7 +14341,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_OPEN_FAIL;
}
*outname_end = '\0';
- LOGPRINTFWW5("--recode to %s.ped + %s.map ... ", outname, outname);
+ LOGPRINTFWW5("--recode ped to %s.ped + %s.map ... ", outname, outname);
if (recode_load_to(loadbuf, bedfile, bed_offset, unfiltered_marker_ct, 0, marker_ct, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
goto recode_ret_READ_FAIL;
}
@@ -14322,7 +14387,8 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
}
- if (!(recode_modifier & (RECODE_TRANSPOSE | RECODE_23 | RECODE_A | RECODE_A_TRANSPOSE | RECODE_AD | RECODE_BEAGLE | RECODE_BEAGLE_NOMAP | RECODE_BIMBAM | RECODE_BIMBAM_1CHR | RECODE_FASTPHASE | RECODE_FASTPHASE_1CHR | RECODE_HV | RECODE_HV_1CHR | RECODE_LIST | RECODE_STRUCTURE | RECODE_VCF))) {
+ // bugfix: stop generating a .map file when Oxford-format requested
+ if (recode_modifier & (RECODE_COMPOUND | RECODE_LGEN | RECODE_LGEN_REF | RECODE_PED)) {
strcpy(outname_end, ".map");
retval = write_map_or_bim(outname, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, nullptr, ((recode_modifier & (RECODE_TAB | RECODE_DELIMX)) == RECODE_DELIMX)? ' ' : '\t', chrom_info_ptr);
if (retval) {
diff --git a/plink_dosage.c b/plink_dosage.c
index 4865d2c..87cbfbc 100644
--- a/plink_dosage.c
+++ b/plink_dosage.c
@@ -1431,7 +1431,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
bigstack_alloc_d(sample_ct, &dgels_b)) {
goto plink1_dosage_ret_NOMEM;
}
- mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!mi_buf) {
goto plink1_dosage_ret_NOMEM;
}
diff --git a/plink_glm.c b/plink_glm.c
index 0310da9..3db0ae6 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -1482,8 +1482,8 @@ static inline void compute_hessian(const float* mm, const float* vv, float* dest
}
void solve_linear_system(const float* ll, const float* yy, float* xx, uint32_t dd) {
- // if we're ever able to produce 32-bit Linux builds with statically linked
- // LAPACK, we might want to use it in place of this hardcoded stuff
+ // might want to use this in NOLAPACK case only, since we can now produce
+ // 32-bit Linux builds with statically linked LAPACK
uintptr_t dim_cta4 = round_up_pow2(dd, 4);
const float* ll_ptr;
float* xx_ptr;
@@ -4824,7 +4824,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
goto glm_linear_assoc_ret_NOMEM;
}
- g_linear_mt[tidx].mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct_max * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ g_linear_mt[tidx].mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct_max * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!(g_linear_mt[tidx].mi_buf)) {
goto glm_linear_assoc_ret_NOMEM;
}
@@ -6342,7 +6342,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
goto glm_logistic_assoc_ret_NOMEM;
}
if (constraint_ct_max) {
- g_logistic_mt[tidx].mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct_max * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ g_logistic_mt[tidx].mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct_max * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!(g_logistic_mt[tidx].mi_buf)) {
goto glm_logistic_assoc_ret_NOMEM;
}
@@ -7353,7 +7353,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
bigstack_alloc_d(param_ct * param_ct, ¶m_2d_buf2)) {
goto glm_linear_nosnp_ret_NOMEM;
}
- mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!mi_buf) {
goto glm_linear_nosnp_ret_NOMEM;
}
@@ -8309,7 +8309,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
fill_unfiltered_sample_to_cluster(sample_valid_ct, g_perm_cluster_ct, g_perm_cluster_map, g_perm_cluster_starts, g_perm_sample_to_cluster);
}
if (constraint_ct) {
- mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ mi_buf = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(param_ct * MATRIX_INVERT_BUF1_ELEM_ALLOC);
if (!mi_buf) {
goto glm_logistic_nosnp_ret_NOMEM;
}
diff --git a/plink_help.c b/plink_help.c
index 232687b..93d992b 100644
--- a/plink_help.c
+++ b/plink_help.c
@@ -45,7 +45,7 @@ uint32_t edit1_match(uint32_t len1, char* s1, uint32_t len2, char* s2) {
return 1;
}
-#define MAX_EQUAL_HELP_PARAMS 22
+#define MAX_EQUAL_HELP_PARAMS 23
typedef struct {
uint32_t iters_left;
@@ -379,58 +379,62 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" improperly. If you have any doubt, stick with --make-bed.\n\n"
);
- help_print("recode\trecode12\ttab\ttranspose\trecode-lgen\trecodeAD\trecodead\trecodeA\trecodea\trecode-rlist\trecode-allele\tlist\twith-reference\trecode-vcf\tfid\tiid\trecode-beagle\trecode-bimbam\trecode-fastphase\trecodeHV\trecodehv\trecode-structure", &help_ctrl, 1,
-" --recode <01 | 12> <23 | A{-transpose} | AD | beagle{-nomap} | bimbam{-1chr}\n"
-" | compound-genotypes | fastphase{-1chr} | HV{-1chr} | lgen{-ref} |\n"
-" list | oxford | rlist | structure | transpose | vcf | vcf-fid |\n"
-" vcf-iid> <tab | tabx | spacex | bgz | gen-gz> <include-alt>\n"
-" <omit-nonmale-y>\n"
-" Create a new text fileset with all filters applied. By default, the\n"
-" fileset consists of a .ped and a .map file, readable with --file.\n"
-" * The '12' modifier causes A1 (usually minor) alleles to be coded as '1'\n"
-" and A2 alleles to be coded as '2', while '01' maps A1 -> 0 and A2 -> 1.\n"
-" * The '23' modifier causes a 23andMe-formatted file to be generated. This\n"
-" can only be used on a single sample's data (--keep may be handy).\n"
-" * The 'AD' modifier causes an sample-major additive (0/1/2) + dominant\n"
-" (het = 1, otherwise 0) component file, suitable for loading from R, to be\n"
-" generated. If you don't want the dominant component, use 'A' instead.\n"
+ help_print("recode\trecode12\ttab\ttranspose\trecode-lgen\trecodeAD\trecodead\trecodeA\trecodea\trecode-rlist\trecode-allele\tlist\twith-reference\trecode-vcf\tfid\tiid\trecode-beagle\trecode-bimbam\trecode-fastphase\trecodeHV\trecodehv\trecode-structure\texport", &help_ctrl, 1,
+" --recode [output format] <01 | 12> <tab | tabx | spacex | bgz | gen-gz>\n"
+" <include-alt> <omit-nonmale-y>\n"
+" Create a new text fileset with all filters applied. The following output\n"
+" formats are supported:\n"
+" * '23': 23andMe 4-column format. This can only be used on a single\n"
+" sample's data (--keep may be handy), and does not support multicharacter\n"
+" allele codes.\n"
+" * 'A': Sample-major additive (0/1/2) coding, suitable for loading from R.\n"
" If you need uncounted alleles to be named in the header line, add the\n"
" 'include-alt' modifier.\n"
-" * The 'A-transpose' modifier causes a variant-major additive component file\n"
-" to be generated.\n"
-" * The 'beagle' modifier causes unphased per-autosome .dat and .map files,\n"
-" readable by early BEAGLE versions, to be generated, while 'beagle-nomap'\n"
-" generates a single .beagle.dat file.\n"
-" * The 'bimbam' modifier causes a BIMBAM-formatted fileset to be generated.\n"
-" If your input data only contains one chromosome, you can use\n"
-" 'bimbam-1chr' instead to write a two-column .pos.txt file.\n"
-" * The 'compound-genotypes' modifier removes the space between pairs of\n"
-" allele codes for the same variant when generating a .ped + .map fileset.\n"
-" * The 'fastphase' modifier causes per-chromosome fastPHASE files to be\n"
-" generated. If your input data only contains one chromosome, you can use\n"
-" 'fastphase-1chr' instead to exclude the chromosome number from the file\n"
-" extension.\n"
-" * The 'HV' modifier causes a Haploview-format .ped + .info fileset to be\n"
-" generated per chromosome. 'HV-1chr' is analogous to 'fastphase-1chr'.\n"
-" * The 'lgen' modifier causes a long-format fileset (loadable with --lfile)\n"
-" to be generated, while 'lgen-ref' generates a (usually) smaller\n"
-" long-format fileset loadable with --lfile + --reference.\n"
-" * The 'list' modifier creates a genotype-based list, while 'rlist' creates\n"
-" a rare-genotype fileset. With these formats, the 'omit-nonmale-y'\n"
-" modifier causes nonmale genotypes to be omitted on the Y chromosome.\n"
-" * 'oxford' causes an Oxford-format .gen + .sample fileset to be generated.\n"
-" If you also include the 'gen-gz' modifier, the .gen file is gzipped.\n"
-" * The 'structure' modifier causes a Structure-format file to be generated.\n"
-" * 'transpose' creates a transposed text fileset (loadable with --tfile).\n"
-" * 'vcf', 'vcf-fid', and 'vcf-iid' result in production of a VCFv4.2 file.\n"
-" 'vcf-fid' and 'vcf-iid' cause family IDs or within-family IDs\n"
-" respectively to be used for the sample IDs in the last header row, while\n"
-" 'vcf' merges both IDs and puts an underscore between them.\n"
-" If the 'bgz' modifier is added, the VCF file is block-gzipped.\n"
+" * 'AD': Sample-major additive (0/1/2) + dominant (het=1/hom=0) coding.\n"
+" Also supports 'include-alt'.\n"
+" * 'A-transpose': Variant-major 0/1/2.\n"
+" * 'beagle': Unphased per-autosome .dat and .map files, readable by early\n"
+" BEAGLE versions.\n"
+" * 'beagle-nomap': Single .beagle.dat file.\n"
+" * 'bimbam': Regular BIMBAM format.\n"
+" * 'bimbam-1chr': BIMBAM format, with a two-column .pos.txt file. Does not\n"
+" support multiple chromosomes.\n"
+" * 'fastphase': Per-chromosome fastPHASE files, with\n"
+" .chr-[chr #].recode.phase.inp filename extensions.\n"
+" * 'fastphase-1chr': Single .recode.phase.inp file. Does not support\n"
+" multiple chromosomes.\n"
+" * 'HV': Per-chromosome Haploview files, with .chr-[chr #].{ped,info}\n"
+" filename extensions.\n"
+" * 'HV-1chr': Single Haploview .ped + .info file pair. Does not support\n"
+" multiple chromosomes.\n"
+" * 'lgen': PLINK 1 long-format (.lgen + .fam + .map), loadable with --lfile.\n"
+" * 'lgen-ref': .lgen + .fam + .map + .ref, loadable with --lfile +\n"
+" --reference.\n"
+" * 'list': Single genotype-based list, up to 4 lines per variant. To omit\n"
+" nonmale genotypes on the Y chromosome, add the 'omit-nonmale-y' modifier.\n"
+" * 'rlist': .rlist + .fam + .map fileset, where the .rlist file is a\n"
+" genotype-based list which omits the most common genotype for each\n"
+" variant. Also supports 'omit-nonmale-y'.\n"
+" * 'oxford': Oxford-format .gen + .sample. With the 'gen-gz' modifier, the\n"
+" .gen file is gzipped.\n"
+" * 'ped': PLINK 1 sample-major (.ped + .map), loadable with --file.\n"
+" * 'compound-genotypes': Same as 'ped', except that the space between each\n"
+" pair of same-variant allele codes is removed.\n"
+" * 'structure': Structure-format.\n"
+" * 'transpose': PLINK 1 variant-major (.tped + .tfam), loadable with\n"
+" --tfile.\n"
+" * 'vcf', 'vcf-fid', 'vcf-iid': VCFv4.2. 'vcf-fid' and 'vcf-iid' cause\n"
+" family IDs or within-family IDs respectively to be used for the sample\n"
+" IDs in the last header row, while 'vcf' merges both IDs and puts an\n"
+" underscore between them. If the 'bgz' modifier is added, the VCF file is\n"
+" block-gzipped.\n"
" The A2 allele is saved as the reference and normally flagged as not based\n"
-" on a real reference genome ('PR' INFO field value). When it is important\n"
-" for reference alleles to be correct, you'll also want to include\n"
-" --a2-allele and --real-ref-alleles in your command.\n"
+" on a real reference genome (INFO:PR). When it is important for reference\n"
+" alleles to be correct, you'll also want to include --a2-allele and\n"
+" --real-ref-alleles in your command.\n"
+" In addition,\n"
+" * The '12' modifier causes A1 (usually minor) alleles to be coded as '1'\n"
+" and A2 alleles to be coded as '2', while '01' maps A1 -> 0 and A2 -> 1.\n"
" * The 'tab' modifier makes the output mostly tab-delimited instead of\n"
" mostly space-delimited. 'tabx' and 'spacex' force all tabs and all\n"
" spaces, respectively.\n\n"
@@ -610,9 +614,9 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
);
help_print("r\tr2\tmatrix\tinter-chr\tD\tdprime\twith-freqs\tld", &help_ctrl, 1,
" --r <square | square0 | triangle | inter-chr> <gz | bin | bin4> <spaces>\n"
-" <in-phase> <dprime> <with-freqs> <yes-really>\n"
+" <in-phase> <d | dprime | dprime-signed> <with-freqs> <yes-really>\n"
" --r2 <square | square0 | triangle | inter-chr> <gz | bin | bin4> <spaces>\n"
-" <in-phase> <dprime> <with-freqs> <yes-really>\n"
+" <in-phase> <d | dprime | dprime-signed> <with-freqs> <yes-really>\n"
" LD statistic reports. --r yields raw inter-variant correlations, while\n"
" --r2 reports their squares. You can request results for all pairs in\n"
" matrix format (if you specify 'bin' or one of the shape modifiers), all\n"
@@ -625,11 +629,12 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" * By default, text matrices are tab-delimited; 'spaces' switches this.\n"
" * 'in-phase' adds a column with in-phase allele pairs to table-formatted\n"
" reports. (This cannot be used with very long allele codes.)\n"
-" * 'dprime' adds Lewontin's D-prime statistic to table-formatted reports,\n"
-" and forces both r/r^2 and D-prime to be based on the maximum likelihood\n"
-" solution to the cubic equation discussed in Gaunt T, Rodriguez S, Day I\n"
-" (2007) Cubic exact solutions for the estimation of pairwise haplotype\n"
-" frequencies.\n"
+" * 'dprime' adds the absolute value of Lewontin's D-prime statistic to\n"
+" table-formatted reports, and forces both r/r^2 and D-prime to be based on\n"
+" the maximum likelihood solution to the cubic equation discussed in Gaunt\n"
+" T, Rodriguez S, Day I (2007) Cubic exact solutions for the estimation of\n"
+" pairwise haplotype frequencies.\n"
+" 'dprime-signed' keeps the sign, while 'd' skips division by D_{max}.\n"
" * 'with-freqs' adds MAF columns to table-formatted reports.\n"
" * Since the resulting file can easily be huge, you're required to add the\n"
" 'yes-really' modifier when requesting an unfiltered, non-distributed all\n"
@@ -1679,7 +1684,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --make-founders <require-2-missing> <first> : Clear parental IDs for those\n"
" with 1+ missing parent(s).\n"
);
- help_print("recode\trecode-allele", &help_ctrl, 0,
+ help_print("recode\trecode-allele\texport", &help_ctrl, 0,
" --recode-allele [fn] : With --recode A/A-transpose/AD, count alleles named in\n"
" the file (otherwise A1 alleles are always counted).\n"
);
@@ -1832,9 +1837,10 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --mendel-multigen : Make Mendel error checks consider (great-)grandparental\n"
" genotypes when parental genotype data is missing.\n"
);
- help_print("r\tr2\tld-window-kb\tld-window-r2\tld-window\tld-snp\tld-snps\tld-snp-list", &help_ctrl, 0,
+ help_print("r\tr2\tld-window-kb\tld-window-cm\tld-window-r2\tld-window\tld-snp\tld-snps\tld-snp-list", &help_ctrl, 0,
" --ld-window [ct+1] : Set --r/--r2 max variant ct pairwise distance (usu. 10).\n"
" --ld-window-kb [x] : Set --r/--r2 max kb pairwise distance (usually 1000).\n"
+" --ld-window-cm [x] : Set --r/--r2 max centimorgan pairwise distance.\n"
" --ld-window-r2 [x] : Set threshold for --r2 report inclusion (usually 0.2).\n"
" --ld-snp [var ID] : Set first variant in all --r/--r2 pairs.\n"
" --ld-snps [vID...] : Restrict first --r/--r2 variant to the given ranges.\n"
diff --git a/plink_lasso.c b/plink_lasso.c
index 999b478..3f10d57 100644
--- a/plink_lasso.c
+++ b/plink_lasso.c
@@ -513,7 +513,7 @@ int32_t lasso_lambda(const uintptr_t* marker_exclude, const uintptr_t* marker_re
goto lasso_lambda_ret_READ_FAIL;
}
}
- uint32_t min_ploidy_1;
+ uint32_t min_ploidy_1 = 0;
uint32_t uii;
if (marker_uidx >= chrom_end) {
chrom_fo_idx++;
diff --git a/plink_ld.c b/plink_ld.c
index b3d97fb..0ecce45 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -17,6 +17,7 @@ void ld_epi_init(Ld_info* ldip, Epi_info* epi_ip, Clump_info* clump_ip) {
ldip->prune_last_param = 0.0;
ldip->window_size = 0xffffffffU;
ldip->window_bp = 0xffffffffU;
+ ldip->window_cm = -1;
ldip->window_r2 = 0.2;
ldip->blocks_max_bp = 0xffffffffU;
ldip->blocks_min_maf = 0.05;
@@ -930,7 +931,7 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
goto ld_prune_ret_NOMEM;
}
- irow = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(window_max * 2 * sizeof(MATRIX_INVERT_BUF1_TYPE));
+ irow = (MATRIX_INVERT_BUF1_TYPE*)bigstack_alloc(window_max * MATRIX_INVERT_BUF1_CHECKED_ALLOC);
if (!irow) {
goto ld_prune_ret_NOMEM;
}
@@ -1947,6 +1948,7 @@ static uint32_t g_ld_plink_maxsnp;
static char* g_ld_marker_ids;
static Chrom_info* g_ld_chrom_info_ptr;
static uint32_t* g_ld_marker_pos;
+static double* g_ld_marker_cms;
static uintptr_t* g_ld_marker_exclude_idx1;
static uintptr_t* g_ld_marker_exclude;
static char** g_ld_marker_allele_ptrs;
@@ -2592,7 +2594,10 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
double window_r2 = g_ld_window_r2;
uint32_t plink_maxsnp = g_ld_plink_maxsnp;
uint32_t is_inter_chr = g_ld_is_inter_chr;
- uint32_t is_dprime = (g_ld_modifier / LD_DPRIME) & 1;
+
+ // 0 = not d/dprime/dprime-signed
+ uint32_t dprime_type = g_ld_modifier & LD_DX;
+
uint32_t is_r2 = g_ld_is_r2;
uint32_t prefix_len = g_ld_prefix_len;
uint32_t chrom_fo_idx1 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
@@ -2631,8 +2636,8 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
}
sptr_cur = memseta(sptr_cur, 32, 11);
sptr_cur = memcpyl3a(sptr_cur, is_r2? "R2 " : " R ");
- if (is_dprime) {
- sptr_cur = memcpya(sptr_cur, " DP ", 13);
+ if (dprime_type) {
+ sptr_cur = memcpya(sptr_cur, (dprime_type == LD_D)? " D " : " DP ", 13);
}
*sptr_cur++ = '\n';
}
@@ -2675,7 +2680,7 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
}
chrom_end2 = 0;
block_end2 = ld_interval1[2 * block_idx1 + 1];
- dptr = &(results[(block_idx1 * marker_idx2_maxw + block_idx2 - block_idx2_start) * (1 + is_dprime)]);
+ dptr = &(results[(block_idx1 * marker_idx2_maxw + block_idx2 - block_idx2_start) * (1 + (dprime_type != 0))]);
while (block_idx2 < block_end2) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx2);
dxx = *dptr++;
@@ -2710,12 +2715,12 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
dxx = fabs(dxx);
}
sptr_cur = width_force(12, sptr_cur, dtoa_g(dxx, sptr_cur));
- if (is_dprime) {
+ if (dprime_type) {
*sptr_cur++ = ' ';
sptr_cur = width_force(12, sptr_cur, dtoa_g(*dptr++, sptr_cur));
}
sptr_cur = memcpya(sptr_cur, " \n", 2);
- } else if (is_dprime) {
+ } else if (dprime_type) {
dptr++;
}
block_idx2++;
@@ -5028,6 +5033,8 @@ THREAD_RET_TYPE ld_dprime_thread(void* arg) {
uintptr_t* sex_male = g_ld_sex_male;
uintptr_t* cur_geno1_male = nullptr;
uint32_t* ld_interval1 = g_ld_interval1;
+ uint32_t is_dprime = g_ld_modifier & (LD_DPRIME | LD_DPRIME_SIGNED);
+ uint32_t is_dprime_unsigned = g_ld_modifier & LD_DPRIME;
uint32_t is_r2 = g_ld_is_r2;
uint32_t xstart1 = g_ld_xstart1;
uint32_t xend1 = g_ld_xend1;
@@ -5158,11 +5165,17 @@ THREAD_RET_TYPE ld_dprime_thread(void* arg) {
*rptr = dxx / sqrt(freq11_expected * freq2x * freqx2);
}
rptr++;
- if (dxx >= 0) {
- *rptr = dxx / MINV(freqx1 * freq2x, freqx2 * freq1x);
- } else {
- *rptr = -dxx / MINV(freq11_expected, freqx2 * freq2x);
+ if (is_dprime) {
+ if (dxx >= 0) {
+ dxx /= MINV(freqx1 * freq2x, freqx2 * freq1x);
+ } else {
+ if (is_dprime_unsigned) {
+ dxx = -dxx;
+ }
+ dxx /= MINV(freq11_expected, freqx2 * freq2x);
+ }
}
+ *rptr = dxx;
}
rptr++;
}
@@ -5179,6 +5192,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
uintptr_t* marker_exclude_idx1 = g_ld_marker_exclude_idx1;
uintptr_t* marker_exclude = g_ld_marker_exclude;
uint32_t* marker_pos = g_ld_marker_pos;
+ double* marker_cms = g_ld_marker_cms;
uintptr_t marker_ct = g_ld_marker_ct;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t founder_ct = g_ld_founder_ct;
@@ -5198,6 +5212,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
uint32_t idx1_subset = (ldip->snpstr || ldip->snps_rl.name_ct);
uint32_t window_size_m1 = ldip->window_size - 1;
uint32_t window_bp = ldip->window_bp;
+ double window_cm = ldip->window_cm;
uint32_t thread_ct = g_ld_thread_ct;
uint32_t chrom_fo_idx = 0;
uint32_t is_haploid = 0;
@@ -5238,6 +5253,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
uint32_t chrom_idx;
uint32_t chrom_end;
uint32_t cur_marker_pos;
+ double cur_marker_cm;
uint32_t is_last_block;
uint32_t uii;
if (bigstack_alloc_uc(262144, &overflow_buf) ||
@@ -5320,7 +5336,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
if (marker_idx1) {
marker_uidx1 = jump_forward_unset_unsafe(marker_exclude_idx1, marker_uidx1 + 1, marker_idx1);
}
- LOGPRINTF("--r%s%s%s dprime%s...", g_ld_is_r2? "2" : "", is_inter_chr? " inter-chr" : "", g_ld_marker_allele_ptrs? " in-phase" : "", g_ld_set_allele_freqs? " with-freqs" : "");
+ LOGPRINTF("--r%s%s%s d%s%s...", g_ld_is_r2? "2" : "", is_inter_chr? " inter-chr" : "", g_ld_marker_allele_ptrs? " in-phase" : "", (g_ld_modifier & LD_D)? "" : ((g_ld_modifier & LD_DPRIME)? "prime" : "prime-signed"), g_ld_set_allele_freqs? " with-freqs" : "");
fputs(" 0%", stdout);
while (1) {
fputs(" [processing]", stdout);
@@ -5343,7 +5359,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
if (idx1_subset) {
if (!is_inter_chr) {
chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
- marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
+ marker_uidx2_base = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, window_cm, &uii);
marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
marker_idx2 = marker_idx2_base + uii;
} else {
@@ -5388,11 +5404,13 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
uii = 1;
}
if (!is_inter_chr) {
+ // uii == 0 if we can perform an incremental update, 1 if we need
+ // fully-powered window_back()/window_forward()
if (uii) {
if (idx1_subset) {
- marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
+ marker_uidx2_back = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, window_cm, &window_trail_ct);
}
- marker_uidx2_fwd = window_forward(marker_pos, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, &window_lead_ct);
+ marker_uidx2_fwd = window_forward(marker_pos, marker_cms, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, window_cm, &window_lead_ct);
marker_uidx2_fwd2 = marker_uidx2_fwd;
if (marker_uidx2_fwd < chrom_last) {
marker_uidx2_fwd2++;
@@ -5416,19 +5434,43 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
}
}
+ if (marker_cms) {
+ cur_marker_cm = marker_cms[marker_uidx1_tmp] - window_cm;
+ while (marker_cms[marker_uidx2_back] < cur_marker_cm) {
+ window_trail_ct--;
+ marker_uidx2_back++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
+ }
+ }
}
if (marker_uidx2_fwd < chrom_last) {
cur_marker_pos = marker_pos[marker_uidx1_tmp] + window_bp;
- while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
- marker_uidx2_fwd = marker_uidx2_fwd2;
- window_lead_ct++;
- if (marker_uidx2_fwd == chrom_last) {
- break;
+ if (!marker_cms) {
+ while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
+ marker_uidx2_fwd = marker_uidx2_fwd2;
+ window_lead_ct++;
+ if (marker_uidx2_fwd == chrom_last) {
+ break;
+ }
+ marker_uidx2_fwd2++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
+ if (window_lead_ct > window_size_m1) {
+ break;
+ }
}
- marker_uidx2_fwd2++;
- next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
- if (window_lead_ct > window_size_m1) {
- break;
+ } else {
+ cur_marker_cm = marker_cms[marker_uidx1_tmp] + window_cm;
+ while ((marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) && (marker_cms[marker_uidx2_fwd2] <= window_cm)) {
+ marker_uidx2_fwd = marker_uidx2_fwd2;
+ window_lead_ct++;
+ if (marker_uidx2_fwd == chrom_last) {
+ break;
+ }
+ marker_uidx2_fwd2++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
+ if (window_lead_ct > window_size_m1) {
+ break;
+ }
}
}
}
@@ -5601,10 +5643,12 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
Chrom_info* chrom_info_ptr = g_ld_chrom_info_ptr;
uintptr_t* marker_exclude_idx1 = marker_exclude;
uint32_t* marker_pos = g_ld_marker_pos;
+ double* marker_cms = g_ld_marker_cms;
uint32_t founder_trail_ct = founder_ct_192_long - founder_ctl * 2;
uint32_t idx1_subset = (ldip->snpstr || ldip->snps_rl.name_ct);
uint32_t window_size_m1 = ldip->window_size - 1;
uint32_t window_bp = ldip->window_bp;
+ double window_cm = ldip->window_cm;
uint32_t thread_ct = g_ld_thread_ct;
uint32_t chrom_fo_idx = 0;
uint32_t chrom_fo_idx2 = 0;
@@ -5653,6 +5697,7 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
uint32_t chrom_idx2;
uint32_t chrom_end2;
uint32_t cur_marker_pos;
+ double cur_marker_cm;
uint32_t is_last_block;
uint32_t uii;
int32_t ii;
@@ -5766,7 +5811,7 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
if (g_ld_is_r2) {
g_ld_window_r2 = ldip->window_r2;
}
- if (ld_modifier & LD_DPRIME) {
+ if (ld_modifier & LD_DX) {
// this is more like --fast-epistasis under the hood, since it requires the
// entire 3x3 table
g_ld_marker_ctm8 = round_up_pow2(marker_idx2_maxw, 4);
@@ -5853,7 +5898,7 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
if (idx1_subset) {
if (!is_inter_chr) {
chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
- marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
+ marker_uidx2_base = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, window_cm, &uii);
marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
marker_idx2 = marker_idx2_base + uii;
} else {
@@ -5897,11 +5942,13 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
uii = 1;
}
if (!is_inter_chr) {
+ // uii == 0 if we can perform an incremental update, 1 if we need
+ // fully-powered window_back()/window_forward()
if (uii) {
if (idx1_subset) {
- marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
+ marker_uidx2_back = window_back(marker_pos, marker_cms, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, window_cm, &window_trail_ct);
}
- marker_uidx2_fwd = window_forward(marker_pos, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, &window_lead_ct);
+ marker_uidx2_fwd = window_forward(marker_pos, marker_cms, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, window_cm, &window_lead_ct);
marker_uidx2_fwd2 = marker_uidx2_fwd;
if (marker_uidx2_fwd < chrom_last) {
marker_uidx2_fwd2++;
@@ -5925,19 +5972,43 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
}
}
+ if (marker_cms) {
+ cur_marker_cm = marker_cms[marker_uidx1_tmp] - window_cm;
+ while (marker_cms[marker_uidx2_back] < cur_marker_cm) {
+ window_trail_ct--;
+ marker_uidx2_back++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_back);
+ }
+ }
}
if (marker_uidx2_fwd < chrom_last) {
cur_marker_pos = marker_pos[marker_uidx1_tmp] + window_bp;
- while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
- marker_uidx2_fwd = marker_uidx2_fwd2;
- window_lead_ct++;
- if (marker_uidx2_fwd == chrom_last) {
- break;
+ if (!marker_cms) {
+ while (marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) {
+ marker_uidx2_fwd = marker_uidx2_fwd2;
+ window_lead_ct++;
+ if (marker_uidx2_fwd == chrom_last) {
+ break;
+ }
+ marker_uidx2_fwd2++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
+ if (window_lead_ct > window_size_m1) {
+ break;
+ }
}
- marker_uidx2_fwd2++;
- next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
- if (window_lead_ct > window_size_m1) {
- break;
+ } else {
+ cur_marker_cm = marker_cms[marker_uidx1_tmp] + window_cm;
+ while ((marker_pos[marker_uidx2_fwd2] <= cur_marker_pos) && (marker_cms[marker_uidx2_fwd2] <= cur_marker_cm)) {
+ marker_uidx2_fwd = marker_uidx2_fwd2;
+ window_lead_ct++;
+ if (marker_uidx2_fwd == chrom_last) {
+ break;
+ }
+ marker_uidx2_fwd2++;
+ next_unset_unsafe_ck(marker_exclude, &marker_uidx2_fwd2);
+ if (window_lead_ct > window_size_m1) {
+ break;
+ }
}
}
}
@@ -6087,7 +6158,7 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
return retval;
}
-int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, double* set_allele_freqs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t [...]
+int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, double* set_allele_freqs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, double* marker_cms, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t par [...]
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctv2 / 2);
@@ -6168,6 +6239,7 @@ int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t be
g_ld_plink_maxsnp = plink_maxsnp;
g_ld_marker_ids = marker_ids;
g_ld_marker_pos = marker_pos;
+ g_ld_marker_cms = (ldip->window_cm == -1)? nullptr : marker_cms;
g_ld_marker_exclude = marker_exclude;
g_ld_max_marker_id_len = max_marker_id_len;
retval = ld_report_regular(threads, ldip, bedfile, bed_offset, unfiltered_marker_ct, marker_reverse, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, founder_include2, founder_male_include2, loadbuf, outname, hh_exists);
@@ -12119,6 +12191,7 @@ void set_test_score(uintptr_t marker_ct, double chisq_threshold, uint32_t set_ma
}
if (!raw_sig_ct) {
// not possible for initial pass, so no need to set raw_sig_ct_ptr, etc.
+ // bugfix: actually, that comment was incorrect
*set_score_ptr = 0.0;
return;
}
@@ -12181,6 +12254,7 @@ int32_t set_test_common_init(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
uint32_t range_stop;
uint32_t include_out_of_bounds;
uint32_t cur_set_size;
+ uint32_t cur_range_size;
uint32_t uii;
if (bigstack_calloc_ul(raw_set_ctl, set_incl_ptr) ||
bigstack_alloc_ui(marker_ct_orig, &marker_midx_to_idx)) {
@@ -12200,10 +12274,11 @@ int32_t set_test_common_init(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
for (range_idx = 0; range_idx < range_ct; range_idx++) {
marker_midx = *(++cur_setdef);
range_stop = *(++cur_setdef);
- cur_set_size += range_stop - marker_midx;
+ cur_range_size = range_stop - marker_midx;
+ cur_set_size += cur_range_size;
if (!uii) {
chisq_ptr = &(orig_chisq[marker_midx_to_idx[marker_midx]]);
- chisq_end = &(chisq_ptr[cur_set_size]);
+ chisq_end = &(chisq_ptr[cur_range_size]);
for (; chisq_ptr < chisq_end; chisq_ptr++) {
if (*chisq_ptr >= chisq_threshold) {
uii = 1;
diff --git a/plink_ld.h b/plink_ld.h
index 573a579..a5c9cae 100644
--- a/plink_ld.h
+++ b/plink_ld.h
@@ -14,23 +14,27 @@
#define LD_INTER_CHR 0x40
#define LD_REPORT_GZ 0x80
#define LD_INPHASE 0x100
-#define LD_DPRIME 0x200
-#define LD_WITH_FREQS 0x400
-#define LD_YES_REALLY 0x800
-#define LD_PRUNE_PAIRWISE 0x1000
-#define LD_PRUNE_PAIRPHASE 0x2000
-#define LD_PRUNE_KB_WINDOW 0x4000
-#define LD_IGNORE_X 0x8000
-#define LD_WEIGHTED_X 0x10000
-#define LD_SNP_LIST_FILE 0x20000
-#define LD_BLOCKS_NO_PHENO_REQ 0x40000
-#define LD_BLOCKS_NO_SMALL_MAX_SPAN 0x80000
-#define LD_FLIPSCAN_VERBOSE 0x100000
-#define LD_SHOW_TAGS_LIST_ALL 0x200000
-#define LD_SHOW_TAGS_MODE2 0x400000
+#define LD_D 0x200
+#define LD_DPRIME 0x400
+#define LD_DPRIME_SIGNED 0x800
+#define LD_DX (LD_D | LD_DPRIME | LD_DPRIME_SIGNED)
+#define LD_WITH_FREQS 0x1000
+#define LD_YES_REALLY 0x2000
+#define LD_PRUNE_PAIRWISE 0x4000
+#define LD_PRUNE_PAIRPHASE 0x8000
+#define LD_PRUNE_KB_WINDOW 0x10000
+#define LD_IGNORE_X 0x20000
+#define LD_WEIGHTED_X 0x40000
+#define LD_SNP_LIST_FILE 0x80000
+#define LD_BLOCKS_NO_PHENO_REQ 0x100000
+#define LD_BLOCKS_NO_SMALL_MAX_SPAN 0x200000
+#define LD_FLIPSCAN_VERBOSE 0x400000
+#define LD_SHOW_TAGS_LIST_ALL 0x800000
+#define LD_SHOW_TAGS_MODE2 0x1000000
typedef struct {
double prune_last_param; // VIF or r^2 threshold
+ double window_cm;
double window_r2;
double blocks_min_maf;
double blocks_inform_frac;
@@ -112,7 +116,7 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c, uintptr_t* founder_info, uintptr_t* s [...]
-int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, double* set_allele_freqs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t [...]
+int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, double* set_allele_freqs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, double* marker_cms, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t par [...]
int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uintptr_t* sex_male, char* outname, char* outname_end, uint32_t hh_exists);
diff --git a/plink_matrix.c b/plink_matrix.c
index 76507cc..aa1156f 100644
--- a/plink_matrix.c
+++ b/plink_matrix.c
@@ -37,20 +37,14 @@ double pythag(const double a, const double b) {
#ifdef NOLAPACK
int32_t svdcmp_c(int32_t m, double* a, double* w, double* v) {
// C port of PLINK stats.cpp svdcmp().
- // Note that this function is NOT thread-safe, due to the buffer allocated
- // from the workspace stack. Pass in a preallocated buffer if that's not
- // okay.
- unsigned char* bigstack_mark = g_bigstack_base;
+ // now thread-safe.
+ double* rv1 = &(w[(uint32_t)m]);
int32_t n = m;
int32_t flag;
int32_t l = 0; // suppress compile warning
int32_t i,its,j,jj,k,nm;
double anorm,c,f,g,h,s,scale,x,y,z;
- double volatile temp;
- double* rv1;
- if (bigstack_alloc_d(m, &rv1)) {
- return -1;
- }
+ double temp;
g=scale=anorm=0.0;
for (i=0;i<n;i++) {
@@ -225,12 +219,12 @@ int32_t svdcmp_c(int32_t m, double* a, double* w, double* v) {
w[k]=x;
}
}
- bigstack_reset(bigstack_mark);
return 1;
}
int32_t invert_matrix(int32_t dim, double* matrix, MATRIX_INVERT_BUF1_TYPE* dbl_1d_buf, double* dbl_2d_buf) {
// C port of PLINK stats.cpp's svd_inverse() function.
+ // Now thread-safe in NOLAPACK case.
// w -> dbl_1d_buf
// v -> dbl_2d_buf
diff --git a/plink_matrix.h b/plink_matrix.h
index f95bf69..16a1bb2 100644
--- a/plink_matrix.h
+++ b/plink_matrix.h
@@ -17,6 +17,8 @@
#ifdef NOLAPACK
#define MATRIX_INVERT_BUF1_TYPE double
+#define MATRIX_INVERT_BUF1_ELEM_ALLOC (2 * sizeof(double))
+#define MATRIX_INVERT_BUF1_CHECKED_ALLOC (2 * sizeof(double))
#define __CLPK_integer int
#else // not NOLAPACK
@@ -106,7 +108,12 @@ extern "C" {
}
#endif // __cplusplus
#endif // __APPLE__
+
#define MATRIX_INVERT_BUF1_TYPE __CLPK_integer
+#define MATRIX_INVERT_BUF1_ELEM_ALLOC sizeof(__CLPK_integer)
+// invert_matrix_checked() usually requires a larger buffer
+#define MATRIX_INVERT_BUF1_CHECKED_ALLOC (2 * sizeof(__CLPK_integer))
+
#endif // NOLAPACK
#define MATRIX_SINGULAR_RCOND 1e-14
diff --git a/plink_misc.c b/plink_misc.c
index 36ca272..d48abcc 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -339,14 +339,19 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
return LOAD_PHENO_LAST_COL;
}
dxx = strtod(bufptr, &ss);
- if (affection) {
- // er, this was calling strtod() twice on the same string. time to
- // drop down a level and remove that redundancy...
+ // if conversion fails, strtod return value is 0.0. that has some
+ // unpleasant interactions with the following code, and my previous
+ // attempt to account for them had a bug.
+ if (ss == bufptr) {
+ dxx = missing_phenod;
+ }
+
+ if (affection) {
if (dxx == pheno_cased) {
set_bit(sample_idx, pheno_c);
set_bit(sample_idx, pheno_nm);
- } else if ((ss != bufptr) && (dxx == pheno_ctrld)) {
+ } else if (dxx == pheno_ctrld) {
clear_bit(sample_idx, pheno_c);
set_bit(sample_idx, pheno_nm);
} else if (affection_01 || (dxx == missing_phenod) || (dxx == 0.0)) {
@@ -379,7 +384,7 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
}
}
if (!affection) {
- if ((ss != bufptr) && (dxx != missing_phenod)) {
+ if (dxx != missing_phenod) {
pheno_d[(uint32_t)sample_idx] = dxx;
set_bit(sample_idx, pheno_nm);
}
diff --git a/plink_set.c b/plink_set.c
index d7f9818..511d475 100644
--- a/plink_set.c
+++ b/plink_set.c
@@ -1483,7 +1483,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
retval = sort_item_ids_noalloc(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, 1, strcmp_deref, sorted_marker_ids, marker_id_map);
if (retval) {
- goto define_sets_ret_NOMEM;
+ goto define_sets_ret_1;
}
#ifdef __LP64__
fill_ulong_zero(round_up_pow2(marker_ctp2l, 2), marker_bitfield_tmp);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git
More information about the debian-med-commit
mailing list