[med-svn] [plink1.9] 01/03: Imported Upstream version 1.90~b3.36-160416
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Tue Apr 26 20:35:12 UTC 2016
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository plink1.9.
commit a42e1a49a686292bc3835a700f5b2d0fb990f12e
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Tue Apr 26 22:22:05 2016 +0200
Imported Upstream version 1.90~b3.36-160416
---
pigz.c | 38 +-
plink.c | 804 +++--
plink_assoc.c | 546 ++--
plink_calc.c | 380 +--
plink_cluster.c | 134 +-
plink_cnv.c | 764 ++---
plink_common.c | 1196 ++++---
plink_common.h | 408 ++-
plink_data.c | 9688 ++++++++++++++++++++++++++++---------------------------
plink_data.h | 4 +-
plink_dosage.c | 193 +-
plink_family.c | 266 +-
plink_filter.c | 212 +-
plink_glm.c | 430 +--
plink_help.c | 41 +-
plink_homozyg.c | 118 +-
plink_lasso.c | 285 +-
plink_lasso.h | 2 +
plink_ld.c | 806 ++---
plink_matrix.c | 2 +-
plink_misc.c | 2727 ++++++++--------
plink_perm.c | 26 +-
plink_rserve.c | 36 +-
plink_set.c | 684 ++--
24 files changed, 10265 insertions(+), 9525 deletions(-)
diff --git a/pigz.c b/pigz.c
index 46b0525..0273750 100644
--- a/pigz.c
+++ b/pigz.c
@@ -301,6 +301,12 @@
#include "pigz.h"
+#ifdef _WIN64
+ #define putc_unlocked _fputc_nolock
+#else
+ #define putc_unlocked putc
+#endif
+
void pigz_init(uint32_t setprocs) {
return;
}
@@ -312,7 +318,7 @@ void parallel_compress(char* out_fname, unsigned char* overflow_buf, uint32_t do
unsigned char* write_ptr;
uint32_t last_size;
if (!gz_outfile) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Failed to open %s.\n", out_fname);
exit(2);
@@ -327,7 +333,7 @@ void parallel_compress(char* out_fname, unsigned char* overflow_buf, uint32_t do
}
if (last_size) {
if (!gzwrite(gz_outfile, overflow_buf, last_size)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
gzclose(gz_outfile);
@@ -338,7 +344,7 @@ void parallel_compress(char* out_fname, unsigned char* overflow_buf, uint32_t do
write_ptr = &(overflow_buf[PIGZ_BLOCK_SIZE]);
while (overflow_ct > PIGZ_BLOCK_SIZE) {
if (!gzwrite(gz_outfile, write_ptr, PIGZ_BLOCK_SIZE)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
gzclose(gz_outfile);
@@ -351,7 +357,7 @@ void parallel_compress(char* out_fname, unsigned char* overflow_buf, uint32_t do
}
} while (last_size);
if (gzclose(gz_outfile) != Z_OK) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
exit(6);
@@ -362,7 +368,7 @@ int32_t pzwrite_init(char* out_fname, unsigned char* overflow_buf, uint32_t do_a
ps_ptr->outfile = fopen(out_fname, do_append? "ab" : "wb");
ps_ptr->gz_outfile = NULL;
if (!ps_ptr->outfile) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Failed to open %s.\n", out_fname);
return 2; // RET_OPEN_FAIL
@@ -375,7 +381,7 @@ void compressed_pzwrite_init(char* out_fname, unsigned char* overflow_buf, uint3
ps_ptr->outfile = NULL;
ps_ptr->gz_outfile = gzopen(out_fname, do_append? "ab" : "wb");
if (!ps_ptr->gz_outfile) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Failed to open %s.\n", out_fname);
exit(2);
@@ -407,7 +413,7 @@ void force_compressed_pzwrite(Pigz_state* ps_ptr, char** writep_ptr, uint32_t wr
unsigned char* writep = (unsigned char*)(*writep_ptr);
if (ps_ptr->overflow_buf != writep) {
if (!gzwrite(ps_ptr->gz_outfile, ps_ptr->overflow_buf, writep - ps_ptr->overflow_buf)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
gzclose(ps_ptr->gz_outfile);
@@ -429,7 +435,7 @@ int32_t flex_pzputs_std(Pigz_state* ps_ptr, char** writep_ptr, char* ss, uint32_
}
} else {
if (!gzwrite(ps_ptr->gz_outfile, ps_ptr->overflow_buf, 2 * PIGZ_BLOCK_SIZE)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
gzclose(ps_ptr->gz_outfile);
@@ -458,7 +464,7 @@ void compressed_pzwrite_close_null(Pigz_state* ps_ptr, char* writep) {
force_compressed_pzwrite(ps_ptr, &writep, 0);
ps_ptr->overflow_buf = NULL;
if (gzclose(ps_ptr->gz_outfile) != Z_OK) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
exit(6);
@@ -640,7 +646,7 @@ local int complain(const char *fmt, ...)
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
- putc('\n', stderr);
+ putc_unlocked('\n', stderr);
fflush(stderr);
g.warned = 1;
}
@@ -1433,7 +1439,7 @@ int32_t pzwrite_init(char* out_fname, unsigned char* overflow_buf, uint32_t do_a
// unbuffered, and doesn't need to support Windows
ps_ptr->outd = open(out_fname, O_WRONLY | (do_append? O_APPEND : (O_CREAT | O_TRUNC)), 0644);
if (ps_ptr->outd == -1) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Failed to open %s.\n", out_fname);
return 2; // RET_OPEN_FAIL
@@ -1639,7 +1645,7 @@ local void cut_short(int sig)
(void)sig;
if (g.outd != -1 && g.outf != NULL)
unlink(g.outf);
- putchar('\n');
+ putc_unlocked('\n', stdout);
_exit(1);
}
@@ -1675,7 +1681,7 @@ int32_t write_uncompressed(char* out_fname, unsigned char* overflow_buf, uint32_
unsigned char* write_ptr;
uint32_t last_size;
if (!outfile) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Failed to open %s.\n", out_fname);
return 2; // RET_OPEN_FAIL
@@ -1690,7 +1696,7 @@ int32_t write_uncompressed(char* out_fname, unsigned char* overflow_buf, uint32_
}
if (last_size) {
if (!fwrite(overflow_buf, last_size, 1, outfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
fclose(outfile);
@@ -1701,7 +1707,7 @@ int32_t write_uncompressed(char* out_fname, unsigned char* overflow_buf, uint32_
write_ptr = &(overflow_buf[PIGZ_BLOCK_SIZE]);
while (overflow_ct > PIGZ_BLOCK_SIZE) {
if (!fwrite(write_ptr, PIGZ_BLOCK_SIZE, 1, outfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
fclose(outfile);
@@ -1714,7 +1720,7 @@ int32_t write_uncompressed(char* out_fname, unsigned char* overflow_buf, uint32_
}
} while (last_size);
if (fclose(outfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fputs("Error: File write failure.\n", stderr);
return 6;
diff --git a/plink.c b/plink.c
index 1ec7bae..fd5bd0c 100644
--- a/plink.c
+++ b/plink.c
@@ -92,7 +92,7 @@
static const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b3.31"
+ "PLINK v1.90b3.36"
#else
"PLINK v1.90p"
#endif
@@ -104,10 +104,10 @@ static const char ver_str[] =
#else
" 32-bit"
#endif
- " (3 Feb 2016)";
+ " (16 Apr 2016)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
- " "
+ ""
#ifdef STABLE_BUILD
"" // (don't want this when version number has a trailing letter)
#else
@@ -133,19 +133,23 @@ static const char notestr_null_calc2[] = "Commands include --make-bed, --recode,
#endif
#endif
+static const char errstr_nomem[] = "Error: Out of memory. The --memory flag may be helpful.\n";
+static const char errstr_write[] = "Error: File write failure.\n";
+static const char errstr_read[] = "Error: File read failure.\n";
+
void disp_exit_msg(int32_t retval) {
switch (retval) {
case RET_NOMEM:
logprint("\n");
- logerrprint("Error: Out of memory. The --memory flag may be helpful.\n");
+ logerrprint(errstr_nomem);
break;
case RET_WRITE_FAIL:
logprint("\n");
- logerrprint("Error: File write failure.\n");
+ logerrprint(errstr_write);
break;
case RET_READ_FAIL:
logprint("\n");
- logerrprint("Error: File read failure.\n");
+ logerrprint(errstr_read);
break;
case RET_THREAD_CREATE_FAIL:
logprint("\n");
@@ -291,27 +295,27 @@ static inline int32_t relationship_or_ibc_req(uint64_t calculation_type) {
return (relationship_req(calculation_type) || (calculation_type & CALC_IBC));
}
-int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, char* famname, char* cm_map_fname, char* cm_map_chrname, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* freqname, char* distance_wts_fname, char* read_dists_fname, char* read_dists_id_fname, char* evecname, char* mergename1, char* mergename2, char* mergename3, char* missing_mid_template, char* missing_marke [...]
- FILE* bedfile = NULL;
- FILE* phenofile = NULL;
+int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, char* famname, char* cm_map_fname, char* cm_map_chrname, char* phenoname, char* extractname, char* excludename, char* keepname, char* removename, char* keepfamname, char* removefamname, char* filtername, char* freqname, char* distance_wts_fname, char* read_dists_fname, char* read_dists_id_fname, char* evecname, char* mergename1, char* mergename2, char* mergename3, char* missing_mid_template, char* missing_marke [...]
+ FILE* bedfile = nullptr;
+ FILE* phenofile = nullptr;
uintptr_t unfiltered_marker_ct = 0;
- uintptr_t* marker_exclude = NULL;
+ uintptr_t* marker_exclude = nullptr;
uintptr_t marker_exclude_ct = 0;
uintptr_t marker_ct = 0;
uintptr_t max_marker_id_len = 0;
// set_allele_freqs = .bed set bit frequency in middle of loading process, A2
// allele frequency later.
- double* set_allele_freqs = NULL;
+ double* set_allele_freqs = nullptr;
uintptr_t unfiltered_sample_ct = 0;
uintptr_t unfiltered_sample_ct4 = 0;
uintptr_t unfiltered_sample_ctl = 0;
- uintptr_t* sample_exclude = NULL;
+ uintptr_t* sample_exclude = nullptr;
uintptr_t sample_exclude_ct = 0;
uintptr_t sample_ct = 0;
- uint32_t* sample_sort_map = NULL;
- uintptr_t* founder_info = NULL;
- uintptr_t* sex_nm = NULL;
- uintptr_t* sex_male = NULL;
+ uint32_t* sample_sort_map = nullptr;
+ uintptr_t* founder_info = nullptr;
+ uintptr_t* sex_nm = nullptr;
+ uintptr_t* sex_male = nullptr;
uint32_t genome_skip_write = (cluster_ptr->ppc != 0.0) && (!(calculation_type & CALC_GENOME)) && (!read_genome_fname);
uint32_t marker_pos_needed = are_marker_pos_needed(calculation_type, misc_flags, cm_map_fname, sip->fname, min_bp_space, genome_skip_write, ldip->modifier, epi_ip->modifier, cluster_ptr->modifier);
uint32_t marker_cms_needed = are_marker_cms_needed(calculation_type, cm_map_fname, update_cm);
@@ -325,63 +329,62 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
uint32_t pheno_all = pheno_modifier & PHENO_ALL;
- char* marker_ids = NULL;
- uint32_t* marker_id_htable = NULL;
+ char* marker_ids = nullptr;
+ uint32_t* marker_id_htable = nullptr;
uint32_t marker_id_htable_size = 0;
- double* marker_cms = NULL;
+ double* marker_cms = nullptr;
// marker_allele_ptrs[2 * i] is id of A1 (usually minor) allele at marker i
// marker_allele_ptrs[2 * i + 1] is id of A2 allele
// Single-character allele names point to g_one_char_strs[]; otherwise
// string allocation occurs on the heap.
- char** marker_allele_ptrs = NULL;
- uintptr_t max_marker_allele_len = 2; // includes trailing null
- uintptr_t* marker_reverse = NULL;
+ char** marker_allele_ptrs = nullptr;
+ uintptr_t max_marker_allele_blen = 2; // er, this probably should be slen
+ uintptr_t* marker_reverse = nullptr;
int32_t retval = 0;
uint32_t map_is_unsorted = 0;
- uint32_t map_cols = 3;
uint32_t affection = 0;
uint32_t gender_unk_ct = 0;
- uintptr_t* pheno_nm = NULL;
- uintptr_t* pheno_nm_datagen = NULL; // --make-bed/--recode/--write-covar only
- uintptr_t* orig_pheno_nm = NULL; // --all-pheno + --pheno-merge
- uintptr_t* pheno_c = NULL;
- uintptr_t* orig_pheno_c = NULL;
- uintptr_t* geno_excl_bitfield = NULL;
- uintptr_t* ac_excl_bitfield = NULL;
- double* pheno_d = NULL;
- double* orig_pheno_d = NULL;
- char* sample_ids = NULL;
+ uintptr_t* pheno_nm = nullptr;
+ uintptr_t* pheno_nm_datagen = nullptr; // --make-bed/--recode/--write-covar
+ uintptr_t* orig_pheno_nm = nullptr; // --all-pheno + --pheno-merge
+ uintptr_t* pheno_c = nullptr;
+ uintptr_t* orig_pheno_c = nullptr;
+ uintptr_t* geno_excl_bitfield = nullptr;
+ uintptr_t* ac_excl_bitfield = nullptr;
+ double* pheno_d = nullptr;
+ double* orig_pheno_d = nullptr;
+ char* sample_ids = nullptr;
uintptr_t max_sample_id_len = 4;
- char* paternal_ids = NULL;
+ char* paternal_ids = nullptr;
uintptr_t max_paternal_id_len = 2;
- char* maternal_ids = NULL;
+ char* maternal_ids = nullptr;
uintptr_t max_maternal_id_len = 2;
- unsigned char* bigstack_mark = NULL;
+ unsigned char* bigstack_mark = nullptr;
uintptr_t cluster_ct = 0;
- uint32_t* cluster_map = NULL; // unfiltered sample IDs
+ uint32_t* cluster_map = nullptr; // unfiltered sample IDs
// index for cluster_map, length (cluster_ct + 1)
// cluster_starts[n+1] - cluster_starts[n] = length of cluster n (0-based)
- uint32_t* cluster_starts = NULL;
- char* cluster_ids = NULL;
+ uint32_t* cluster_starts = nullptr;
+ char* cluster_ids = nullptr;
uintptr_t max_cluster_id_len = 2;
- double* mds_plot_dmatrix_copy = NULL;
- uintptr_t* cluster_merge_prevented = NULL;
- double* cluster_sorted_ibs = NULL;
- char* cptr = NULL;
+ double* mds_plot_dmatrix_copy = nullptr;
+ uintptr_t* cluster_merge_prevented = nullptr;
+ double* cluster_sorted_ibs = nullptr;
+ char* cptr = nullptr;
double missing_phenod = (double)missing_pheno;
double ci_zt = 0.0;
uintptr_t bed_offset = 3;
- uint32_t* marker_pos = NULL;
+ uint32_t* marker_pos = nullptr;
uint32_t hh_exists = 0;
uint32_t pheno_ctrl_ct = 0;
uintptr_t covar_ct = 0;
- char* covar_names = NULL;
+ char* covar_names = nullptr;
uintptr_t max_covar_name_len = 0;
- uintptr_t* covar_nm = NULL;
- double* covar_d = NULL;
- uintptr_t* gxe_covar_nm = NULL;
- uintptr_t* gxe_covar_c = NULL;
- uintptr_t* pca_sample_exclude = NULL;
+ uintptr_t* covar_nm = nullptr;
+ double* covar_d = nullptr;
+ uintptr_t* gxe_covar_nm = nullptr;
+ uintptr_t* gxe_covar_c = nullptr;
+ uintptr_t* pca_sample_exclude = nullptr;
uintptr_t pca_sample_ct = 0;
uintptr_t ulii = 0;
uint32_t pheno_nm_ct = 0;
@@ -392,21 +395,21 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
uint32_t sample_male_ct = 0;
uint32_t sample_f_ct = 0;
uint32_t sample_f_male_ct = 0;
- unsigned char* bigstack_mark2 = NULL;
- unsigned char* bigstack_mark_precluster = NULL;
- unsigned char* bigstack_mark_postcluster = NULL;
- uint32_t* nchrobs = NULL;
- int32_t* hwe_lls = NULL;
- int32_t* hwe_lhs = NULL;
- int32_t* hwe_hhs = NULL;
- int32_t* hwe_ll_cases = NULL;
- int32_t* hwe_lh_cases = NULL;
- int32_t* hwe_hh_cases = NULL;
- int32_t* hwe_ll_allfs = NULL;
- int32_t* hwe_lh_allfs = NULL;
- int32_t* hwe_hh_allfs = NULL;
- int32_t* hwe_hapl_allfs = NULL;
- int32_t* hwe_haph_allfs = NULL;
+ unsigned char* bigstack_mark2 = nullptr;
+ unsigned char* bigstack_mark_precluster = nullptr;
+ unsigned char* bigstack_mark_postcluster = nullptr;
+ uint32_t* nchrobs = nullptr;
+ int32_t* hwe_lls = nullptr;
+ int32_t* hwe_lhs = nullptr;
+ int32_t* hwe_hhs = nullptr;
+ int32_t* hwe_ll_cases = nullptr;
+ int32_t* hwe_lh_cases = nullptr;
+ int32_t* hwe_hh_cases = nullptr;
+ int32_t* hwe_ll_allfs = nullptr;
+ int32_t* hwe_lh_allfs = nullptr;
+ int32_t* hwe_hh_allfs = nullptr;
+ int32_t* hwe_hapl_allfs = nullptr;
+ int32_t* hwe_haph_allfs = nullptr;
pthread_t threads[MAX_THREADS];
uint32_t* uiptr;
double* rel_ibc;
@@ -437,7 +440,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
// famname[0] is nonzero iff we're not in the --merge-list special case
if ((calculation_type & CALC_MAKE_BED) && famname[0]) {
#ifdef _WIN32
- uii = GetFullPathName(bedname, FNAMESIZE, g_textbuf, NULL);
+ uii = GetFullPathName(bedname, FNAMESIZE, g_textbuf, nullptr);
if ((!uii) || (uii > FNAMESIZE))
#else
if (!realpath(bedname, g_textbuf))
@@ -452,10 +455,10 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_OPEN_FAIL;
}
memcpy(outname_end, ".bed", 5);
- // if file doesn't exist, realpath returns NULL on Linux instead of what
+ // if file doesn't exist, realpath returns nullptr on Linux instead of what
// the path would be.
#ifdef _WIN32
- uii = GetFullPathName(outname, FNAMESIZE, &(g_textbuf[FNAMESIZE + 64]), NULL);
+ uii = GetFullPathName(outname, FNAMESIZE, &(g_textbuf[FNAMESIZE + 64]), nullptr);
if (uii && (uii <= FNAMESIZE) && (!strcmp(g_textbuf, &(g_textbuf[FNAMESIZE + 64]))))
#else
cptr = realpath(outname, &(g_textbuf[FNAMESIZE + 64]));
@@ -548,10 +551,15 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (!marker_alleles_needed) {
allelexxxx = 0;
}
- retval = load_bim(bimname, &map_cols, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : NULL, &marker_allele_ptrs, &max_marker_allele_len, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, [...]
+ retval = load_bim(bimname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, &set_allele_freqs, nchrobs_needed? (&nchrobs) : nullptr, &marker_allele_ptrs, &max_marker_allele_blen, &marker_ids, missing_mid_template, new_id_max_allele_len, missing_marker_id_match, chrom_info_ptr, &marker_cms, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is [...]
if (retval) {
goto plink_ret_1;
}
+ if (!update_chr) {
+ if (finalize_chrom_info(chrom_info_ptr)) {
+ goto plink_ret_NOMEM;
+ }
+ }
}
// load .fam, count samples
@@ -641,7 +649,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_1;
}
} else if (phenofile) {
- retval = load_pheno(phenofile, unfiltered_sample_ct, 0, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, NULL, 0);
+ retval = load_pheno(phenofile, unfiltered_sample_ct, 0, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, nullptr, 0);
if (retval) {
if (retval == LOAD_PHENO_LAST_COL) {
logerrprintb();
@@ -791,7 +799,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (marker_alleles_needed) {
if (update_alleles_fname) {
- retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_len, outname, outname_end);
+ retval = update_marker_alleles(update_alleles_fname, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len, unfiltered_marker_ct, marker_exclude, marker_allele_ptrs, &max_marker_allele_blen, outname, outname_end);
if (retval) {
goto plink_ret_1;
}
@@ -1084,7 +1092,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (filter_flags & (FILTER_BINARY_FOUNDERS | FILTER_BINARY_NONFOUNDERS)) {
ii = sample_exclude_ct;
- filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, (filter_flags / FILTER_BINARY_FOUNDERS) & 1, NULL);
+ filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, (filter_flags / FILTER_BINARY_FOUNDERS) & 1, nullptr);
if ((sample_exclude_ct == unfiltered_sample_ct) && (!allow_no_samples)) {
LOGERRPRINTF("Error: All %s removed due to founder status (--filter-%s).\n", g_species_plural, (filter_flags & FILTER_BINARY_FOUNDERS)? "founders" : "nonfounders");
goto plink_ret_ALL_SAMPLES_EXCLUDED;
@@ -1160,7 +1168,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logprint("Using 1 thread.\n");
} else {
#endif
- logprint("Using 1 thread (no multithreaded calculations invoked.\n");
+ logprint("Using 1 thread (no multithreaded calculations invoked).\n");
#ifndef NOLAPACK
}
#endif
@@ -1215,7 +1223,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
} else {
// if only --gxe, ignore --covar-name/--covar-number
uii = (calculation_type & (CALC_MAKE_BED | CALC_MAKE_FAM | CALC_RECODE | CALC_WRITE_COVAR | CALC_GLM | CALC_LASSO | CALC_RPLUGIN))? 1 : 0;
- retval = load_covars(covar_fname, unfiltered_sample_ct, sample_exclude, sample_ct, NULL, NULL, sample_ids, max_sample_id_len, missing_phenod, uii? covar_modifier : (covar_modifier & COVAR_KEEP_PHENO_ON_MISSING_COV), uii? covar_range_list_ptr : NULL, gxe_mcovar, &covar_ct, &covar_names, &max_covar_name_len, pheno_nm, &covar_nm, &covar_d, &gxe_covar_nm, &gxe_covar_c);
+ retval = load_covars(covar_fname, unfiltered_sample_ct, sample_exclude, sample_ct, nullptr, nullptr, sample_ids, max_sample_id_len, missing_phenod, uii? covar_modifier : (covar_modifier & COVAR_KEEP_PHENO_ON_MISSING_COV), uii? covar_range_list_ptr : nullptr, gxe_mcovar, &covar_ct, &covar_names, &max_covar_name_len, pheno_nm, &covar_nm, &covar_d, &gxe_covar_nm, &gxe_covar_c);
if (retval) {
goto plink_ret_1;
}
@@ -1256,7 +1264,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_NOMEM;
}
if (bedfile && sample_ct) {
- retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CALC [...]
+ retval = calc_freqs_and_hwe(bedfile, outname, outname_end, unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_exclude_ct, marker_ids, max_marker_id_len, unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, founder_info, nonfounders, (misc_flags / MISC_MAF_SUCC) & 1, set_allele_freqs, bed_offset, (hwe_thresh > 0.0) || (calculation_type & CALC_HARDY), hwe_modifier & HWE_THRESH_ALL, (pheno_nm_ct && pheno_c)? ((calculation_type / CALC [...]
if (retval) {
goto plink_ret_1;
}
@@ -1279,7 +1287,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (a1alleles || a2alleles) {
- retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_len, marker_reverse, marker_ids, max_marker_id_len, set_allele_freqs, a2alleles? 1 : 0);
+ retval = load_ax_alleles(a1alleles? a1alleles : a2alleles, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_allele_ptrs, &max_marker_allele_blen, marker_reverse, marker_ids, max_marker_id_len, set_allele_freqs, a2alleles? 1 : 0);
if (retval) {
goto plink_ret_1;
}
@@ -1296,11 +1304,11 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (misc_flags & MISC_FREQ_COUNTS) {
logprint("Note: --freq 'counts' modifier has no effect on cluster-stratified report.\n");
}
- retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
+ retval = write_stratified_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_ct, sample_f_ct, founder_info, nonfounders, sex_male, sample_f_male_ct, marker_reverse, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len);
} else if (misc_flags & MISC_FREQ_CC) {
- retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
+ retval = write_cc_freqs(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_FREQ_GZ) & 1, plink_maxsnp, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, founder_info, nonfounders, sex_male, marker_reverse, pheno_nm, pheno_c);
} else {
- retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
+ retval = write_freqs(outname, outname_end, plink_maxsnp, unfiltered_marker_ct, marker_exclude, set_allele_freqs, chrom_info_ptr, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, hwe_hapl_allfs, hwe_haph_allfs, sample_f_ct, sample_f_male_ct, nonfounders, misc_flags, marker_reverse);
}
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ))))) {
goto plink_ret_1;
@@ -1329,7 +1337,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
oblig_missing_cleanup(om_ip);
if (sample_ct) {
if (calculation_type & CALC_HARDY) {
- retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
+ retval = hardy_report(outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, hwe_lls, hwe_lhs, hwe_hhs, hwe_modifier, nonfounders, hwe_ll_cases, hwe_lh_cases, hwe_hh_cases, hwe_ll_allfs, hwe_lh_allfs, hwe_hh_allfs, pheno_nm_ct, pheno_c, chrom_info_ptr);
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_HARDY))))) {
goto plink_ret_1;
}
@@ -1355,7 +1363,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (bedfile && sample_ct && (unfiltered_marker_ct > marker_exclude_ct)) {
if ((calculation_type & CALC_MENDEL) || (fam_ip->mendel_modifier & MENDEL_FILTER)) {
- retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_len, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, ch [...]
+ retval = mendel_error_scan(fam_ip, bedfile, bed_offset, outname, outname_end, plink_maxfid, plink_maxiid, plink_maxsnp, allow_no_variants, unfiltered_marker_ct, marker_exclude, &marker_exclude_ct, marker_reverse, marker_ids, max_marker_id_len, marker_allele_ptrs, max_marker_allele_blen, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, hh_exists, c [...]
if (retval || (!(calculation_type & (~(CALC_MERGE | CALC_WRITE_CLUSTER | CALC_FREQ | CALC_MISSING_REPORT | CALC_MENDEL))))) {
goto plink_ret_1;
}
@@ -1438,7 +1446,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (pca_sample_ct == sample_ct) {
logerrprint("Warning: --pca-cluster-names/--pca-clusters has no effect since all samples are\nin the named clusters.\n");
- pca_sample_exclude = NULL;
+ pca_sample_exclude = nullptr;
} else {
LOGPRINTF("--pca-cluster-names/--pca-clusters: %" PRIuPTR " samples specified.\n", pca_sample_ct);
ulii = unfiltered_sample_ct - pca_sample_ct;
@@ -1492,8 +1500,8 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (retval) {
goto plink_ret_1;
}
- g_sample_missing_unwt = NULL;
- g_missing_dbl_excluded = NULL;
+ g_sample_missing_unwt = nullptr;
+ g_missing_dbl_excluded = nullptr;
}
if (calculation_type & CALC_SEXCHECK) {
@@ -1530,7 +1538,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_WRITE_SNPLIST) {
- retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, NULL, 0);
+ retval = write_snplist(outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, nullptr, 0);
if (retval) {
goto plink_ret_1;
}
@@ -1577,13 +1585,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if (calculation_type & (CALC_MAKE_BED | CALC_MAKE_BIM | CALC_MAKE_FAM)) {
- retval = make_bed(bedfile, bed_offset, bimname, map_cols, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, o [...]
+ retval = make_bed(bedfile, bed_offset, bimname, outname, outname_end, calculation_type, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, pheno_d, output_miss [...]
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_RECODE) {
- retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_len, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, phe [...]
+ retval = recode(recode_modifier, bedfile, bed_offset, outname, outname_end, recode_allele_name, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, marker_ids, max_marker_id_len, marker_cms, marker_allele_ptrs, max_marker_allele_blen, marker_pos, marker_reverse, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm_datagen? pheno_nm_datagen : pheno_nm, pheno_c, ph [...]
if (retval) {
goto plink_ret_1;
}
@@ -1629,7 +1637,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: Run-of-homozygosity scanning requires a sorted .bim. Retry this command\nafter using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
+ retval = calc_homozyg(homozyg_ptr, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, marker_pos, sample_ct, unfiltered_sample_ct, sample_exclude, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, outname, outname_end, pheno_nm, pheno_c, pheno_d, output_missing_pheno, sex_male);
if (retval) {
goto plink_ret_1;
}
@@ -1655,14 +1663,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: LD-based strand flip scanning requires a sorted .bim. Retry this\ncommand after using --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
+ retval = flipscan(ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, chrom_info_ptr, set_allele_freqs, marker_pos, unfiltered_sample_ct, pheno_nm, pheno_c, founder_info, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_EPI) && epi_ip->ld_mkr1) {
- retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, NULL, 0, 0, NULL, sex_male, NULL, NULL, hh_exists);
+ retval = twolocus(epi_ip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, chrom_info_ptr, unfiltered_sample_ct, founder_info, 0, nullptr, 0, 0, nullptr, sex_male, nullptr, nullptr, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1673,7 +1681,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
logerrprint("Error: Windowed --r/--r2 runs require a sorted .bim. Retry this command after\nusing --make-bed to sort your data.\n");
goto plink_ret_INVALID_CMDLINE;
}
- retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, set_allele_freqs, chrom_info_ptr, marker_pos, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
+ retval = ld_report(threads, ldip, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, marker_reverse, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, set_allele_freqs, chrom_info_ptr, marker_pos, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, outname, outname_end, hh_exists);
if (retval) {
goto plink_ret_1;
}
@@ -1747,7 +1755,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
goto plink_ret_NOMEM;
}
if (cluster_ptr->modifier & CLUSTER_GROUP_AVG) {
- fill_double_zero(cluster_sorted_ibs, ulii);
+ fill_double_zero(ulii, cluster_sorted_ibs);
} else {
for (uljj = 0; uljj < ulii; uljj++) {
cluster_sorted_ibs[uljj] = 1.0;
@@ -1780,7 +1788,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (bigstack_alloc_d((sample_ct * (sample_ct - 1)) / 2, &g_dists)) {
goto plink_ret_NOMEM;
}
- retval = read_dists(read_dists_fname, read_dists_id_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, 0, NULL, NULL, 0, 0, g_dists, 0, NULL, NULL);
+ retval = read_dists(read_dists_fname, read_dists_id_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, 0, nullptr, nullptr, 0, 0, g_dists, 0, nullptr, nullptr);
if (retval) {
goto plink_ret_1;
}
@@ -1816,12 +1824,12 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (read_dists_fname && (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST | CALC_REGRESS_DISTANCE))) {
bigstack_reset(g_dists);
- g_dists = NULL;
+ g_dists = nullptr;
}
if ((calculation_type & CALC_GENOME) || genome_skip_write) {
bigstack_reset(bigstack_mark2);
- g_dists = NULL;
+ g_dists = nullptr;
retval = calc_genome(threads, bedfile, bed_offset, marker_ct, unfiltered_marker_ct, marker_exclude, chrom_info_ptr, marker_pos, set_allele_freqs, nchrobs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, founder_info, parallel_idx, parallel_tot, outname, outname_end, nonfounders, calculation_type, genome_modifier, ppc_gap, genome_min_pi_hat, genome_max_pi_h [...]
if (retval) {
goto plink_ret_1;
@@ -1829,14 +1837,14 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_HET) {
- retval = het_report(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_HET_GZ) & 1, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, (misc_flags & MISC_HET_SMALL_SAMPLE)? founder_info : NULL, chrom_info_ptr, set_allele_freqs);
+ retval = het_report(bedfile, bed_offset, outname, outname_end, (misc_flags / MISC_HET_GZ) & 1, unfiltered_marker_ct, marker_exclude, marker_ct, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, plink_maxfid, plink_maxiid, max_sample_id_len, (misc_flags & MISC_HET_SMALL_SAMPLE)? founder_info : nullptr, chrom_info_ptr, set_allele_freqs);
if (retval) {
goto plink_ret_1;
}
}
if (calculation_type & CALC_FST) {
- retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : NULL, cluster_ct, cluster_map, cluster_starts);
+ retval = fst_report(bedfile, bed_offset, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_pos, chrom_info_ptr, unfiltered_sample_ct, sample_exclude, pheno_nm, (misc_flags & MISC_FST_CC)? pheno_c : nullptr, cluster_ct, cluster_map, cluster_starts);
if (retval) {
goto plink_ret_1;
}
@@ -1876,6 +1884,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
#endif
+ if (calculation_type & CALC_LASSO_LAMBDA) {
+ retval = lasso_lambda(marker_exclude, marker_reverse, chrom_info_ptr, sex_male, pheno_nm, covar_nm, bed_offset, unfiltered_marker_ct, marker_ct, unfiltered_sample_ct, pheno_nm_ct, hh_exists, lasso_lambda_iters, lasso_h2, bedfile, outname, outname_end, &lasso_minlambda);
+ if (retval) {
+ goto plink_ret_1;
+ }
+ }
+
if (calculation_type & (CALC_MODEL | CALC_GXE | CALC_GLM | CALC_LASSO | CALC_CMH | CALC_HOMOG | CALC_TESTMISS | CALC_TDT | CALC_DFAM | CALC_QFAM)) {
// can't use pheno_ctrl_ct in here since new phenotypes may be loaded, and
// we don't bother updating it...
@@ -1886,13 +1901,13 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
uii = 0; // phenotype/cluster number
*outname_end = '.';
if (loop_assoc_fname) {
- retval = load_clusters(loop_assoc_fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, NULL, NULL, NULL, NULL, 0);
+ retval = load_clusters(loop_assoc_fname, unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, sample_ids, max_sample_id_len, mwithin_col, (misc_flags / MISC_LOAD_CLUSTER_KEEP_NA) & 1, &cluster_ct, &cluster_map, &cluster_starts, &cluster_ids, &max_cluster_id_len, nullptr, nullptr, nullptr, nullptr, 0);
if (retval) {
goto plink_ret_1;
}
if (pheno_d) {
free(pheno_d);
- pheno_d = NULL;
+ pheno_d = nullptr;
}
if (!pheno_c) {
if (aligned_malloc(unfiltered_sample_ctl * sizeof(intptr_t), &pheno_c)) {
@@ -1912,7 +1927,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
break;
}
outname_end2 = strcpya(&(outname_end[1]), &(cluster_ids[uii * max_cluster_id_len]));
- fill_ulong_zero(pheno_c, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_c);
ukk = cluster_starts[uii + 1];
for (ujj = cluster_starts[uii]; ujj < ukk; ujj++) {
SET_BIT(cluster_map[ujj], pheno_c);
@@ -1925,7 +1940,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (orig_pheno_c) {
if (!pheno_c) {
free(pheno_d);
- pheno_d = NULL;
+ pheno_d = nullptr;
if (aligned_malloc(unfiltered_sample_ctl * sizeof(intptr_t), &pheno_c)) {
goto plink_ret_NOMEM;
}
@@ -1935,18 +1950,18 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
memcpy(pheno_d, orig_pheno_d, unfiltered_sample_ct * sizeof(double));
}
} else {
- fill_ulong_zero(pheno_nm, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_nm);
aligned_free_cond_null(&pheno_c);
if (pheno_d) {
free(pheno_d);
- pheno_d = NULL;
+ pheno_d = nullptr;
}
}
uii++;
plink_skip_empty_pheno:
rewind(phenofile);
outname_end[1] = '\0';
- retval = load_pheno(phenofile, unfiltered_sample_ct, sample_exclude_ct, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, uii, NULL, pheno_nm, &pheno_c, &pheno_d, &(outname_end[1]), (uintptr_t)((&(outname[FNAMESIZE - 32])) - outname_end));
+ retval = load_pheno(phenofile, unfiltered_sample_ct, sample_exclude_ct, cptr, max_sample_id_len, uiptr, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, uii, nullptr, pheno_nm, &pheno_c, &pheno_d, &(outname_end[1]), (uintptr_t)((&(outname[FNAMESIZE - 32])) - outname_end));
if (retval == LOAD_PHENO_LAST_COL) {
bigstack_reset(bigstack_mark);
retval = 0; // exit code bugfix
@@ -1988,7 +2003,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
retval = qassoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_mperm_val, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_d, founder_info, sex_male, hh_exists, ldip->modifier & LD_IGNORE_ [...]
}
} else {
- retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_len, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? NULL : cluster_starts, apip, mperm_save, pheno_nm_ct, [...]
+ retval = model_assoc(threads, bedfile, bed_offset, outname, outname_end2, model_modifier, model_cell_ct, model_mperm_val, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm [...]
}
if (retval) {
goto plink_ret_1;
@@ -1998,12 +2013,12 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
if (!(glm_modifier & GLM_NO_SNP)) {
if (pheno_d) {
#ifndef NOLAPACK
- retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_samp [...]
+ retval = glm_linear_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sam [...]
#else
logerrprint("Warning: Skipping --logistic on --all-pheno QT since this is a no-LAPACK " PROG_NAME_CAPS"\nbuild.\n");
#endif
} else {
- retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_len, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_sa [...]
+ retval = glm_logistic_assoc(threads, bedfile, bed_offset, outname, outname_end2, glm_modifier, glm_vif_thresh, glm_xchr_model, glm_mperm_val, parameters_range_list_ptr, tests_range_list_ptr, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, condition_mname, condition_fname, chrom_info_ptr, unfiltered_s [...]
}
} else {
if (pheno_d) {
@@ -2030,7 +2045,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
if (calculation_type & CALC_LASSO) {
retval = lasso(threads, bedfile, bed_offset, outname, outname_end2, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_allele_ptrs, marker_reverse, chrom_info_ptr, unfiltered_sample_ct, pheno_nm_ct, lasso_h2, lasso_minlambda, lasso_select_covars_range_list_ptr, misc_flags, pheno_nm, pheno_c, pheno_d, covar_ct, covar_names, max_covar_name_len, covar_nm, covar_d, sex_male, hh_exists);
- if (retval) {
+ if (retval) {
goto plink_ret_1;
}
}
@@ -2045,25 +2060,25 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
}
}
if ((calculation_type & CALC_HOMOG) && pheno_c) {
- retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+ retval = homog_assoc(bedfile, bed_offset, outname, outname_end, output_min_p, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, chrom_info_ptr, set_allele_freqs, unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, cluster_ids, max_cluster_id_len, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_TESTMISS) && pheno_c) {
- retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? NULL : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
+ retval = testmiss(threads, bedfile, bed_offset, outname, outname_end2, testmiss_mperm_val, testmiss_modifier, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, unfiltered_sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_nm_ct, pheno_nm, pheno_c, sex_male, hh_exists);
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_TDT) && pheno_c) {
- retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_len, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal [...]
+ retval = tdt(threads, bedfile, bed_offset, outname, outname_end2, ci_size, ci_zt, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_pos, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, apip, mperm_save, pheno_nm, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, materna [...]
if (retval) {
goto plink_ret_1;
}
}
if ((calculation_type & CALC_DFAM) && pheno_c) {
- retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_len, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? NULL : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, [...]
+ retval = dfam(threads, bedfile, bed_offset, outname, outname_end2, pfilter, output_min_p, mtest_adjust, adjust_lambda, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, marker_allele_ptrs, max_marker_allele_blen, marker_reverse, unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, loop_assoc_fname? nullptr : cluster_starts, apip, mperm_save, pheno_c, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_i [...]
if (retval) {
goto plink_ret_1;
}
@@ -2133,15 +2148,7 @@ int32_t plink(char* outname, char* outname_end, char* bedname, char* bimname, ch
aligned_free_cond(pheno_c);
fclose_cond(phenofile);
fclose_cond(bedfile);
- if (marker_allele_ptrs && (max_marker_allele_len > 2)) {
- ulii = unfiltered_marker_ct * 2;
- for (marker_uidx = 0; marker_uidx < ulii; marker_uidx++) {
- cptr = marker_allele_ptrs[marker_uidx];
- if ((cptr < g_one_char_strs) || (cptr >= &(g_one_char_strs[512]))) {
- free(cptr);
- }
- }
- }
+ cleanup_allele_storage(max_marker_allele_blen - 1, 2 * unfiltered_marker_ct, marker_allele_ptrs);
return retval;
}
@@ -2158,7 +2165,7 @@ static inline char* is_flag_start(char* param) {
if ((*param == '-') && ((ucc > '9') || ((ucc < '0') && (ucc != '.') && (ucc != '\0')))) {
return (ucc == '-')? (&(param[2])) : (&(param[1]));
}
- return NULL;
+ return nullptr;
}
uint32_t param_count(int32_t argc, char** argv, int32_t flag_idx) {
@@ -2198,14 +2205,14 @@ int32_t enforce_param_ct_range(uint32_t param_ct, char* flag_name, uint32_t min_
int32_t parse_next_range(uint32_t param_ct, char range_delim, char** argv, uint32_t* cur_param_idx_ptr, char** cur_arg_pptr, char** range_start_ptr, uint32_t* rs_len_ptr, char** range_end_ptr, uint32_t* re_len_ptr) {
// Starts reading from argv[cur_param_idx][cur_pos]. If a valid range is
// next, range_start + rs_len + range_end + re_len are updated. If only a
- // single item is next, range_end is set to NULL and range_start + rs_len are
- // updated. If there are no items left, range_start is set to NULL. If
- // the input is not well-formed, -1 is returned instead of 0.
+ // single item is next, range_end is set to nullptr and range_start + rs_len
+ // are updated. If there are no items left, range_start is set to nullptr.
+ // If the input is not well-formed, -1 is returned instead of 0.
uint32_t cur_param_idx = *cur_param_idx_ptr;
char* cur_arg_ptr = *cur_arg_pptr;
char cc;
if (cur_param_idx > param_ct) {
- *cur_arg_pptr = NULL;
+ *cur_arg_pptr = nullptr;
return 0;
}
while (1) {
@@ -2213,7 +2220,7 @@ int32_t parse_next_range(uint32_t param_ct, char range_delim, char** argv, uint3
if (!cc) {
*cur_param_idx_ptr = ++cur_param_idx;
if (cur_param_idx > param_ct) {
- *range_start_ptr = NULL;
+ *range_start_ptr = nullptr;
return 0;
}
cur_arg_ptr = argv[cur_param_idx];
@@ -2233,7 +2240,7 @@ int32_t parse_next_range(uint32_t param_ct, char range_delim, char** argv, uint3
if ((!cc) || (cc == ',')) {
*rs_len_ptr = (uintptr_t)(cur_arg_ptr - (*range_start_ptr));
*cur_arg_pptr = cur_arg_ptr;
- *range_end_ptr = NULL;
+ *range_end_ptr = nullptr;
return 0;
}
} while (cc != range_delim);
@@ -2275,7 +2282,7 @@ int32_t parse_chrom_ranges(uint32_t param_ct, char range_delim, char** argv, uin
if (!range_start) {
break;
}
- chrom_code_start = get_chrom_code2(chrom_info_ptr, range_start, rs_len);
+ chrom_code_start = get_chrom_code_counted(chrom_info_ptr, rs_len, range_start);
if (chrom_code_start < 0) {
range_start[rs_len] = '\0';
if (!allow_extra_chroms) {
@@ -2288,7 +2295,7 @@ int32_t parse_chrom_ranges(uint32_t param_ct, char range_delim, char** argv, uin
goto parse_chrom_ranges_ret_NOMEM;
}
} else if (range_end) {
- chrom_code_end = get_chrom_code2(chrom_info_ptr, range_end, re_len);
+ chrom_code_end = get_chrom_code_counted(chrom_info_ptr, re_len, range_end);
if (chrom_code_end < 0) {
if (!allow_extra_chroms) {
range_end[re_len] = '\0';
@@ -2464,7 +2471,7 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
char** argv = *argv_ptr;
FILE* rerunfile = fopen(rerun_parameter_present? argv[rerun_argv_pos + 1] : (PROG_NAME_STR ".log"), "r");
uintptr_t line_idx = 1;
- char** subst_argv2 = NULL;
+ char** subst_argv2 = nullptr;
uint32_t argc = (uint32_t)(*argc_ptr);
uint32_t cur_arg = *cur_arg_ptr;
int32_t retval = 0;
@@ -2483,7 +2490,6 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
uint32_t slen;
uint32_t slen2;
if (!rerunfile) {
- print_ver();
goto rerun_ret_OPEN_FAIL;
}
g_textbuf[MAXLINELEN - 1] = ' ';
@@ -2509,7 +2515,7 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
if ((g_textbuf[0] >= '0') && (g_textbuf[0] <= '9')) {
// Old "xx arguments: --aa bb --cc --dd" format
fclose_null(&rerunfile);
- if (scan_posint_capped(g_textbuf, (MAXLINELEN / 2) / 10, (MAXLINELEN / 2) % 10, &loaded_arg_ct)) {
+ if (scan_posint_capped(g_textbuf, (MAXLINELEN / 2), &loaded_arg_ct)) {
print_ver();
fflush(stdout);
fputs("Error: Invalid argument count on line 2 of --rerun log file.\n", stderr);
@@ -2655,7 +2661,7 @@ int32_t rerun(uint32_t rerun_argv_pos, uint32_t rerun_parameter_present, int32_t
}
*subst_argv_ptr = subst_argv2;
*argv_ptr = subst_argv2;
- subst_argv2 = NULL;
+ subst_argv2 = nullptr;
while (0) {
rerun_ret_NOMEM:
print_ver();
@@ -2887,24 +2893,7 @@ uint32_t valid_varid_template_string(char* varid_str, const char* flag_name) {
return 1;
}
-// if these are defined within init_delim_and_species, they may not persist
-// after function exit
-static const char species_singular_constants[][7] = {"person", "cow", "dog", "horse", "mouse", "plant", "sheep", "sample"};
-static const char species_plural_constants[][8] = {"people", "cattle", "dogs", "horses", "mice", "plants", "sheep", "samples"};
-
int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_map, int32_t argc, char** argv, char* range_delim_ptr, Chrom_info* chrom_info_ptr) {
- // human: 22, X, Y, XY, MT
- // cow: 29, X, Y, MT
- // dog: 38, X, Y, XY, MT
- // horse: 31, X, Y
- // mouse: 19, X, Y
- // rice: 12
- // sheep: 26, X, Y
- const int32_t species_x_code[] = {23, 30, 39, 32, 20, -1, 27};
- const int32_t species_y_code[] = {24, 31, 40, 33, 21, -1, 28};
- const int32_t species_xy_code[] = {25, -1, 41, -1, -1, -1, -1};
- const int32_t species_mt_code[] = {26, 33, 42, -1, -1, -1, -1};
- const uint32_t species_max_code[] = {26, 33, 42, 33, 21, 12, 28};
uint32_t species_code = SPECIES_HUMAN;
uint32_t flag_idx = 0;
uint32_t retval = 0;
@@ -2912,10 +2901,6 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
uint32_t param_ct;
int32_t ii;
uint32_t param_idx;
- fill_ulong_zero(chrom_info_ptr->haploid_mask, CHROM_MASK_WORDS);
- fill_ulong_zero(chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS);
- chrom_info_ptr->output_encoding = 0;
- chrom_info_ptr->zero_extra_chroms = 0;
if (flag_match("autosome-num", &flag_idx, flag_ct, flag_buf)) {
species_code = SPECIES_UNKNOWN;
cur_arg = flag_map[flag_idx - 1];
@@ -2923,14 +2908,14 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto init_delim_and_species_ret_INVALID_CMDLINE_2A;
}
- if (scan_posint_capped(argv[cur_arg + 1], MAX_CHROM_TEXTNUM / 10, MAX_CHROM_TEXTNUM % 10, (uint32_t*)(&ii))) {
+ if (scan_posint_capped(argv[cur_arg + 1], MAX_CHROM_TEXTNUM, (uint32_t*)(&ii))) {
sprintf(g_logbuf, "Error: Invalid --autosome-num parameter '%s'.\n", argv[cur_arg + 1]);
goto init_delim_and_species_ret_INVALID_CMDLINE_WWA;
}
- chrom_info_ptr->x_code = ii + 1;
- chrom_info_ptr->y_code = -1;
- chrom_info_ptr->xy_code = -1;
- chrom_info_ptr->mt_code = -1;
+ chrom_info_ptr->xymt_codes[X_OFFSET] = ii + 1;
+ chrom_info_ptr->xymt_codes[Y_OFFSET] = -1;
+ chrom_info_ptr->xymt_codes[XY_OFFSET] = -1;
+ chrom_info_ptr->xymt_codes[MT_OFFSET] = -1;
chrom_info_ptr->max_code = ii + 1;
chrom_info_ptr->autosome_ct = ii;
set_bit(ii + 1, chrom_info_ptr->haploid_mask);
@@ -2944,7 +2929,7 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 5)) {
goto init_delim_and_species_ret_INVALID_CMDLINE_2A;
}
- if (scan_int_abs_bounded(argv[cur_arg + 1], MAX_CHROM_TEXTNUM / 10, MAX_CHROM_TEXTNUM % 10, &ii) || (!ii)) {
+ if (scan_int_abs_bounded(argv[cur_arg + 1], MAX_CHROM_TEXTNUM, &ii) || (!ii)) {
sprintf(g_logbuf, "Error: Invalid --chr-set parameter '%s'.\n", argv[cur_arg + 1]);
goto init_delim_and_species_ret_INVALID_CMDLINE_WWA;
}
@@ -2955,43 +2940,43 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
}
ii = -ii;
chrom_info_ptr->autosome_ct = ii;
- chrom_info_ptr->x_code = -1;
- chrom_info_ptr->y_code = -1;
- chrom_info_ptr->xy_code = -1;
- chrom_info_ptr->mt_code = -1;
+ chrom_info_ptr->xymt_codes[X_OFFSET] = -1;
+ chrom_info_ptr->xymt_codes[Y_OFFSET] = -1;
+ chrom_info_ptr->xymt_codes[XY_OFFSET] = -1;
+ chrom_info_ptr->xymt_codes[MT_OFFSET] = -1;
chrom_info_ptr->max_code = ii;
fill_all_bits(((uint32_t)ii) + 1, chrom_info_ptr->haploid_mask);
} else {
chrom_info_ptr->autosome_ct = ii;
- chrom_info_ptr->x_code = ii + 1;
- chrom_info_ptr->y_code = ii + 2;
- chrom_info_ptr->xy_code = ii + 3;
- chrom_info_ptr->mt_code = ii + 4;
+ chrom_info_ptr->xymt_codes[X_OFFSET] = ii + 1;
+ chrom_info_ptr->xymt_codes[Y_OFFSET] = ii + 2;
+ chrom_info_ptr->xymt_codes[XY_OFFSET] = ii + 3;
+ chrom_info_ptr->xymt_codes[MT_OFFSET] = ii + 4;
set_bit(ii + 1, chrom_info_ptr->haploid_mask);
set_bit(ii + 2, chrom_info_ptr->haploid_mask);
for (param_idx = 2; param_idx <= param_ct; param_idx++) {
if (!strcmp(argv[cur_arg + param_idx], "no-x")) {
- chrom_info_ptr->x_code = -1;
+ chrom_info_ptr->xymt_codes[X_OFFSET] = -1;
clear_bit(ii + 1, chrom_info_ptr->haploid_mask);
} else if (!strcmp(argv[cur_arg + param_idx], "no-y")) {
- chrom_info_ptr->y_code = -1;
+ chrom_info_ptr->xymt_codes[Y_OFFSET] = -1;
clear_bit(ii + 2, chrom_info_ptr->haploid_mask);
} else if (!strcmp(argv[cur_arg + param_idx], "no-xy")) {
- chrom_info_ptr->xy_code = -1;
+ chrom_info_ptr->xymt_codes[XY_OFFSET] = -1;
} else if (!strcmp(argv[cur_arg + param_idx], "no-mt")) {
- chrom_info_ptr->mt_code = -1;
+ chrom_info_ptr->xymt_codes[MT_OFFSET] = -1;
} else {
sprintf(g_logbuf, "Error: Invalid --chr-set parameter '%s'.\n", argv[cur_arg + param_idx]);
goto init_delim_and_species_ret_INVALID_CMDLINE_WWA;
}
}
- if (chrom_info_ptr->mt_code != -1) {
+ if (chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) {
chrom_info_ptr->max_code = ii + 4;
- } else if (chrom_info_ptr->xy_code != -1) {
+ } else if (chrom_info_ptr->xymt_codes[XY_OFFSET] != -1) {
chrom_info_ptr->max_code = ii + 3;
- } else if (chrom_info_ptr->y_code != -1) {
+ } else if (chrom_info_ptr->xymt_codes[Y_OFFSET] != -1) {
chrom_info_ptr->max_code = ii + 2;
- } else if (chrom_info_ptr->x_code != -1) {
+ } else if (chrom_info_ptr->xymt_codes[X_OFFSET] != -1) {
chrom_info_ptr->max_code = ii + 1;
} else {
chrom_info_ptr->max_code = ii;
@@ -3068,55 +3053,7 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
goto init_delim_and_species_ret_INVALID_CMDLINE;
}
}
- chrom_info_ptr->species = species_code;
- chrom_info_ptr->is_include_stack = 0;
- if (species_code != SPECIES_UNKNOWN) {
- chrom_info_ptr->x_code = species_x_code[species_code];
- chrom_info_ptr->y_code = species_y_code[species_code];
- chrom_info_ptr->xy_code = species_xy_code[species_code];
- chrom_info_ptr->mt_code = species_mt_code[species_code];
- chrom_info_ptr->max_code = species_max_code[species_code];
- }
- g_species_singular = species_singular_constants[species_code];
- g_species_plural = species_plural_constants[species_code];
- switch (species_code) {
- case SPECIES_HUMAN:
- chrom_info_ptr->autosome_ct = 22;
- chrom_info_ptr->haploid_mask[0] = 0x1800000;
- break;
- case SPECIES_COW:
- chrom_info_ptr->autosome_ct = 29;
- chrom_info_ptr->haploid_mask[0] = 0xc0000000LU;
- break;
- case SPECIES_DOG:
- chrom_info_ptr->autosome_ct = 38;
-#ifdef __LP64__
- chrom_info_ptr->haploid_mask[0] = 0x18000000000LLU;
-#else
- chrom_info_ptr->haploid_mask[1] = 0x180;
-#endif
- break;
- case SPECIES_HORSE:
- chrom_info_ptr->autosome_ct = 31;
-#ifdef __LP64__
- chrom_info_ptr->haploid_mask[0] = 0x300000000LLU;
-#else
- chrom_info_ptr->haploid_mask[1] = 3;
-#endif
- break;
- case SPECIES_MOUSE:
- chrom_info_ptr->autosome_ct = 19;
- chrom_info_ptr->haploid_mask[0] = 0x300000;
- break;
- case SPECIES_RICE:
- chrom_info_ptr->autosome_ct = 12;
- chrom_info_ptr->haploid_mask[0] = 0x1fff;
- break;
- case SPECIES_SHEEP:
- chrom_info_ptr->autosome_ct = 26;
- chrom_info_ptr->haploid_mask[0] = 0x18000000;
- break;
- }
+ init_species(species_code, chrom_info_ptr);
while (0) {
init_delim_and_species_ret_INVALID_CMDLINE_WWA:
wordwrapb(0);
@@ -3133,27 +3070,6 @@ int32_t init_delim_and_species(uint32_t flag_ct, char* flag_buf, uint32_t* flag_
return retval;
}
-void fill_chrom_mask(Chrom_info* chrom_info_ptr) {
- if (chrom_info_ptr->species != SPECIES_UNKNOWN) {
- fill_all_bits(chrom_info_ptr->max_code + 1, chrom_info_ptr->chrom_mask);
- } else {
- fill_all_bits(chrom_info_ptr->autosome_ct + 1, chrom_info_ptr->chrom_mask);
- // --chr-set support
- if (chrom_info_ptr->x_code != -1) {
- set_bit(chrom_info_ptr->x_code, chrom_info_ptr->chrom_mask);
- }
- if (chrom_info_ptr->y_code != -1) {
- set_bit(chrom_info_ptr->y_code, chrom_info_ptr->chrom_mask);
- }
- if (chrom_info_ptr->xy_code != -1) {
- set_bit(chrom_info_ptr->xy_code, chrom_info_ptr->chrom_mask);
- }
- if (chrom_info_ptr->mt_code != -1) {
- set_bit(chrom_info_ptr->mt_code, chrom_info_ptr->chrom_mask);
- }
- }
-}
-
int32_t recode_type_set(uint32_t* recode_modifier_ptr, uint32_t cur_code) {
if (*recode_modifier_ptr & (RECODE_TYPEMASK - cur_code)) {
logerrprint("Error: Conflicting --recode modifiers.\n");
@@ -3164,71 +3080,71 @@ int32_t recode_type_set(uint32_t* recode_modifier_ptr, uint32_t cur_code) {
}
int32_t main(int32_t argc, char** argv) {
- char* outname_end = NULL;
- char** subst_argv = NULL;
- char* script_buf = NULL;
- char* rerun_buf = NULL;
- char* flag_buf = NULL;
- uint32_t* flag_map = NULL;
- char* makepheno_str = NULL;
- char* phenoname_str = NULL;
- Two_col_params* a1alleles = NULL;
- Two_col_params* a2alleles = NULL;
- char* sample_sort_fname = NULL;
- char* filtervals_flattened = NULL;
- char* evecname = NULL;
- char* filtername = NULL;
- char* distance_wts_fname = NULL;
- char* read_dists_fname = NULL;
- char* read_dists_id_fname = NULL;
- char* freqname = NULL;
- char* extractname = NULL;
- char* excludename = NULL;
- char* keepname = NULL;
- char* removename = NULL;
- char* keepfamname = NULL;
- char* removefamname = NULL;
- char* cm_map_fname = NULL;
- char* cm_map_chrname = NULL;
- char* phenoname = NULL;
- char* recode_allele_name = NULL;
- char* lgen_reference_fname = NULL;
- char* covar_fname = NULL;
- char* update_alleles_fname = NULL;
- Two_col_params* qual_filter = NULL;
- Two_col_params* update_chr = NULL;
- Two_col_params* update_cm = NULL;
- Two_col_params* update_map = NULL;
- Two_col_params* update_name = NULL;
- char* oxford_single_chr = NULL;
- char* oxford_pheno_name = NULL;
- char* update_ids_fname = NULL;
- char* update_parents_fname = NULL;
- char* update_sex_fname = NULL;
- char* loop_assoc_fname = NULL;
- char* flip_fname = NULL;
- char* flip_subset_fname = NULL;
- char* read_genome_fname = NULL;
- char* condition_mname = NULL;
- char* condition_fname = NULL;
- char* missing_marker_id_match = NULL;
- char* filter_attrib_fname = NULL;
- char* filter_attrib_liststr = NULL;
- char* filter_attrib_sample_fname = NULL;
- char* filter_attrib_sample_liststr = NULL;
- char* const_fid = NULL;
- char* vcf_filter_exceptions_flattened = NULL;
- char* gene_report_fname = NULL;
- char* gene_report_glist = NULL;
- char* gene_report_subset = NULL;
- char* gene_report_snp_field = NULL;
- char* metaanal_fnames = NULL;
- char* metaanal_snpfield_search_order = NULL;
- char* metaanal_a1field_search_order = NULL;
- char* metaanal_a2field_search_order = NULL;
- char* metaanal_pfield_search_order = NULL;
- char* metaanal_essfield_search_order = NULL;
- char* rplugin_fname = NULL;
+ char* outname_end = nullptr;
+ char** subst_argv = nullptr;
+ char* script_buf = nullptr;
+ char* rerun_buf = nullptr;
+ char* flag_buf = nullptr;
+ uint32_t* flag_map = nullptr;
+ char* makepheno_str = nullptr;
+ char* phenoname_str = nullptr;
+ Two_col_params* a1alleles = nullptr;
+ Two_col_params* a2alleles = nullptr;
+ char* sample_sort_fname = nullptr;
+ char* filtervals_flattened = nullptr;
+ char* evecname = nullptr;
+ char* filtername = nullptr;
+ char* distance_wts_fname = nullptr;
+ char* read_dists_fname = nullptr;
+ char* read_dists_id_fname = nullptr;
+ char* freqname = nullptr;
+ char* extractname = nullptr;
+ char* excludename = nullptr;
+ char* keepname = nullptr;
+ char* removename = nullptr;
+ char* keepfamname = nullptr;
+ char* removefamname = nullptr;
+ char* cm_map_fname = nullptr;
+ char* cm_map_chrname = nullptr;
+ char* phenoname = nullptr;
+ char* recode_allele_name = nullptr;
+ char* lgen_reference_fname = nullptr;
+ char* covar_fname = nullptr;
+ char* update_alleles_fname = nullptr;
+ Two_col_params* qual_filter = nullptr;
+ Two_col_params* update_chr = nullptr;
+ Two_col_params* update_cm = nullptr;
+ Two_col_params* update_map = nullptr;
+ Two_col_params* update_name = nullptr;
+ char* oxford_single_chr = nullptr;
+ char* oxford_pheno_name = nullptr;
+ char* update_ids_fname = nullptr;
+ char* update_parents_fname = nullptr;
+ char* update_sex_fname = nullptr;
+ char* loop_assoc_fname = nullptr;
+ char* flip_fname = nullptr;
+ char* flip_subset_fname = nullptr;
+ char* read_genome_fname = nullptr;
+ char* condition_mname = nullptr;
+ char* condition_fname = nullptr;
+ char* missing_marker_id_match = nullptr;
+ char* filter_attrib_fname = nullptr;
+ char* filter_attrib_liststr = nullptr;
+ char* filter_attrib_sample_fname = nullptr;
+ char* filter_attrib_sample_liststr = nullptr;
+ char* const_fid = nullptr;
+ char* vcf_filter_exceptions_flattened = nullptr;
+ char* gene_report_fname = nullptr;
+ char* gene_report_glist = nullptr;
+ char* gene_report_subset = nullptr;
+ char* gene_report_snp_field = nullptr;
+ char* metaanal_fnames = nullptr;
+ char* metaanal_snpfield_search_order = nullptr;
+ char* metaanal_a1field_search_order = nullptr;
+ char* metaanal_a2field_search_order = nullptr;
+ char* metaanal_pfield_search_order = nullptr;
+ char* metaanal_essfield_search_order = nullptr;
+ char* rplugin_fname = nullptr;
uint32_t gene_report_border = 0;
uint32_t metaanal_flags = 0;
uint32_t rplugin_port = 6311;
@@ -3293,12 +3209,12 @@ int32_t main(int32_t argc, char** argv) {
double glm_vif_thresh = 50.0;
uint32_t glm_xchr_model = 1;
uint32_t ppc_gap = DEFAULT_PPC_GAP;
- uint32_t* rseeds = NULL;
+ uint32_t* rseeds = nullptr;
uint32_t rseed_ct = 0;
uint32_t genome_modifier = 0;
double genome_min_pi_hat = -1.0;
double genome_max_pi_hat = 1.0;
- FILE* scriptfile = NULL;
+ FILE* scriptfile = nullptr;
uint32_t recode_modifier = 0;
uint32_t allelexxxx = 0;
uint32_t merge_type = 0;
@@ -3310,17 +3226,17 @@ int32_t main(int32_t argc, char** argv) {
uint32_t dummy_flags = 0;
double dummy_missing_geno = 0.0;
double dummy_missing_pheno = 0.0;
- char* simulate_fname = NULL;
+ char* simulate_fname = nullptr;
uint32_t simulate_flags = 0;
uint32_t simulate_cases = 1000;
uint32_t simulate_controls = 1000;
double simulate_prevalence = 0.01;
- char* simulate_label = NULL;
+ char* simulate_label = nullptr;
double simulate_missing = 0.0;
uint32_t simulate_qt_samples = 1000;
- char* markername_from = NULL;
- char* markername_to = NULL;
- char* markername_snp = NULL;
+ char* markername_from = nullptr;
+ char* markername_to = nullptr;
+ char* markername_snp = nullptr;
// minor bugfix: --snp and '--window 0 --snp' should actually behave
// differently when there are other variants at the same bp coordinate
int32_t snp_window_size = -1;
@@ -3355,8 +3271,8 @@ int32_t main(int32_t argc, char** argv) {
uint32_t cnv_min_sites = 0;
uint32_t cnv_max_sites = 0xffffffffU;
uint32_t cnv_intersect_filter_type = 0;
- char* cnv_intersect_filter_fname = NULL;
- char* cnv_subset_fname = NULL;
+ char* cnv_intersect_filter_fname = nullptr;
+ char* cnv_subset_fname = nullptr;
uint32_t cnv_overlap_type = 0;
double cnv_overlap_val = 0.0;
uint32_t cnv_freq_type = 0;
@@ -3371,6 +3287,7 @@ int32_t main(int32_t argc, char** argv) {
double tail_top = 0.0;
double lasso_h2 = 0.0;
double lasso_minlambda = -1;
+ uint32_t lasso_lambda_iters = 0;
uint32_t testmiss_modifier = 0;
uint32_t testmiss_mperm_val = 0;
@@ -3378,16 +3295,16 @@ int32_t main(int32_t argc, char** argv) {
uint32_t new_id_max_allele_len = 23;
uint32_t aperm_present = 0;
- char* segment_spanning_fname = NULL;
- char* missing_code = NULL;
+ char* segment_spanning_fname = nullptr;
+ char* missing_code = nullptr;
char range_delim = '-';
uint32_t modifier_23 = 0;
double pheno_23 = DBL_MAX;
- char* fid_23 = NULL;
- char* iid_23 = NULL;
- char* paternal_id_23 = NULL;
- char* maternal_id_23 = NULL;
- Ll_str* file_delete_list = NULL;
+ char* fid_23 = nullptr;
+ char* iid_23 = nullptr;
+ char* paternal_id_23 = nullptr;
+ char* maternal_id_23 = nullptr;
+ Ll_str* file_delete_list = nullptr;
uint32_t chrom_flag_present = 0;
uintptr_t chrom_exclude[CHROM_MASK_INITIAL_WORDS];
// er, except for first four, these should not be preallocated...
@@ -3404,8 +3321,8 @@ int32_t main(int32_t argc, char** argv) {
int32_t mib[2];
size_t sztmp;
#endif
- unsigned char* bigstack_ua = NULL; // ua = unaligned
- char* bubble = NULL;
+ unsigned char* bigstack_ua = nullptr; // ua = unaligned
+ char* bubble = nullptr;
unsigned char* bigstack_initial_base;
uint32_t param_ct;
time_t rawtime;
@@ -3444,7 +3361,6 @@ int32_t main(int32_t argc, char** argv) {
uint32_t uii;
uint32_t ujj;
uint32_t ukk;
- uint32_t umm;
intptr_t default_alloc_mb;
int64_t llxx;
Ll_str* ll_str_ptr;
@@ -3468,13 +3384,15 @@ int32_t main(int32_t argc, char** argv) {
range_list_init(&lasso_select_covars_range_list);
range_list_init(¶meters_range_list);
range_list_init(&tests_range_list);
- missing_mid_template = NULL;
+ missing_mid_template = nullptr;
// standardize strtod() behavior
setlocale(LC_NUMERIC, "C");
- chrom_info.name_ct = 0;
- chrom_info.incl_excl_name_stack = NULL;
+ if (init_chrom_info(&chrom_info)) {
+ goto main_ret_NOMEM_NOLOG;
+ }
+
for (uii = 1; uii < (uint32_t)argc; uii++) {
if ((!strcmp("-script", argv[uii])) || (!strcmp("--script", argv[uii]))) {
ujj = param_count(argc, argv, uii);
@@ -3497,36 +3415,32 @@ int32_t main(int32_t argc, char** argv) {
scriptfile = fopen(argv[uii + 1], FOPEN_RB);
if (!scriptfile) {
print_ver();
- printf(g_errstr_fopen, argv[uii + 1]);
+ fprintf(stderr, g_errstr_fopen, argv[uii + 1]);
goto main_ret_OPEN_FAIL;
}
if (fseeko(scriptfile, 0, SEEK_END)) {
- print_ver();
- goto main_ret_READ_FAIL;
+ goto main_ret_READ_FAIL_NOLOG;
}
llxx = ftello(scriptfile);
if (llxx == -1) {
- print_ver();
- goto main_ret_READ_FAIL;
+ goto main_ret_READ_FAIL_NOLOG;
} else if (llxx > 0x7fffffff) {
// could actually happen if user enters parameters in the wrong order,
// so may as well catch it and print a somewhat informative error msg
print_ver();
fflush(stdout);
fputs("Error: --script file too large.\n", stderr);
- goto main_ret_NOMEM;
+ goto main_ret_INVALID_CMDLINE;
}
rewind(scriptfile);
ujj = (uint32_t)((uint64_t)llxx);
script_buf = (char*)malloc(ujj);
if (!script_buf) {
- print_ver();
- goto main_ret_NOMEM;
+ goto main_ret_NOMEM_NOLOG;
}
ukk = fread(script_buf, 1, ujj, scriptfile);
if (ukk < ujj) {
- print_ver();
- goto main_ret_READ_FAIL;
+ goto main_ret_READ_FAIL_NOLOG;
}
fclose_null(&scriptfile);
num_params = 0;
@@ -3562,6 +3476,7 @@ int32_t main(int32_t argc, char** argv) {
argc = num_params;
cur_arg = 0;
argv = subst_argv;
+ break;
}
}
for (uii = cur_arg; uii < (uint32_t)argc; uii++) {
@@ -3583,8 +3498,9 @@ int32_t main(int32_t argc, char** argv) {
}
retval = rerun(uii, ujj, &argc, &cur_arg, &argv, &subst_argv, &rerun_buf);
if (retval) {
- goto main_ret_1;
+ goto main_ret_NOLOG;
}
+ break;
}
}
if ((cur_arg < (uint32_t)argc) && (!is_flag(argv[cur_arg]))) {
@@ -3628,9 +3544,11 @@ int32_t main(int32_t argc, char** argv) {
}
if (strlen(argptr) >= MAX_FLAG_LEN) {
print_ver();
- invalid_arg(argv[uii]);
- fputs(g_logbuf, stdout);
- fputs(errstr_append, stdout);
+ // shouldn't be possible for this to overflow the buffer...
+ sprintf(g_logbuf, "Error: Unrecognized flag ('%s').\n", argv[uii]);
+ wordwrapb(0);
+ fputs(g_logbuf, stderr);
+ fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
}
flag_ct++;
@@ -3645,20 +3563,21 @@ int32_t main(int32_t argc, char** argv) {
}
if (ujj) {
fputs(ver_str, stdout);
- putchar('\n');
+ putc_unlocked('\n', stdout);
goto main_ret_1;
}
if (ukk) {
- freopen("/dev/null", "w", stdout);
+ if (!freopen("/dev/null", "w", stdout)) {
+ fputs("Warning: --silent failed.\n", stderr);
+ }
}
print_ver();
flag_buf = (char*)malloc(flag_ct * MAX_FLAG_LEN * sizeof(char));
flag_map = (uint32_t*)malloc(flag_ct * sizeof(int32_t));
if ((!flag_buf) || (!flag_map)) {
- goto main_ret_NOMEM;
+ goto main_ret_NOMEM_NOLOG2;
}
flagptr = flag_buf;
- umm = 0; // parameter count increase due to aliases
for (uii = cur_arg; uii < (uint32_t)argc; uii++) {
argptr = is_flag_start(argv[uii]);
if (argptr) {
@@ -3877,7 +3796,6 @@ int32_t main(int32_t argc, char** argv) {
misc_flags |= MISC_SET_HH_MISSING;
fputs("Note: --recode-fastphase flag deprecated. Use e.g. '--recode 01 fastphase-1chr'.\n", stdout);
ujj = 2;
- umm++;
} else if (!memcmp(argptr2, "-structure", 10)) {
memcpy(flagptr, "recode structure", 17);
recode_modifier |= RECODE_STRUCTURE;
@@ -3890,7 +3808,6 @@ int32_t main(int32_t argc, char** argv) {
if (ujj == 1) {
printf("Note: --%s flag deprecated. Use '%s ...'.\n", argptr, flagptr);
}
- umm++;
break;
}
} else if (!strcmp(argptr, "reference-allele")) {
@@ -3925,7 +3842,7 @@ int32_t main(int32_t argc, char** argv) {
// requires MAX_FLAG_LEN to be at least sizeof(void*) + sizeof(int32_t)
sptr = (char*)malloc(flag_ct * MAX_FLAG_LEN);
if (!sptr) {
- goto main_ret_NOMEM;
+ goto main_ret_NOMEM_NOLOG2;
}
qsort_ext2(flag_buf, flag_ct, MAX_FLAG_LEN, strcmp_deref, (char*)flag_map, sizeof(int32_t), sptr, MAX_FLAG_LEN);
free(sptr);
@@ -3949,8 +3866,8 @@ int32_t main(int32_t argc, char** argv) {
ujj = flag_map[cur_flag];
ukk = param_count(argc, argv, ujj);
if (enforce_param_ct_range(ukk, argv[ujj], 1, 1)) {
- fputs(g_logbuf, stdout);
- fputs(errstr_append, stdout);
+ fputs(g_logbuf, stderr);
+ fputs(errstr_append, stderr);
goto main_ret_INVALID_CMDLINE;
}
if (strlen(argv[ujj + 1]) > (FNAMESIZE - MAX_POST_EXT)) {
@@ -3982,19 +3899,6 @@ int32_t main(int32_t argc, char** argv) {
outname[uii] = '\0';
logstr(ver_str);
- /*
- sprintf(g_logbuf, "\n%d argument%s:", argc + umm - cur_arg, (argc + umm - cur_arg == 1)? "" : "s");
- logstr(g_logbuf);
- for (cur_flag = 0; cur_flag < flag_ct; cur_flag++) {
- logstr(" --");
- logstr(&(flag_buf[cur_flag * MAX_FLAG_LEN]));
- ii = flag_map[cur_flag] + 1;
- while ((ii < argc) && (!is_flag(argv[ii]))) {
- logstr(" ");
- logstr(argv[ii++]);
- }
- }
- */
logstr("\n");
logprint("Options in effect:\n");
for (cur_flag = 0; cur_flag < flag_ct; cur_flag++) {
@@ -4020,7 +3924,9 @@ int32_t main(int32_t argc, char** argv) {
logstr(g_textbuf);
}
logstr("\nWorking directory: ");
- getcwd(g_textbuf, FNAMESIZE);
+ if (!getcwd(g_textbuf, FNAMESIZE)) {
+ goto main_ret_READ_FAIL;
+ }
logstr(g_textbuf);
logstr("\nStart time: ");
time(&rawtime);
@@ -4032,14 +3938,8 @@ int32_t main(int32_t argc, char** argv) {
g_thread_ct = sysinfo.dwNumberOfProcessors;
known_procs = g_thread_ct;
#else
- ii = sysconf(_SC_NPROCESSORS_ONLN);
- if (ii == -1) {
- g_thread_ct = 1;
- known_procs = -1;
- } else {
- g_thread_ct = ii;
- known_procs = ii;
- }
+ known_procs = sysconf(_SC_NPROCESSORS_ONLN);
+ g_thread_ct = (known_procs == -1)? 1 : known_procs;
#endif
if (g_thread_ct > 8) {
if (g_thread_ct > MAX_THREADS) {
@@ -4057,7 +3957,7 @@ int32_t main(int32_t argc, char** argv) {
if (retval) {
goto main_ret_1;
}
- fill_ulong_zero(chrom_exclude, CHROM_MASK_INITIAL_WORDS);
+ fill_ulong_zero(CHROM_MASK_INITIAL_WORDS, chrom_exclude);
cur_flag = 0;
do {
argptr = &(flag_buf[cur_flag * MAX_FLAG_LEN]);
@@ -4220,7 +4120,7 @@ int32_t main(int32_t argc, char** argv) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
- if (scan_posint_capped(argv[cur_arg + 1], 65535 / 10, 65535 % 10, &rplugin_port)) {
+ if (scan_posint_capped(argv[cur_arg + 1], 65535, &rplugin_port)) {
sprintf(g_logbuf, "Error: Invalid --R-port parameter '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -4257,12 +4157,12 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --autosome-xy cannot be used with --autosome.\n");
goto main_ret_INVALID_CMDLINE;
}
- if (chrom_info.xy_code == -1) {
+ if (chrom_info.xymt_codes[XY_OFFSET] == -1) {
logerrprint("Error: --autosome-xy used with a species lacking an XY region.\n");
goto main_ret_INVALID_CMDLINE_A;
}
fill_bits(1, chrom_info.autosome_ct, chrom_info.chrom_mask);
- set_bit(chrom_info.xy_code, chrom_info.chrom_mask);
+ set_bit(chrom_info.xymt_codes[XY_OFFSET], chrom_info.chrom_mask);
chrom_info.is_include_stack = 1;
goto main_param_zero;
} else if (!memcmp(argptr2, "llow-extra-chr", 15)) {
@@ -4285,15 +4185,12 @@ int32_t main(int32_t argc, char** argv) {
sex_missing_pheno |= ALLOW_NO_SEX;
goto main_param_zero;
} else if (!memcmp(argptr2, "llow-no-samples", 16)) {
- UNSTABLE("allow-no-samples");
misc_flags |= MISC_ALLOW_NO_SAMPLES;
goto main_param_zero;
} else if (!memcmp(argptr2, "llow-no-vars", 13)) {
- UNSTABLE("allow-no-vars");
misc_flags |= MISC_ALLOW_NO_VARS;
goto main_param_zero;
} else if (!memcmp(argptr2, "llow-no-covars", 15)) {
- UNSTABLE("allow-no-covars");
covar_modifier |= COVAR_ALLOW_NONE;
goto main_param_zero;
} else if (!memcmp(argptr2, "ll", 3)) {
@@ -4438,7 +4335,7 @@ int32_t main(int32_t argc, char** argv) {
}
aperm.min++;
if (param_ct > 1) {
- if (scan_posint_capped(argv[cur_arg + 2], APERM_MAX / 10, APERM_MAX % 10, &aperm.max)) {
+ if (scan_posint_capped(argv[cur_arg + 2], APERM_MAX, &aperm.max)) {
sprintf(g_logbuf, "Error: Invalid --aperm max permutation count '%s'.\n", argv[cur_arg + 2]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -6525,7 +6422,7 @@ int32_t main(int32_t argc, char** argv) {
if (retval) {
goto main_ret_1;
}
- if (scan_posint_capped(argv[cur_arg + 2], PARALLEL_MAX / 10, PARALLEL_MAX % 10, &epi_info.summary_merge_ct) || (epi_info.summary_merge_ct == 1)) {
+ if (scan_posint_capped(argv[cur_arg + 2], PARALLEL_MAX, &epi_info.summary_merge_ct) || (epi_info.summary_merge_ct == 1)) {
sprintf(g_logbuf, "Error: Invalid --epistasis-summary-merge job count '%s'.\n", argv[cur_arg + 2]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -7824,7 +7721,7 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_INVALID_CMDLINE_2A;
}
// may as well enforce 2^29 / 18 limit...
- if (scan_uint_capped(argv[cur_arg + 1], 29826161 / 10, 29826161 % 10, &epi_info.je_cellmin)) {
+ if (scan_uint_capped(argv[cur_arg + 1], 29826161, &epi_info.je_cellmin)) {
sprintf(g_logbuf, "Error: Invalid --je-cellmin parameter '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -8161,6 +8058,35 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_1;
}
}
+ } else if (!memcmp(argptr2, "asso-lambda", 12)) {
+ UNSTABLE("lasso-lambda");
+ if (lasso_minlambda != -1) {
+ logerrprint("Error: --lasso-lambda cannot be used with a --lasso minimum lambda setting.\n");
+ goto main_ret_INVALID_CMDLINE;
+ }
+ if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 2)) {
+ goto main_ret_INVALID_CMDLINE_2A;
+ }
+ if (scan_posint_defcap(argv[cur_arg + 1], &lasso_lambda_iters)) {
+ sprintf(g_logbuf, "Error: Invalid --lasso-lambda parameter '%s'.\n", argv[cur_arg + 1]);
+ goto main_ret_INVALID_CMDLINE_WWA;
+ }
+ if (param_ct == 2) {
+ if (scan_double(argv[cur_arg + 2], &dxx) || (dxx > 1) || (dxx <= 0)) {
+ sprintf(g_logbuf, "Error: Invalid --lasso-lambda heritability estimate '%s'.\n", argv[cur_arg + 2]);
+ goto main_ret_INVALID_CMDLINE_WWA;
+ }
+ if ((calculation_type & CALC_LASSO) && (dxx != lasso_h2)) {
+ logerrprint("Error: --lasso and --lasso-lambda heritability estimates are different.\n");
+ goto main_ret_INVALID_CMDLINE;
+ } else {
+ lasso_h2 = dxx;
+ }
+ } else if (!(calculation_type & CALC_LASSO)) {
+ logerrprint("Error: --lasso-lambda requires a heritability estimate.\n");
+ goto main_ret_INVALID_CMDLINE;
+ }
+ calculation_type |= CALC_LASSO_LAMBDA;
} else if (!memcmp(argptr2, "d-window", 9)) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
@@ -9241,7 +9167,7 @@ int32_t main(int32_t argc, char** argv) {
set_info.modifier |= SET_COMPLEMENTS | SET_C_PREFIX | SET_MAKE_COLLAPSE_GROUP;
goto main_param_zero;
} else if (!memcmp(argptr2, "erge-x", 7)) {
- if ((chrom_info.x_code == -1) || (chrom_info.xy_code == -1)) {
+ if ((chrom_info.xymt_codes[X_OFFSET] == -1) || (chrom_info.xymt_codes[XY_OFFSET] == -1)) {
logerrprint("Error: --merge-x must be used with a chromosome set containing X and XY codes.\n");
goto main_ret_INVALID_CMDLINE_A;
}
@@ -9554,12 +9480,12 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_1;
}
if (chrom_info.is_include_stack || (!chrom_flag_present)) {
- fill_chrom_mask(&chrom_info);
+ init_default_chrom_mask(&chrom_info);
}
for (uii = 0; uii < CHROM_MASK_INITIAL_WORDS; uii++) {
chrom_info.chrom_mask[uii] &= ~chrom_exclude[uii];
}
- if (all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS) && ((!((misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1)) || (chrom_info.is_include_stack && (!chrom_info.incl_excl_name_stack)))) {
+ if (are_all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS) && ((!((misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1)) || (chrom_info.is_include_stack && (!chrom_info.incl_excl_name_stack)))) {
logerrprint("Error: All chromosomes excluded.\n");
goto main_ret_INVALID_CMDLINE_A;
}
@@ -9576,7 +9502,7 @@ int32_t main(int32_t argc, char** argv) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
- if (scan_posint_capped(argv[cur_arg + 1], (MAX_ID_LEN - 2) / 10, (MAX_ID_LEN - 2) % 10, &new_id_max_allele_len)) {
+ if (scan_posint_capped(argv[cur_arg + 1], MAX_ID_SLEN - 2, &new_id_max_allele_len)) {
sprintf(g_logbuf, "Error: Invalid --new-id-max-allele-len parameter '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -9838,11 +9764,11 @@ int32_t main(int32_t argc, char** argv) {
if (enforce_param_ct_range(param_ct, argv[cur_arg], 2, 2)) {
goto main_ret_INVALID_CMDLINE_2A;
}
- if (scan_posint_capped(argv[cur_arg + 1], PARALLEL_MAX / 10, PARALLEL_MAX % 10, ¶llel_idx)) {
+ if (scan_posint_capped(argv[cur_arg + 1], PARALLEL_MAX, ¶llel_idx)) {
sprintf(g_logbuf, "Error: Invalid --parallel job index '%s'.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
}
- if (scan_posint_capped(argv[cur_arg + 2], PARALLEL_MAX / 10, PARALLEL_MAX % 10, ¶llel_tot) || (parallel_tot == 1) || (parallel_tot < parallel_idx)) {
+ if (scan_posint_capped(argv[cur_arg + 2], PARALLEL_MAX, ¶llel_tot) || (parallel_tot == 1) || (parallel_tot < parallel_idx)) {
sprintf(g_logbuf, "Error: Invalid --parallel total job count '%s'.\n", argv[cur_arg + 2]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -10159,7 +10085,7 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --q-score-range takes at most two numeric parameters.\n");
goto main_ret_INVALID_CMDLINE_A;
} else {
- if (scan_posint_capped(argv[cur_arg + uii], (MAXLINEBUFLEN / 2) / 10, (MAXLINEBUFLEN / 2) % 10, (uint32_t*)&ii)) {
+ if (scan_posint_capped(argv[cur_arg + uii], MAXLINEBUFLEN / 2, (uint32_t*)&ii)) {
sprintf(g_logbuf, "Error: Invalid --q-score-range parameter '%s'.\n", argv[cur_arg + uii]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -10575,7 +10501,7 @@ int32_t main(int32_t argc, char** argv) {
if (recode_type_set(&recode_modifier, RECODE_COMPOUND)) {
goto main_ret_INVALID_CMDLINE_A;
}
- } else if (match_upper_nt(argv[cur_arg + uii], "HV", 2)) {
+ } else if (match_upper_counted(argv[cur_arg + uii], "HV", 2)) {
if (!argv[cur_arg + uii][2]) {
if (recode_type_set(&recode_modifier, RECODE_HV)) {
goto main_ret_INVALID_CMDLINE_A;
@@ -10971,7 +10897,7 @@ int32_t main(int32_t argc, char** argv) {
rseed_ct = param_ct;
rseeds = (uint32_t*)malloc(param_ct * sizeof(int32_t));
for (uii = 1; uii <= param_ct; uii++) {
- if (scan_uint_capped(argv[cur_arg + uii], 0xffffffffU / 10, 0xffffffffU % 10, &(rseeds[uii - 1]))) {
+ if (scan_uint_capped(argv[cur_arg + uii], 0xffffffffU, &(rseeds[uii - 1]))) {
sprintf(g_logbuf, "Error: Invalid --seed parameter '%s'.\n", argv[cur_arg + uii]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -10999,7 +10925,7 @@ int32_t main(int32_t argc, char** argv) {
} else if (marker_pos_start != -1) {
logerrprint("Error: --snp cannot be used with --from-bp/-kb/-mb.\n");
goto main_ret_INVALID_CMDLINE_A;
- } else if ((!all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS)) || chrom_info.incl_excl_name_stack) {
+ } else if ((!are_all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS)) || chrom_info.incl_excl_name_stack) {
logerrprint("Error: --snp cannot be used with --autosome{-xy} or --{not-}chr.\n");
goto main_ret_INVALID_CMDLINE_A;
} else if (markername_snp) {
@@ -11392,7 +11318,7 @@ int32_t main(int32_t argc, char** argv) {
if (misc_flags & MISC_MERGEX) {
logerrprint("Error: --split-x cannot be used with --merge-x.\n");
goto main_ret_INVALID_CMDLINE_A;
- } else if ((chrom_info.x_code == -1) || (chrom_info.xy_code == -1)) {
+ } else if ((chrom_info.xymt_codes[X_OFFSET] == -1) || (chrom_info.xymt_codes[XY_OFFSET] == -1)) {
logerrprint("Error: --split-x must be used with a chromosome set containing X and XY codes.\n");
goto main_ret_INVALID_CMDLINE_A;
}
@@ -11422,7 +11348,7 @@ int32_t main(int32_t argc, char** argv) {
splitx_bound2 = 154931044;
} else if ((!strcmp(argv[cur_arg + uii], "b38")) || (!strcmp(argv[cur_arg + uii], "hg38"))) {
splitx_bound1 = 2781479;
- splitx_bound1 = 155701383;
+ splitx_bound2 = 155701383;
} else {
sprintf(g_logbuf, "Error: Unrecognized --split-x build code '%s'.\n", argv[cur_arg + uii]);
goto main_ret_INVALID_CMDLINE_WWA;
@@ -11527,7 +11453,7 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --score takes at most three numeric parameters.\n");
goto main_ret_INVALID_CMDLINE_A;
} else {
- if (scan_posint_capped(argv[cur_arg + uii], (MAXLINEBUFLEN / 2) / 10, (MAXLINEBUFLEN / 2) % 10, (uint32_t*)&ii)) {
+ if (scan_posint_capped(argv[cur_arg + uii], MAXLINEBUFLEN / 2, (uint32_t*)&ii)) {
sprintf(g_logbuf, "Error: Invalid --score parameter '%s'.\n", argv[cur_arg + uii]);
goto main_ret_INVALID_CMDLINE_WWA;
}
@@ -11719,7 +11645,7 @@ int32_t main(int32_t argc, char** argv) {
memcpy(pedname, argv[cur_arg + 1], jj + 1);
load_rare |= LOAD_RARE_TPED;
} else if (!memcmp(argptr2, "o", 2)) {
- if ((!all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS)) || chrom_info.incl_excl_name_stack) {
+ if ((!are_all_words_zero(chrom_info.chrom_mask, CHROM_MASK_INITIAL_WORDS)) || chrom_info.incl_excl_name_stack) {
logerrprint("Error: --to cannot be used with --autosome{-xy} or --{not-}chr.\n");
goto main_ret_INVALID_CMDLINE_A;
} else if (markername_snp) {
@@ -12279,7 +12205,7 @@ int32_t main(int32_t argc, char** argv) {
}
logprint("Note: --update-map [filename] + parameter-free --update-name deprecated. Use\n--update-name [filename] instead.\n");
update_name = update_map;
- update_map = NULL;
+ update_map = nullptr;
} else {
if (update_map) {
// no point in explaining the deprecated exception to this in the
@@ -12423,6 +12349,11 @@ int32_t main(int32_t argc, char** argv) {
logerrprint("Error: --vcf-min-gq must be used with --vcf.\n");
goto main_ret_INVALID_CMDLINE;
}
+ if (vcf_half_call) {
+ logerrprint("Error: --vcf-min-gq cannot currently be used with --vcf-half-call.\n");
+ retval = RET_CALC_NOT_YET_SUPPORTED;
+ goto main_ret_1;
+ }
if (enforce_param_ct_range(param_ct, argv[cur_arg], 1, 1)) {
goto main_ret_INVALID_CMDLINE_2A;
}
@@ -12465,6 +12396,8 @@ int32_t main(int32_t argc, char** argv) {
vcf_half_call = VCF_HALF_CALL_MISSING;
} else if ((!strcmp(argv[cur_arg + 1], "e")) || (!strcmp(argv[cur_arg + 1], "error"))) {
vcf_half_call = VCF_HALF_CALL_ERROR;
+ } else if ((!strcmp(argv[cur_arg + 1], "r")) || (!strcmp(argv[cur_arg + 1], "reference"))) {
+ vcf_half_call = VCF_HALF_CALL_REFERENCE;
} else {
sprintf(g_logbuf, "Error: '%s' is not a valid mode for --vcf-half-call.\n", argv[cur_arg + 1]);
goto main_ret_INVALID_CMDLINE_WWA;
@@ -13122,7 +13055,7 @@ int32_t main(int32_t argc, char** argv) {
marker_pos_end = 0x7ffffffe;
}
if (!chrom_flag_present) {
- fill_chrom_mask(&chrom_info);
+ init_default_chrom_mask(&chrom_info);
}
if (((marker_pos_start != -1) && (!markername_to)) || ((marker_pos_end != -1) && (!markername_from))) {
// require exactly one chromosome to be defined given --from-bp/--to-bp
@@ -13251,7 +13184,7 @@ int32_t main(int32_t argc, char** argv) {
goto main_ret_INVALID_CMDLINE_A;
}
if (calculation_type && (!(calculation_type & (~(CALC_FREQ | CALC_MISSING_REPORT)))) && ((geno_thresh != 1.0) || (hwe_thresh != 0.0) || (min_maf != 0.0) || (max_maf != 0.5) || min_ac || (max_ac != 0x7fffffff))) {
- logerrprint("Warning: --freq and --missing complete BEFORE --geno, --hwe, and --maf in\nplink's norder of operations.\n");
+ logerrprint("Warning: --freq and --missing complete BEFORE --geno, --hwe, and --maf in\nplink's order of operations.\n");
}
// short batch job?
uii = 0;
@@ -13273,13 +13206,13 @@ int32_t main(int32_t argc, char** argv) {
free_cond(rerun_buf);
free_cond(flag_buf);
free_cond(flag_map);
- subst_argv = NULL;
- script_buf = NULL;
- rerun_buf = NULL;
- flag_buf = NULL;
- flag_map = NULL;
+ subst_argv = nullptr;
+ script_buf = nullptr;
+ rerun_buf = nullptr;
+ flag_buf = nullptr;
+ flag_map = nullptr;
if (!rseeds) {
- ujj = (uint32_t)time(NULL);
+ ujj = (uint32_t)time(nullptr);
sprintf(g_logbuf, "Random number seed: %u\n", ujj);
logstr(g_logbuf);
sfmt_init_gen_rand(&g_sfmt, ujj);
@@ -13290,7 +13223,7 @@ int32_t main(int32_t argc, char** argv) {
sfmt_init_by_array(&g_sfmt, rseeds, rseed_ct);
}
free(rseeds);
- rseeds = NULL;
+ rseeds = nullptr;
}
// guarantee contiguous malloc space outside of main workspace
bubble = (char*)malloc(NON_BIGSTACK_MIN * sizeof(char));
@@ -13304,7 +13237,7 @@ int32_t main(int32_t argc, char** argv) {
mib[1] = HW_MEMSIZE;
llxx = 0;
sztmp = sizeof(int64_t);
- sysctl(mib, 2, &llxx, &sztmp, NULL, 0);
+ sysctl(mib, 2, &llxx, &sztmp, nullptr, 0);
llxx /= 1048576;
#else
#ifdef _WIN32
@@ -13356,7 +13289,7 @@ int32_t main(int32_t argc, char** argv) {
g_bigstack_base = bigstack_initial_base;
g_bigstack_end = &(bigstack_initial_base[(malloc_size_mb * 1048576 - (uintptr_t)(bigstack_initial_base - bigstack_ua)) & (~(CACHELINE - ONELU))]);
free(bubble);
- bubble = NULL;
+ bubble = nullptr;
// standalone stuff
if (epi_info.summary_merge_prefix) {
@@ -13369,13 +13302,13 @@ int32_t main(int32_t argc, char** argv) {
retval = annotate(&annot_info, outname, outname_end, pfilter, &chrom_info);
}
if (gene_report_fname) {
- retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags & MISC_EXTRACT_RANGE)? NULL : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
+ retval = gene_report(gene_report_fname, gene_report_glist, gene_report_subset, gene_report_border, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, gene_report_snp_field, outname, outname_end, pfilter, &chrom_info);
if (retval) {
goto main_ret_1;
}
}
if (metaanal_fnames) {
- retval = meta_analysis(metaanal_fnames, metaanal_snpfield_search_order, metaanal_a1field_search_order, metaanal_a2field_search_order, metaanal_pfield_search_order, metaanal_essfield_search_order, metaanal_flags, (misc_flags & MISC_EXTRACT_RANGE)? NULL : extractname, outname, outname_end, output_min_p, &chrom_info);
+ retval = meta_analysis(metaanal_fnames, metaanal_snpfield_search_order, metaanal_a1field_search_order, metaanal_a2field_search_order, metaanal_pfield_search_order, metaanal_essfield_search_order, metaanal_flags, (misc_flags & MISC_EXTRACT_RANGE)? nullptr : extractname, outname, outname_end, output_min_p, &chrom_info);
if (retval) {
goto main_ret_1;
}
@@ -13452,10 +13385,10 @@ int32_t main(int32_t argc, char** argv) {
// care of generating those test cases
retval = simulate_dataset(outname, sptr, simulate_flags, simulate_fname, simulate_cases, simulate_controls, simulate_prevalence, simulate_qt_samples, simulate_missing, simulate_label);
free(simulate_fname);
- simulate_fname = NULL;
+ simulate_fname = nullptr;
if (simulate_label) {
free(simulate_label);
- simulate_label = NULL;
+ simulate_label = nullptr;
}
} else if (load_params & LOAD_PARAMS_OX_ALL) {
retval = oxford_to_bed(pedname, mapname, outname, sptr, oxford_single_chr, oxford_pheno_name, hard_call_threshold, missing_code, missing_pheno, misc_flags, (load_params / LOAD_PARAMS_OXBGEN) & 1, &chrom_info);
@@ -13485,7 +13418,7 @@ int32_t main(int32_t argc, char** argv) {
} else if (!rel_info.ibc_type) {
rel_info.ibc_type = 1;
}
- retval = plink(outname, outname_end, pedname, mapname, famname, cm_map_fname, cm_map_chrname, phenoname, extractname, excludename, keepname, removename, keepfamname, removefamname, filtername, freqname, distance_wts_fname, read_dists_fname, read_dists_id_fname, evecname, mergename1, mergename2, mergename3, missing_mid_template, missing_marker_id_match, makepheno_str, phenoname_str, a1alleles, a2alleles, recode_allele_name, covar_fname, update_alleles_fname, read_genome_fname, qual_fi [...]
+ retval = plink(outname, outname_end, pedname, mapname, famname, cm_map_fname, cm_map_chrname, phenoname, extractname, excludename, keepname, removename, keepfamname, removefamname, filtername, freqname, distance_wts_fname, read_dists_fname, read_dists_id_fname, evecname, mergename1, mergename2, mergename3, missing_mid_template, missing_marker_id_match, makepheno_str, phenoname_str, a1alleles, a2alleles, recode_allele_name, covar_fname, update_alleles_fname, read_genome_fname, qual_fi [...]
}
while (0) {
main_ret_NOMEM:
@@ -13543,8 +13476,22 @@ int32_t main(int32_t argc, char** argv) {
#endif
}
main_ret_1:
- fclose_cond(scriptfile);
disp_exit_msg(retval);
+ while (0) {
+ main_ret_NOMEM_NOLOG:
+ print_ver();
+ main_ret_NOMEM_NOLOG2:
+ fputs(errstr_nomem, stderr);
+ retval = RET_NOMEM;
+ break;
+ main_ret_READ_FAIL_NOLOG:
+ print_ver();
+ fputs(errstr_read, stderr);
+ retval = RET_READ_FAIL;
+ break;
+ }
+ main_ret_NOLOG:
+ fclose_cond(scriptfile);
free_cond(bubble);
free_cond(bigstack_ua);
free_cond(subst_argv);
@@ -13646,14 +13593,7 @@ int32_t main(int32_t argc, char** argv) {
file_delete_list = ll_str_ptr;
} while (file_delete_list);
}
- forget_extra_chrom_names(&chrom_info);
- if (chrom_info.incl_excl_name_stack) {
- do {
- ll_str_ptr = chrom_info.incl_excl_name_stack->next;
- free(chrom_info.incl_excl_name_stack);
- chrom_info.incl_excl_name_stack = ll_str_ptr;
- } while (chrom_info.incl_excl_name_stack);
- }
+ cleanup_chrom_info(&chrom_info);
if (g_logfile) {
if (!g_log_failed) {
logstr("\nEnd time: ");
@@ -13666,14 +13606,14 @@ int32_t main(int32_t argc, char** argv) {
} else {
fclose(g_logfile);
}
- g_logfile = NULL;
+ g_logfile = nullptr;
}
if (misc_flags & MISC_GPLINK) {
memcpy(outname_end, ".gplink", 8);
g_logfile = fopen(outname, "w");
if (g_logfile) { // can't do much if an error occurs here...
- putc(retval? '1' : '0', g_logfile);
- putc('\n', g_logfile);
+ putc_unlocked(retval? '1' : '0', g_logfile);
+ putc_unlocked('\n', g_logfile);
fclose(g_logfile);
}
}
diff --git a/plink_assoc.c b/plink_assoc.c
index dc8b556..536205b 100644
--- a/plink_assoc.c
+++ b/plink_assoc.c
@@ -224,7 +224,7 @@ int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintp
unsigned char* bigstack_mark = g_bigstack_base;
uint32_t is_log10 = mtest_adjust & ADJUST_LOG10;
uint32_t qq_plot = mtest_adjust & ADJUST_QQ;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
double pv_holm = 0.0;
double pv_sidak_sd = 0;
int32_t retval = 0;
@@ -232,8 +232,8 @@ int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintp
uint32_t adjust_gc = (mtest_adjust & ADJUST_GC) && (!skip_gc);
uint32_t output_min_p_strlen = 11;
uint32_t uii = 0;
- uint32_t* new_tcnt = NULL;
- double* unadj = NULL;
+ uint32_t* new_tcnt = nullptr;
+ double* unadj = nullptr;
char output_min_p_str[16];
uint32_t pct;
double* sp;
@@ -457,7 +457,7 @@ int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintp
}
marker_uidx = new_order[cur_idx];
if (!is_set_test) {
- bufptr = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_uidx), g_textbuf));
+ bufptr = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_uidx), g_textbuf));
} else {
bufptr = g_textbuf;
}
@@ -537,7 +537,7 @@ int32_t multcomp(char* outname, char* outname_end, uint32_t* marker_uidxs, uintp
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -2465,9 +2465,9 @@ THREAD_RET_TYPE assoc_maxt_thread(void* arg) {
// currently safe for this to be uint32_t since perm_vec_ct < 2^30
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uintptr_t perm_vec_ctcl8m = round_up_pow2(perm_vec_ct, CACHELINE_DBL);
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
@@ -2479,7 +2479,7 @@ THREAD_RET_TYPE assoc_maxt_thread(void* arg) {
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* __restrict__ loadbuf;
uint32_t* __restrict__ missing_cts;
uint32_t* __restrict__ set_cts;
@@ -2620,9 +2620,9 @@ THREAD_RET_TYPE assoc_maxt_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
} else {
memcpy(git_homrar_cts, &(resultbuf[3 * ldref * perm_vec_ctcl4m]), 3 * perm_vec_ctcl4m * sizeof(int32_t));
calc_rem(pheno_nm_ct, perm_vec_ct, loadbuf_cur, &(loadbuf[ldref * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
@@ -2733,9 +2733,9 @@ THREAD_RET_TYPE assoc_set_thread(void* arg) {
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uint32_t* resultbuf = g_resultbuf;
uint32_t case_ct = g_perm_case_ct;
@@ -2743,7 +2743,7 @@ THREAD_RET_TYPE assoc_set_thread(void* arg) {
uintptr_t* __restrict__ nonmale_vec = g_sample_nonmale_include2;
uintptr_t* __restrict__ perm_vecs = g_perm_vecs;
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* loadbuf;
uintptr_t* loadbuf_cur;
uint32_t* __restrict__ missing_cts;
@@ -2804,9 +2804,9 @@ THREAD_RET_TYPE assoc_set_thread(void* arg) {
git_homrar_cts = &(resultbuf[3 * marker_bidx * perm_vec_ctcl4m]);
git_missing_cts = &(git_homrar_cts[perm_vec_ctcl4m]);
git_het_cts = &(git_homrar_cts[2 * perm_vec_ctcl4m]);
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
}
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
if (!is_x_or_y) {
@@ -2994,16 +2994,16 @@ THREAD_RET_TYPE qassoc_adapt_thread(void* arg) {
if ((dyy > aperm_alpha) || (dzz < aperm_alpha)) {
perm_adapt_stop[marker_idx] = 1;
perm_attempt_ct[marker_idx] = next_adapt_check;
- fill_double_zero(git_qt_g_prod, next_cqg);
- fill_double_zero(git_qt_sum, next_cqg);
- fill_double_zero(git_qt_ssq, next_cqg);
+ fill_double_zero(next_cqg, git_qt_g_prod);
+ fill_double_zero(next_cqg, git_qt_sum);
+ fill_double_zero(next_cqg, git_qt_ssq);
goto qassoc_adapt_thread_lesszero;
}
}
next_adapt_check += (int32_t)(adaptive_intercept + ((int32_t)next_adapt_check) * adaptive_slope);
}
}
- fill_double_zero(git_qt_g_prod, perm_vec_ctcl8m * 3);
+ fill_double_zero(perm_vec_ctcl8m * 3, git_qt_g_prod);
qassoc_adapt_thread_lesszero:
perm_2success_ct[marker_idx] += success_2incr;
}
@@ -3168,19 +3168,19 @@ THREAD_RET_TYPE qassoc_adapt_lin_thread(void* arg) {
if ((dyy > aperm_alpha) || (dzz < aperm_alpha)) {
perm_adapt_stop[marker_idx] = 1;
perm_attempt_ct[marker_idx] = next_adapt_check;
- fill_double_zero(git_qt_het_sum, next_cqg);
- fill_double_zero(git_qt_het_ssq, next_cqg);
- fill_double_zero(git_qt_homrar_sum, next_cqg);
- fill_double_zero(git_qt_homrar_ssq, next_cqg);
- fill_double_zero(git_qt_missing_sum, next_cqg);
- fill_double_zero(git_qt_missing_ssq, next_cqg);
+ fill_double_zero(next_cqg, git_qt_het_sum);
+ fill_double_zero(next_cqg, git_qt_het_ssq);
+ fill_double_zero(next_cqg, git_qt_homrar_sum);
+ fill_double_zero(next_cqg, git_qt_homrar_ssq);
+ fill_double_zero(next_cqg, git_qt_missing_sum);
+ fill_double_zero(next_cqg, git_qt_missing_ssq);
goto qassoc_adapt_lin_thread_lesszero;
}
}
next_adapt_check += (int32_t)(adaptive_intercept + ((int32_t)next_adapt_check) * adaptive_slope);
}
}
- fill_double_zero(git_qt_het_sum, perm_vec_ctcl8m * 6);
+ fill_double_zero(perm_vec_ctcl8m * 6, git_qt_het_sum);
qassoc_adapt_lin_thread_lesszero:
perm_2success_ct[marker_idx] += success_2incr;
}
@@ -3202,7 +3202,7 @@ THREAD_RET_TYPE qassoc_maxt_thread(void* arg) {
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
double* __restrict__ perm_vecstd = g_perm_vecstd;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
double pheno_sum = g_pheno_sum;
double pheno_ssq = g_pheno_ssq;
double* git_qt_g_prod;
@@ -3361,7 +3361,7 @@ THREAD_RET_TYPE qassoc_maxt_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_double_zero(git_qt_g_prod, perm_vec_ctcl8m * 3);
+ fill_double_zero(perm_vec_ctcl8m * 3, git_qt_g_prod);
calc_qgit(pheno_nm_ct, perm_vec_ctcl8m, perm_vec_ct, loadbuf_cur, perm_vecstd, git_qt_g_prod);
} else {
memcpy(git_qt_g_prod, &(qresultbuf[3 * ldref * perm_vec_ctcl8m]), 3 * perm_vec_ctcl8m * sizeof(double));
@@ -3410,7 +3410,7 @@ THREAD_RET_TYPE qassoc_maxt_lin_thread(void* arg) {
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
double* __restrict__ perm_vecstd = g_perm_vecstd;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
double pheno_sum = g_pheno_sum;
double pheno_ssq = g_pheno_ssq;
double* git_qt_het_sum;
@@ -3545,7 +3545,7 @@ THREAD_RET_TYPE qassoc_maxt_lin_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_double_zero(git_qt_het_sum, perm_vec_ctcl8m * 6);
+ fill_double_zero(perm_vec_ctcl8m * 6, git_qt_het_sum);
calc_qgit_lin(pheno_nm_ct, perm_vec_ctcl8m, perm_vec_ct, loadbuf_cur, perm_vecstd, git_qt_het_sum);
} else {
memcpy(git_qt_het_sum, &(qresultbuf[6 * ldref * perm_vec_ctcl8m]), 6 * perm_vec_ctcl8m * sizeof(double));
@@ -3672,7 +3672,7 @@ THREAD_RET_TYPE qassoc_set_thread(void* arg) {
vbeta_sqrt = sqrt((qt_var * geno_var_recip - beta * beta) * nanal_m2_recip);
*msa_ptr++ = fabs(beta / vbeta_sqrt);
}
- fill_double_zero(git_qt_g_prod, perm_vec_ctcl8m * 3);
+ fill_double_zero(perm_vec_ctcl8m * 3, git_qt_g_prod);
}
qassoc_set_thread_skip_all:
if ((!tidx) || g_is_last_thread_block) {
@@ -3861,9 +3861,9 @@ THREAD_RET_TYPE model_maxt_domrec_thread(void* arg) {
uint32_t fisher_midp = g_fisher_midp;
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uintptr_t perm_vec_ctcl8m = round_up_pow2(perm_vec_ct, CACHELINE_DBL);
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
@@ -3873,7 +3873,7 @@ THREAD_RET_TYPE model_maxt_domrec_thread(void* arg) {
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* __restrict__ loadbuf;
uint32_t* __restrict__ missing_cts;
uint32_t* __restrict__ het_cts;
@@ -3995,9 +3995,9 @@ THREAD_RET_TYPE model_maxt_domrec_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, &(loadbuf[marker_bidx * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
} else {
memcpy(git_homrar_cts, &(resultbuf[3 * ldref * perm_vec_ctcl4m]), 3 * perm_vec_ctcl4m * sizeof(int32_t));
calc_rem(pheno_nm_ct, perm_vec_ct, loadbuf_cur, &(loadbuf[ldref * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
@@ -4091,15 +4091,15 @@ THREAD_RET_TYPE model_set_domrec_thread(void* arg) {
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uint32_t* resultbuf = g_resultbuf;
uint32_t case_ct = g_perm_case_ct;
int32_t is_model_prec = g_is_model_prec;
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* loadbuf;
uintptr_t* loadbuf_cur;
uint32_t* __restrict__ missing_cts;
@@ -4151,9 +4151,9 @@ THREAD_RET_TYPE model_set_domrec_thread(void* arg) {
git_homrar_cts = &(resultbuf[3 * marker_bidx * perm_vec_ctcl4m]);
git_missing_cts = &(git_homrar_cts[perm_vec_ctcl4m]);
git_het_cts = &(git_homrar_cts[2 * perm_vec_ctcl4m]);
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
case_missing_ct = git_missing_cts[pidx];
if (is_model_prec) {
@@ -4320,9 +4320,9 @@ THREAD_RET_TYPE model_maxt_trend_thread(void* arg) {
uint32_t pidx_offset = g_perms_done - perm_vec_ct;
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uintptr_t perm_vec_ctcl8m = round_up_pow2(perm_vec_ct, CACHELINE_DBL);
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
@@ -4331,7 +4331,7 @@ THREAD_RET_TYPE model_maxt_trend_thread(void* arg) {
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* __restrict__ loadbuf;
uint32_t* __restrict__ missing_cts;
uint32_t* __restrict__ het_cts;
@@ -4430,9 +4430,9 @@ THREAD_RET_TYPE model_maxt_trend_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
} else {
memcpy(git_homrar_cts, &(resultbuf[3 * ldref * perm_vec_ctcl4m]), 3 * perm_vec_ctcl4m * sizeof(int32_t));
calc_rem(pheno_nm_ct, perm_vec_ct, loadbuf_cur, &(loadbuf[ldref * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
@@ -4502,14 +4502,14 @@ THREAD_RET_TYPE model_set_trend_thread(void* arg) {
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uint32_t* resultbuf = g_resultbuf;
uint32_t case_ct = g_perm_case_ct;
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* loadbuf;
uintptr_t* loadbuf_cur;
uint32_t* __restrict__ missing_cts;
@@ -4555,9 +4555,9 @@ THREAD_RET_TYPE model_set_trend_thread(void* arg) {
git_homrar_cts = &(resultbuf[3 * marker_bidx * perm_vec_ctcl4m]);
git_missing_cts = &(git_homrar_cts[perm_vec_ctcl4m]);
git_het_cts = &(git_homrar_cts[2 * perm_vec_ctcl4m]);
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
case_missing_ct = git_missing_cts[pidx];
case_com_ct = 2 * (case_ct - case_missing_ct - git_homrar_cts[pidx]) - git_het_cts[pidx];
@@ -4732,9 +4732,9 @@ THREAD_RET_TYPE model_maxt_gen_thread(void* arg) {
uint32_t fisher_midp = g_fisher_midp;
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uintptr_t perm_vec_ctcl8m = round_up_pow2(perm_vec_ct, CACHELINE_DBL);
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
@@ -4742,7 +4742,7 @@ THREAD_RET_TYPE model_maxt_gen_thread(void* arg) {
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* __restrict__ loadbuf;
uint32_t* __restrict__ missing_cts;
uint32_t* __restrict__ het_cts;
@@ -4848,9 +4848,9 @@ THREAD_RET_TYPE model_maxt_gen_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
} else {
memcpy(git_homrar_cts, &(resultbuf[3 * ldref * perm_vec_ctcl4m]), 3 * perm_vec_ctcl4m * sizeof(int32_t));
calc_rem(pheno_nm_ct, perm_vec_ct, loadbuf_cur, &(loadbuf[ldref * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
@@ -5148,9 +5148,9 @@ THREAD_RET_TYPE model_maxt_best_thread(void* arg) {
uint32_t fisher_midp = g_fisher_midp;
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uintptr_t perm_vec_ctcl8m = round_up_pow2(perm_vec_ct, CACHELINE_DBL);
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
@@ -5159,7 +5159,7 @@ THREAD_RET_TYPE model_maxt_best_thread(void* arg) {
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* __restrict__ loadbuf;
uintptr_t* is_invalid;
uint32_t* __restrict__ missing_cts;
@@ -5282,9 +5282,9 @@ THREAD_RET_TYPE model_maxt_best_thread(void* arg) {
ldrefs[marker_idx] = ldref;
}
if (ldref == marker_bidx) {
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, &(loadbuf[marker_bidx * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
} else {
memcpy(git_homrar_cts, &(resultbuf[3 * ldref * perm_vec_ctcl4m]), 3 * perm_vec_ctcl4m * sizeof(int32_t));
calc_rem(pheno_nm_ct, perm_vec_ct, loadbuf_cur, &(loadbuf[ldref * pheno_nm_ctv2]), perm_vecst, git_homrar_cts, thread_git_wkspace);
@@ -5471,14 +5471,14 @@ THREAD_RET_TYPE model_set_best_thread(void* arg) {
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uint32_t perm_ctvc = BITCT_TO_VECCT(perm_vec_ct);
uint32_t* thread_git_wkspace = &(g_thread_git_wkspace[tidx * perm_ctvc * 144 * BYTECT4]);
- uint32_t* git_homrar_cts = NULL;
- uint32_t* git_missing_cts = NULL;
- uint32_t* git_het_cts = NULL;
+ uint32_t* git_homrar_cts = nullptr;
+ uint32_t* git_missing_cts = nullptr;
+ uint32_t* git_het_cts = nullptr;
uintptr_t perm_vec_ctcl4m = round_up_pow2(perm_vec_ct, CACHELINE_INT32);
uint32_t* resultbuf = g_resultbuf;
uint32_t case_ct = g_perm_case_ct;
uint32_t* __restrict__ perm_vecst = g_perm_vecst;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
uintptr_t* loadbuf;
uintptr_t* loadbuf_cur;
uintptr_t* is_invalid;
@@ -5538,9 +5538,9 @@ THREAD_RET_TYPE model_set_best_thread(void* arg) {
git_homrar_cts = &(resultbuf[3 * marker_bidx * perm_vec_ctcl4m]);
git_missing_cts = &(git_homrar_cts[perm_vec_ctcl4m]);
git_het_cts = &(git_homrar_cts[2 * perm_vec_ctcl4m]);
- fill_uint_zero(git_homrar_cts, 3 * perm_vec_ctcl4m);
+ fill_uint_zero(3 * perm_vec_ctcl4m, git_homrar_cts);
calc_git(pheno_nm_ct, perm_vec_ct, loadbuf_cur, perm_vecst, git_homrar_cts, thread_git_wkspace);
- fill_uint_zero(thread_git_wkspace, perm_ctvc * 72 * BYTECT4);
+ fill_uint_zero(perm_ctvc * 72 * BYTECT4, thread_git_wkspace);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
case_missing_ct = git_missing_cts[pidx];
case_het_ct = git_het_cts[pidx];
@@ -5592,18 +5592,18 @@ int32_t model_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t* marker_exclude = marker_exclude_mid;
- uintptr_t* unstopped_markers = NULL;
+ uintptr_t* unstopped_markers = nullptr;
uintptr_t* loadbuf = g_loadbuf;
uintptr_t* sample_male_include2 = g_sample_male_include2;
- uintptr_t* perm_adapt_set_unstopped = NULL;
+ uintptr_t* perm_adapt_set_unstopped = nullptr;
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
double* orig_chisq = g_orig_chisq;
- double* sorted_chisq_buf = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- uint32_t* sorted_marker_idx_buf = NULL;
- uint32_t* proxy_arr = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* perm_attempt_ct = NULL;
+ double* sorted_chisq_buf = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ uint32_t* sorted_marker_idx_buf = nullptr;
+ uint32_t* proxy_arr = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* perm_attempt_ct = nullptr;
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uintptr_t marker_ct = marker_ct_mid;
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -5614,7 +5614,7 @@ int32_t model_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
uint32_t model_perm_best = !(model_modifier & MODEL_PMASK);
uint32_t max_thread_ct = g_thread_ct;
uint32_t perms_done = 0;
- int32_t x_code = chrom_info_ptr->x_code;
+ int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
int32_t retval = 0;
uintptr_t* set_incl;
uintptr_t* loadbuf_ptr;
@@ -5790,7 +5790,7 @@ int32_t model_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
// no need to skip MT/haploid here, since we error out on that case
// earlier
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_x = (uii == (uint32_t)x_code);
@@ -5897,7 +5897,7 @@ int32_t model_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
goto model_assoc_set_test_more_perms;
}
model_assoc_set_test_perms_done:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.\n", perms_done, (perms_done != 1)? "s" : "");
model_assoc_set_test_write:
if (model_modifier & MODEL_PERM) {
@@ -5972,8 +5972,8 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
int32_t retval = 0;
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
uintptr_t* marker_exclude = marker_exclude_orig;
uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
uintptr_t marker_ct = marker_ct_orig;
@@ -6001,29 +6001,29 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
uint32_t load_case_ct = 0;
uint32_t precomp_width = 0;
uint32_t is_y = 0;
- int32_t x_code = chrom_info_ptr->x_code;
- int32_t y_code = chrom_info_ptr->y_code;
- int32_t mt_code = chrom_info_ptr->mt_code;
- uintptr_t* sample_nonmale_ctrl_include2 = NULL;
- uintptr_t* sample_nonmale_case_include2 = NULL;
- uintptr_t* sample_male_ctrl_include2 = NULL;
- uintptr_t* sample_male_case_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* cur_ctrl_include2 = NULL;
- uintptr_t* cur_case_include2 = NULL;
- uintptr_t* is_invalid_bitfield = NULL;
- uintptr_t* founder_pnm = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* perm_attempt_ct = NULL;
- uint32_t* set_cts = NULL;
- uint32_t* het_cts = NULL;
- uint32_t* homcom_cts = NULL;
- uint32_t* precomp_ui = NULL;
- double* orig_chisq = NULL;
- double* maxt_extreme_stat = NULL;
- double* orig_odds = NULL;
- double* precomp_d = NULL;
- unsigned char* perm_adapt_stop = NULL;
+ int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
+ int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
+ uintptr_t* sample_nonmale_ctrl_include2 = nullptr;
+ uintptr_t* sample_nonmale_case_include2 = nullptr;
+ uintptr_t* sample_male_ctrl_include2 = nullptr;
+ uintptr_t* sample_male_case_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* cur_ctrl_include2 = nullptr;
+ uintptr_t* cur_case_include2 = nullptr;
+ uintptr_t* is_invalid_bitfield = nullptr;
+ uintptr_t* founder_pnm = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* perm_attempt_ct = nullptr;
+ uint32_t* set_cts = nullptr;
+ uint32_t* het_cts = nullptr;
+ uint32_t* homcom_cts = nullptr;
+ uint32_t* precomp_ui = nullptr;
+ double* orig_chisq = nullptr;
+ double* maxt_extreme_stat = nullptr;
+ double* orig_odds = nullptr;
+ double* precomp_d = nullptr;
+ unsigned char* perm_adapt_stop = nullptr;
double dxx = 0.0;
double dww = 0.0;
double dvv = 0.0;
@@ -6041,9 +6041,9 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
uint32_t model_fisherx = model_fisher && (!(model_modifier & MODEL_PTREND));
uint32_t fisher_midp = model_modifier & MODEL_FISHER_MIDP;
char* writebuf = g_textbuf;
- char* chrom_name_ptr = NULL;
+ char* chrom_name_ptr = nullptr;
uint32_t chrom_name_len = 0;
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
uint32_t mu_table[MODEL_BLOCKSIZE];
uint32_t uibuf[4];
char wbuf[48];
@@ -6125,15 +6125,15 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
perms_done = 0;
g_is_model_prec = model_modifier / MODEL_PREC;
g_perm_is_1bit = 0;
- g_mperm_save_all = NULL;
- g_sample_male_include2 = NULL;
+ g_mperm_save_all = nullptr;
+ g_sample_male_include2 = nullptr;
if (is_set_test) {
if (bigstack_alloc_ul(unfiltered_sample_ctl, &founder_pnm)) {
goto model_assoc_ret_NOMEM;
}
memcpy(founder_pnm, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
bitvec_and(founder_info, unfiltered_sample_ctl, founder_pnm);
- if (extract_set_union_unfiltered(sip, NULL, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
+ if (extract_set_union_unfiltered(sip, nullptr, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
goto model_assoc_ret_NOMEM;
}
}
@@ -6148,7 +6148,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
maxt_extreme_stat[uii] = 1;
}
} else {
- fill_double_zero(maxt_extreme_stat, perms_total);
+ fill_double_zero(perms_total, maxt_extreme_stat);
}
if (mperm_save & MPERM_DUMP_ALL) {
memcpy(outname_end, ".mperm.dump.all", 16);
@@ -6327,7 +6327,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
goto model_assoc_ret_1;
}
} else {
- g_perm_cluster_starts = NULL;
+ g_perm_cluster_starts = nullptr;
}
if (!is_set_test) {
if (max_thread_ct > perms_total) {
@@ -6350,9 +6350,9 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
goto model_assoc_ret_NOMEM;
}
#ifdef __LP64__
- fill_ulong_one((uintptr_t*)g_ldrefs, (marker_ct + 3) / 4);
+ fill_ulong_one((marker_ct + 3) / 4, (uintptr_t*)g_ldrefs);
#else
- fill_ulong_one((uintptr_t*)g_ldrefs, (marker_ct + 1) / 2);
+ fill_ulong_one((marker_ct + 1) / 2, (uintptr_t*)g_ldrefs);
#endif
if (!(mperm_save & MPERM_DUMP_ALL)) {
// 5.65686 = roughly 4 * sqrt(2), corresponding to 4 stdevs. this is
@@ -6599,7 +6599,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
} else {
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_x = (uii == (uint32_t)x_code);
@@ -7121,12 +7121,12 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_pvals[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - ujj, uoo, upp, &(precomp_ui[umm * 4]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - ujj, uoo, upp, &(precomp_ui[umm * 4]), nullptr);
ujj += uqq;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - ujj, uoo, upp, &(precomp_ui[umm * 6]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - ujj, uoo, upp, &(precomp_ui[umm * 6]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, fisher_midp, unn - ujj, uoo, upp, uibuf, &(precomp_d[umm * 2]));
precomp_ui[umm * 6 + 4] = uibuf[2];
precomp_ui[umm * 6 + 5] = uibuf[3] - uibuf[2];
@@ -7137,12 +7137,12 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_chisq[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, unn - ujj, uoo, upp, &(precomp_ui[umm * 4]), NULL);
+ chi22_precomp_val_bounds(dxx, unn - ujj, uoo, upp, &(precomp_ui[umm * 4]), nullptr);
ujj += uqq;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, unn - ujj, uoo, upp, &(precomp_ui[umm * 6]), NULL);
+ chi22_precomp_val_bounds(dxx, unn - ujj, uoo, upp, &(precomp_ui[umm * 6]), nullptr);
chi22_precomp_val_bounds(maxt_cur_extreme_stat, unn - ujj, uoo, upp, uibuf, &(precomp_d[umm * 2]));
precomp_ui[umm * 6 + 4] = uibuf[2];
precomp_ui[umm * 6 + 5] = uibuf[3] - uibuf[2];
@@ -7170,12 +7170,12 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_pvals[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - uss, uoo, uqq, &(precomp_ui[umm * 12]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - uss, uoo, uqq, &(precomp_ui[umm * 12]), nullptr);
uss += 2;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - uss, uoo, uqq, &(precomp_ui[umm * 18]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, unn - uss, uoo, uqq, &(precomp_ui[umm * 18]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, fisher_midp, 2 * case_ct - uss, uoo, uqq, uibuf, &(precomp_d[umm * 6]));
precomp_ui[umm * 18 + 4] = uibuf[2];
precomp_ui[umm * 18 + 5] = uibuf[3] - uibuf[2];
@@ -7189,17 +7189,17 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
ujj = case_ct - ujj;
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 12 + 4]), NULL);
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uqq, upp, &(precomp_ui[umm * 12 + 8]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 12 + 4]), nullptr);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uqq, upp, &(precomp_ui[umm * 12 + 8]), nullptr);
ujj--;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 18 + 6]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 18 + 6]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, fisher_midp, ujj, uoo, upp, uibuf, &(precomp_d[umm * 6 + 2]));
precomp_ui[umm * 18 + 10] = uibuf[2];
precomp_ui[umm * 18 + 11] = uibuf[3] - uibuf[2];
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uqq, upp, &(precomp_ui[umm * 18 + 12]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uqq, upp, &(precomp_ui[umm * 18 + 12]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, fisher_midp, ujj, uqq, upp, uibuf, &(precomp_d[umm * 6 + 4]));
precomp_ui[umm * 18 + 16] = uibuf[2];
precomp_ui[umm * 18 + 17] = uibuf[3] - uibuf[2];
@@ -7211,12 +7211,12 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_chisq[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, unn - uss, uoo, uqq, &(precomp_ui[umm * 12]), NULL);
+ chi22_precomp_val_bounds(dxx, unn - uss, uoo, uqq, &(precomp_ui[umm * 12]), nullptr);
uss += 2;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, unn - uss, uoo, uqq, &(precomp_ui[umm * 18]), NULL);
+ chi22_precomp_val_bounds(dxx, unn - uss, uoo, uqq, &(precomp_ui[umm * 18]), nullptr);
chi22_precomp_val_bounds(maxt_cur_extreme_stat, unn - uss, uoo, uqq, uibuf, &(precomp_d[umm * 6]));
precomp_ui[umm * 18 + 4] = uibuf[2];
precomp_ui[umm * 18 + 5] = uibuf[3] - uibuf[2];
@@ -7230,17 +7230,17 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
ujj = case_ct - ujj;
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 12 + 4]), NULL);
- chi22_precomp_val_bounds(dxx, ujj, uqq, upp, &(precomp_ui[umm * 12 + 8]), NULL);
+ chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 12 + 4]), nullptr);
+ chi22_precomp_val_bounds(dxx, ujj, uqq, upp, &(precomp_ui[umm * 12 + 8]), nullptr);
ujj--;
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 18 + 6]), NULL);
+ chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 18 + 6]), nullptr);
chi22_precomp_val_bounds(maxt_cur_extreme_stat, ujj, uoo, upp, uibuf, &(precomp_d[umm * 6 + 2]));
precomp_ui[umm * 18 + 10] = uibuf[2];
precomp_ui[umm * 18 + 11] = uibuf[3] - uibuf[2];
- chi22_precomp_val_bounds(dxx, ujj, uqq, upp, &(precomp_ui[umm * 18 + 12]), NULL);
+ chi22_precomp_val_bounds(dxx, ujj, uqq, upp, &(precomp_ui[umm * 18 + 12]), nullptr);
chi22_precomp_val_bounds(maxt_cur_extreme_stat, ujj, uqq, upp, uibuf, &(precomp_d[umm * 6 + 4]));
precomp_ui[umm * 18 + 16] = uibuf[2];
precomp_ui[umm * 18 + 17] = uibuf[3] - uibuf[2];
@@ -7268,11 +7268,11 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_chisq[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- ca_trend_precomp_val_bounds(dxx, ujj--, unn, uoo, upp, &(precomp_ui[umm * 4]), NULL);
+ ca_trend_precomp_val_bounds(dxx, ujj--, unn, uoo, upp, &(precomp_ui[umm * 4]), nullptr);
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- ca_trend_precomp_val_bounds(dxx, ujj, unn, uoo, upp, &(precomp_ui[umm * 6]), NULL);
+ ca_trend_precomp_val_bounds(dxx, ujj, unn, uoo, upp, &(precomp_ui[umm * 6]), nullptr);
ca_trend_precomp_val_bounds(maxt_cur_extreme_stat, ujj--, unn, uoo, upp, uibuf, &(precomp_d[umm * 2]));
precomp_ui[umm * 6 + 4] = uibuf[2];
precomp_ui[umm * 6 + 5] = uibuf[3] - uibuf[2];
@@ -7301,11 +7301,11 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_pvals[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj--, uoo, upp, &(precomp_ui[umm * 4]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj--, uoo, upp, &(precomp_ui[umm * 4]), nullptr);
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 6]), NULL);
+ fisher22_precomp_pval_bounds(dxx, fisher_midp, ujj, uoo, upp, &(precomp_ui[umm * 6]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, fisher_midp, ujj--, uoo, upp, uibuf, &(precomp_d[umm * 2]));
precomp_ui[umm * 6 + 4] = uibuf[2];
precomp_ui[umm * 6 + 5] = uibuf[3] - uibuf[2];
@@ -7315,11 +7315,11 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
dxx = orig_chisq[urr];
if (model_adapt_nst) {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, ujj--, uoo, upp, &(precomp_ui[umm * 4]), NULL);
+ chi22_precomp_val_bounds(dxx, ujj--, uoo, upp, &(precomp_ui[umm * 4]), nullptr);
}
} else {
for (umm = uii * precomp_width; umm < ukk; umm++) {
- chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 6]), NULL);
+ chi22_precomp_val_bounds(dxx, ujj, uoo, upp, &(precomp_ui[umm * 6]), nullptr);
chi22_precomp_val_bounds(maxt_cur_extreme_stat, ujj--, uoo, upp, uibuf, &(precomp_d[umm * 2]));
precomp_ui[umm * 6 + 4] = uibuf[2];
precomp_ui[umm * 6 + 5] = uibuf[3] - uibuf[2];
@@ -7415,7 +7415,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
if (marker_idx < marker_unstopped_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_unstopped_ct;
printf("\b\b%u%%", pct);
@@ -7426,7 +7426,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
} while (marker_idx < marker_unstopped_ct);
if (!perm_pass_idx) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -7442,7 +7442,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
goto model_assoc_ret_NOMEM;
}
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, model_fisher? NULL : orig_chisq, pfilter, output_min_p, mtest_adjust, (!model_assoc) && (!(model_modifier & MODEL_PTREND)), adjust_lambda, NULL, model_fisher? orig_pvals : NULL);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, model_fisher? nullptr : orig_chisq, pfilter, output_min_p, mtest_adjust, (!model_assoc) && (!(model_modifier & MODEL_PTREND)), adjust_lambda, nullptr, model_fisher? orig_pvals : nullptr);
if (retval) {
goto model_assoc_ret_1;
}
@@ -7500,7 +7500,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
if (model_perms_nst) {
if (mperm_save & MPERM_DUMP_ALL) {
if (perm_pass_idx) {
- putchar(' ');
+ putc_unlocked(' ', stdout);
}
fputs("[dumping stats]", stdout);
fflush(stdout);
@@ -7558,7 +7558,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", perms_done, model_maxt_nst? "max(T)" : "(adaptive)", (perms_done != 1)? "s" : "");
if (model_fisher && (model_modifier & MODEL_PTREND)) {
outname_end2 -= 7; // remove ".fisher"
@@ -7637,7 +7637,7 @@ int32_t model_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, cha
while (1) {
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_x = (uii == (uint32_t)x_code);
@@ -7738,17 +7738,17 @@ int32_t qassoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t* marker_exclude = marker_exclude_mid;
- uintptr_t* unstopped_markers = NULL;
+ uintptr_t* unstopped_markers = nullptr;
uintptr_t* loadbuf = g_loadbuf;
- uintptr_t* perm_adapt_set_unstopped = NULL;
- uintptr_t* regression_skip = NULL;
+ uintptr_t* perm_adapt_set_unstopped = nullptr;
+ uintptr_t* regression_skip = nullptr;
double* orig_stats = g_orig_chisq; // initially contains t-statistics
- double* sorted_chisq_buf = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- uint32_t* sorted_marker_idx_buf = NULL;
- uint32_t* proxy_arr = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* perm_attempt_ct = NULL;
+ double* sorted_chisq_buf = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ uint32_t* sorted_marker_idx_buf = nullptr;
+ uint32_t* proxy_arr = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* perm_attempt_ct = nullptr;
uintptr_t marker_ct = marker_ct_mid;
uintptr_t set_ct = 0;
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -8015,7 +8015,7 @@ int32_t qassoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
goto qassoc_set_test_more_perms;
}
qassoc_set_test_perms_done:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.\n", perms_done, (perms_done != 1)? "s" : "");
qassoc_set_test_write:
if (model_modifier & MODEL_PERM) {
@@ -8049,9 +8049,9 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
uintptr_t pheno_nm_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(pheno_nm_ct);
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
uintptr_t perm_vec_ctcl8m = 0;
- FILE* outfile = NULL;
- FILE* outfile_qtm = NULL;
- FILE* outfile_msa = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_qtm = nullptr;
+ FILE* outfile_msa = nullptr;
uint32_t is_set_test = model_modifier & MODEL_SET_TEST;
uint32_t perm_adapt_nst = (model_modifier & MODEL_PERM) && (!is_set_test);
uint32_t perm_maxt_nst = (model_modifier & MODEL_MPERM) && (!is_set_test);
@@ -8066,17 +8066,17 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
uint32_t pct = 0;
uint32_t max_thread_ct = g_thread_ct;
uint32_t perm_pass_idx = 0;
- uint32_t mt_exists = (chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code);
+ uint32_t mt_exists = (chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET]);
uint32_t hh_or_mt_exists = hh_exists | (mt_exists * NXMHH_EXISTS);
int32_t retval = 0;
double x11 = 0;
double x12 = 0;
double x22 = 0;
uintptr_t* marker_exclude = marker_exclude_orig;
- uintptr_t* founder_pnm = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uint32_t* tcnt = NULL;
- char* chrom_name_ptr = NULL;
+ uintptr_t* founder_pnm = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uint32_t* tcnt = nullptr;
+ char* chrom_name_ptr = nullptr;
uint32_t chrom_name_len = 0;
char chrom_name_buf[5];
uint32_t mu_table[MODEL_BLOCKSIZE];
@@ -8157,14 +8157,14 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
}
memcpy(founder_pnm, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
bitvec_and(founder_info, unfiltered_sample_ctl, founder_pnm);
- if (extract_set_union_unfiltered(sip, NULL, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
+ if (extract_set_union_unfiltered(sip, nullptr, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
goto qassoc_ret_NOMEM;
}
}
memset(spacebuf, 32, 8);
g_perm_pheno_nm_ct = pheno_nm_ct;
g_perms_done = 0;
- g_mperm_save_all = NULL;
+ g_mperm_save_all = nullptr;
numbuf[0] = ' ';
if (perm_maxt_nst) {
perms_total = model_mperm_val;
@@ -8177,9 +8177,9 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
goto qassoc_ret_NOMEM;
}
#ifdef __LP64__
- fill_ulong_one((uintptr_t*)g_ldrefs, (marker_ct + 3) / 4);
+ fill_ulong_one((marker_ct + 3) / 4, (uintptr_t*)g_ldrefs);
#else
- fill_ulong_one((uintptr_t*)g_ldrefs, (marker_ct + 1) / 2);
+ fill_ulong_one((marker_ct + 1) / 2, (uintptr_t*)g_ldrefs);
#endif
if (mperm_save & MPERM_DUMP_ALL) {
memcpy(outname_end, ".mperm.dump.all", 16);
@@ -8270,7 +8270,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
max_thread_ct = MAXV(uii, 1);
}
if (cluster_starts) {
- retval = cluster_include_and_reindex(unfiltered_sample_ct, pheno_nm, 1, NULL, pheno_nm_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, NULL, NULL);
+ retval = cluster_include_and_reindex(unfiltered_sample_ct, pheno_nm, 1, nullptr, pheno_nm_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, nullptr, nullptr);
if (retval) {
goto qassoc_ret_1;
}
@@ -8648,23 +8648,23 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
goto qassoc_ret_WRITE_FAIL;
}
fputs(a1ptr, outfile_qtm);
- putc('/', outfile_qtm);
+ putc_unlocked('/', outfile_qtm);
fputs(a1ptr, outfile_qtm);
- putc(' ', outfile_qtm);
+ putc_unlocked(' ', outfile_qtm);
if (uii + ujj < 7) {
fwrite(spacebuf, 1, 7 - uii - ujj, outfile_qtm);
}
fputs(a1ptr, outfile_qtm);
- putc('/', outfile_qtm);
+ putc_unlocked('/', outfile_qtm);
fputs(a2ptr, outfile_qtm);
- putc(' ', outfile_qtm);
+ putc_unlocked(' ', outfile_qtm);
if (ujj < 4) {
fwrite(spacebuf, 1, 7 - 2 * ujj, outfile_qtm);
}
fputs(a2ptr, outfile_qtm);
- putc('/', outfile_qtm);
+ putc_unlocked('/', outfile_qtm);
fputs(a2ptr, outfile_qtm);
- putc('\n', outfile_qtm);
+ putc_unlocked('\n', outfile_qtm);
wptr = memcpya(wptr_restart, "COUNTS ", 7);
wptr = uint32toa_w8x(homrar_ct, ' ', wptr);
wptr = uint32toa_w8x(het_ct, ' ', wptr);
@@ -8793,7 +8793,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
if (marker_idx < marker_unstopped_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_unstopped_ct;
printf("\b\b%u%%", pct);
@@ -8804,7 +8804,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
} while (marker_idx < marker_unstopped_ct);
if (!perm_pass_idx) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -8827,7 +8827,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
g_orig_chisq[uii] = sqrt(g_orig_linsq[uii]);
}
}
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, g_orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, tcnt, NULL);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, g_orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, tcnt, nullptr);
if (retval) {
goto qassoc_ret_1;
}
@@ -8847,7 +8847,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
if (do_perms_nst) {
if (mperm_save & MPERM_DUMP_ALL) {
if (perm_pass_idx) {
- putchar(' ');
+ putc_unlocked(' ', stdout);
}
fputs("[dumping stats]", stdout);
fflush(stdout);
@@ -8905,7 +8905,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt_nst? "max(T)" : "(adaptive)", (g_perms_done != 1)? "s" : "");
if (perm_adapt_nst) {
@@ -8976,7 +8976,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
dxx = 0.5 * dyy;
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, uii, g_textbuf));
@@ -9069,7 +9069,7 @@ int32_t qassoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* ou
int32_t gxe_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t sample_ct, uintptr_t* sample_exclude, uintptr_t* pheno_nm, double* pheno_d, uintptr_t* gxe_covar_nm, uintptr_t* gxe_covar_c, uintptr_t* sex_male, uint32_t hh_or_mt_exists) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
@@ -9085,30 +9085,30 @@ int32_t gxe_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outn
uintptr_t group2_size_male = 0;
uintptr_t marker_uidx = 0;
uintptr_t final_mask = 0;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* sample_male_all_include2 = NULL;
- uintptr_t* group1_include2 = NULL;
- uintptr_t* group2_include2 = NULL;
- uintptr_t* group1_male_include2 = NULL;
- uintptr_t* group2_male_include2 = NULL;
- uintptr_t* covar_nm_raw = NULL;
- uintptr_t* covar_nm_male_raw = NULL;
- uintptr_t* cur_sample_i2 = NULL;
- uintptr_t* cur_sample_male_i2 = NULL;
- uintptr_t* cur_group1_i2 = NULL;
- uintptr_t* cur_group2_i2 = NULL;
- uintptr_t* cur_covar_nm_raw = NULL;
- double* pheno_d_collapsed = NULL;
- double* pheno_d_male_collapsed = NULL;
- double* cur_pheno_d = NULL;
- char* wptr_start = NULL;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* sample_male_all_include2 = nullptr;
+ uintptr_t* group1_include2 = nullptr;
+ uintptr_t* group2_include2 = nullptr;
+ uintptr_t* group1_male_include2 = nullptr;
+ uintptr_t* group2_male_include2 = nullptr;
+ uintptr_t* covar_nm_raw = nullptr;
+ uintptr_t* covar_nm_male_raw = nullptr;
+ uintptr_t* cur_sample_i2 = nullptr;
+ uintptr_t* cur_sample_male_i2 = nullptr;
+ uintptr_t* cur_group1_i2 = nullptr;
+ uintptr_t* cur_group2_i2 = nullptr;
+ uintptr_t* cur_covar_nm_raw = nullptr;
+ double* pheno_d_collapsed = nullptr;
+ double* pheno_d_male_collapsed = nullptr;
+ double* cur_pheno_d = nullptr;
+ char* wptr_start = nullptr;
uintptr_t cur_sample_ct = 0;
uintptr_t cur_sample_ctv2 = 0;
uintptr_t cur_group1_size = 0;
uintptr_t cur_group2_size = 0;
- uint32_t y_exists = (chrom_info_ptr->y_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->y_code);
- uint32_t mt_exists = (chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code);
+ uint32_t y_exists = (chrom_info_ptr->xymt_codes[Y_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[Y_OFFSET]);
+ uint32_t mt_exists = (chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET]);
uint32_t skip_y = 0;
double pheno_sum_g1 = 0;
double pheno_ssq_g1 = 0;
@@ -9515,7 +9515,7 @@ int32_t gxe_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outn
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -9523,7 +9523,7 @@ int32_t gxe_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outn
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -9665,7 +9665,7 @@ THREAD_RET_TYPE testmiss_adapt_thread(void* arg) {
double stat_low = 0;
uint32_t missing_sum = 0;
uint32_t nm_sum = 0;
- uint32_t* male_case_cts = NULL;
+ uint32_t* male_case_cts = nullptr;
uintptr_t* __restrict__ loadbuf;
uintptr_t* loadbuf_ptr;
uint32_t* __restrict__ precomp_ui;
@@ -9792,10 +9792,10 @@ THREAD_RET_TYPE testmiss_maxt_thread(void* arg) {
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ results = &(g_maxt_thread_results[perm_vec_ctcl8m * tidx]);
double* __restrict__ orig_pvals = g_orig_pvals;
- double* msa_ptr = NULL;
- uint32_t* male_case_cts = NULL;
- uint32_t* gpui = NULL;
- double* gpd = NULL;
+ double* msa_ptr = nullptr;
+ uint32_t* male_case_cts = nullptr;
+ uint32_t* gpui = nullptr;
+ double* gpd = nullptr;
double stat_high = 0;
double stat_low = 0;
uint32_t case_ct = g_perm_case_ct;
@@ -9839,7 +9839,7 @@ THREAD_RET_TYPE testmiss_maxt_thread(void* arg) {
if (valid_obs_ct != pheno_nm_ct) {
is_y = 1;
male_case_cts = g_male_case_cts;
- precomp_ui = NULL;
+ precomp_ui = nullptr;
}
} else {
valid_obs_ct = pheno_nm_ct;
@@ -9849,7 +9849,7 @@ THREAD_RET_TYPE testmiss_maxt_thread(void* arg) {
precomp_d = g_precomp_d;
if (g_mperm_save_all) {
msa_ptr = &(g_mperm_save_all[marker_idx * perm_vec_ct]);
- precomp_ui = NULL;
+ precomp_ui = nullptr;
} else {
precomp_ui = g_precomp_ui;
}
@@ -9865,7 +9865,7 @@ THREAD_RET_TYPE testmiss_maxt_thread(void* arg) {
}
loadbuf_ptr = &(loadbuf[marker_bidx * pheno_nm_ctv]);
success_2incr = 0;
- fill_uint_zero(thread_git_wkspace, perm_ct128 * 176);
+ fill_uint_zero(perm_ct128 * 176, thread_git_wkspace);
calc_git_missing(pheno_nm_ct, perm_vec_ct, loadbuf_ptr, perm_vecst, thread_git_wkspace);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
missing_case_ct = thread_git_wkspace[pidx];
@@ -9933,14 +9933,14 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t marker_uidx = next_unset_unsafe(marker_exclude_orig, 0);
double maxt_cur_extreme_stat = 0;
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
- uintptr_t* sample_hh_include2 = NULL;
- uintptr_t* sample_hh_male_include2 = NULL;
- uintptr_t* pheno_male_nm2 = NULL;
- uintptr_t* pheno_c_collapsed_male = NULL;
- uintptr_t* sex_male_collapsed = NULL;
- char* wptr_start = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
+ uintptr_t* sample_hh_include2 = nullptr;
+ uintptr_t* sample_hh_male_include2 = nullptr;
+ uintptr_t* pheno_male_nm2 = nullptr;
+ uintptr_t* pheno_c_collapsed_male = nullptr;
+ uintptr_t* sex_male_collapsed = nullptr;
+ char* wptr_start = nullptr;
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
uint32_t perm_adapt = testmiss_modifier & TESTMISS_PERM;
uint32_t perm_maxt = testmiss_modifier & TESTMISS_MPERM;
@@ -9965,7 +9965,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
uint32_t mperm_dump_all = 0;
uint32_t max_thread_ct = g_thread_ct;
uintptr_t pheno_male_nm_ctl = BITCT_TO_WORDCT(male_ct);
- int32_t y_code = chrom_info_ptr->y_code;
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
int32_t retval = 0;
uint32_t uibuf[4];
uintptr_t* loadbuf_raw;
@@ -10171,7 +10171,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
goto testmiss_ret_NOMEM;
}
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, NULL, pfilter, output_min_p, mtest_adjust, 1, 0.0, NULL, g_orig_pvals);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, nullptr, pfilter, output_min_p, mtest_adjust, 1, 0.0, nullptr, g_orig_pvals);
if (retval) {
goto testmiss_ret_1;
}
@@ -10208,8 +10208,8 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
if (!skip_y) {
// maybe all Y chromosome markers had no missing calls?
- uii = chrom_info_ptr->chrom_start[(uint32_t)y_code];
- ujj = chrom_info_ptr->chrom_end[(uint32_t)y_code];
+ uii = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)y_code);
+ ujj = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)y_code);
if (popcount_bit_idx(marker_exclude, uii, ujj) == ujj - uii) {
skip_y = 1;
} else {
@@ -10234,7 +10234,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
goto testmiss_ret_1;
}
} else {
- g_perm_cluster_starts = NULL;
+ g_perm_cluster_starts = nullptr;
}
if (max_thread_ct > perms_total) {
max_thread_ct = perms_total;
@@ -10281,7 +10281,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
g_perm_case_ct = case_ct;
g_male_ct = male_ct;
g_fisher_midp = midp;
- g_mperm_save_all = NULL;
+ g_mperm_save_all = nullptr;
// ----- begin main loop -----
testmiss_more_perms:
if (perm_adapt) {
@@ -10372,7 +10372,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
chrom_end = 0;
// only forced to terminate block at Y chromosome boundaries
if (!skip_y) {
- marker_uidx_end = chrom_info_ptr->chrom_start[(uint32_t)y_code];
+ marker_uidx_end = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)y_code);
pheno_male_nm_ctl = round_up_pow2(pheno_male_nm_ctl, 2);
} else {
marker_uidx_end = unfiltered_marker_ct;
@@ -10388,7 +10388,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
block_end = MODEL_BLOCKSIZE;
}
if (marker_uidx >= marker_uidx_end) {
- marker_uidx_end = chrom_info_ptr->chrom_end[(uint32_t)y_code];
+ marker_uidx_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)y_code);
if (marker_uidx >= marker_uidx_end) {
marker_uidx_end = unfiltered_marker_ct;
}
@@ -10474,9 +10474,9 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
pval = g_orig_pvals[marker_cidx];
missing_ct = g_missing_cts[marker_cidx];
if (perm_adapt) {
- fisher22_precomp_pval_bounds(pval, midp, case_ct, missing_ct, pheno_nm_ct, &(g_precomp_ui[uii * 4]), NULL);
+ fisher22_precomp_pval_bounds(pval, midp, case_ct, missing_ct, pheno_nm_ct, &(g_precomp_ui[uii * 4]), nullptr);
} else {
- fisher22_precomp_pval_bounds(pval, midp, case_ct, missing_ct, pheno_nm_ct, &(g_precomp_ui[uii * 6]), NULL);
+ fisher22_precomp_pval_bounds(pval, midp, case_ct, missing_ct, pheno_nm_ct, &(g_precomp_ui[uii * 6]), nullptr);
fisher22_precomp_pval_bounds(maxt_cur_extreme_stat, midp, case_ct, missing_ct, pheno_nm_ct, uibuf, &(g_precomp_d[uii * 2]));
g_precomp_ui[uii * 6 + 4] = uibuf[2];
g_precomp_ui[uii * 6 + 5] = uibuf[3] - uibuf[2];
@@ -10519,7 +10519,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
} while (marker_idx < marker_unstopped_ct);
if (mperm_dump_all) {
if (g_perms_done) {
- putchar(' ');
+ putc_unlocked(' ', stdout);
}
fputs("[dumping stats]", stdout);
fflush(stdout);
@@ -10578,7 +10578,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt? "max(T)" : "(adaptive)", (g_perms_done != 1)? "s" : "");
if (perm_adapt) {
memcpy(outname_end2, ".perm", 6);
@@ -10641,7 +10641,7 @@ int32_t testmiss(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
dxx = 0.5 * dyy;
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, uii, g_textbuf));
@@ -10726,8 +10726,8 @@ int32_t cluster_assoc_init(const char* flag_name, uintptr_t unfiltered_sample_ct
uint32_t cluster_end = 0;
uint32_t case_ct_total = 0;
uint32_t is_mh2 = (flag_name[4] == '2'); // yeah, this is a hack
- uintptr_t* pheno_nm_nonmale_11 = NULL;
- uintptr_t* pheno_nm_male_11 = NULL;
+ uintptr_t* pheno_nm_nonmale_11 = nullptr;
+ uintptr_t* pheno_nm_male_11 = nullptr;
uintptr_t* pheno_nm_11;
uint32_t* sample_to_cluster_pheno;
uint32_t* cluster_pheno_gtots;
@@ -10870,15 +10870,15 @@ int32_t cluster_assoc_load_one(FILE* bedfile, uintptr_t bed_offset, uintptr_t* m
if (marker_uidx >= (*chrom_end_ptr)) {
chrom_fo_idx = *chrom_fo_idx_ptr;
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
*chrom_end_ptr = chrom_end;
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- min_ploidy_1 = is_set(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code);
+ min_ploidy_1 = is_set(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET]);
*chrom_fo_idx_ptr = chrom_fo_idx;
*min_ploidy_1_ptr = min_ploidy_1;
- *is_x_ptr = (chrom_idx == (uint32_t)chrom_info_ptr->x_code);
- *is_y_ptr = (chrom_idx == (uint32_t)chrom_info_ptr->y_code);
+ *is_x_ptr = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]);
+ *is_y_ptr = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]);
if (!min_ploidy_1) {
for (cpidx = 0; cpidx < 2 * cluster_ct2; cpidx++) {
cur_cluster_pheno_gtots[cpidx] = 2 * cluster_pheno_gtots[cpidx * 2];
@@ -10914,7 +10914,7 @@ int32_t cluster_assoc_load_one(FILE* bedfile, uintptr_t bed_offset, uintptr_t* m
if (min_ploidy_1 && hh_or_mt_exists) {
haploid_fix(hh_or_mt_exists, sample_hh_include2, sample_hh_male_include2, unfiltered_sample_ct, *is_x_ptr, *is_y_ptr, (unsigned char*)loadbuf_raw);
}
- fill_uint_zero(cluster_geno_cts, 4 * cluster_ct2);
+ fill_uint_zero(4 * cluster_ct2, cluster_geno_cts);
ulptr = loadbuf_raw;
ulptr2 = pheno_nm_11;
if ((!min_ploidy_1) || (*is_x_ptr)) {
@@ -10960,12 +10960,12 @@ int32_t cluster_assoc_load_one(FILE* bedfile, uintptr_t bed_offset, uintptr_t* m
int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t cmh_mperm_val, uint32_t cmh_modifier, double ci_size, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, Chrom_info* [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
- uintptr_t* sample_hh_include2 = NULL;
- uintptr_t* sample_hh_male_include2 = NULL;
- uint32_t* orig_df = NULL;
- char* chrom_name_ptr = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
+ uintptr_t* sample_hh_include2 = nullptr;
+ uintptr_t* sample_hh_male_include2 = nullptr;
+ uint32_t* orig_df = nullptr;
+ char* chrom_name_ptr = nullptr;
uint32_t breslow_day = cmh_modifier & CLUSTER_CMH_BD;
uint32_t perm_bd = cmh_modifier & CLUSTER_CMH_PERM_BD;
uint32_t chrom_fo_idx = 0xffffffffU; // deliberate overflow
@@ -10976,7 +10976,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
uint32_t is_x = 0;
uint32_t is_y = 0;
int32_t retval = 0;
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
uintptr_t* pheno_nm_11;
uintptr_t* pheno_nm_nonmale_11;
uintptr_t* pheno_nm_male_11;
@@ -11044,7 +11044,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
// The best data structures for permutation testing are somewhat different
// from those for the single-pass computation, so we separate the logic.
- retval = cluster_assoc_init("--mh/--bd", unfiltered_sample_ct, pheno_nm, pheno_c, sex_male, cluster_ct, cluster_map, cluster_starts, NULL, &pheno_nm_11, &pheno_nm_nonmale_11, &pheno_nm_male_11, &sample_to_cluster_pheno, &cluster_pheno_gtots, &cur_cluster_pheno_gtots, &cluster_geno_cts, &loadbuf_raw, &cluster_ct2);
+ retval = cluster_assoc_init("--mh/--bd", unfiltered_sample_ct, pheno_nm, pheno_c, sex_male, cluster_ct, cluster_map, cluster_starts, nullptr, &pheno_nm_11, &pheno_nm_nonmale_11, &pheno_nm_male_11, &sample_to_cluster_pheno, &cluster_pheno_gtots, &cur_cluster_pheno_gtots, &cluster_geno_cts, &loadbuf_raw, &cluster_ct2);
if (retval) {
goto cmh_assoc_ret_1;
}
@@ -11077,7 +11077,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
if (putc_checked('\n', outfile)) {
goto cmh_assoc_ret_WRITE_FAIL;
}
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_or_mt_exists, 1, pheno_nm, sex_male, &sample_hh_include2, &sample_hh_male_include2)) {
@@ -11244,7 +11244,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
if (marker_idx >= loop_end) {
if (marker_idx < marker_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -11257,7 +11257,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
goto cmh_assoc_ret_WRITE_FAIL;
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -11266,7 +11266,7 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
goto cmh_assoc_ret_NOMEM;
}
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, NULL, NULL);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, nullptr, nullptr);
}
if (cmh_modifier & (CLUSTER_CMH_PERM | CLUSTER_CMH_MPERM)) {
@@ -11323,10 +11323,10 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t pheno_nm_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c, uintptr_t* sex_male, uint32_t hh_o [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- uintptr_t* sample_hh_include2 = NULL;
- uintptr_t* sample_hh_male_include2 = NULL;
- char* wptr_start = NULL;
+ FILE* outfile = nullptr;
+ uintptr_t* sample_hh_include2 = nullptr;
+ uintptr_t* sample_hh_male_include2 = nullptr;
+ char* wptr_start = nullptr;
uint32_t chrom_fo_idx = 0xffffffffU;
uint32_t chrom_end = 0;
uint32_t pct = 0;
@@ -11384,7 +11384,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
uint32_t loop_end;
uint32_t uii;
// no reason to keep X/Y/MT/haploid restriction
- retval = cluster_assoc_init("--mh2", unfiltered_sample_ct, pheno_nm, pheno_c, sex_male, cluster_ct, cluster_map, cluster_starts, NULL, &pheno_nm_11, &pheno_nm_nonmale_11, &pheno_nm_male_11, &sample_to_cluster_pheno, &cluster_pheno_gtots, &cur_cluster_pheno_gtots, &cluster_geno_cts, &loadbuf_raw, &cluster_ct1);
+ retval = cluster_assoc_init("--mh2", unfiltered_sample_ct, pheno_nm, pheno_c, sex_male, cluster_ct, cluster_map, cluster_starts, nullptr, &pheno_nm_11, &pheno_nm_nonmale_11, &pheno_nm_male_11, &sample_to_cluster_pheno, &cluster_pheno_gtots, &cur_cluster_pheno_gtots, &cluster_geno_cts, &loadbuf_raw, &cluster_ct1);
for (cluster_idx = 0; cluster_idx < cluster_ct1; cluster_idx++) {
ctrl_ct += cluster_pheno_gtots[4 * cluster_idx];
case_ct += cluster_pheno_gtots[4 * cluster_idx + 2];
@@ -11415,7 +11415,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
if (!mi_buf) {
goto cmh2_assoc_ret_NOMEM;
}
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_or_mt_exists, 1, pheno_nm, sex_male, &sample_hh_include2, &sample_hh_male_include2)) {
@@ -11436,7 +11436,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
}
loop_end = marker_ct / 100;
for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
- if (cluster_assoc_load_one(bedfile, bed_offset, marker_exclude, unfiltered_sample_ct, sample_hh_include2, sample_hh_male_include2, loadbuf_raw, pheno_nm_11, pheno_nm_nonmale_11, pheno_nm_male_11, marker_reverse, chrom_info_ptr, hh_or_mt_exists, g_textbuf, cluster_ct1, sample_to_cluster_pheno, cluster_pheno_gtots, cur_cluster_pheno_gtots, cluster_geno_cts, &marker_uidx, &chrom_end, &chrom_fo_idx, &min_ploidy_1, &is_x, &is_y, &wptr_start, NULL)) {
+ if (cluster_assoc_load_one(bedfile, bed_offset, marker_exclude, unfiltered_sample_ct, sample_hh_include2, sample_hh_male_include2, loadbuf_raw, pheno_nm_11, pheno_nm_nonmale_11, pheno_nm_male_11, marker_reverse, chrom_info_ptr, hh_or_mt_exists, g_textbuf, cluster_ct1, sample_to_cluster_pheno, cluster_pheno_gtots, cur_cluster_pheno_gtots, cluster_geno_cts, &marker_uidx, &chrom_end, &chrom_fo_idx, &min_ploidy_1, &is_x, &is_y, &wptr_start, nullptr)) {
goto cmh2_assoc_ret_READ_FAIL;
}
wptr = strcpyax(wptr_start, &(marker_ids[marker_uidx * max_marker_id_len]), '\t');
@@ -11534,7 +11534,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
if (marker_idx >= loop_end) {
if (marker_idx < marker_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -11547,7 +11547,7 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
goto cmh2_assoc_ret_WRITE_FAIL;
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -11576,11 +11576,11 @@ int32_t cmh2_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uintptr_t unfiltered_sample_ct, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* clu [...]
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* outfile = NULL;
- uintptr_t* sample_hh_include2 = NULL;
- uintptr_t* sample_hh_male_include2 = NULL;
+ FILE* outfile = nullptr;
+ uintptr_t* sample_hh_include2 = nullptr;
+ uintptr_t* sample_hh_male_include2 = nullptr;
char* writebuf = g_textbuf;
- char* chrom_name_ptr = NULL;
+ char* chrom_name_ptr = nullptr;
uint32_t cluster_ct2 = 0;
uint32_t chrom_fo_idx = 0xffffffffU;
uint32_t chrom_end = 0;
@@ -11590,7 +11590,7 @@ int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
uint32_t is_x = 0;
uint32_t is_y = 0;
int32_t retval = 0;
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
uintptr_t* cluster_bitfield;
uintptr_t* pheno_nm_11;
uintptr_t* pheno_nm_nonmale_11;
@@ -11630,7 +11630,7 @@ int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
double dxx;
uint32_t cluster_idx;
uint32_t loop_end;
- ulii = 2 * max_marker_allele_len + MAX_ID_LEN + max_marker_id_len + max_cluster_id_len + 256;
+ ulii = 2 * max_marker_allele_len + MAX_ID_SLEN + max_marker_id_len + max_cluster_id_len + 256;
if (ulii > MAXLINELEN) {
if (bigstack_alloc_c(ulii, &writebuf)) {
goto homog_assoc_ret_NOMEM;
@@ -11677,7 +11677,7 @@ int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
// misaligned for backward compatibility
sprintf(g_textbuf, " CHR %%%us A1 A2 F_A F_U N_A N_U TEST CHISQ DF P OR\n", plink_maxsnp);
fprintf(outfile, g_textbuf, "SNP");
- if (chrom_info_ptr->mt_code != -1) {
+ if (chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_or_mt_exists, 1, pheno_nm, sex_male, &sample_hh_include2, &sample_hh_male_include2)) {
@@ -11792,7 +11792,7 @@ int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
if (marker_idx >= loop_end) {
if (marker_idx < marker_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -11805,7 +11805,7 @@ int32_t homog_assoc(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
goto homog_assoc_ret_WRITE_FAIL;
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
diff --git a/plink_calc.c b/plink_calc.c
index ee7bb02..efe3683 100644
--- a/plink_calc.c
+++ b/plink_calc.c
@@ -108,8 +108,8 @@ void rel_init(Rel_info* relip) {
relip->cutoff = 0.025;
relip->ibc_type = 0;
relip->pc_ct = 20;
- relip->pca_cluster_names_flattened = NULL;
- relip->pca_clusters_fname = NULL;
+ relip->pca_cluster_names_flattened = nullptr;
+ relip->pca_clusters_fname = nullptr;
}
void rel_cleanup(Rel_info* relip) {
@@ -141,7 +141,7 @@ void update_rel_ibc(double* rel_ibc, uintptr_t* geno, double* set_allele_freqs,
#endif
double wtarr[BITCT2 * 5];
double *wptr = weights;
- fill_double_zero(wtarr, BITCT2 * 5);
+ fill_double_zero(BITCT2 * 5, wtarr);
for (uii = 0; uii < window_size; uii++) {
if ((set_allele_freqs[uii] != 0.0) && (set_allele_freqs[uii] < (1.0 - EPSILON))) {
if (ibc_type) {
@@ -524,10 +524,6 @@ void fill_subset_weights_r(double* subset_weights, double* set_allele_freqs, dou
}
}
-int32_t get_chrom_end(Chrom_info* chrom_info_ptr, uintptr_t marker_idx) {
- return chrom_info_ptr->chrom_end[get_marker_chrom(chrom_info_ptr, marker_idx)];
-}
-
void exclude_multi(uintptr_t* exclude_arr, int32_t* new_excl, uint32_t unfiltered_sample_ct, uintptr_t* exclude_ct_ptr) {
uint32_t exclude_ct = *exclude_ct_ptr;
int32_t* new_excl_end = &(new_excl[unfiltered_sample_ct - exclude_ct]);
@@ -717,16 +713,16 @@ static inline uint32_t popcount_xor_2mask_multiword(uintptr_t** xor1p, uintptr_t
#endif
// ----- multithread globals -----
-double* g_rel_dists = NULL;
-uint32_t* g_sample_missing_unwt = NULL;
-uint32_t* g_missing_dbl_excluded = NULL;
-double* g_dists = NULL;
+double* g_rel_dists = nullptr;
+uint32_t* g_sample_missing_unwt = nullptr;
+uint32_t* g_missing_dbl_excluded = nullptr;
+double* g_dists = nullptr;
static uint32_t g_thread_start[MAX_THREADS_P1];
static int32_t* g_idists;
-static uintptr_t* g_pheno_nm = NULL;
-static uintptr_t* g_pheno_c = NULL;
-static unsigned char* g_geno = NULL;
+static uintptr_t* g_pheno_nm = nullptr;
+static uintptr_t* g_pheno_c = nullptr;
+static unsigned char* g_geno = nullptr;
static double* g_subset_weights;
static uint32_t* g_subset_weights_i;
static double g_reg_tot_xy;
@@ -744,8 +740,8 @@ static uintptr_t* g_masks;
static uintptr_t* g_mmasks;
static uint32_t* g_missing_tot_weights;
static uint32_t* g_sample_missing;
-static double* g_jackknife_precomp = NULL;
-static uint32_t* g_genome_main = NULL;
+static double* g_jackknife_precomp = nullptr;
+static uint32_t* g_genome_main = nullptr;
static uintptr_t g_marker_window[GENOME_MULTIPLEX * 2];
static double* g_pheno_packed;
@@ -946,7 +942,7 @@ void ibs_test_range(uint32_t tidx, uintptr_t* perm_col_buf, double* perm_results
ulii = row_idx * (row_idx + 1);
if (ulii >= pct_next) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ulii / pct_div;
printf("\b\b%" PRIuPTR "%%", pct);
@@ -1852,7 +1848,7 @@ THREAD_RET_TYPE groupdist_jack_thread(void* arg) {
double returns[3];
double results[9];
uintptr_t ulii;
- fill_double_zero(results, 9);
+ fill_double_zero(9, results);
for (ulii = 0; ulii < jackknife_iters; ulii++) {
pick_d_small(cbuf, uibuf, case_ct + ctrl_ct, jackknife_d, sfmtp);
if (case_ct + ctrl_ct < sample_ct) {
@@ -2103,7 +2099,7 @@ int32_t regress_rel_main(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclu
dyysq += g_calc_result[uii + 1][3];
}
ulii = g_jackknife_iters * g_thread_ct;
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("Jackknife s.e. (y = genomic relationship): %g\n", sqrt(((sample_ct - g_jackknife_d) / (double)g_jackknife_d) * (dxxsq - dxx * dxx / (double)ulii) / ((double)ulii - 1)));
LOGPRINTF(" (y = phenotype): %g\n", sqrt(((sample_ct - g_jackknife_d) / (double)g_jackknife_d) * (dyysq - dyy * dyy / (double)ulii) / ((double)ulii - 1)));
bigstack_reset(bigstack_mark);
@@ -2162,7 +2158,7 @@ void matrix_row_sum_ur(uintptr_t sample_ct, double* sums, double* matrix) {
double acc;
double* sptr_end;
double* sptr;
- fill_double_zero(sums, sample_ct);
+ fill_double_zero(sample_ct, sums);
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
dptr = &(matrix[sample_idx * sample_ct]);
acc = 0.0;
@@ -2227,8 +2223,8 @@ void reml_em_one_trait(double* wkbase, double* pheno, double* covg_ref, double*
if (!lwork) {
lwork = CACHELINE_DBL;
}
- fill_double_zero(matrix_pvg, mat_offset);
- fill_double_zero(row2, sample_ct);
+ fill_double_zero(mat_offset, matrix_pvg);
+ fill_double_zero(sample_ct, row2);
printf(" ");
do {
memcpy(wkbase, rel_dists, mat_offset * sizeof(double));
@@ -2337,7 +2333,7 @@ void reml_em_one_trait(double* wkbase, double* pheno, double* covg_ref, double*
printf("\b\b\b\b\b\b \rcovg: %g covr: %g EM step log likelihood change: %g", *covg_ref, *covr_ref, ll_change);
fflush(stdout);
} while (ll_change > tol);
- putchar('\n');
+ putc_unlocked('\n', stdout);
sprintf(g_logbuf, "covg: %g covr: %g\n", *covg_ref, *covr_ref);
logstr(g_logbuf);
}
@@ -2380,7 +2376,7 @@ int32_t calc_unrelated_herit(uint64_t calculation_type, Rel_info* relip, uintptr
double* ibc_ptr;
double* rel_base;
g_sample_ct = sample_ct;
- g_missing_dbl_excluded = NULL;
+ g_missing_dbl_excluded = nullptr;
ulii = sample_ct;
ulii = round_up_pow2(ulii * ulii, CACHELINE_DBL);
rel_base = &(g_rel_dists[ulii]);
@@ -2412,9 +2408,9 @@ int32_t calc_unrelated_herit(uint64_t calculation_type, Rel_info* relip, uintptr
int32_t unrelated_herit_batch(uint32_t load_grm_bin, char* grmname, char* phenoname, uint32_t mpheno_col, char* phenoname_str, int32_t missing_pheno, Rel_info* relip) {
char* grmname_end = (char*)memchr(grmname, 0, FNAMESIZE);
- FILE* infile = NULL;
- FILE* grm_binfile = NULL;
- gzFile grm_gzfile = NULL;
+ FILE* infile = nullptr;
+ FILE* grm_binfile = nullptr;
+ gzFile grm_gzfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
uintptr_t max_sample_id_len = 4;
@@ -2425,8 +2421,8 @@ int32_t unrelated_herit_batch(uint32_t load_grm_bin, char* grmname, char* phenon
double unrelated_herit_covg = relip->unrelated_herit_covg;
double unrelated_herit_covr = relip->unrelated_herit_covr;
uint32_t is_strict = (relip->modifier / REL_UNRELATED_HERITABILITY_STRICT) & 1;
- uintptr_t* pheno_c = NULL;
- double* pheno_d = NULL;
+ uintptr_t* pheno_c = nullptr;
+ double* pheno_d = nullptr;
uintptr_t cur_sample_id_len;
uintptr_t unfiltered_sample_ctl;
uintptr_t* pheno_nm;
@@ -2520,7 +2516,7 @@ int32_t unrelated_herit_batch(uint32_t load_grm_bin, char* grmname, char* phenon
if (fopen_checked(phenoname, "r", &infile)) {
goto unrelated_herit_batch_ret_OPEN_FAIL;
}
- retval = load_pheno(infile, unfiltered_sample_ct, 0, sorted_ids, max_sample_id_len, id_map, missing_pheno, 0, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, NULL, 0);
+ retval = load_pheno(infile, unfiltered_sample_ct, 0, sorted_ids, max_sample_id_len, id_map, missing_pheno, 0, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, nullptr, 0);
bigstack_end_reset(bigstack_end_mark);
fclose_null(&infile);
if (retval) {
@@ -2580,7 +2576,9 @@ int32_t unrelated_herit_batch(uint32_t load_grm_bin, char* grmname, char* phenon
goto unrelated_herit_batch_ret_READ_FAIL;
}
}
- fread(&fxx, 4, 1, grm_binfile);
+ if (fread(&fxx, sizeof(float), 1, grm_binfile) != sizeof(float)) {
+ goto unrelated_herit_batch_ret_READ_FAIL;
+ }
*row_ptr++ = (double)fxx;
sample_uidx2++;
}
@@ -2628,7 +2626,7 @@ int32_t unrelated_herit_batch(uint32_t load_grm_bin, char* grmname, char* phenon
}
}
gzclose(grm_gzfile);
- grm_gzfile = NULL;
+ grm_gzfile = nullptr;
}
// fill in upper right
for (ulii = 0; ulii < pheno_nm_ct; ulii++) {
@@ -2741,7 +2739,7 @@ int32_t ibs_test_calc(pthread_t* threads, char* read_dists_fname, uintptr_t unfi
ctrl_case_ct = pheno_ctrl_ct * case_ct;
case_case_ct = (case_ct * (case_ct - 1)) / 2;
g_perm_ct = perm_ct;
- // g_pheno_nm and g_pheno_c should be NULL
+ // g_pheno_nm and g_pheno_c should be nullptr
if (bigstack_alloc_ul(unfiltered_sample_ctl, &g_pheno_nm) ||
bigstack_alloc_ul(unfiltered_sample_ctl, &g_pheno_c)) {
goto ibs_test_calc_ret_NOMEM;
@@ -2882,8 +2880,8 @@ int32_t ibs_test_calc(pthread_t* threads, char* read_dists_fname, uintptr_t unfi
}
ibs_test_calc_ret_1:
bigstack_reset(bigstack_mark);
- g_pheno_nm = NULL;
- g_pheno_c = NULL;
+ g_pheno_nm = nullptr;
+ g_pheno_c = nullptr;
return retval;
}
@@ -2933,7 +2931,7 @@ int32_t groupdist_calc(pthread_t* threads, uint32_t unfiltered_sample_ct, uintpt
}
g_ctrl_ct = pheno_ctrl_ct;
g_sample_ct = sample_ct;
- // g_pheno_nm and g_pheno_c should be NULL
+ // g_pheno_nm and g_pheno_c should be nullptr
if (bigstack_alloc_ul(unfiltered_sample_ctl, &pheno_nm_local) ||
bigstack_alloc_ul(unfiltered_sample_ctl, &pheno_c_local)) {
goto groupdist_calc_ret_NOMEM;
@@ -3094,7 +3092,7 @@ int32_t groupdist_calc(pthread_t* threads, uint32_t unfiltered_sample_ct, uintpt
for (uii = 0; uii < 9; uii++) {
g_calc_result[0][uii] *= dxx;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
dxx = g_calc_result[0][0] - g_calc_result[0][1];
LOGPRINTF(" AA mean - AU mean avg difference (s.e.): %g (%g)\n", dxx, sqrt(((g_case_ct + g_ctrl_ct - g_jackknife_d) / ((double)g_jackknife_d)) * (g_calc_result[0][3] + g_calc_result[0][4] - 2 * g_calc_result[0][6] - dxx * dxx)));
dxx = g_calc_result[0][0] - g_calc_result[0][2];
@@ -3112,8 +3110,8 @@ int32_t groupdist_calc(pthread_t* threads, uint32_t unfiltered_sample_ct, uintpt
}
groupdist_calc_ret_1:
bigstack_reset(bigstack_mark);
- g_pheno_nm = NULL;
- g_pheno_c = NULL;
+ g_pheno_nm = nullptr;
+ g_pheno_c = nullptr;
return retval;
}
@@ -3180,11 +3178,11 @@ void normalize_phenos(double* new_phenos, uint32_t sample_ct, uintptr_t* sample_
/*
int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t max_pcs, FILE* bedfile, uintptr_t bed_offset, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t sample_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, char* sample_ids, uintptr_t max_sample_id_len, uintptr_ [...]
- FILE* outfile = NULL;
- FILE* evecfile = NULL;
+ FILE* outfile = nullptr;
+ FILE* evecfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
@@ -3371,7 +3369,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
// precalculate (X'X)
// er, check if there's a faster way to do this...
- fill_double_zero(pc_orig_prod_sums, pc_ct_p1 * pc_ct_p1);
+ fill_double_zero(pc_ct_p1 * pc_ct_p1, pc_orig_prod_sums);
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
for (ulii = 0; ulii < pc_ct_p1; ulii++) {
for (uljj = ulii; uljj < pc_ct_p1; uljj++) {
@@ -3380,7 +3378,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
}
}
- fill_uint_zero(missing_cts, sample_ct);
+ fill_uint_zero(sample_ct, missing_cts);
refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &is_haploid);
// .gen instead of .bgen because latter actually has lower precision(!) (15
// bits instead of the ~20 you get from printf("%g", dxx)), and there's no
@@ -3409,7 +3407,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
if (is_haploid && hh_exists) {
haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x, is_y, (unsigned char*)loadbuf);
}
- bufptr = chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_uidx), g_textbuf);
+ bufptr = chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_uidx), g_textbuf);
*bufptr++ = ' ';
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
@@ -3417,12 +3415,12 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
bufptr = uint32toa_x(marker_pos[marker_uidx], ' ', &(g_textbuf[1]));
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (fputs_checked(marker_allele_ptrs[2 * marker_uidx + 1], outfile)) {
goto calc_regress_pcs_ret_WRITE_FAIL;
}
memcpy(pc_prod_sums, pc_orig_prod_sums, pc_ct_p1 * pc_ct_p1 * sizeof(double));
- fill_double_zero(x_prime_y, pc_ct_p1);
+ fill_double_zero(pc_ct_p1, x_prime_y);
sample_idx = 0;
sample_uidx = BITCT2; // repurposed as end-of-word
ulptr = loadbuf;
@@ -3553,7 +3551,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
goto calc_regress_pcs_ret_WRITE_FAIL;
}
// regress phenotype
- fill_double_zero(x_prime_y, pc_ct_p1);
+ fill_double_zero(pc_ct_p1, x_prime_y);
sample_uidx = 0;
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
@@ -3603,7 +3601,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
uii = strlen_se(sample_id_ptr);
// todo: adjust pheno_d, double-check missing gender behavior
fwrite(sample_id_ptr, 1, uii, outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(&(sample_id_ptr[uii + 1]), outfile);
g_textbuf[0] = ' ';
bufptr = dtoa_gx(((double)missing_cts[sample_uidx]) / (double)marker_ct, ' ', &(g_textbuf[1]));
@@ -3618,7 +3616,7 @@ int32_t calc_regress_pcs(char* evecname, uint32_t regress_pcs_modifier, uint32_t
goto calc_regress_pcs_ret_WRITE_FAIL;
}
*outname_end = '\0';
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("Principal component regression residuals and %sphenotype Z-scores %s%s.gen and %s.sample.\n", regress_pcs_sex_specific? "sex-specific " : "", regress_pcs_sex_specific? "\nwritten to " : "written to\n", outname, outname);
bigstack_reset(bigstack_mark);
while (0) {
@@ -3791,7 +3789,7 @@ int32_t distance_open(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile3_p
}
void distance_print_done(int32_t format_code, char* outname, char* outname_end) {
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (!format_code) {
strcpy(outname_end, g_textbuf);
sprintf(g_logbuf, "Distances (allele counts) written to %s .\n", outname);
@@ -4309,7 +4307,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
}
} else if (!bin4) {
if (shape == DISTANCE_SQ0) {
- fill_double_zero((double*)membuf, sample_ct);
+ fill_double_zero(sample_ct, (double*)membuf);
}
if (write_alcts) {
dxx = 0.0;
@@ -4423,7 +4421,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
}
} else {
if (shape == DISTANCE_SQ0) {
- fill_float_zero((float*)membuf, sample_ct);
+ fill_float_zero(sample_ct, (float*)membuf);
}
if (write_alcts) {
fxx = 0.0;
@@ -4585,7 +4583,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
goto distance_d_write_ret_1;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("Distances (allele counts) written to %s .\n", outname);
g_pct = 1;
}
@@ -4623,7 +4621,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
goto distance_d_write_ret_1;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("Distances (proportions) written to %s .\n", outname);
g_pct = 1;
}
@@ -4663,7 +4661,7 @@ int32_t distance_d_write(FILE** outfile_ptr, FILE** outfile2_ptr, FILE** outfile
goto distance_d_write_ret_1;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("IBS matrix written to %s .\n", outname);
}
}
@@ -4964,11 +4962,11 @@ uint32_t calc_genome_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
}
int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, double* set_allele_freqs, uint32_t* nchrobs, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_id [...]
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
int32_t retval = 0;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
- unsigned char* loadbuf = NULL; // from file
+ unsigned char* loadbuf = nullptr; // from file
int32_t ibd_prect = 0;
int64_t cur_line = 0;
double e00 = 0;
@@ -5094,9 +5092,9 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
g_missing_dbl_excluded = missing_dbl_excluded;
g_sample_missing_unwt = sample_missing_unwt;
g_genome_main = genome_main;
- fill_uint_zero(missing_dbl_excluded, tot_cells);
- fill_uint_zero(sample_missing_unwt, sample_ct);
- fill_uint_zero(genome_main, tot_cells * 5);
+ fill_uint_zero(tot_cells, missing_dbl_excluded);
+ fill_uint_zero(sample_ct, sample_missing_unwt);
+ fill_uint_zero(tot_cells * 5, genome_main);
if (!IS_SET(marker_exclude, 0)) {
if (fseeko(bedfile, bed_offset, SEEK_SET)) {
retval = RET_READ_FAIL;
@@ -5191,8 +5189,8 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
}
if (ukk < GENOME_MULTIPLEX) {
memset(&(loadbuf[ukk * unfiltered_sample_ct4]), 0, (GENOME_MULTIPLEX - ukk) * unfiltered_sample_ct4);
- fill_ulong_zero(geno, sample_ct * (GENOME_MULTIPLEX / BITCT2));
- fill_ulong_zero(masks, sample_ct * (GENOME_MULTIPLEX / BITCT2));
+ fill_ulong_zero(sample_ct * (GENOME_MULTIPLEX / BITCT2), geno);
+ fill_ulong_zero(sample_ct * (GENOME_MULTIPLEX / BITCT2), masks);
for (umm = ukk * 2; umm < GENOME_MULTIPLEX2; umm++) {
*glptr2++ = GENOME_MULTIPLEX2;
}
@@ -5205,7 +5203,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
glptr3 = mmasks;
giptr = sample_missing_unwt;
sample_uidx = 0;
- fill_int_zero(missing_ct_buf, BITCT);
+ fill_int_zero(BITCT, missing_ct_buf);
missing_ct_all = 0;
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
// er, switch this to new loop structure
@@ -5352,8 +5350,8 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
fwrite(wbuf, 1, cptr - wbuf, outfile);
uljj += sample_ct - ulii - 2;
}
- putc('1', outfile);
- putc(' ', outfile);
+ putc_unlocked('1', outfile);
+ putc_unlocked(' ', outfile);
giptr3++;
for (ujj = sample_idx + 1; ujj < sample_ct; ujj++) {
cptr = dtoa_gx(1.0 - ((double)((*giptr) + 2 * giptr[1])) / ((double)(2 * (uii - (*giptr3++) + (*giptr2++)))), ' ', wbuf);
@@ -5372,7 +5370,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
if (fclose_null(&outfile)) {
goto calc_genome_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
strcpy(outname_end, ".mibs.id");
retval = write_ids(outname, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len);
if (retval) {
@@ -5399,8 +5397,8 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
fwrite(wbuf, 1, cptr - wbuf, outfile);
uljj += sample_ct - ulii - 2;
}
- putc('0', outfile);
- putc(' ', outfile);
+ putc_unlocked('0', outfile);
+ putc_unlocked(' ', outfile);
giptr3++;
for (ujj = sample_idx + 1; ujj < sample_ct; ujj++) {
cptr = dtoa_gx(((double)((*giptr) + 2 * giptr[1])) / ((double)(2 * (uii - (*giptr3++) + (*giptr2++)))), ' ', wbuf);
@@ -5419,7 +5417,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
if (fclose_null(&outfile)) {
goto calc_genome_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
strcpy(outname_end, ".mdist.id");
retval = write_ids(outname, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len);
if (retval) {
@@ -5478,7 +5476,7 @@ int32_t calc_genome(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uin
goto calc_genome_ret_1;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("Finished writing %s .\n", outname);
while (0) {
calc_genome_ret_NOMEM:
@@ -5755,7 +5753,7 @@ uint32_t rel_cutoff_batch_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
progress += row;
if (progress >= pct * hundredth) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = 1 + (progress / hundredth);
printf("\b\b%u%%", pct - 1);
@@ -5792,7 +5790,7 @@ uint32_t rel_cutoff_batch_rbin_emitn(uint32_t overflow_ct, unsigned char* readbu
uint32_t wbuf_ct;
uint32_t uii;
float fxx;
-
+ float fyy;
while (row < sample_ct) {
if (rel_ct_arr[row] == -1) {
fseeko(in_binfile, (row + 1) * sizeof(float), SEEK_CUR);
@@ -5816,10 +5814,15 @@ uint32_t rel_cutoff_batch_rbin_emitn(uint32_t overflow_ct, unsigned char* readbu
}
sptr_cur = memcpya(sptr_cur, wbuf, wbuf_ct);
sptr_cur = uint32toa_x(++new_col, '\t', sptr_cur);
- fread(&fxx, 4, 1, in_bin_nfile);
+ if ((fread(&fxx, sizeof(float), 1, in_bin_nfile) != sizeof(float)) || (fread(&fyy, sizeof(float), 1, in_binfile) != sizeof(float))) {
+ // can't use return code here
+ putc_unlocked('\n', stdout);
+ fflush(stdout);
+ fputs("Error: File read failure.\n", stderr);
+ exit(RET_READ_FAIL);
+ }
sptr_cur = uint32toa_x((int32_t)fxx, '\t', sptr_cur);
- fread(&fxx, 4, 1, in_binfile);
- sptr_cur = ftoa_ex(fxx, '\n', sptr_cur);
+ sptr_cur = ftoa_ex(fyy, '\n', sptr_cur);
col++;
if (sptr_cur >= readbuf_end) {
goto rel_cutoff_batch_rbin_emitn_ret;
@@ -5832,7 +5835,7 @@ uint32_t rel_cutoff_batch_rbin_emitn(uint32_t overflow_ct, unsigned char* readbu
progress += row;
if (progress >= pct * hundredth) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = 1 + (progress / hundredth);
printf("\b\b%u%%", pct - 1);
@@ -5856,12 +5859,12 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
uintptr_t sample_ct = 0;
uintptr_t line_idx = 0;
double rel_cutoff = relip->cutoff;
- FILE* idfile = NULL;
- FILE* outfile = NULL;
- FILE* out_bin_nfile = NULL;
- FILE* in_binfile = NULL;
- FILE* in_bin_nfile = NULL;
- gzFile cur_gzfile = NULL;
+ FILE* idfile = nullptr;
+ FILE* outfile = nullptr;
+ FILE* out_bin_nfile = nullptr;
+ FILE* in_binfile = nullptr;
+ FILE* in_bin_nfile = nullptr;
+ gzFile cur_gzfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uint32_t samples_excluded = 0;
uint32_t exactly_one_rel_ct = 0;
@@ -5985,7 +5988,7 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -6041,7 +6044,7 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -6054,9 +6057,9 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
goto rel_cutoff_batch_ret_INVALID_FORMAT_GENERIC;
}
gzclose(cur_gzfile);
- cur_gzfile = NULL;
+ cur_gzfile = nullptr;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("%s read complete. Pruning.\n", grmname);
// would prefer to just call do_rel_cutoff(), but unfortunately that
@@ -6228,7 +6231,7 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
}
for (sample_idx = 0; sample_idx < sample_ct;) {
- if (fgets(g_textbuf, MAXLINELEN, idfile) == NULL) {
+ if (fgets(g_textbuf, MAXLINELEN, idfile) == nullptr) {
goto rel_cutoff_batch_ret_READ_FAIL;
}
if (is_eoln_kns(*(skip_initial_spaces(g_textbuf)))) {
@@ -6343,10 +6346,11 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
break;
}
}
- fread(&fxx, 4, 1, in_bin_nfile);
+ if ((fread(&fxx, sizeof(float), 1, in_bin_nfile) != sizeof(float)) || (fread(&fyy, sizeof(float), 1, in_binfile) != sizeof(float))) {
+ goto rel_cutoff_batch_ret_READ_FAIL;
+ }
fwrite(&fxx, 4, 1, out_bin_nfile);
- fread(&fxx, 4, 1, in_binfile);
- fwrite(&fxx, 4, 1, outfile);
+ fwrite(&fyy, 4, 1, outfile);
col++;
}
} else {
@@ -6372,7 +6376,7 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
progress += row;
if (progress >= pct * hundredth) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = 1 + (progress / hundredth);
printf("\b\b%u%%", pct - 1);
@@ -6381,7 +6385,7 @@ int32_t rel_cutoff_batch(uint32_t load_grm_bin, char* grmname, char* outname, ch
col = 0;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("Pruned relationship matrix written to %s .\n", outname);
}
retval = 0;
@@ -6677,7 +6681,7 @@ void copy_set_allele_freqs(uintptr_t marker_uidx, uintptr_t* marker_exclude, uin
int32_t load_distance_wts(char* distance_wts_fname, uintptr_t unfiltered_marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t noheader, uint32_t conditional_alloc_exclude, uintptr_t** marker_exclude_ptr, uint32_t* marker_ct_ptr, double** main_weights_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t line_idx = 0;
@@ -6812,8 +6816,8 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
uintptr_t sample_ct = unfiltered_sample_ct - (*sample_exclude_ct_ptr);
uintptr_t marker_uidx = 0;
uintptr_t marker_idx = 0;
- FILE* outfile = NULL;
- FILE* out_bin_nfile = NULL;
+ FILE* outfile = nullptr;
+ FILE* out_bin_nfile = nullptr;
uintptr_t* marker_exclude = marker_exclude_orig;
uint32_t rel_calc_type = relip->modifier & REL_CALC_MASK;
int32_t ibc_type = relip->ibc_type;
@@ -6823,12 +6827,12 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
uint32_t all_missing_warning = 0;
int64_t llxx = 0;
double rel_cutoff = relip->cutoff;
- double* dist_ptr = NULL;
- double* dptr3 = NULL;
- double* dptr4 = NULL;
- double* rel_dists = NULL;
- double* main_weights = NULL;
- double* main_weights_ptr = NULL;
+ double* dist_ptr = nullptr;
+ double* dptr3 = nullptr;
+ double* dptr4 = nullptr;
+ double* rel_dists = nullptr;
+ double* main_weights = nullptr;
+ double* main_weights_ptr = nullptr;
double* dptr2;
double set_allele_freq_buf[MULTIPLEX_DIST];
char wbuf[96];
@@ -6969,13 +6973,13 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
}
if (cur_markers_loaded < MULTIPLEX_REL) {
memset(&(gptr[cur_markers_loaded * unfiltered_sample_ct4]), 0, (MULTIPLEX_REL - cur_markers_loaded) * unfiltered_sample_ct4);
- fill_double_zero(&(set_allele_freq_buf[cur_markers_loaded]), MULTIPLEX_REL - cur_markers_loaded);
+ fill_double_zero(MULTIPLEX_REL - cur_markers_loaded, &(set_allele_freq_buf[cur_markers_loaded]));
}
- fill_ulong_zero(mmasks, sample_ct);
+ fill_ulong_zero(sample_ct, mmasks);
is_last_block = (marker_idx == marker_ct);
for (win_marker_idx = 0; win_marker_idx < cur_markers_loaded; win_marker_idx += MULTIPLEX_REL / 3) {
- fill_ulong_zero(masks, sample_ct);
+ fill_ulong_zero(sample_ct, masks);
sample_idx = 0;
glptr2 = geno;
for (sample_uidx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
@@ -7009,13 +7013,13 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
}
if (calculation_type & CALC_IBC) {
for (uii = 0; uii < 3; uii++) {
- update_rel_ibc(&(rel_ibc[uii * sample_ct]), geno, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : NULL, uii, sample_ct, ukk);
+ update_rel_ibc(&(rel_ibc[uii * sample_ct]), geno, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : nullptr, uii, sample_ct, ukk);
}
} else {
- update_rel_ibc(rel_ibc, geno, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : NULL, ibc_type, sample_ct, ukk);
+ update_rel_ibc(rel_ibc, geno, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : nullptr, ibc_type, sample_ct, ukk);
}
if (rel_req) {
- fill_subset_weights_r(subset_weights, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : NULL, (ibc_type != -1));
+ fill_subset_weights_r(subset_weights, &(set_allele_freq_buf[win_marker_idx]), main_weights_ptr? (&(main_weights_ptr[win_marker_idx])) : nullptr, (ibc_type != -1));
ulii = 0;
if (!main_weights_ptr) {
if (spawn_threads2(threads, &calc_rel_thread, dist_thread_ct, ujj)) {
@@ -7035,11 +7039,11 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
fflush(stdout);
} while (!is_last_block);
if (rel_req) {
- putchar('\r');
+ putc_unlocked('\r', stdout);
logprint("Relationship matrix calculation complete.\n");
dist_ptr = rel_dists;
} else {
- putchar('\n');
+ putc_unlocked('\n', stdout);
}
dptr2 = rel_ibc;
if (calculation_type & CALC_IBC) {
@@ -7139,7 +7143,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
if (rel_calc_type & REL_CALC_BIN) {
pct = 1;
if (rel_shape == REL_CALC_SQ0) {
- fill_double_zero((double*)geno, sample_ct - 1);
+ fill_double_zero(sample_ct - 1, (double*)geno);
}
strcpy(outname_end, ".rel.bin");
if (parallel_tot > 1) {
@@ -7231,7 +7235,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
// need to downcode all doubles to floats
pct = 1;
if (rel_shape == REL_CALC_SQ0) {
- fill_float_zero((float*)geno, sample_ct - 1);
+ fill_float_zero(sample_ct - 1, (float*)geno);
}
// make this .rel.bin4?
strcpy(outname_end, ".rel.bin");
@@ -7368,7 +7372,7 @@ int32_t calc_rel(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_to
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (!parallel_idx) {
wptr = strcpya(g_logbuf, "Relationship matrix ");
if (parallel_tot > 1) {
@@ -7429,7 +7433,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
// todo: provide a randomized approximation algorithm as well. (This can
// wait, though; far more important to implement stuff that doesn't already
// exist. EIGENSOFT is not *that* hard to use.)
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
@@ -7439,17 +7443,17 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
uintptr_t final_mask = get_final_mask(pca_sample_ct);
double nz = 0.0;
double zz = -1.0;
- uintptr_t* loadbuf_proj = NULL;
- uintptr_t* sample_exclude_proj = NULL;
- double* proj_sample_loadings = NULL;
- double* proj_allhom_wts = NULL;
- uint32_t* proj_missing_cts = NULL;
+ uintptr_t* loadbuf_proj = nullptr;
+ uintptr_t* sample_exclude_proj = nullptr;
+ double* proj_sample_loadings = nullptr;
+ double* proj_allhom_wts = nullptr;
+ uint32_t* proj_missing_cts = nullptr;
uint32_t write_headers = relip->modifier & REL_PCA_HEADER;
uint32_t pc_ct = relip->pc_ct;
uint32_t var_wts = relip->modifier & REL_PCA_VAR_WTS;
uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
int32_t ibc_type = relip->ibc_type;
- int32_t mt_code = chrom_info_ptr->mt_code;
+ int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
int32_t retval = 0;
__CLPK_integer info = 0;
__CLPK_integer lwork = -1;
@@ -7498,7 +7502,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
uint32_t ujj;
int32_t chrom_idx;
// calc_rel() already verified that diploid data is present
- g_missing_dbl_excluded = NULL;
+ g_missing_dbl_excluded = nullptr;
marker_ct -= count_non_autosomal_markers(chrom_info_ptr, marker_exclude, 1, 1);
if ((pc_ct > pca_sample_ct) || (pc_ct > marker_ct)) {
if (pca_sample_ct <= marker_ct) {
@@ -7548,7 +7552,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
if (!isuppz) {
goto calc_pca_ret_NOMEM;
}
- fill_int_zero((int32_t*)isuppz, 2 * pc_ct * (sizeof(__CLPK_integer) / sizeof(int32_t)));
+ fill_int_zero(2 * pc_ct * (sizeof(__CLPK_integer) / sizeof(int32_t)), (int32_t*)isuppz);
ldz = mdim;
dsyevr_(&jobz, &range, &uplo, &mdim, main_matrix, &mdim, &nz, &nz, &i1, &i2, &zz, &out_m, out_w, out_z, &ldz, isuppz, &optim_lwork, &lwork, &optim_liwork, &liwork, &info);
@@ -7561,7 +7565,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
if (!iwork) {
goto calc_pca_ret_NOMEM;
}
- fill_int_zero((int32_t*)iwork, liwork * (sizeof(__CLPK_integer) / sizeof(int32_t)));
+ fill_int_zero(liwork * (sizeof(__CLPK_integer) / sizeof(int32_t)), (int32_t*)iwork);
dsyevr_(&jobz, &range, &uplo, &mdim, main_matrix, &mdim, &nz, &nz, &i1, &i2, &zz, &out_m, out_w, out_z, &ldz, isuppz, work, &lwork, iwork, &liwork, &info);
// * out_w[0..(pc_ct-1)] contains eigenvalues
@@ -7635,8 +7639,8 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
if (is_set(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == mt_code)) {
continue;
}
- marker_uidx = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ marker_uidx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
wptr_start = chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf);
*wptr_start++ = delimiter;
if (marker_uidx < chrom_end) {
@@ -7662,7 +7666,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
// Variant weight matrix = X^T * S * D^{-1/2}, where X^T is the
// variance-standardized genotype matrix, S is the sample weight
// matrix, and D is a diagonal eigenvalue matrix.
- fill_double_zero(cur_var_wts, pc_ct);
+ fill_double_zero(pc_ct, cur_var_wts);
dxx = set_allele_freqs[marker_uidx];
dyy = sqrt(1 / (2 * dxx * (1.0 - dxx)));
ulptr = loadbuf;
@@ -7844,7 +7848,7 @@ int32_t calc_pca(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
goto calc_pca_ret_WRITE_FAIL;
}
*outname_end = '\0';
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (var_wts) {
LOGPRINTFWW("--pca: Results saved to %s.eigenval , %s.eigenvec , and %s.eigenvec.var .\n", outname, outname, outname);
} else {
@@ -7875,7 +7879,7 @@ int32_t calc_ibm(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uintpt
uint32_t dist_thread_ct = g_thread_ct;
int32_t retval = 0;
uintptr_t* marker_exclude = marker_exclude_orig;
- uint32_t* giptr = NULL;
+ uint32_t* giptr = nullptr;
unsigned char* bigstack_mark;
unsigned char* bedbuf;
unsigned char* gptr;
@@ -7968,7 +7972,7 @@ int32_t calc_ibm(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uintpt
printf("\r%" PRIuPTR " markers complete.", marker_idx);
fflush(stdout);
} while (!is_last_block);
- putchar('\r');
+ putc_unlocked('\r', stdout);
bigstack_reset(bigstack_mark);
while (0) {
calc_ibm_ret_NOMEM:
@@ -7986,9 +7990,9 @@ int32_t calc_ibm(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uintpt
int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parallel_tot, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, char* read_dists_fname, char* distance_wts_fname, double distance_exp, uint64_t calculation_type, uint32_t dist_calc_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uint32_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* set_allele_freqs, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exc [...]
// if calculation_type == 0, this must perform the basic unweighted
// computation and not write to disk.
- FILE* outfile = NULL;
- FILE* outfile2 = NULL;
- FILE* outfile3 = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile2 = nullptr;
+ FILE* outfile3 = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uint64_t dists_alloc = 0;
uint32_t missing_wt_needed = ((calculation_type & CALC_DISTANCE) || ((!read_dists_fname) && (calculation_type & (CALC_IBS_TEST | CALC_GROUPDIST | CALC_REGRESS_DISTANCE)))) && (!(dist_calc_type & DISTANCE_FLAT_MISSING));
@@ -7996,14 +8000,14 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
uint32_t marker_weight_sum = 0;
int32_t retval = 0;
uintptr_t* marker_exclude = marker_exclude_orig;
- uint32_t* dist_missing_wts_i = NULL;
- uint32_t* sample_missing = NULL;
- uint32_t* sample_missing_unwt = NULL;
- uint32_t* giptr = NULL;
- uint32_t* giptr2 = NULL;
- char* writebuf = NULL;
- double* main_weights = NULL;
- double* subset_weights = NULL;
+ uint32_t* dist_missing_wts_i = nullptr;
+ uint32_t* sample_missing = nullptr;
+ uint32_t* sample_missing_unwt = nullptr;
+ uint32_t* giptr = nullptr;
+ uint32_t* giptr2 = nullptr;
+ char* writebuf = nullptr;
+ double* main_weights = nullptr;
+ double* subset_weights = nullptr;
uint32_t dist_thread_ct = g_thread_ct;
double set_allele_freq_buf[MULTIPLEX_DIST];
uint32_t wtbuf[MULTIPLEX_DIST];
@@ -8066,7 +8070,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
unwt_needed = 1;
} else {
// defensive
- g_missing_dbl_excluded = NULL;
+ g_missing_dbl_excluded = nullptr;
}
// Additional + CACHELINE is to fix aliasing bug that shows up with -O2 in
// some cases.
@@ -8082,15 +8086,15 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
}
g_sample_missing = sample_missing;
} else {
- g_missing_tot_weights = NULL;
+ g_missing_tot_weights = nullptr;
}
ujj = distance_wts_fname || (distance_exp != 0.0); // special weights?
if (!ujj) {
g_idists = (int32_t*)((char*)bigstack_mark - round_up_pow2(llxx * sizeof(int32_t), CACHELINE));
- fill_int_zero(g_idists, llxx);
+ fill_int_zero(llxx, g_idists);
} else {
- fill_double_zero(g_dists, llxx);
+ fill_double_zero(llxx, g_dists);
}
retval = conditional_allocate_non_autosomal_markers(chrom_info_ptr, unfiltered_marker_ct, marker_exclude_orig, marker_ct, 1, 1, "distance matrix calc", &marker_exclude, &uii);
@@ -8219,7 +8223,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
for (ujj = 0; ujj < multiplex; ujj++) {
set_allele_freq_buf[ujj] = 0.5;
}
- fill_int_zero((int32_t*)wtbuf, multiplex);
+ fill_int_zero(multiplex, (int32_t*)wtbuf);
// For each pair (g_j, g_k) of 2-bit PLINK genotypes, we perform the
// following operations:
@@ -8264,7 +8268,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
// See the comments at the beginning of this file for discussion of
// the zero exponent special case.
- copy_set_allele_freqs(marker_uidx, marker_exclude, multiplex, marker_idx, marker_ct, NULL, set_allele_freqs, set_allele_freq_buf);
+ copy_set_allele_freqs(marker_uidx, marker_exclude, multiplex, marker_idx, marker_ct, nullptr, set_allele_freqs, set_allele_freq_buf);
if (missing_wt_needed) {
uii = marker_ct - marker_idx;
if (uii > multiplex) {
@@ -8279,11 +8283,11 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
if (ujj < multiplex) {
memset(&(bedbuf[ujj * unfiltered_sample_ct4]), 0, (multiplex - ujj) * unfiltered_sample_ct4);
if (!main_weights) {
- fill_ulong_zero(geno, sample_ct * (MULTIPLEX_2DIST / BITCT));
- fill_ulong_zero(masks, sample_ct * (MULTIPLEX_2DIST / BITCT));
+ fill_ulong_zero(sample_ct * (MULTIPLEX_2DIST / BITCT), geno);
+ fill_ulong_zero(sample_ct * (MULTIPLEX_2DIST / BITCT), masks);
} else {
- fill_ulong_zero(geno, sample_ct);
- fill_ulong_zero(masks, sample_ct);
+ fill_ulong_zero(sample_ct, geno);
+ fill_ulong_zero(sample_ct, masks);
}
}
is_last_block = (marker_idx == marker_ct);
@@ -8368,7 +8372,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
join_threads2(threads, dist_thread_ct, uii);
}
} else {
- fill_ulong_zero(mmasks, sample_ct);
+ fill_ulong_zero(sample_ct, mmasks);
for (ukk = 0; ukk < ujj; ukk += MULTIPLEX_DIST_EXP / 2) {
glptr = geno;
glptr2 = masks;
@@ -8417,7 +8421,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
printf("\r%" PRIuPTR " markers complete.", marker_idx);
fflush(stdout);
} while (!is_last_block);
- putchar('\r');
+ putc_unlocked('\r', stdout);
logprint("Distance matrix calculation complete.\n");
bigstack_reset(masks);
if (calculation_type & (CALC_PLINK1_DISTANCE_MATRIX | CALC_PLINK1_IBS_MATRIX)) {
@@ -8460,7 +8464,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
if (fclose_null(&outfile)) {
goto calc_distance_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (!parallel_idx) {
wptr = strcpya(g_logbuf, "Distances (proportions) written to ");
wptr = strcpya(wptr, outname);
@@ -8510,7 +8514,7 @@ int32_t calc_distance(pthread_t* threads, uint32_t parallel_idx, uint32_t parall
if (fclose_null(&outfile)) {
goto calc_distance_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
strcpy(outname_end, ".mibs.id");
retval = write_ids(outname, unfiltered_sample_ct, sample_exclude, sample_ids, max_sample_id_len);
if (retval) {
@@ -8687,27 +8691,27 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
// O(n) space, so it's an excellent complement to --distance/--genome +
// --parallel on very large datasets (e.g. 500k samples), and should be added
// as a special case in the future.
- FILE* outfile = NULL;
- uint32_t* cluster_sorted_ibs_indices = NULL;
+ FILE* outfile = nullptr;
+ uint32_t* cluster_sorted_ibs_indices = nullptr;
#ifdef __LP64__
- // uint64_t* cluster_sorted_ibs_indices_big = NULL;
+ // uint64_t* cluster_sorted_ibs_indices_big = nullptr;
#endif
- uint32_t* sample_to_cluster = NULL;
- double* neighbor_quantiles = NULL;
- double* neighbor_quantile_means = NULL;
- double* neighbor_quantile_stdev_recips = NULL;
- uint32_t* neighbor_qindices = NULL;
- uint32_t* ppc_fail_counts = NULL;
- uint32_t* cur_cluster_sizes = NULL;
- uint32_t* cur_cluster_case_cts = NULL;
- uint32_t* cur_cluster_remap = NULL;
- uint32_t* cluster_index = NULL;
- uintptr_t* collapsed_pheno_c = NULL;
- uint32_t* sample_idx_to_uidx = NULL;
- uint32_t* late_clidx_to_sample_uidx = NULL;
- uintptr_t* ibs_ties = NULL;
+ uint32_t* sample_to_cluster = nullptr;
+ double* neighbor_quantiles = nullptr;
+ double* neighbor_quantile_means = nullptr;
+ double* neighbor_quantile_stdev_recips = nullptr;
+ uint32_t* neighbor_qindices = nullptr;
+ uint32_t* ppc_fail_counts = nullptr;
+ uint32_t* cur_cluster_sizes = nullptr;
+ uint32_t* cur_cluster_case_cts = nullptr;
+ uint32_t* cur_cluster_remap = nullptr;
+ uint32_t* cluster_index = nullptr;
+ uintptr_t* collapsed_pheno_c = nullptr;
+ uint32_t* sample_idx_to_uidx = nullptr;
+ uint32_t* late_clidx_to_sample_uidx = nullptr;
+ uintptr_t* ibs_ties = nullptr;
uint32_t* genome_main = g_genome_main;
- double* dptr = NULL;
+ double* dptr = nullptr;
double min_ppc = cp->ppc;
double min_ibm = cp->min_ibm;
double min_zx = 0.0;
@@ -8786,7 +8790,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
}
}
retval = fill_sample_to_cluster(unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, cluster_starts, sample_to_cluster, late_clidx_to_sample_uidx);
- if (!retval) {
+ if (retval) {
goto calc_cluster_neighbor_ret_1;
}
}
@@ -8825,19 +8829,19 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
if (!neighbor_quantile_stdev_recips) {
goto calc_cluster_neighbor_ret_NOMEM;
}
- fill_double_zero(neighbor_quantiles, ulii);
+ fill_double_zero(ulii, neighbor_quantiles);
}
- fill_ulong_zero(cluster_merge_prevented, BITCT_TO_WORDCT(initial_triangle_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(initial_triangle_size), cluster_merge_prevented);
if ((min_ppc != 0.0) || genome_main || read_genome_fname) {
if (do_neighbor && (min_ppc != 0.0)) {
ppc_fail_counts = (uint32_t*)malloc(sample_ct * sizeof(int32_t));
if (!ppc_fail_counts) {
goto calc_cluster_neighbor_ret_NOMEM;
}
- fill_uint_zero(ppc_fail_counts, sample_ct);
+ fill_uint_zero(sample_ct, ppc_fail_counts);
}
if (read_genome_fname) {
- retval = read_genome(read_genome_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, cluster_merge_prevented, use_genome_dists? cluster_sorted_ibs : NULL, neighbor_n2, neighbor_quantiles, neighbor_qindices, ppc_fail_counts, min_ppc, cluster_ct && (!is_group_avg), cluster_ct, cluster_starts, sample_to_cluster);
+ retval = read_genome(read_genome_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, cluster_merge_prevented, use_genome_dists? cluster_sorted_ibs : nullptr, neighbor_n2, neighbor_quantiles, neighbor_qindices, ppc_fail_counts, min_ppc, cluster_ct && (!is_group_avg), cluster_ct, cluster_starts, sample_to_cluster);
if (retval) {
goto calc_cluster_neighbor_ret_1;
}
@@ -8917,7 +8921,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
// calculate entire distance matrix, or use already-calculated matrix in
// memory
if (!g_dists) {
- retval = calc_distance(threads, 0, 1, bedfile, bed_offset, outname, outname_end, NULL, NULL, 0.0, 0, DISTANCE_FLAT_MISSING | DISTANCE_CLUSTER, unfiltered_marker_ct, marker_exclude, marker_ct, NULL, 0, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
+ retval = calc_distance(threads, 0, 1, bedfile, bed_offset, outname, outname_end, nullptr, nullptr, 0.0, 0, DISTANCE_FLAT_MISSING | DISTANCE_CLUSTER, unfiltered_marker_ct, marker_exclude, marker_ct, nullptr, 0, set_allele_freqs, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, chrom_info_ptr);
if (retval) {
goto calc_cluster_neighbor_ret_1;
}
@@ -9011,7 +9015,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
if (min_ppc != 0.0) {
fputs(" PROP_DIFF ", outfile);
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
dxx1 = 1.0 / ((double)((intptr_t)(sample_ct - 1)));
for (sample_idx1 = 0; sample_idx1 < sample_ct; sample_idx1++) {
fam_id = &(sample_ids[sample_idx_to_uidx[sample_idx1] * max_sample_id_len]);
@@ -9196,7 +9200,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
if (putc_checked('1', outfile)) {
goto calc_cluster_neighbor_ret_WRITE_FAIL;
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (!genome_main) {
for (sample_idx2 = sample_idx1 + 1; sample_idx2 < sample_ct; sample_idx2++) {
dxx = 1.0 - ((double)((int32_t)(uii + (*(++sample_missing_ptr)) - 2 * missing_dbl_excluded[((sample_idx2 * (sample_idx2 - 1)) >> 1) + sample_idx1]))) * dxx1;
@@ -9221,7 +9225,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
}
if ((sample_idx1 + 1) * 100LLU >= sample_ct * ((uint64_t)pct)) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ((sample_idx1 + 1) * 100LLU) / sample_ct;
printf("\b\b%u%%", pct++);
@@ -9233,7 +9237,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
if (fclose_null(&outfile)) {
goto calc_cluster_neighbor_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("IBM matrix written to %s .\n", outname);
if (ibm_warning) {
logerrprint("Warning: Initial cluster assignment violates IBM constraint.\n");
@@ -9261,7 +9265,7 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
goto calc_cluster_neighbor_ret_NOMEM;
}
copy_bitarr_subset_excl(pheno_c, sample_exclude, unfiltered_sample_ct, sample_ct, collapsed_pheno_c);
- fill_uint_zero(cur_cluster_case_cts, cur_cluster_ct);
+ fill_uint_zero(cur_cluster_ct, cur_cluster_case_cts);
if (!cluster_ct) {
for (sample_idx1 = 0; sample_idx1 < sample_ct; sample_idx1++) {
if (IS_SET(collapsed_pheno_c, sample_idx1)) {
@@ -9609,12 +9613,12 @@ int32_t calc_cluster_neighbor(pthread_t* threads, FILE* bedfile, uintptr_t bed_o
goto calc_cluster_neighbor_ret_NOMEM;
}
if (is_mds_cluster || (!read_dists_fname)) {
- fill_double_zero(mds_plot_dmatrix_copy, (ulii * (ulii - 1)) / 2);
+ fill_double_zero((ulii * (ulii - 1)) / 2, mds_plot_dmatrix_copy);
}
if (read_dists_fname) {
- retval = read_dists(read_dists_fname, read_dists_id_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, is_mds_cluster? cluster_ct : 0, is_mds_cluster? cluster_starts : NULL, is_mds_cluster? sample_to_cluster : NULL, 2, 0, mds_plot_dmatrix_copy, 0, NULL, NULL);
+ retval = read_dists(read_dists_fname, read_dists_id_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, is_mds_cluster? cluster_ct : 0, is_mds_cluster? cluster_starts : nullptr, is_mds_cluster? sample_to_cluster : nullptr, 2, 0, mds_plot_dmatrix_copy, 0, nullptr, nullptr);
} else {
- retval = read_genome(read_genome_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, NULL, mds_plot_dmatrix_copy, 0, NULL, NULL, NULL, 0.0, 0, is_mds_cluster? cluster_ct : 0, is_mds_cluster? cluster_starts : NULL, is_mds_cluster? sample_to_cluster : NULL);
+ retval = read_genome(read_genome_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, nullptr, mds_plot_dmatrix_copy, 0, nullptr, nullptr, nullptr, 0.0, 0, is_mds_cluster? cluster_ct : 0, is_mds_cluster? cluster_starts : nullptr, is_mds_cluster? sample_to_cluster : nullptr);
}
if (retval) {
goto calc_cluster_neighbor_ret_1;
@@ -9881,7 +9885,7 @@ int32_t regress_distance(pthread_t* threads, uint64_t calculation_type, double*
dvv += g_calc_result[uii + 1][3];
}
regress_iters = g_jackknife_iters * thread_ct;
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("Jackknife s.e.: %g\n", sqrt(((sample_ct - g_jackknife_d) / ((double)g_jackknife_d)) * (dzz - dyy * dyy / regress_iters) / (regress_iters - 1)));
LOGPRINTF("Jackknife s.e. (y = avg phenotype): %g\n", sqrt(((sample_ct - g_jackknife_d) / ((double)g_jackknife_d)) * (dvv - dww * dww / regress_iters) / (regress_iters - 1)));
while (0) {
diff --git a/plink_cluster.c b/plink_cluster.c
index 75850a9..c323e3e 100644
--- a/plink_cluster.c
+++ b/plink_cluster.c
@@ -4,18 +4,18 @@
#include "plink_matrix.h"
void cluster_init(Cluster_info* cluster_ptr) {
- cluster_ptr->fname = NULL;
- cluster_ptr->match_fname = NULL;
- cluster_ptr->match_missing_str = NULL;
- cluster_ptr->match_type_fname = NULL;
- cluster_ptr->qmatch_fname = NULL;
- cluster_ptr->qmatch_missing_str = NULL;
- cluster_ptr->qt_fname = NULL;
- cluster_ptr->keep_fname = NULL;
- cluster_ptr->remove_fname = NULL;
- cluster_ptr->keep_flattened = NULL;
- cluster_ptr->remove_flattened = NULL;
- cluster_ptr->zerofname = NULL;
+ cluster_ptr->fname = nullptr;
+ cluster_ptr->match_fname = nullptr;
+ cluster_ptr->match_missing_str = nullptr;
+ cluster_ptr->match_type_fname = nullptr;
+ cluster_ptr->qmatch_fname = nullptr;
+ cluster_ptr->qmatch_missing_str = nullptr;
+ cluster_ptr->qt_fname = nullptr;
+ cluster_ptr->keep_fname = nullptr;
+ cluster_ptr->remove_fname = nullptr;
+ cluster_ptr->keep_flattened = nullptr;
+ cluster_ptr->remove_flattened = nullptr;
+ cluster_ptr->zerofname = nullptr;
cluster_ptr->modifier = 0;
cluster_ptr->ppc = 0.0;
cluster_ptr->max_size = 0xffffffffU;
@@ -45,8 +45,8 @@ void cluster_cleanup(Cluster_info* cluster_ptr) {
int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* sample_ids, uintptr_t max_sample_id_len, uint32_t mwithin_col, uint32_t keep_na, uintptr_t* cluster_ct_ptr, uint32_t** cluster_map_ptr, uint32_t** cluster_starts_ptr, char** cluster_ids_ptr, uintptr_t* max_cluster_id_len_ptr, char* keep_fname, char* keep_flattened, char* remove_fname, char* remove_flattened, uint32_t allow_no_samples) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
- uintptr_t* sample_exclude_new = NULL;
+ FILE* infile = nullptr;
+ uintptr_t* sample_exclude_new = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t sample_exclude_ct = *sample_exclude_ct_ptr;
uintptr_t sample_ct = unfiltered_sample_ct - sample_exclude_ct;
@@ -60,14 +60,14 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
// one is effectively active (i.e. any names in both lists are deleted from
// the keep list, and then the function proceeds as if --remove-clusters
// wasn't specified); this is tracked by which of
- // sorted_keep_ids/sorted_remove_ids is non-NULL. cluster_kr_ct and
+ // sorted_keep_ids/sorted_remove_ids is non-nullptr. cluster_kr_ct and
// max_cluster_kr_len apply to that array.
- char* sorted_keep_ids = NULL;
- char* sorted_remove_ids = NULL;
+ char* sorted_keep_ids = nullptr;
+ char* sorted_remove_ids = nullptr;
uintptr_t max_cluster_id_len = 0;
uintptr_t assigned_ct = 0;
uintptr_t cluster_ct = 0;
- Ll_str* cluster_names = NULL;
+ Ll_str* cluster_names = nullptr;
uintptr_t* already_seen;
uintptr_t* ulptr;
char* cluster_ids;
@@ -153,7 +153,7 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
}
}
qsort(sorted_keep_ids, cluster_kr_ct, max_cluster_kr_len, strcmp_casted);
- cluster_kr_ct = collapse_duplicate_ids(sorted_keep_ids, cluster_kr_ct, max_cluster_kr_len, NULL);
+ cluster_kr_ct = collapse_duplicate_ids(sorted_keep_ids, cluster_kr_ct, max_cluster_kr_len, nullptr);
if (remove_flattened || remove_fname) {
bigstack_end_mark2 = g_bigstack_end;
// track deletions
@@ -258,7 +258,7 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
}
}
qsort(sorted_remove_ids, cluster_kr_ct, max_cluster_kr_len, strcmp_casted);
- cluster_kr_ct = collapse_duplicate_ids(sorted_remove_ids, cluster_kr_ct, max_cluster_kr_len, NULL);
+ cluster_kr_ct = collapse_duplicate_ids(sorted_remove_ids, cluster_kr_ct, max_cluster_kr_len, nullptr);
}
if (infile) {
if (fclose_null(&infile)) {
@@ -350,8 +350,8 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
goto load_clusters_ret_READ_FAIL;
}
if (cluster_names) {
- if (max_cluster_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Cluster IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_cluster_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: Cluster IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto load_clusters_ret_INVALID_FORMAT;
}
*max_cluster_id_len_ptr = max_cluster_id_len;
@@ -453,9 +453,9 @@ int32_t load_clusters(char* fname, uintptr_t unfiltered_sample_ct, uintptr_t* sa
// 2. allocate buffer, copy over
// 3. natural sort, remove duplicates, shrink buffer
// 4. initialize other data structures
- if (max_cluster_id_len > MAX_ID_LEN_P1) {
+ if (max_cluster_id_len > MAX_ID_BLEN) {
// max FID len was previously checked
- logerrprint("Error: Cluster IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ logerrprint("Error: Cluster IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto load_clusters_ret_INVALID_FORMAT;
}
@@ -568,7 +568,7 @@ void fill_unfiltered_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t
uint32_t* cluster_end_ptr;
uint32_t cluster_idx;
// 0xffffffffU cluster index = unassigned
- fill_uint_one(sample_to_cluster, unfiltered_sample_ct);
+ fill_uint_one(unfiltered_sample_ct, sample_to_cluster);
for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
cluster_end_ptr = &(cluster_map[cluster_starts[cluster_idx + 1]]);
do {
@@ -578,7 +578,7 @@ void fill_unfiltered_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t
}
int32_t fill_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t* sample_to_cluster, uint32_t* late_clidx_to_sample_uidx) {
- // If late_clidx_to_sample_uidx is not NULL, all samples not in a loaded
+ // If late_clidx_to_sample_uidx is not nullptr, all samples not in a loaded
// cluster are given their own cluster, and late_clidx_to_sample_uidx is
// filled with the cluster index -> sample uidx mapping.
// (Yes, this is a strange interface; it may be switched to filtered sample
@@ -595,7 +595,7 @@ int32_t fill_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t* sample
goto fill_sample_to_cluster_ret_NOMEM;
}
fill_uidx_to_idx(sample_exclude, unfiltered_sample_ct, sample_ct, uidx_to_idx);
- fill_uint_one(sample_to_cluster, sample_ct);
+ fill_uint_one(sample_ct, sample_to_cluster);
for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
cluster_end_ptr = &(cluster_map[cluster_starts[cluster_idx + 1]]);
do {
@@ -623,7 +623,7 @@ int32_t fill_sample_to_cluster(uintptr_t unfiltered_sample_ct, uintptr_t* sample
int32_t write_clusters(char* outname, char* outname_end, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uint32_t omit_unassigned, uintptr_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, char* cluster_ids, uintptr_t max_cluster_id_len) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t sample_uidx = 0;
int32_t retval = 0;
uint32_t* sample_to_cluster;
@@ -680,7 +680,7 @@ int32_t write_clusters(char* outname, char* outname_end, uintptr_t unfiltered_sa
int32_t extract_clusters(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, char* cluster_ids, uintptr_t max_cluster_id_len, char* cluster_names_flattened, char* clusters_fname, uintptr_t** new_sample_exclude_ptr, uintptr_t* new_sample_ct_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t line_idx = 0;
int32_t retval = 0;
@@ -851,8 +851,8 @@ int32_t cluster_include_and_reindex(uintptr_t unfiltered_sample_ct, uintptr_t* s
uint32_t sample_uidx = 0;
uint32_t case_ct = 0;
uint32_t assigned_ct = 0;
- uintptr_t* cluster_cc_perm_preimage = NULL;
- uint32_t* cluster_case_cts = NULL;
+ uintptr_t* cluster_cc_perm_preimage = nullptr;
+ uint32_t* cluster_case_cts = nullptr;
uint32_t* new_cluster_map;
uint32_t* new_cluster_starts;
uint32_t* uidx_to_idx;
@@ -1021,13 +1021,13 @@ int32_t cluster_alloc_and_populate_magic_nums(uint32_t cluster_ct, uint32_t* clu
int32_t read_dists(char* dist_fname, char* id_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t cluster_ct, uint32_t* cluster_starts, uint32_t* sample_to_cluster, uint32_t for_cluster_flag, uint32_t is_max_dist, double* dists, uint32_t neighbor_n2, double* neighbor_quantiles, uint32_t* neighbor_qindices) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* dist_file = NULL;
- FILE* id_file = NULL;
+ FILE* dist_file = nullptr;
+ FILE* id_file = nullptr;
uintptr_t id_entry_ct = sample_ct;
uintptr_t matching_entry_ct = sample_ct;
uintptr_t line_idx = 0;
char* id_buf = &(g_textbuf[MAXLINELEN]);
- uint64_t* fidx_to_memidx = NULL; // high 32 bits = fidx, low 32 = memidx
+ uint64_t* fidx_to_memidx = nullptr; // high 32 bits = fidx, low 32 = memidx
uint32_t is_presorted = cluster_ct? 0 : 1;
int32_t retval = 0;
char* sorted_ids;
@@ -1060,7 +1060,7 @@ int32_t read_dists(char* dist_fname, char* id_fname, uintptr_t unfiltered_sample
if (bigstack_alloc_ull(sample_ct, &fidx_to_memidx)) {
goto read_dists_ret_NOMEM;
}
- fill_ull_one(fidx_to_memidx, sample_ct);
+ fill_ull_one(sample_ct, fidx_to_memidx);
if (fopen_checked(id_fname, "r", &id_file)) {
goto read_dists_ret_OPEN_FAIL;
}
@@ -1081,7 +1081,7 @@ int32_t read_dists(char* dist_fname, char* id_fname, uintptr_t unfiltered_sample
if (is_eoln_kns(*fam_id)) {
continue;
}
- if (bsearch_read_fam_indiv(fam_id, sorted_ids, max_sample_id_len, sample_ct, NULL, &ii, id_buf)) {
+ if (bsearch_read_fam_indiv(fam_id, sorted_ids, max_sample_id_len, sample_ct, nullptr, &ii, id_buf)) {
LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s has fewer tokens than expected.\n", line_idx, id_fname);
goto read_dists_ret_INVALID_FORMAT_2;
}
@@ -1293,7 +1293,7 @@ void update_neighbor(uintptr_t sample_ct, uint32_t neighbor_n2, uintptr_t sample
int32_t read_genome(char* read_genome_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* cluster_merge_prevented, double* cluster_sorted_ibs, uint32_t neighbor_n2, double* neighbor_quantiles, uint32_t* neighbor_qindices, uint32_t* ppc_fail_counts, double min_ppc, uint32_t is_max_dist, uintptr_t cluster_ct, uint32_t* cluster_starts, uint32_t* sample_to_cluster) {
unsigned char* bigstack_mark = g_bigstack_base;
- gzFile gz_infile = NULL;
+ gzFile gz_infile = nullptr;
uint32_t neighbor_load_quantiles = neighbor_quantiles && cluster_sorted_ibs;
uint32_t ppc_warning = cluster_merge_prevented? 0 : 1;
uintptr_t loaded_entry_ct = 0;
@@ -1448,10 +1448,10 @@ int32_t read_genome(char* read_genome_fname, uintptr_t unfiltered_sample_ct, uin
int32_t cluster_enforce_match(Cluster_info* cp, int32_t missing_pheno, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t cluster_ct, uint32_t* cluster_starts, uint32_t* sample_to_cluster, uintptr_t* merge_prevented) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* matchfile = NULL;
- FILE* typefile = NULL;
+ FILE* matchfile = nullptr;
+ FILE* typefile = nullptr;
char* id_buf = &(g_textbuf[MAXLINELEN]);
- char* missing_str = NULL;
+ char* missing_str = nullptr;
uintptr_t bigstack_pre_end_address = ((uintptr_t)g_bigstack_end) - MAXLINELEN;
uintptr_t cur_coord = 0;
uint32_t cluster_mismatch_warning = 0;
@@ -1500,7 +1500,7 @@ int32_t cluster_enforce_match(Cluster_info* cp, int32_t missing_pheno, uintptr_t
goto cluster_enforce_match_ret_NOMEM;
}
for (sample_idx1 = 0; sample_idx1 < sample_ct; sample_idx1++) {
- sample_idx_to_match_str[sample_idx1] = NULL;
+ sample_idx_to_match_str[sample_idx1] = nullptr;
}
cov_type_arr = g_bigstack_base;
if (((uintptr_t)cov_type_arr) > bigstack_pre_end_address) {
@@ -1726,7 +1726,7 @@ int32_t cluster_enforce_match(Cluster_info* cp, int32_t missing_pheno, uintptr_t
goto cluster_enforce_match_ret_NOMEM;
}
for (sample_idx1 = 0; sample_idx1 < sample_ct; sample_idx1++) {
- sample_idx_to_dvals[sample_idx1] = NULL;
+ sample_idx_to_dvals[sample_idx1] = nullptr;
}
tol_arr = (double*)g_bigstack_base;
if (bigstack_left() <= MAXLINELEN * 4) {
@@ -2018,7 +2018,7 @@ uint32_t cluster_main(uintptr_t cluster_ct, uintptr_t* merge_prevented, uintptr_
}
if (is_old_tiebreaks && (siptr != tie_end)) {
siptr2 = siptr;
- siptr_best = NULL;
+ siptr_best = nullptr;
for (siptr2 = siptr; siptr2 < tie_end; siptr2++) {
ujj = *siptr2;
if (ujj != 0xffffffffU) {
@@ -2091,7 +2091,7 @@ uint32_t cluster_main(uintptr_t cluster_ct, uintptr_t* merge_prevented, uintptr_
}
if (is_old_tiebreaks && (siptr != tie_end)) {
siptr2 = siptr;
- siptr_best = NULL;
+ siptr_best = nullptr;
for (siptr2 = siptr; siptr2 < tie_end; siptr2++) {
ujj = *siptr2;
if (ujj != 0xffffffffU) {
@@ -2637,11 +2637,11 @@ void write_cluster1(FILE* outfile, uint32_t clidx, char* sample_ids, uintptr_t m
char* sptr2;
uint32_t msidx;
write_cluster1_recurse:
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
sptr = &(sample_ids[sample_idx_to_uidx[clidx] * max_sample_id_len]);
sptr2 = (char*)memchr(sptr, '\t', max_sample_id_len);
fwrite(sptr, 1, sptr2 - sptr, outfile);
- putc('_', outfile);
+ putc_unlocked('_', outfile);
fputs(&(sptr2[1]), outfile);
if (pheno_c) {
if (IS_SET(pheno_c, sample_idx_to_uidx[clidx])) {
@@ -2666,12 +2666,12 @@ void write_cluster1_oitc(FILE* outfile, uint32_t clidx, char* sample_ids, uintpt
uint32_t uii;
uint32_t ujj;
write_cluster1_oitc_recurse:
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (clidx >= orig_within_ct) {
sptr = &(sample_ids[late_clidx_to_sample_uidx[clidx - orig_within_ct] * max_sample_id_len]);
sptr2 = (char*)memchr(sptr, '\t', max_sample_id_len);
fwrite(sptr, 1, sptr2 - sptr, outfile);
- putc('_', outfile);
+ putc_unlocked('_', outfile);
fputs(&(sptr2[1]), outfile);
if (pheno_c) {
if (IS_SET(pheno_c, late_clidx_to_sample_uidx[clidx - orig_within_ct])) {
@@ -2686,7 +2686,7 @@ void write_cluster1_oitc(FILE* outfile, uint32_t clidx, char* sample_ids, uintpt
sptr = &(sample_ids[orig_cluster_map[uii] * max_sample_id_len]);
sptr2 = (char*)memchr(sptr, '\t', max_sample_id_len);
fwrite(sptr, 1, sptr2 - sptr, outfile);
- putc('_', outfile);
+ putc_unlocked('_', outfile);
fputs(&(sptr2[1]), outfile);
if (pheno_c) {
if (IS_SET(pheno_c, orig_cluster_map[uii])) {
@@ -2709,7 +2709,7 @@ void write_cluster1_oitc(FILE* outfile, uint32_t clidx, char* sample_ids, uintpt
int32_t write_cluster_solution(char* outname, char* outname_end, uint32_t* orig_sample_to_cluster, uintptr_t sample_ct, uint32_t* orig_cluster_map, uint32_t* orig_cluster_starts, uint32_t* late_clidx_to_sample_uidx, uint32_t orig_within_ct, uint32_t orig_cluster_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* pheno_c, uint32_t* sample_idx_to_uidx, Cluster_info* cp, uint32_t* cluster_remap, uint32_t* clidx_table_space, uint32_t merge_ct, uint32_t* merge_sequence) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint32_t only2 = cp->modifier & CLUSTER_ONLY2;
uint32_t report_pheno = (cp->modifier & CLUSTER_CC) || (cp->max_ctrls != 0xffffffffU);
uint32_t pct = 1;
@@ -2768,9 +2768,9 @@ int32_t write_cluster_solution(char* outname, char* outname_end, uint32_t* orig_
goto write_cluster_solution_ret_WRITE_FAIL;
}
if (!orig_sample_to_cluster) {
- write_cluster1(outfile, clidx, sample_ids, max_sample_id_len, report_pheno? pheno_c : NULL, sample_idx_to_uidx, merge_sequence, merge_ct);
+ write_cluster1(outfile, clidx, sample_ids, max_sample_id_len, report_pheno? pheno_c : nullptr, sample_idx_to_uidx, merge_sequence, merge_ct);
} else {
- write_cluster1_oitc(outfile, clidx, sample_ids, max_sample_id_len, report_pheno? pheno_c : NULL, orig_cluster_map, orig_cluster_starts, late_clidx_to_sample_uidx, orig_within_ct, cluster_remap, merge_sequence, merge_ct);
+ write_cluster1_oitc(outfile, clidx, sample_ids, max_sample_id_len, report_pheno? pheno_c : nullptr, orig_cluster_map, orig_cluster_starts, late_clidx_to_sample_uidx, orig_within_ct, cluster_remap, merge_sequence, merge_ct);
}
if (putc_checked('\n', outfile)) {
goto write_cluster_solution_ret_WRITE_FAIL;
@@ -2824,9 +2824,9 @@ int32_t write_cluster_solution(char* outname, char* outname_end, uint32_t* orig_
if (fwrite_checked(sptr, sptr2 - sptr, outfile)) {
goto write_cluster_solution_ret_WRITE_FAIL;
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(&(sptr2[1]), outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
if (orig_sample_to_cluster) {
clidx = orig_sample_to_cluster[sample_idx];
} else {
@@ -2861,23 +2861,23 @@ int32_t write_cluster_solution(char* outname, char* outname_end, uint32_t* orig_
}
if ((sample_idx + 1) * 100LLU >= ((uint64_t)pct * sample_ct)) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (((uint64_t)(sample_idx + 1)) * 100) / sample_ct;
printf("\b\b%u%%", pct++);
fflush(stdout);
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (fclose_null(&outfile)) {
goto write_cluster_solution_ret_WRITE_FAIL;
}
*outname_end = '\0';
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPREPRINTFWW("Cluster solution written to %s.cluster1 , %s.cluster2 , and %s.cluster3%s .\n", outname, outname, outname, (cp->modifier & CLUSTER_MISSING)? ".missing" : "");
} else {
*outname_end = '\0';
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPREPRINTFWW("Cluster solution written to %s.cluster2 .\n", outname);
}
logprintb();
@@ -2896,7 +2896,7 @@ int32_t write_cluster_solution(char* outname, char* outname_end, uint32_t* orig_
#ifndef NOLAPACK
int32_t mds_plot(char* outname, char* outname_end, uintptr_t* sample_exclude, uintptr_t sample_ct, uint32_t* sample_idx_to_uidx, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, uint32_t cur_cluster_ct, uint32_t merge_ct, uint32_t* orig_sample_to_cluster, uint32_t* cur_cluster_remap, uint32_t dim_ct, uint32_t is_mds_cluster, uint32_t dump_eigvals, double* dists) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t final_cluster_ct = cur_cluster_ct - merge_ct;
double grand_mean = 0.0;
uintptr_t ulii = 0;
@@ -3046,15 +3046,15 @@ int32_t mds_plot(char* outname, char* outname_end, uintptr_t* sample_exclude, ui
bigstack_alloc_d(ulii * ulii, &out_v)) {
goto mds_plot_ret_NOMEM;
}
- // fill_double_zero(sqrt_eigvals, ulii);
- // fill_double_zero(out_u, ulii * ulii);
- // fill_double_zero(out_v, ulii * ulii);
+ // fill_double_zero(ulii, sqrt_eigvals);
+ // fill_double_zero(ulii * ulii, out_u);
+ // fill_double_zero(ulii * ulii, out_v);
iwork = (__CLPK_integer*)bigstack_alloc(8 * ulii * sizeof(__CLPK_integer));
if (!iwork) {
goto mds_plot_ret_NOMEM;
}
- // fill_int_zero(iwork, 8 * mdim);
+ // fill_int_zero(8 * mdim, iwork);
// workspace query
dgesdd_(&jobz, &mdim, &mdim, main_matrix, &mdim, sqrt_eigvals, out_u, &mdim, out_v, &mdim, &optim_lwork, &lwork, iwork, &info);
@@ -3062,7 +3062,7 @@ int32_t mds_plot(char* outname, char* outname_end, uintptr_t* sample_exclude, ui
if (bigstack_alloc_d(lwork, &work)) {
goto mds_plot_ret_NOMEM;
}
- // fill_double_zero(work, lwork);
+ // fill_double_zero(lwork, work);
dgesdd_(&jobz, &mdim, &mdim, main_matrix, &mdim, sqrt_eigvals, out_u, &mdim, out_v, &mdim, work, &lwork, iwork, &info);
// * sqrt_eigvals[0..(ulii-1)] contains singular values
@@ -3194,7 +3194,7 @@ int32_t mds_plot(char* outname, char* outname_end, uintptr_t* sample_exclude, ui
// probably want to factor out common initialization with mds_plot, etc.
int32_t mds_plot_eigendecomp(char* outname, char* outname_end, uintptr_t* sample_exclude, uintptr_t sample_ct, uint32_t* sample_idx_to_uidx, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, uint32_t cur_cluster_ct, uint32_t merge_ct, uint32_t* orig_sample_to_cluster, uint32_t* cur_cluster_remap, uint32_t dim_ct, uint32_t is_mds_cluster, uint32_t dump_eigvals, double* dists) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t final_cluster_ct = cur_cluster_ct - merge_ct;
double grand_mean = 0.0;
uintptr_t ulii = 0;
@@ -3357,7 +3357,7 @@ int32_t mds_plot_eigendecomp(char* outname, char* outname_end, uintptr_t* sample
if (!isuppz) {
goto mds_plot_eigendecomp_ret_NOMEM;
}
- fill_int_zero((int32_t*)isuppz, 2 * dim_ct * (sizeof(__CLPK_integer) / sizeof(int32_t)));
+ fill_int_zero(2 * dim_ct * (sizeof(__CLPK_integer) / sizeof(int32_t)), (int32_t*)isuppz);
ldz = mdim;
dsyevr_(&jobz, &range, &uplo, &mdim, main_matrix, &mdim, &nz, &nz, &i1, &i2, &zz, &out_m, out_w, out_z, &ldz, isuppz, &optim_lwork, &lwork, &optim_liwork, &liwork, &info);
@@ -3370,7 +3370,7 @@ int32_t mds_plot_eigendecomp(char* outname, char* outname_end, uintptr_t* sample
if (!iwork) {
goto mds_plot_eigendecomp_ret_NOMEM;
}
- fill_int_zero((int32_t*)iwork, liwork * (sizeof(__CLPK_integer) / sizeof(int32_t)));
+ fill_int_zero(liwork * (sizeof(__CLPK_integer) / sizeof(int32_t)), (int32_t*)iwork);
dsyevr_(&jobz, &range, &uplo, &mdim, main_matrix, &mdim, &nz, &nz, &i1, &i2, &zz, &out_m, out_w, out_z, &ldz, isuppz, work, &lwork, iwork, &liwork, &info);
// * out_w[0..(dim_ct-1)] contains eigenvalues
diff --git a/plink_cnv.c b/plink_cnv.c
index 95f5ccc..e797fe2 100644
--- a/plink_cnv.c
+++ b/plink_cnv.c
@@ -1,8 +1,7 @@
#include "plink_common.h"
-#ifndef HIGH_MAX_CHROM
int32_t cnv_subset_load(char* subset_fname, char** subset_list_ptr, uintptr_t* subset_ct_ptr, uintptr_t* max_subset_name_len_ptr) {
- FILE* subset_file = NULL;
+ FILE* subset_file = nullptr;
uintptr_t subset_ct = 0;
uintptr_t max_subset_name_len = 0;
int32_t retval = open_and_size_string_list(subset_fname, &subset_file, &subset_ct, &max_subset_name_len);
@@ -48,10 +47,12 @@ const char* cnv_intersect_filter_type_to_str(uint32_t intersect_filter_type) {
}
}
-// log_2(MAX_POSSIBLE_CHROM) + SMALL_INTERVAL_BITS must not exceed 32
#define SMALL_INTERVAL_BITS 18
#define SMALL_INTERVAL_MAX_SIZE ((1 << SMALL_INTERVAL_BITS) - 1)
+// log_2(chrom_code_end) + SMALL_INTERVAL_BITS must not exceed 32
+#define CNV_CHROM_CODE_END_MAX (1 << (32 - SMALL_INTERVAL_BITS))
+
int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filter_fname, char* subset_list, uintptr_t subset_ct, uintptr_t max_subset_name_len, uintptr_t* il_chrom_start_small, uintptr_t* il_chrom_start_large, uint32_t* il_chrom_max_width_small, uint32_t* il_chrom_max_width_large, uint64_t** il_small_ptr, uint64_t** il_large_ptr, int32_t marker_pos_start, int32_t marker_pos_end, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr) {
// We store intervals in sorted order, with the center of each interval in
// the high-order bits, and the size (without adding 1) in the low-order
@@ -71,7 +72,7 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
// This way, whenever we check for an intersection, we can usually skip
// almost all the "small tier" intervals regardless of the largest interval
// size.
- FILE* intersect_file = NULL;
+ FILE* intersect_file = nullptr;
uintptr_t max_interval_ct = bigstack_left() / 9;
uintptr_t small_interval_ct = 0;
uintptr_t large_interval_ct = 0;
@@ -89,8 +90,6 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
const char* cift_str = cnv_intersect_filter_type_to_str(intersect_filter_type);
int32_t retval = 0;
uint64_t* il_small;
- char* bufptr;
- char* bufptr2;
uint64_t ullii;
uintptr_t ulii;
uintptr_t uljj;
@@ -104,44 +103,52 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
uint32_t cur_chrom;
uint32_t uii;
unsigned char ucc;
- if (fopen_checked(intersect_filter_fname, "r", &intersect_file)) {
- goto cnv_intersect_load_ret_OPEN_FAIL;
- }
- while (fgets(g_textbuf, MAXLINELEN, intersect_file)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, cift_str);
- goto cnv_intersect_load_ret_INVALID_FORMAT_2;
+ {
+ if (fopen_checked(intersect_filter_fname, "r", &intersect_file)) {
+ goto cnv_intersect_load_ret_OPEN_FAIL;
}
- bufptr = skip_initial_spaces(g_textbuf);
- if (!is_eoln_kns(*bufptr)) {
+ while (fgets(g_textbuf, MAXLINELEN, intersect_file)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, cift_str);
+ goto cnv_intersect_load_ret_INVALID_FORMAT_2;
+ }
+ char* textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
+ continue;
+ }
// CHR, BP1, BP2, subset name
- bufptr2 = next_token_mult(bufptr, 2);
- if (no_more_tokens_kns(bufptr2)) {
+ char* first_token_end = token_endnn(textbuf_first_token);
+ char* col3_ptr = next_token_mult(first_token_end, 2);
+ if (no_more_tokens_kns(col3_ptr)) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file has fewer tokens than expected.\n", line_idx, cift_str);
goto cnv_intersect_load_ret_INVALID_FORMAT_2;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if ((!allow_extra_chroms) || (ii == -1)) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s.\n", line_idx, cift_str);
- goto cnv_intersect_load_ret_INVALID_FORMAT_2;
- }
- retval = resolve_or_add_chrom_name(bufptr, cift_str, line_idx, chrom_info_ptr, &ii);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ // don't use get_or_add_chrom_code() since we want to skip the
+ // CNV_CHROM_CODE_END_MAX check when possible
+ int32_t cur_chrom_code = get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ retval = try_to_add_chrom_name(textbuf_first_token, cift_str, line_idx, chrom_name_slen, allow_extra_chroms, &cur_chrom_code, chrom_info_ptr);
if (retval) {
goto cnv_intersect_load_ret_1;
}
+ if (cur_chrom_code >= CNV_CHROM_CODE_END_MAX) {
+ logerrprint("Error: Too many distinct nonstandard chromosome/contig names for CNV module.\n");
+ goto cnv_intersect_load_ret_INVALID_FORMAT;
+ }
}
- uljj = ((uint32_t)ii);
+ uljj = ((uint32_t)cur_chrom_code);
if (!IS_SET(chrom_mask, uljj)) {
continue;
}
- bufptr = next_token(bufptr);
- if (scan_uint_defcap(bufptr, (uint32_t*)&jj)) {
+ char* textbuf_iter = skip_initial_spaces(&(first_token_end[1]));
+ if (scan_uint_defcap(textbuf_iter, (uint32_t*)&jj)) {
sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of %s.\n", line_idx, cift_str);
goto cnv_intersect_load_ret_INVALID_FORMAT_2;
}
- if (scan_uint_defcap(bufptr2, (uint32_t*)&ii)) {
+ if (scan_uint_defcap(col3_ptr, (uint32_t*)&ii)) {
sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of %s.\n", line_idx, cift_str);
goto cnv_intersect_load_ret_INVALID_FORMAT_2;
}
@@ -158,16 +165,16 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
continue;
}
if (subset_ct) {
- bufptr = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr)) {
+ textbuf_iter = next_token(col3_ptr);
+ if (no_more_tokens_kns(textbuf_iter)) {
continue;
}
- if (bsearch_str(bufptr, strlen_se(bufptr), subset_list, max_subset_name_len, subset_ct) == -1) {
+ if (bsearch_str(textbuf_iter, strlen_se(textbuf_iter), subset_list, max_subset_name_len, subset_ct) == -1) {
continue;
}
}
if (small_interval_ct + large_interval_ct == max_interval_ct) {
- goto cnv_intersect_load_ret_NOMEM;
+ goto cnv_intersect_load_ret_NOMEM;
}
kk = ii - jj;
if (kk > SMALL_INTERVAL_MAX_SIZE) {
@@ -178,117 +185,117 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
tmp_il_small[small_interval_ct++] = (((uint64_t)uljj) << (32 + SMALL_INTERVAL_BITS)) | (((uint64_t)(((uint32_t)ii) + ((uint32_t)jj))) << SMALL_INTERVAL_BITS) | ((uint64_t)((uint32_t)kk));
}
}
- }
- if (!feof(intersect_file)) {
- goto cnv_intersect_load_ret_READ_FAIL;
- }
- if (reverse_warning_ct > 1) {
- LOGPRINTF("(%" PRIuPTR " subsequent line%s with [end of range] < [start of range].)\n", reverse_warning_ct - 1, (reverse_warning_ct == 2)? "" : "s");
- }
- *il_large_ptr = il_large;
- il_small = &(il_large[-((intptr_t)small_interval_ct)]);
- *il_small_ptr = il_small;
- chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
- if ((!small_interval_ct) && (!large_interval_ct)) {
- if (subset_ct) {
- fill_ulong_zero(il_chrom_start_small, chrom_code_end + 1);
- fill_ulong_zero(il_chrom_start_large, chrom_code_end + 1);
- logerrprint("Warning: All intervals filtered out by --cnv-subset.\n");
- goto cnv_intersect_load_ret_1;
+ if (!feof(intersect_file)) {
+ goto cnv_intersect_load_ret_READ_FAIL;
}
- sprintf(g_logbuf, "Error: Empty %s.\n", cift_str);
- goto cnv_intersect_load_ret_INVALID_FORMAT_2;
- }
- if (small_interval_ct) {
+ if (reverse_warning_ct > 1) {
+ LOGPRINTF("(%" PRIuPTR " subsequent line%s with [end of range] < [start of range].)\n", reverse_warning_ct - 1, (reverse_warning_ct == 2)? "" : "s");
+ }
+ *il_large_ptr = il_large;
+ il_small = &(il_large[-((intptr_t)small_interval_ct)]);
+ *il_small_ptr = il_small;
+ chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
+ if ((!small_interval_ct) && (!large_interval_ct)) {
+ if (subset_ct) {
+ fill_ulong_zero(chrom_code_end + 1, il_chrom_start_small);
+ fill_ulong_zero(chrom_code_end + 1, il_chrom_start_large);
+ logerrprint("Warning: All intervals filtered out by --cnv-subset.\n");
+ goto cnv_intersect_load_ret_1;
+ }
+ sprintf(g_logbuf, "Error: Empty %s.\n", cift_str);
+ goto cnv_intersect_load_ret_INVALID_FORMAT_2;
+ }
+ if (small_interval_ct) {
#ifdef __cplusplus
- std::sort((int64_t*)tmp_il_small, (int64_t*)(&(tmp_il_small[small_interval_ct])));
+ std::sort((int64_t*)tmp_il_small, (int64_t*)(&(tmp_il_small[small_interval_ct])));
#else
- qsort((int64_t*)tmp_il_small, small_interval_ct, sizeof(int64_t), llcmp);
+ qsort((int64_t*)tmp_il_small, small_interval_ct, sizeof(int64_t), llcmp);
#endif
- il_chrom_start_small[chrom_code_end] = small_interval_ct;
- cur_chrom = chrom_code_end - 1;
- max_width = 0;
- ulii = small_interval_ct;
- do {
- ulii--;
- ullii = tmp_il_small[ulii];
- uii = (uint32_t)(ullii >> (SMALL_INTERVAL_BITS + 32));
- if (uii < cur_chrom) {
- il_chrom_max_width_small[cur_chrom] = max_width;
- do {
- il_chrom_start_small[cur_chrom--] = ulii + 1;
- } while (cur_chrom > uii);
- max_width = 0;
+ il_chrom_start_small[chrom_code_end] = small_interval_ct;
+ cur_chrom = chrom_code_end - 1;
+ max_width = 0;
+ ulii = small_interval_ct;
+ do {
+ ulii--;
+ ullii = tmp_il_small[ulii];
+ uii = (uint32_t)(ullii >> (SMALL_INTERVAL_BITS + 32));
+ if (uii < cur_chrom) {
+ il_chrom_max_width_small[cur_chrom] = max_width;
+ do {
+ il_chrom_start_small[cur_chrom--] = ulii + 1;
+ } while (cur_chrom > uii);
+ max_width = 0;
+ }
+ cur_width = ullii & (SMALL_INTERVAL_MAX_SIZE * 1LLU);
+ if (cur_width > max_width) {
+ max_width = cur_width;
+ }
+ il_small[ulii] = ((ullii >> SMALL_INTERVAL_BITS) << 32) | ((uint64_t)cur_width);
+ } while (ulii);
+ il_chrom_max_width_small[cur_chrom] = max_width;
+ do {
+ il_chrom_start_small[cur_chrom] = 0;
+ } while (cur_chrom--);
+ } else {
+ fill_ulong_zero(chrom_code_end + 1, il_chrom_start_small);
+ }
+ if (large_interval_ct) {
+ if (large_interval_ct > 1) {
+ uljj = large_interval_ct / 2;
+ ulkk = large_interval_ct - 1;
+ for (ulii = 0; ulii < uljj; ulii++) {
+ ucc = tmp_il_large_chroms[ulii];
+ tmp_il_large_chroms[ulii] = tmp_il_large_chroms[ulkk];
+ tmp_il_large_chroms[ulkk--] = ucc;
+ }
}
- cur_width = ullii & (SMALL_INTERVAL_MAX_SIZE * 1LLU);
- if (cur_width > max_width) {
- max_width = cur_width;
+ if (qsort_ext((char*)tmp_il_large_chroms, large_interval_ct, sizeof(char), char_cmp_deref, (char*)il_large, sizeof(int64_t))) {
+ goto cnv_intersect_load_ret_NOMEM;
}
- il_small[ulii] = ((ullii >> SMALL_INTERVAL_BITS) << 32) | ((uint64_t)cur_width);
- } while (ulii);
- il_chrom_max_width_small[cur_chrom] = max_width;
- do {
- il_chrom_start_small[cur_chrom] = 0;
- } while (cur_chrom--);
- } else {
- fill_ulong_zero(il_chrom_start_small, chrom_code_end + 1);
- }
- if (large_interval_ct) {
- if (large_interval_ct > 1) {
- uljj = large_interval_ct / 2;
- ulkk = large_interval_ct - 1;
- for (ulii = 0; ulii < uljj; ulii++) {
- ucc = tmp_il_large_chroms[ulii];
- tmp_il_large_chroms[ulii] = tmp_il_large_chroms[ulkk];
- tmp_il_large_chroms[ulkk--] = ucc;
- }
- }
- if (qsort_ext((char*)tmp_il_large_chroms, large_interval_ct, sizeof(char), char_cmp_deref, (char*)il_large, sizeof(int64_t))) {
- goto cnv_intersect_load_ret_NOMEM;
- }
- il_chrom_start_large[0] = 0;
- cur_chrom = 0;
- for (ulii = 0; ulii < large_interval_ct; ulii++) {
- uii = tmp_il_large_chroms[ulii];
- if (uii > cur_chrom) {
- do {
- il_chrom_start_large[++cur_chrom] = ulii;
- } while (cur_chrom < uii);
+ il_chrom_start_large[0] = 0;
+ cur_chrom = 0;
+ for (ulii = 0; ulii < large_interval_ct; ulii++) {
+ uii = tmp_il_large_chroms[ulii];
+ if (uii > cur_chrom) {
+ do {
+ il_chrom_start_large[++cur_chrom] = ulii;
+ } while (cur_chrom < uii);
+ }
}
- }
- do {
- il_chrom_start_large[++cur_chrom] = large_interval_ct;
- } while (cur_chrom < chrom_code_end);
- ulii = il_chrom_start_large[0];
- for (cur_chrom = 0; cur_chrom < chrom_code_end; cur_chrom++) {
- uljj = il_chrom_start_large[cur_chrom + 1];
- if (uljj > ulii) {
+ do {
+ il_chrom_start_large[++cur_chrom] = large_interval_ct;
+ } while (cur_chrom < chrom_code_end);
+ ulii = il_chrom_start_large[0];
+ for (cur_chrom = 0; cur_chrom < chrom_code_end; cur_chrom++) {
+ uljj = il_chrom_start_large[cur_chrom + 1];
+ if (uljj > ulii) {
#ifdef __cplusplus
- std::sort((int64_t*)(&(il_large[ulii])), (int64_t*)(&(il_large[uljj])));
+ std::sort((int64_t*)(&(il_large[ulii])), (int64_t*)(&(il_large[uljj])));
#else
- qsort((int64_t*)(&(il_large[ulii])), uljj - ulii, sizeof(int64_t), llcmp);
+ qsort((int64_t*)(&(il_large[ulii])), uljj - ulii, sizeof(int64_t), llcmp);
#endif
- ulii = uljj;
+ ulii = uljj;
+ }
}
- }
- ulii = 0;
- for (cur_chrom = 0; cur_chrom < chrom_code_end; cur_chrom++) {
- uljj = il_chrom_start_large[cur_chrom + 1];
- max_width = 0;
- while (ulii < uljj) {
- ullii = il_large[ulii];
- cur_width = ullii & 0x7fffffffLLU;
- if (cur_width > max_width) {
- max_width = cur_width;
+ ulii = 0;
+ for (cur_chrom = 0; cur_chrom < chrom_code_end; cur_chrom++) {
+ uljj = il_chrom_start_large[cur_chrom + 1];
+ max_width = 0;
+ while (ulii < uljj) {
+ ullii = il_large[ulii];
+ cur_width = ullii & 0x7fffffffLLU;
+ if (cur_width > max_width) {
+ max_width = cur_width;
+ }
+ il_large[ulii++] = ((ullii >> 31) << 32) | ((uint64_t)cur_width);
}
- il_large[ulii++] = ((ullii >> 31) << 32) | ((uint64_t)cur_width);
+ il_chrom_max_width_large[cur_chrom] = max_width;
}
- il_chrom_max_width_large[cur_chrom] = max_width;
+ } else {
+ fill_ulong_zero(chrom_code_end + 1, il_chrom_start_large);
}
- } else {
- fill_ulong_zero(il_chrom_start_large, chrom_code_end + 1);
+ bigstack_end_alloc_presized(round_up_pow2(small_interval_ct + large_interval_ct, CACHELINE_INT64) * sizeof(int64_t));
}
- bigstack_end_alloc_presized(round_up_pow2(small_interval_ct + large_interval_ct, CACHELINE_INT64) * sizeof(int64_t));
while (0) {
cnv_intersect_load_ret_NOMEM:
retval = RET_NOMEM;
@@ -301,6 +308,7 @@ int32_t cnv_intersect_load(uint32_t intersect_filter_type, char* intersect_filte
break;
cnv_intersect_load_ret_INVALID_FORMAT_2:
logerrprintb();
+ cnv_intersect_load_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
break;
}
@@ -451,7 +459,7 @@ int32_t cnv_make_map_write(FILE* new_mapfile, Chrom_info* chrom_info_ptr, uint32
int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, uint32_t min_seglen, uint32_t max_seglen, double min_score, double max_score, uint32_t min_sites, uint32_t max_sites, uintptr_t* il_chrom_start_small, uintptr_t* il_chrom_start_large, uint32_t* il_chrom_max_width_small, uint32_t* il_chrom_max_width_large, uint64_t* il_small, uint64_t* il_large, uint32_t intersect_filter_type, uint32_t overlap_type, double overlap_val, int32_t marker_pos_start, int32_t marker_p [...]
int64_t* marker_pos_arr = (int64_t*)g_bigstack_base;
- FILE* new_mapfile = NULL;
+ FILE* new_mapfile = nullptr;
uintptr_t raw_marker_ct = 0;
uint32_t distinct_marker_ct = 1;
uint32_t req_fields = 3;
@@ -465,8 +473,6 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
uint32_t chrom_code_end;
uintptr_t max_marker_ct;
int32_t retval;
- char* bufptr;
- char* bufptr2;
int64_t llii;
uint64_t ullii;
uintptr_t line_idx;
@@ -478,58 +484,63 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
uint32_t uii;
int32_t ii;
double dxx;
- logprintb();
- if (fopen_checked(new_mapname, "w", &new_mapfile)) {
- goto cnv_make_map_ret_OPEN_FAIL;
- }
- retval = cnv_first_nonheader_line(cnvfile, &line_idx);
- if (retval) {
- goto cnv_make_map_ret_1;
- }
- max_marker_ct = bigstack_left() / sizeof(int64_t);
- // allow SCORE/SITES to be missing if they aren't being filtered on
- if (filter_sites) {
- req_fields = 5;
- } else if (filter_score) {
- req_fields = 4;
- }
- line_idx--;
- do {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .cnv file is pathologically long.\n", line_idx);
- goto cnv_make_map_ret_INVALID_FORMAT_2N;
+ {
+ logprintb();
+ if (fopen_checked(new_mapname, "w", &new_mapfile)) {
+ goto cnv_make_map_ret_OPEN_FAIL;
}
- bufptr = skip_initial_spaces(g_textbuf);
- if (!is_eoln_kns(*bufptr)) {
+ retval = cnv_first_nonheader_line(cnvfile, &line_idx);
+ if (retval) {
+ goto cnv_make_map_ret_1;
+ }
+ max_marker_ct = bigstack_left() / sizeof(int64_t);
+ // allow SCORE/SITES to be missing if they aren't being filtered on
+ if (filter_sites) {
+ req_fields = 5;
+ } else if (filter_score) {
+ req_fields = 4;
+ }
+ line_idx--;
+ do {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .cnv file is pathologically long.\n", line_idx);
+ goto cnv_make_map_ret_INVALID_FORMAT_2N;
+ }
+ char* col3_ptr = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*col3_ptr)) {
+ continue;
+ }
// FID, IID, CHR, BP1, BP2, TYPE, SCORE, SITES
- bufptr = next_token_mult(bufptr, 2);
- bufptr2 = next_token_mult(bufptr, req_fields);
- if (no_more_tokens_kns(bufptr2)) {
+ col3_ptr = next_token_mult(col3_ptr, 2);
+ if (no_more_tokens_kns(next_token_mult(col3_ptr, req_fields))) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .cnv file has fewer tokens than expected.\n", line_idx);
goto cnv_make_map_ret_INVALID_FORMAT_2N;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if ((!allow_extra_chroms) || (ii == -1)) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of .cnv file.\n", line_idx);
- goto cnv_make_map_ret_INVALID_FORMAT_2N;
- }
- retval = resolve_or_add_chrom_name(bufptr, ".cnv file", line_idx, chrom_info_ptr, &ii);
+ char* col3_end = token_endnn(col3_ptr);
+ const uint32_t chrom_name_slen = (uintptr_t)(col3_end - col3_ptr);
+ *col3_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(col3_ptr, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ retval = try_to_add_chrom_name(col3_ptr, ".cnv file", line_idx, chrom_name_slen, allow_extra_chroms, &cur_chrom_code, chrom_info_ptr);
if (retval) {
goto cnv_make_map_ret_1;
}
+ if (cur_chrom_code >= CNV_CHROM_CODE_END_MAX) {
+ logerrprint("\nError: Too many distinct nonstandard chromosome/contig names for CNV module.\n");
+ goto cnv_make_map_ret_INVALID_FORMAT;
+ }
}
- chrom_idx = ii;
+ chrom_idx = (uint32_t)cur_chrom_code;
if ((!is_autogen) && (!IS_SET(chrom_mask, chrom_idx))) {
continue;
}
ullii = ((uint64_t)chrom_idx) << 32;
- bufptr2 = next_token(bufptr);
- bufptr = next_token(bufptr2);
- if (scan_uint_defcap(bufptr2, &seg_start) || scan_uint_defcap(bufptr, &seg_end)) {
+ char* col4_ptr = skip_initial_spaces(&(col3_end[1]));
+ char* col5_ptr = next_token(col4_ptr);
+ if (scan_uint_defcap(col4_ptr, &seg_start) || scan_uint_defcap(col5_ptr, &seg_end)) {
sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .cnv file.\n", line_idx);
- goto cnv_make_map_ret_INVALID_FORMAT_2N;
+ goto cnv_make_map_ret_INVALID_FORMAT_2N;
}
if (seg_end < seg_start) {
sprintf(g_logbuf, "Error: Segment end coordinate smaller than segment start on line %" PRIuPTR " of\n.cnv file.\n", line_idx);
@@ -546,9 +557,10 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
continue;
}
}
+ char* col5_end = token_endnn(col5_ptr);
if (cnv_calc_type & (CNV_DEL | CNV_DUP)) {
- bufptr2 = next_token(bufptr);
- if (scan_uint_defcap(bufptr2, (uint32_t*)&ii)) {
+ char* col6_ptr = next_token(col5_end);
+ if (scan_uint_defcap(col6_ptr, (uint32_t*)&ii)) {
sprintf(g_logbuf, "Error: Invalid variant copy count on line %" PRIuPTR " of .cnv file.\n", line_idx);
goto cnv_make_map_ret_INVALID_FORMAT_2N;
}
@@ -561,9 +573,9 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
}
}
if (filter_score) {
- bufptr2 = next_token_mult(bufptr, 2);
- if (scan_double(bufptr2, &dxx)) {
- sprintf(g_logbuf, "Error: Invalid confidence score on line %" PRIuPTR " of .cnv file.\n", line_idx);
+ char* col7_ptr = next_token_mult(col5_end, 2);
+ if (scan_double(col7_ptr, &dxx)) {
+ sprintf(g_logbuf, "Error: Invalid confidence score on line %" PRIuPTR " of .cnv file.\n", line_idx);
goto cnv_make_map_ret_INVALID_FORMAT_2N;
}
if ((dxx < min_score) || (dxx > max_score)) {
@@ -571,8 +583,8 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
}
}
if (filter_sites) {
- bufptr2 = next_token_mult(bufptr, 3);
- if (scan_posint_defcap(bufptr2, (uint32_t*)&ii)) {
+ char* col8_ptr = next_token_mult(col5_end, 3);
+ if (scan_posint_defcap(col8_ptr, (uint32_t*)&ii)) {
sprintf(g_logbuf, "Error: Invalid probe count on line %" PRIuPTR " of .cnv file.\n", line_idx);
goto cnv_make_map_ret_INVALID_FORMAT_2N;
}
@@ -584,7 +596,7 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
ulii = il_chrom_start_small[chrom_idx];
uljj = il_chrom_start_large[chrom_idx];
if (is_cnv_overlap((uint32_t)((uint64_t)llii), (uint32_t)ullii, overlap_type, overlap_val, il_chrom_max_width_small[chrom_idx], il_chrom_max_width_large[chrom_idx], &(il_small[ulii]), il_chrom_start_small[chrom_idx + 1] - ulii, &(il_large[uljj]), il_chrom_start_large[chrom_idx + 1] - uljj)) {
- if (intersect_filter_type & CNV_EXCLUDE) {
+ if (intersect_filter_type & CNV_EXCLUDE) {
continue;
}
} else if (intersect_filter_type & CNV_INTERSECT) {
@@ -592,65 +604,65 @@ int32_t cnv_make_map(FILE* cnvfile, char* new_mapname, uint32_t cnv_calc_type, u
}
}
if (raw_marker_ct + 2 >= max_marker_ct) {
- goto cnv_make_map_ret_NOMEM;
+ goto cnv_make_map_ret_NOMEM;
}
marker_pos_arr[raw_marker_ct++] = llii;
if (make_map_long && (((uint64_t)llii) != ullii)) {
- marker_pos_arr[raw_marker_ct++] = (int64_t)ullii;
+ marker_pos_arr[raw_marker_ct++] = (int64_t)ullii;
}
marker_pos_arr[raw_marker_ct++] = 1 + (int64_t)ullii;
+ } while (fgets(g_textbuf, MAXLINELEN, cnvfile));
+ if (!feof(cnvfile)) {
+ goto cnv_make_map_ret_READ_FAIL;
+ }
+ if (!raw_marker_ct) {
+ logprint("\n");
+ logerrprint(cnv_calc_type? "Error: No variants.\n" : "Error: No variants after filtering.\n");
+ goto cnv_make_map_ret_INVALID_FORMAT;
}
- } while (fgets(g_textbuf, MAXLINELEN, cnvfile));
- if (!feof(cnvfile)) {
- goto cnv_make_map_ret_READ_FAIL;
- }
- if (!raw_marker_ct) {
- logprint("\n");
- logerrprint(cnv_calc_type? "Error: No variants.\n" : "Error: No variants after filtering.\n");
- goto cnv_make_map_ret_INVALID_FORMAT;
- }
#ifdef __cplusplus
- std::sort(marker_pos_arr, &(marker_pos_arr[raw_marker_ct]));
+ std::sort(marker_pos_arr, &(marker_pos_arr[raw_marker_ct]));
#else
- qsort(marker_pos_arr, raw_marker_ct, sizeof(int64_t), llcmp);
+ qsort(marker_pos_arr, raw_marker_ct, sizeof(int64_t), llcmp);
#endif
- llii = marker_pos_arr[0];
- chrom_idx = (uint32_t)(((uint64_t)llii) >> 32);
- for (uii = 0; uii <= chrom_idx; uii++) {
- marker_chrom_start[uii] = 0;
- }
- if (cnv_make_map_write(new_mapfile, chrom_info_ptr, chrom_idx, (uint32_t)((uint64_t)llii), max_marker_id_len_ptr)) {
- goto cnv_make_map_ret_WRITE_FAIL;
- }
- for (ulii = 1; ulii < raw_marker_ct; ulii++) {
- if (marker_pos_arr[ulii] != llii) {
+ llii = marker_pos_arr[0];
+ chrom_idx = (uint32_t)(((uint64_t)llii) >> 32);
+ for (uii = 0; uii <= chrom_idx; uii++) {
+ marker_chrom_start[uii] = 0;
+ }
+ if (cnv_make_map_write(new_mapfile, chrom_info_ptr, chrom_idx, (uint32_t)((uint64_t)llii), max_marker_id_len_ptr)) {
+ goto cnv_make_map_ret_WRITE_FAIL;
+ }
+ for (ulii = 1; ulii < raw_marker_ct; ulii++) {
+ if (marker_pos_arr[ulii] != llii) {
#ifdef __LP64__
- if ((++distinct_marker_ct) == 0x80000000U) {
- logprint("\n");
- logerrprint("Error: Too many distinct .cnv.map positions (max 2^31 - 1).\n");
- goto cnv_make_map_ret_INVALID_FORMAT;
- }
+ if ((++distinct_marker_ct) == 0x80000000U) {
+ logprint("\n");
+ logerrprint("Error: Too many distinct .cnv.map positions (max 2^31 - 1).\n");
+ goto cnv_make_map_ret_INVALID_FORMAT;
+ }
#endif
- llii = marker_pos_arr[ulii];
- uii = (uint32_t)(((uint64_t)llii) >> 32);
- if (uii > chrom_idx) {
- do {
- marker_chrom_start[++chrom_idx] = distinct_marker_ct;
- } while (chrom_idx < uii);
- }
- if (cnv_make_map_write(new_mapfile, chrom_info_ptr, chrom_idx, (uint32_t)((uint64_t)llii), max_marker_id_len_ptr)) {
- goto cnv_make_map_ret_WRITE_FAIL;
+ llii = marker_pos_arr[ulii];
+ uii = (uint32_t)(((uint64_t)llii) >> 32);
+ if (uii > chrom_idx) {
+ do {
+ marker_chrom_start[++chrom_idx] = distinct_marker_ct;
+ } while (chrom_idx < uii);
+ }
+ if (cnv_make_map_write(new_mapfile, chrom_info_ptr, chrom_idx, (uint32_t)((uint64_t)llii), max_marker_id_len_ptr)) {
+ goto cnv_make_map_ret_WRITE_FAIL;
+ }
}
}
+ chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
+ do {
+ marker_chrom_start[++chrom_idx] = distinct_marker_ct;
+ } while (chrom_idx < chrom_code_end);
+ if (fclose_null(&new_mapfile)) {
+ goto cnv_make_map_ret_WRITE_FAIL;
+ }
+ logprint("done.\n");
}
- chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
- do {
- marker_chrom_start[++chrom_idx] = distinct_marker_ct;
- } while (chrom_idx < chrom_code_end);
- if (fclose_null(&new_mapfile)) {
- goto cnv_make_map_ret_WRITE_FAIL;
- }
- logprint("done.\n");
while (0) {
cnv_make_map_ret_NOMEM:
retval = RET_NOMEM;
@@ -689,114 +701,121 @@ int32_t validate_cnv_map(FILE** mapfile_ptr, char* mapname, int32_t* marker_pos_
int32_t marker_pos_end = 0x7fffffff;
uintptr_t* chrom_mask = chrom_info_ptr->chrom_mask;
uint32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
- char* bufptr;
- char* bufptr2;
uint32_t colskip;
uintptr_t cur_marker_id_len;
int32_t ii;
- if ((*marker_pos_end_ptr) != -1) {
- marker_pos_end = *marker_pos_end_ptr;
- }
- if (fopen_checked(mapname, "r", mapfile_ptr)) {
- goto validate_cnv_map_ret_OPEN_FAIL;
- }
- marker_chrom_start[0] = 0;
- do {
- line_idx++;
- if (!fgets(g_textbuf, MAXLINELEN, *mapfile_ptr)) {
- if (feof(*mapfile_ptr)) {
- logerrprint("Error: Empty .cnv.map file.\n");
- goto validate_cnv_map_ret_INVALID_FORMAT;
- } else {
- goto validate_cnv_map_ret_READ_FAIL;
- }
+ {
+ if ((*marker_pos_end_ptr) != -1) {
+ marker_pos_end = *marker_pos_end_ptr;
}
- if (!g_textbuf[MAXLINELEN - 1]) {
- goto validate_cnv_map_ret_LONG_LINE;
- }
- bufptr = skip_initial_spaces(g_textbuf);
- } while (is_eoln_kns(*bufptr));
- bufptr2 = next_token_mult(bufptr, 2);
- if (is_eoln_kns(*bufptr2)) {
- goto validate_cnv_map_ret_MISSING_TOKENS;
- }
- bufptr2 = next_token(bufptr2);
- if (is_eoln_kns(*bufptr2)) {
- // --map3 autodetect
- colskip = 1;
- } else {
- colskip = 2;
- }
- line_idx--;
- do {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- goto validate_cnv_map_ret_LONG_LINE;
- }
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
+ if (fopen_checked(mapname, "r", mapfile_ptr)) {
+ goto validate_cnv_map_ret_OPEN_FAIL;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if ((!allow_extra_chroms) || (ii == -1)) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of .cnv.map file.\n", line_idx);
- goto validate_cnv_map_ret_INVALID_FORMAT_2;
+ marker_chrom_start[0] = 0;
+ char* textbuf_first_token;
+ do {
+ line_idx++;
+ if (!fgets(g_textbuf, MAXLINELEN, *mapfile_ptr)) {
+ if (feof(*mapfile_ptr)) {
+ logerrprint("Error: Empty .cnv.map file.\n");
+ goto validate_cnv_map_ret_INVALID_FORMAT;
+ } else {
+ goto validate_cnv_map_ret_READ_FAIL;
+ }
}
- retval = resolve_or_add_chrom_name(bufptr, ".cnv.map file", line_idx, chrom_info_ptr, &ii);
- if (retval) {
- goto validate_cnv_map_ret_1;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ goto validate_cnv_map_ret_LONG_LINE;
}
- }
- if (((uint32_t)ii) < chrom_idx) {
- goto validate_cnv_map_ret_UNSORTED;
- }
- bufptr = next_token(bufptr);
- bufptr2 = next_token_mult(bufptr, colskip);
- if (is_eoln_kns(*bufptr2)) {
+ textbuf_first_token = skip_initial_spaces(g_textbuf);
+ } while (is_eoln_kns(*textbuf_first_token));
+ char* textbuf_iter = next_token_mult(textbuf_first_token, 2);
+ if (no_more_tokens_kns(textbuf_iter)) {
goto validate_cnv_map_ret_MISSING_TOKENS;
}
- if (((uint32_t)ii) > chrom_idx) {
- do {
- marker_chrom_start[++chrom_idx] = marker_ct;
- } while (chrom_idx < ((uint32_t)ii));
- last_pos = -1;
- }
- if (*bufptr2 == '-') {
- continue;
- }
- if (scan_uint_defcap(bufptr2, (uint32_t*)&ii)) {
- sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .cnv.map file.\n", line_idx);
- goto validate_cnv_map_ret_INVALID_FORMAT_2;
- }
- if (ii <= last_pos) {
- goto validate_cnv_map_ret_UNSORTED;
- }
- last_pos = ii;
- if (!IS_SET(chrom_mask, chrom_idx)) {
- continue;
- }
- if ((last_pos <= marker_pos_start_m1) || (last_pos > marker_pos_end)) {
- continue;
- }
- cur_marker_id_len = strlen_se(bufptr);
- bufptr[cur_marker_id_len] = '\0';
- if (cur_marker_id_len >= max_marker_id_len) {
- max_marker_id_len = cur_marker_id_len + 1;
+ textbuf_iter = next_token(textbuf_iter);
+ if (no_more_tokens_kns(textbuf_iter)) {
+ // --map3 autodetect
+ colskip = 1;
+ } else {
+ colskip = 2;
}
- if (++marker_ct == 0x80000000U) {
- logerrprint("Error: Too many entries in .cnv.map file (max 2147483647).\n");
- goto validate_cnv_map_ret_INVALID_FORMAT;
+ line_idx--;
+ do {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ goto validate_cnv_map_ret_LONG_LINE;
+ }
+ textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(textbuf_first_token);
+ if (!(*first_token_end)) {
+ goto validate_cnv_map_ret_MISSING_TOKENS;
+ }
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ retval = try_to_add_chrom_name(textbuf_first_token, ".cnv.map file", line_idx, chrom_name_slen, allow_extra_chroms, &cur_chrom_code, chrom_info_ptr);
+ if (retval) {
+ goto validate_cnv_map_ret_1;
+ }
+ if (cur_chrom_code >= CNV_CHROM_CODE_END_MAX) {
+ logerrprint("Error: Too many distinct nonstandard chromosome/contig names for CNV module.\n");
+ goto validate_cnv_map_ret_INVALID_FORMAT;
+ }
+ }
+ if (((uint32_t)cur_chrom_code) < chrom_idx) {
+ goto validate_cnv_map_ret_UNSORTED;
+ }
+ char* col2_ptr = skip_initial_spaces(&(first_token_end[1]));
+ char* bp_col_ptr = next_token_mult(col2_ptr, colskip);
+ if (no_more_tokens_kns(bp_col_ptr)) {
+ goto validate_cnv_map_ret_MISSING_TOKENS;
+ }
+ if (((uint32_t)cur_chrom_code) > chrom_idx) {
+ do {
+ marker_chrom_start[++chrom_idx] = marker_ct;
+ } while (chrom_idx < ((uint32_t)cur_chrom_code));
+ last_pos = -1;
+ }
+ if (*bp_col_ptr == '-') {
+ continue;
+ }
+ if (scan_uint_defcap(bp_col_ptr, (uint32_t*)&ii)) {
+ sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .cnv.map file.\n", line_idx);
+ goto validate_cnv_map_ret_INVALID_FORMAT_2;
+ }
+ if (ii <= last_pos) {
+ goto validate_cnv_map_ret_UNSORTED;
+ }
+ last_pos = ii;
+ if (!IS_SET(chrom_mask, chrom_idx)) {
+ continue;
+ }
+ if ((last_pos <= marker_pos_start_m1) || (last_pos > marker_pos_end)) {
+ continue;
+ }
+ cur_marker_id_len = strlen_se(col2_ptr);
+ col2_ptr[cur_marker_id_len] = '\0';
+ if (cur_marker_id_len >= max_marker_id_len) {
+ max_marker_id_len = cur_marker_id_len + 1;
+ }
+ if (++marker_ct == 0x80000000U) {
+ logerrprint("Error: Too many entries in .cnv.map file (max 2147483647).\n");
+ goto validate_cnv_map_ret_INVALID_FORMAT;
+ }
+ } while (fgets(g_textbuf, MAXLINELEN, *mapfile_ptr));
+ if (!feof(*mapfile_ptr)) {
+ goto validate_cnv_map_ret_READ_FAIL;
}
- } while (fgets(g_textbuf, MAXLINELEN, *mapfile_ptr));
- if (!feof(*mapfile_ptr)) {
- goto validate_cnv_map_ret_READ_FAIL;
+ do {
+ marker_chrom_start[++chrom_idx] = marker_ct;
+ } while (chrom_idx < chrom_code_end);
+ *max_marker_id_len_ptr = max_marker_id_len;
+ rewind(*mapfile_ptr);
}
- do {
- marker_chrom_start[++chrom_idx] = marker_ct;
- } while (chrom_idx < chrom_code_end);
- *max_marker_id_len_ptr = max_marker_id_len;
- rewind(*mapfile_ptr);
while (0) {
validate_cnv_map_ret_OPEN_FAIL:
retval = RET_OPEN_FAIL;
@@ -829,52 +848,54 @@ int32_t load_cnv_map(FILE* mapfile, int32_t marker_pos_start, int32_t marker_pos
uint32_t chrom_idx = 0;
uintptr_t* chrom_mask = chrom_info_ptr->chrom_mask;
uint32_t colskip;
- char* bufptr;
- char* bufptr2;
- uint32_t cur_marker_id_len;
int32_t cur_pos;
// don't need to worry about invalid format
- if (marker_pos_end == -1) {
- marker_pos_end = 0x7fffffff;
- }
- do {
- if (!fgets(g_textbuf, MAXLINELEN, mapfile)) {
+ {
+ if (marker_pos_end == -1) {
+ marker_pos_end = 0x7fffffff;
+ }
+ char* textbuf_first_token;
+ do {
+ if (!fgets(g_textbuf, MAXLINELEN, mapfile)) {
+ goto load_cnv_map_ret_READ_FAIL;
+ }
+ textbuf_first_token = skip_initial_spaces(g_textbuf);
+ } while (is_eoln_kns(*textbuf_first_token));
+ char* textbuf_iter = next_token_mult(textbuf_first_token, 3);
+ if (no_more_tokens_kns(textbuf_iter)) {
+ colskip = 1;
+ } else {
+ colskip = 2;
+ }
+ do {
+ textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(textbuf_first_token);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ chrom_idx = get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (!IS_SET(chrom_mask, chrom_idx)) {
+ continue;
+ }
+ char* col2_ptr = skip_initial_spaces(&(first_token_end[1]));
+ char* col2_end = token_endnn(col2_ptr);
+ char* bp_col_ptr = next_token_mult(col2_end, colskip);
+ if (*bp_col_ptr == '-') {
+ continue;
+ }
+ scan_uint_defcap(bp_col_ptr, (uint32_t*)&cur_pos);
+ if ((cur_pos < marker_pos_start) || (cur_pos > marker_pos_end)) {
+ continue;
+ }
+ memcpyx(marker_ids, col2_ptr, (uintptr_t)(col2_end - col2_ptr), '\0');
+ marker_ids = &(marker_ids[max_marker_id_len]);
+ *marker_pos++ = (uint32_t)cur_pos;
+ } while (fgets(g_textbuf, MAXLINELEN, mapfile));
+ if (!feof(mapfile)) {
goto load_cnv_map_ret_READ_FAIL;
}
- bufptr = skip_initial_spaces(g_textbuf);
- } while (is_eoln_kns(*bufptr));
- bufptr = next_token_mult(bufptr, 3);
- if (is_eoln_kns(*bufptr)) {
- colskip = 1;
- } else {
- colskip = 2;
- }
- do {
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
- }
- chrom_idx = get_chrom_code(chrom_info_ptr, bufptr);
- if (!IS_SET(chrom_mask, chrom_idx)) {
- continue;
- }
- bufptr = next_token(bufptr);
- bufptr2 = next_token_mult(bufptr, colskip);
- if (*bufptr2 == '-') {
- continue;
- }
- scan_uint_defcap(bufptr2, (uint32_t*)&cur_pos);
- if ((cur_pos < marker_pos_start) || (cur_pos > marker_pos_end)) {
- continue;
- }
- cur_marker_id_len = strlen_se(bufptr);
- bufptr[cur_marker_id_len] = '\0';
- memcpy(marker_ids, bufptr, cur_marker_id_len + 1);
- marker_ids = &(marker_ids[max_marker_id_len]);
- *marker_pos++ = (uint32_t)cur_pos;
- } while (fgets(g_textbuf, MAXLINELEN, mapfile));
- if (!feof(mapfile)) {
- goto load_cnv_map_ret_READ_FAIL;
}
while (0) {
load_cnv_map_ret_READ_FAIL:
@@ -887,17 +908,17 @@ int32_t load_cnv_map(FILE* mapfile, int32_t marker_pos_start, int32_t marker_pos
int32_t plink_cnv(char* outname, char* outname_end, char* cnvname, char* mapname, char* famname, char* phenoname, char* keepname, char* removename, char* filtername, uint64_t misc_flags, Two_col_params* update_chr, Two_col_params* update_cm, Two_col_params* update_map, Two_col_params* update_name, char* update_ids_fname, char* update_parents_fname, char* update_sex_fname, char* filtervals_flattened, uint64_t filter_flags, uint32_t cnv_calc_type, uint32_t min_seglen, uint32_t max_seglen, [...]
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* cnvfile = NULL;
- FILE* famfile = NULL;
- FILE* mapfile = NULL;
- FILE* outfile = NULL;
- char* subset_list = NULL;
+ FILE* cnvfile = nullptr;
+ FILE* famfile = nullptr;
+ FILE* mapfile = nullptr;
+ FILE* outfile = nullptr;
+ char* subset_list = nullptr;
uintptr_t subset_ct = 0;
uintptr_t max_subset_name_len = 0;
uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
- uint64_t* il_small = NULL; // high-order 32 bits = 2x center pos,
- // low-order 32 bits = interval end - start
- uint64_t* il_large = NULL;
+ uint64_t* il_small = nullptr; // high-order 32 bits = 2x center pos,
+ // low-order 32 bits = interval end - start
+ uint64_t* il_large = nullptr;
uintptr_t* il_chrom_start_small;
uintptr_t* il_chrom_start_large;
unsigned char* bigstack_mark2;
@@ -946,7 +967,7 @@ int32_t plink_cnv(char* outname, char* outname_end, char* cnvname, char* mapname
if ((mapname[0] == '\0') || (!filename_exists("", mapname, sptr))) {
if (mapname[0] == '\0') {
uii = strlen(cnvname);
- if ((uii < 5) || (cnvname[uii - 4] != '.') || (!match_upper_nt(&(cnvname[uii - 3]), "CNV", 3))) {
+ if ((uii < 5) || (cnvname[uii - 4] != '.') || (!match_upper_counted(&(cnvname[uii - 3]), "CNV", 3))) {
logerrprint("Error: No .cnv.map filename specified, and .cnv filename does not unambiguously\nspecify how an autogenerated file should be named. Use --cnv-make-map + --out.\n");
goto plink_cnv_ret_INVALID_CMDLINE;
}
@@ -960,7 +981,7 @@ int32_t plink_cnv(char* outname, char* outname_end, char* cnvname, char* mapname
}
sprintf(g_logbuf, "Autogenerating missing %s ... ", mapname);
wordwrapb(5);
- retval = cnv_make_map(cnvfile, mapname, 0, 0, 0xffffffffU, -DBL_MAX, DBL_MAX, 0, 0xffffffffU, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0.0, -1, -1, allow_extra_chroms, 0, chrom_info_ptr, &max_marker_id_len, marker_chrom_start);
+ retval = cnv_make_map(cnvfile, mapname, 0, 0, 0xffffffffU, -DBL_MAX, DBL_MAX, 0, 0xffffffffU, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, 0, 0.0, -1, -1, allow_extra_chroms, 0, chrom_info_ptr, &max_marker_id_len, marker_chrom_start);
} else {
retval = validate_cnv_map(&mapfile, mapname, &marker_pos_start, &marker_pos_end, allow_extra_chroms, chrom_info_ptr, &max_marker_id_len, marker_chrom_start);
}
@@ -1015,7 +1036,6 @@ int32_t plink_cnv(char* outname, char* outname_end, char* cnvname, char* mapname
bigstack_double_reset(bigstack_mark, bigstack_end_mark);
return 0;
}
-#endif // HIGH_MAX_CHROM
int32_t plink_gvar(char* outname, char* outname_end, char* gvarname, char* mapname, char* famname) {
logerrprint("Error: Common CNP analysis not yet supported.\n");
diff --git a/plink_common.c b/plink_common.c
index ad9a165..a68efdd 100644
--- a/plink_common.c
+++ b/plink_common.c
@@ -18,7 +18,7 @@ const char* g_output_missing_geno_ptr = &(g_one_char_strs[96]);
sfmt_t g_sfmt;
-FILE* g_logfile = NULL;
+FILE* g_logfile = nullptr;
char g_logbuf[MAXLINELEN * 2];
@@ -71,7 +71,7 @@ void logstr(const char* ss) {
if (!g_debug_on) {
fputs(ss, g_logfile);
if (ferror(g_logfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Warning: Logging failure on:\n%s\nFurther logging will not be attempted in this run.\n", ss);
g_log_failed = 1;
@@ -83,7 +83,7 @@ void logstr(const char* ss) {
} else {
fputs(ss, g_logfile);
if (ferror(g_logfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
fflush(stdout);
fprintf(stderr, "Error: Debug logging failure. Dumping to stderr:\n%s", ss);
g_log_failed = 1;
@@ -227,7 +227,7 @@ unsigned char* bigstack_alloc(uintptr_t size) {
unsigned char* alloc_ptr;
size = round_up_pow2(size, CACHELINE);
if (bigstack_left() < size) {
- return NULL;
+ return nullptr;
}
alloc_ptr = g_bigstack_base;
g_bigstack_base += size;
@@ -243,7 +243,7 @@ unsigned char* bigstack_end_alloc_presized(uintptr_t size) {
assert(!(size & END_ALLOC_CHUNK_M1));
uintptr_t cur_bigstack_left = bigstack_left();
if (size > cur_bigstack_left) {
- return NULL;
+ return nullptr;
} else {
g_bigstack_end -= size;
return g_bigstack_end;
@@ -261,7 +261,7 @@ uint32_t match_upper(const char* ss, const char* fixed_str) {
return !(*ss);
}
-uint32_t match_upper_nt(const char* ss, const char* fixed_str, uint32_t ct) {
+uint32_t match_upper_counted(const char* ss, const char* fixed_str, uint32_t ct) {
do {
if ((((unsigned char)(*ss++)) & 0xdf) != ((unsigned char)(*fixed_str++))) {
return 0;
@@ -270,134 +270,243 @@ uint32_t match_upper_nt(const char* ss, const char* fixed_str, uint32_t ct) {
return 1;
}
-uint32_t scan_posint_capped(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {
- // '0' has ascii code 48
- uint32_t val = (uint32_t)((unsigned char)*ss) - 48;
- uint32_t cur_digit;
- if (val < 10) {
- while (1) {
- scan_posint_capped_main_loop:
- cur_digit = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (cur_digit >= 10) {
- if (val) {
- *valp = val;
- return 0;
- }
- return 1;
- }
- // avoid integer overflow in middle of computation
- if ((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10))) {
+#ifdef __LP64__
+static inline uint32_t scan_uint_capped_finish(const char* ss, uint64_t cap, uint32_t* valp) {
+ uint64_t val = *valp;
+ while (1) {
+ // a little bit of unrolling seems to help
+ const uint64_t cur_digit = (uint64_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit >= 10) {
+ break;
+ }
+ // val = val * 10 + cur_digit;
+ const uint64_t cur_digit2 = (uint64_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit2 >= 10) {
+ val = val * 10 + cur_digit;
+ if (val > cap) {
return 1;
}
- val = val * 10 + cur_digit;
+ break;
}
- } else if (val == 0xfffffffbU) {
+ val = val * 100 + cur_digit * 10 + cur_digit2;
+ if (val > cap) {
+ return 1;
+ }
+ }
+ *valp = val;
+ return 0;
+}
+
+uint32_t scan_posint_capped(const char* ss, uint64_t cap, uint32_t* valp) {
+ // '0' has ascii code 48
+ *valp = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (*valp >= 10) {
// permit leading '+' (ascii 43), but not '++' or '+-'
- val = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (val < 10) {
- goto scan_posint_capped_main_loop;
+ if (*valp != 0xfffffffbU) {
+ return 1;
+ }
+ *valp = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (*valp >= 10) {
+ return 1;
}
}
- return 1;
+ while (!(*valp)) {
+ *valp = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if ((*valp) >= 10) {
+ return 1;
+ }
+ }
+ return scan_uint_capped_finish(ss, cap, valp);
}
-uint32_t scan_uint_capped(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {
+uint32_t scan_uint_capped(const char* ss, uint64_t cap, uint32_t* valp) {
// Reads an integer in [0, cap]. Assumes first character is nonspace.
- uint32_t val = (uint32_t)((unsigned char)*ss) - 48;
- uint32_t cur_digit;
- if (val < 10) {
- while (1) {
- scan_uint_capped_main_loop:
- cur_digit = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (cur_digit >= 10) {
- *valp = val;
- return 0;
- }
- if ((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10))) {
+ uint32_t val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ if (val != 0xfffffffbU) {
+ // '-' has ascii code 45, so unsigned 45 - 48 = 0xfffffffdU
+ if ((val != 0xfffffffdU) || (*ss != '0')) {
return 1;
}
- val = val * 10 + cur_digit;
+ // accept "-0", "-00", etc.
+ while (*(++ss) == '0');
+ *valp = 0;
+ return ((uint32_t)((unsigned char)(*ss)) - 48) < 10;
+ }
+ // accept leading '+'
+ val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
}
}
- // '-' has ascii code 45, so unsigned 45 - 48 = 0xfffffffdU
- ss++;
- if (val != 0xfffffffdU) {
- if (val == 0xfffffffbU) {
- val = (uint32_t)((unsigned char)(*ss)) - 48;
- if (val < 10) {
- goto scan_uint_capped_main_loop;
- }
+ *valp = val;
+ return scan_uint_capped_finish(ss, cap, valp);
+}
+
+uint32_t scan_int_abs_bounded(const char* ss, uint64_t bound, int32_t* valp) {
+ // Reads an integer in [-bound, bound]. Assumes first character is nonspace.
+ *valp = (uint32_t)((unsigned char)(*ss++)) - 48;
+ int32_t sign = 1;
+ if (((uint32_t)*valp) >= 10) {
+ if (*valp == -3) {
+ sign = -1;
+ } else if (*valp != -5) {
+ return 1;
+ }
+ *valp = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (((uint32_t)*valp) >= 10) {
+ return 1;
}
- return 1;
}
- // accept "-0", "-00", etc.
- if (*ss != '0') {
+ if (scan_uint_capped_finish(ss, bound, (uint32_t*)valp)) {
return 1;
}
- while (*(++ss) == '0');
- *valp = 0;
- return ((uint32_t)((unsigned char)(*ss)) - 48) < 10;
+ *valp *= sign;
+ return 0;
+}
+#else // not __LP64__
+uint32_t scan_posint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {
+ // '0' has ascii code 48
+ uint32_t val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ if (val != 0xfffffffbU) {
+ return 1;
+ }
+ val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
+ }
+ }
+ while (!val) {
+ val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
+ }
+ }
+ while (1) {
+ const uint32_t cur_digit = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit >= 10) {
+ *valp = val;
+ return 0;
+ }
+ // avoid integer overflow in middle of computation
+ if ((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10))) {
+ return 1;
+ }
+ val = val * 10 + cur_digit;
+ }
}
-uint32_t scan_int_abs_bounded(const char* ss, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp) {
- // Reads an integer in [-bound, bound]. Assumes first character is nonspace.
- uint32_t val = (uint32_t)((unsigned char)*ss) - 48;
- int32_t sign = 1;
- uint32_t cur_digit;
- if (val < 10) {
- while (1) {
- scan_int_abs_bounded_main_loop:
- cur_digit = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (cur_digit >= 10) {
- *valp = sign * ((int32_t)val);
- return 0;
- }
- if ((val >= bound_div_10) && ((val > bound_div_10) || (cur_digit > bound_mod_10))) {
+uint32_t scan_uint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp) {
+ // Reads an integer in [0, cap]. Assumes first character is nonspace.
+ uint32_t val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ if (val != 0xfffffffbU) {
+ if ((val != 0xfffffffdU) || (*ss != '0')) {
return 1;
}
- val = val * 10 + cur_digit;
+ while (*(++ss) == '0');
+ *valp = 0;
+ return ((uint32_t)((unsigned char)(*ss)) - 48) < 10;
+ }
+ val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
}
}
- if (val == 0xfffffffdU) {
- sign = -1;
- } else if (val != 0xfffffffbU) {
- return 1;
+ while (1) {
+ const uint32_t cur_digit = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit >= 10) {
+ *valp = val;
+ return 0;
+ }
+ if ((val >= cap_div_10) && ((val > cap_div_10) || (cur_digit > cap_mod_10))) {
+ return 1;
+ }
+ val = val * 10 + cur_digit;
}
- val = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (val < 10) {
- goto scan_int_abs_bounded_main_loop;
+}
+
+uint32_t scan_int_abs_bounded32(const char* ss, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp) {
+ // Reads an integer in [-bound, bound]. Assumes first character is nonspace.
+ uint32_t val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ int32_t sign = 1;
+ if (val >= 10) {
+ if (val == 0xfffffffdU) {
+ sign = -1;
+ } else if (val != 0xfffffffbU) {
+ return 1;
+ }
+ val = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
+ }
+ }
+ while (1) {
+ const uint32_t cur_digit = (uint32_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit >= 10) {
+ *valp = sign * ((int32_t)val);
+ return 0;
+ }
+ if ((val >= bound_div_10) && ((val > bound_div_10) || (cur_digit > bound_mod_10))) {
+ return 1;
+ }
+ val = val * 10 + cur_digit;
}
- return 1;
}
+#endif
uint32_t scan_posintptr(const char* ss, uintptr_t* valp) {
// Reads an integer in [1, 2^BITCT - 1]. Assumes first character is
// nonspace.
- uintptr_t val = (uint32_t)((unsigned char)*ss) - 48;
- uintptr_t cur_digit;
- if (val < 10) {
- while (1) {
- scan_posintptr_main_loop:
- cur_digit = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (cur_digit >= 10) {
- if (val) {
- *valp = val;
- return 0;
- }
- return 1;
- }
- if ((val >= (~ZEROLU) / 10) && ((val > (~ZEROLU) / 10) || (cur_digit > (~ZEROLU) % 10))) {
+ uintptr_t val = (uintptr_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+#ifdef __LP64__
+ if (val != 0xfffffffffffffffbLLU) {
+ return 1;
+ }
+#else
+ if (val != 0xfffffffbU) {
+ return 1;
+ }
+#endif
+ val = (uintptr_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
+ }
+ }
+ while (!val) {
+ val = (uintptr_t)((unsigned char)(*ss++)) - 48;
+ if (val >= 10) {
+ return 1;
+ }
+ }
+ // limit is 20 digits, we've already read one
+#ifdef __LP64__
+ const char* ss_limit = &(ss[20]);
+#else
+ const char* ss_limit = &(ss[10]);
+#endif
+ while (1) {
+ const uintptr_t cur_digit = (uintptr_t)((unsigned char)(*ss++)) - 48;
+ if (cur_digit >= 10) {
+ *valp = val;
+ return 0;
+ }
+ const uintptr_t cur_digit2 = (uintptr_t)((unsigned char)(*ss++)) - 48;
+ if (ss == ss_limit) {
+ if ((cur_digit2 < 10) || ((val >= (~ZEROLU) / 10) && ((val > (~ZEROLU) / 10) || (cur_digit > (~ZEROLU) % 10)))) {
return 1;
}
- val = val * 10 + cur_digit;
+ *valp = val * 10 + cur_digit;
+ return 0;
}
- } else if (val == 0xfffffffbU) {
- val = (uint32_t)((unsigned char)(*(++ss))) - 48;
- if (val < 10) {
- goto scan_posintptr_main_loop;
+ if (cur_digit2 >= 10) {
+ *valp = val * 10 + cur_digit;
+ return 0;
}
+ val = val * 100 + cur_digit * 10 + cur_digit2;
}
- return 1;
}
/*
@@ -678,7 +787,7 @@ int32_t strcmp_se(const char* s_read, const char* s_const, uint32_t s_const_len)
char* next_token(char* sptr) {
if (!sptr) {
- return NULL;
+ return nullptr;
}
unsigned char ucc = *sptr;
while (ucc > 32) {
@@ -687,13 +796,13 @@ char* next_token(char* sptr) {
while ((ucc == ' ') || (ucc == '\t')) {
ucc = *(++sptr);
}
- return (ucc > 32)? sptr : NULL;
+ return (ucc > 32)? sptr : nullptr;
}
char* next_token_mult(char* sptr, uint32_t ct) {
assert(ct);
if (!sptr) {
- return NULL;
+ return nullptr;
}
unsigned char ucc = *sptr;
do {
@@ -704,7 +813,7 @@ char* next_token_mult(char* sptr, uint32_t ct) {
ucc = *(++sptr);
}
if (ucc <= 32) {
- return NULL;
+ return nullptr;
}
} while (--ct);
return sptr;
@@ -3210,39 +3319,6 @@ char* chrom_print_human(uint32_t num, char* buf) {
}
}
-uint32_t allele_set(const char* newval, uint32_t slen, char** allele_ptr) {
- char* newptr;
- if (slen == 1) {
- newptr = (char*)(&(g_one_char_strs[((unsigned char)*newval) * 2]));
- } else {
- newptr = (char*)malloc(slen + 1);
- if (!newptr) {
- return 1;
- }
- memcpyx(newptr, newval, slen, '\0');
- }
- *allele_ptr = newptr;
- return 0;
-}
-
-uint32_t allele_reset(const char* newval, uint32_t slen, char** allele_ptr) {
- char* newptr;
- if (slen == 1) {
- newptr = (char*)(&(g_one_char_strs[((unsigned char)*newval) * 2]));
- } else {
- newptr = (char*)malloc(slen + 1);
- if (!newptr) {
- return 1;
- }
- memcpyx(newptr, newval, slen, '\0');
- }
- if (allele_ptr[0][1]) {
- free(*allele_ptr);
- }
- *allele_ptr = newptr;
- return 0;
-}
-
void magic_num(uint32_t divisor, uint64_t* multp, uint32_t* __restrict pre_shiftp, uint32_t* __restrict post_shiftp, uint32_t* __restrict incrp) {
// Enables fast integer division by a constant not known until runtime. See
// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html .
@@ -3312,7 +3388,7 @@ void fill_bits(uintptr_t loc_start, uintptr_t len, uintptr_t* bitarr) {
bitarr[maj_start] |= (ONELU << ((loc_start + len) % BITCT)) - (ONELU << (loc_start % BITCT));
} else {
bitarr[maj_start] |= ~((ONELU << (loc_start % BITCT)) - ONELU);
- fill_ulong_one(&(bitarr[maj_start + 1]), maj_end - maj_start - 1);
+ fill_ulong_one(maj_end - maj_start - 1, &(bitarr[maj_start + 1]));
minor = (loc_start + len) % BITCT;
if (minor) {
bitarr[maj_end] |= (ONELU << minor) - ONELU;
@@ -3329,7 +3405,7 @@ void clear_bits(uintptr_t loc_start, uintptr_t len, uintptr_t* bitarr) {
bitarr[maj_start] &= ~((ONELU << ((loc_start + len) % BITCT)) - (ONELU << (loc_start % BITCT)));
} else {
bitarr[maj_start] &= ((ONELU << (loc_start % BITCT)) - ONELU);
- fill_ulong_zero(&(bitarr[maj_start + 1]), maj_end - maj_start - 1);
+ fill_ulong_zero(maj_end - maj_start - 1, &(bitarr[maj_start + 1]));
minor = (loc_start + len) % BITCT;
if (minor) {
bitarr[maj_end] &= ~((ONELU << minor) - ONELU);
@@ -3562,7 +3638,7 @@ int32_t bigstack_calloc_d(uintptr_t ct, double** dp_ptr) {
if (!(*dp_ptr)) {
return 1;
}
- fill_double_zero(*dp_ptr, ct);
+ fill_double_zero(ct, *dp_ptr);
return 0;
}
@@ -3571,7 +3647,7 @@ int32_t bigstack_calloc_f(uintptr_t ct, float** fp_ptr) {
if (!(*fp_ptr)) {
return 1;
}
- fill_float_zero(*fp_ptr, ct);
+ fill_float_zero(ct, *fp_ptr);
return 0;
}
@@ -3580,7 +3656,7 @@ int32_t bigstack_calloc_ui(uintptr_t ct, uint32_t** uip_ptr) {
if (!(*uip_ptr)) {
return 1;
}
- fill_uint_zero(*uip_ptr, ct);
+ fill_uint_zero(ct, *uip_ptr);
return 0;
}
@@ -3589,7 +3665,7 @@ int32_t bigstack_calloc_ul(uintptr_t ct, uintptr_t** ulp_ptr) {
if (!(*ulp_ptr)) {
return 1;
}
- fill_ulong_zero(*ulp_ptr, ct);
+ fill_ulong_zero(ct, *ulp_ptr);
return 0;
}
@@ -3598,7 +3674,7 @@ int32_t bigstack_calloc_ull(uintptr_t ct, uint64_t** ullp_ptr) {
if (!(*ullp_ptr)) {
return 1;
}
- fill_ull_zero(*ullp_ptr, ct);
+ fill_ull_zero(ct, *ullp_ptr);
return 0;
}
@@ -3616,7 +3692,7 @@ int32_t bigstack_end_calloc_d(uintptr_t ct, double** dp_ptr) {
if (!(*dp_ptr)) {
return 1;
}
- fill_double_zero(*dp_ptr, ct);
+ fill_double_zero(ct, *dp_ptr);
return 0;
}
@@ -3625,7 +3701,7 @@ int32_t bigstack_end_calloc_f(uintptr_t ct, float** fp_ptr) {
if (!(*fp_ptr)) {
return 1;
}
- fill_float_zero(*fp_ptr, ct);
+ fill_float_zero(ct, *fp_ptr);
return 0;
}
@@ -3634,7 +3710,7 @@ int32_t bigstack_end_calloc_ui(uintptr_t ct, uint32_t** uip_ptr) {
if (!(*uip_ptr)) {
return 1;
}
- fill_uint_zero(*uip_ptr, ct);
+ fill_uint_zero(ct, *uip_ptr);
return 0;
}
@@ -3643,7 +3719,7 @@ int32_t bigstack_end_calloc_ul(uintptr_t ct, uintptr_t** ulp_ptr) {
if (!(*ulp_ptr)) {
return 1;
}
- fill_ulong_zero(*ulp_ptr, ct);
+ fill_ulong_zero(ct, *ulp_ptr);
return 0;
}
@@ -3652,7 +3728,7 @@ int32_t bigstack_end_calloc_ull(uintptr_t ct, uint64_t** ullp_ptr) {
if (!(*ullp_ptr)) {
return 1;
}
- fill_ull_zero(*ullp_ptr, ct);
+ fill_ull_zero(ct, *ullp_ptr);
return 0;
}
@@ -3803,7 +3879,7 @@ int32_t populate_id_htable(uintptr_t unfiltered_ct, const uintptr_t* exclude_arr
uint32_t top_diff;
uint32_t hash_result;
uint32_t cur_dup;
- fill_uint_one(id_htable, id_htable_size);
+ fill_uint_one(id_htable_size, id_htable);
if (!store_dups) {
for (; item_idx < item_ct; item_uidx++, item_idx++) {
next_unset_ul_unsafe_ck(exclude_arr, &item_uidx);
@@ -4168,26 +4244,273 @@ void get_set_wrange_align(const uintptr_t* __restrict bitarr, uintptr_t word_ct,
*wlen_ptr = 0;
}
-// global since species_str() may be called by functions which don't actually
-// care about Chrom_info
-const char* g_species_singular = NULL;
-const char* g_species_plural = NULL;
+// hashval computation left to caller since this is frequently used with
+// chromosome IDs, where the compiler can optimize the integer modulus
+// operation since the hash table size is preset
+uint32_t unklen_id_htable_find(const char* cur_id, const char* const* item_ids, const uint32_t* id_htable, uint32_t hashval, uint32_t id_htable_size) {
+ // returns 0xffffffffU on failure
+ uint32_t next_incr = 1;
+ while (1) {
+ const uint32_t hash_result = id_htable[hashval];
+ if (hash_result == 0xffffffffU) {
+ return 0xffffffffU;
+ }
+ const char* htable_entry = item_ids[hash_result];
+ if (!strcmp(cur_id, htable_entry)) {
+ return hash_result;
+ }
+ const uint32_t top_diff = id_htable_size - hashval;
+ if (top_diff > next_incr) {
+ hashval += next_incr;
+ } else {
+ hashval = next_incr - top_diff;
+ }
+ next_incr += 2;
+ }
+}
+
+static inline uint32_t nonstd_chrom_name_htable_find(const char* chrom_name, const char* const* nonstd_names, const uint32_t* nonstd_id_htable, uint32_t name_slen) {
+ const uint32_t hashval = murmurhash3_32(chrom_name, name_slen) % CHROM_NAME_HTABLE_SIZE;
+ return unklen_id_htable_find(chrom_name, nonstd_names, nonstd_id_htable, hashval, CHROM_NAME_HTABLE_SIZE);
+}
+
+
+// Global since species_str() may be called by functions which don't actually
+// care about chrom_info. (chrom_info is really a global variable too, but I
+// find it easier to maintain this code when chrom_info dependencies are made
+// explicit in the function signatures; in contrast, g_species_singular and
+// g_species_plural are just for pretty printing and lend no insight into what
+// the functions which reference them are doing.)
+const char* g_species_singular = nullptr;
+const char* g_species_plural = nullptr;
+
+int32_t init_chrom_info(Chrom_info* chrom_info_ptr) {
+ // "constructor". initializes with maximum capacity. doesn't use bigstack.
+ // chrom_mask, haploid_mask: bits
+ // chrom_file_order, chrom_idx_to_foidx: int32s
+ // chrom_fo_vidx_start: int32s, with an extra trailing element
+ // nonstd_names: intptr_ts
+ // nonstd_id_htable: CHROM_NAME_HTABLE_SIZE int32s
+
+ assert(!(MAX_POSSIBLE_CHROM % VEC_BYTES));
+ const uintptr_t vecs_required = 2 * BITCT_TO_VECCT(MAX_POSSIBLE_CHROM) + 3 * (MAX_POSSIBLE_CHROM / VEC_INT32) + 1 + (MAX_POSSIBLE_CHROM / VEC_WORDS) + (CHROM_NAME_HTABLE_SIZE + (VEC_INT32 - 1)) / VEC_INT32;
+
+ // needed for proper cleanup
+ chrom_info_ptr->name_ct = 0;
+ chrom_info_ptr->incl_excl_name_stack = nullptr;
+ if (aligned_malloc(vecs_required * VEC_BYTES, &(chrom_info_ptr->chrom_mask))) {
+ return RET_NOMEM;
+ }
+ uintptr_t* alloc_iter = &(chrom_info_ptr->chrom_mask[BITCT_TO_VECCT(MAX_POSSIBLE_CHROM) * VEC_WORDS]);
+ chrom_info_ptr->haploid_mask = alloc_iter;
+ alloc_iter = &(alloc_iter[BITCT_TO_VECCT(MAX_POSSIBLE_CHROM) * VEC_WORDS]);
+ chrom_info_ptr->chrom_file_order = (uint32_t*)alloc_iter;
+ alloc_iter = &(alloc_iter[(MAX_POSSIBLE_CHROM / VEC_INT32) * VEC_WORDS]);
+ chrom_info_ptr->chrom_fo_vidx_start = (uint32_t*)alloc_iter;
+ alloc_iter = &(alloc_iter[((MAX_POSSIBLE_CHROM / VEC_INT32) + 1) * VEC_WORDS]);
+ chrom_info_ptr->chrom_idx_to_foidx = (uint32_t*)alloc_iter;
+ alloc_iter = &(alloc_iter[(MAX_POSSIBLE_CHROM / VEC_INT32) * VEC_WORDS]);
+ chrom_info_ptr->nonstd_names = (char**)alloc_iter;
+ alloc_iter = &(alloc_iter[MAX_POSSIBLE_CHROM]);
+ chrom_info_ptr->nonstd_id_htable = (uint32_t*)alloc_iter;
+ // alloc_iter = &(alloc_iter[((CHROM_NAME_HTABLE_SIZE + (VEC_INT32 - 1)) / VEC_INT32) * VEC_WORDS]);
+ // postpone nonstd_id_htable initialization until first nonstandard ID is
+ // loaded
+ // fill_uint_one(CHROM_NAME_HTABLE_SIZE, chrom_info_ptr->nonstd_id_htable);
+ return 0;
+}
+
+// if these are defined within init_species(), they may not persist after
+// function exit
+static const char species_singular_constants[][7] = {"person", "cow", "dog", "horse", "mouse", "plant", "sheep", "sample"};
+static const char species_plural_constants[][8] = {"people", "cattle", "dogs", "horses", "mice", "plants", "sheep", "samples"};
+
+void init_species(uint32_t species_code, Chrom_info* chrom_info_ptr) {
+ // human: 22, X, Y, XY, MT
+ // cow: 29, X, Y, MT
+ // dog: 38, X, Y, XY, MT
+ // horse: 31, X, Y
+ // mouse: 19, X, Y
+ // rice: 12
+ // sheep: 26, X, Y
+ const int32_t species_xymt_codes[] = {
+ 23, 24, 25, 26,
+ 30, 31, -1, 33,
+ 39, 40, 41, 42,
+ 32, 33, -1, -1,
+ 20, 21, -1, -1,
+ -1, -1, -1, -1,
+ 27, 28, -1, -1};
+ const uint32_t species_autosome_ct[] = {22, 29, 38, 31, 19, 12, 26};
+ const uint32_t species_max_code[] = {26, 33, 42, 33, 21, 12, 28};
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->haploid_mask);
+ chrom_info_ptr->output_encoding = 0;
+ chrom_info_ptr->zero_extra_chroms = 0;
+ chrom_info_ptr->species = species_code;
+ chrom_info_ptr->is_include_stack = 0;
+ g_species_singular = species_singular_constants[species_code];
+ g_species_plural = species_plural_constants[species_code];
+ if (species_code != SPECIES_UNKNOWN) {
+ // these are assumed to be already initialized in the SPECIES_UNKNOWN case
+ memcpy(chrom_info_ptr->xymt_codes, &(species_xymt_codes[species_code * XYMT_OFFSET_CT]), XYMT_OFFSET_CT * sizeof(int32_t));
+ chrom_info_ptr->autosome_ct = species_autosome_ct[species_code];
+ chrom_info_ptr->max_code = species_max_code[species_code];
+ switch (species_code) {
+ case SPECIES_HUMAN:
+ chrom_info_ptr->haploid_mask[0] = 0x1800000;
+ break;
+ case SPECIES_COW:
+ chrom_info_ptr->haploid_mask[0] = 0xc0000000LU;
+ break;
+ case SPECIES_DOG:
+#ifdef __LP64__
+ chrom_info_ptr->haploid_mask[0] = 0x18000000000LLU;
+#else
+ chrom_info_ptr->haploid_mask[1] = 0x180;
+#endif
+ break;
+ case SPECIES_HORSE:
+#ifdef __LP64__
+ chrom_info_ptr->haploid_mask[0] = 0x300000000LLU;
+#else
+ chrom_info_ptr->haploid_mask[1] = 3;
+#endif
+ break;
+ case SPECIES_MOUSE:
+ chrom_info_ptr->haploid_mask[0] = 0x300000;
+ break;
+ case SPECIES_RICE:
+ chrom_info_ptr->haploid_mask[0] = 0x1fff;
+ break;
+ case SPECIES_SHEEP:
+ chrom_info_ptr->haploid_mask[0] = 0x18000000;
+ break;
+ }
+ }
+}
+
+void init_default_chrom_mask(Chrom_info* chrom_info_ptr) {
+ if (chrom_info_ptr->species != SPECIES_UNKNOWN) {
+ fill_all_bits(chrom_info_ptr->max_code + 1, chrom_info_ptr->chrom_mask);
+ } else {
+ fill_all_bits(chrom_info_ptr->autosome_ct + 1, chrom_info_ptr->chrom_mask);
+ // --chr-set support
+ for (uint32_t xymt_idx = 0; xymt_idx < XYMT_OFFSET_CT; ++xymt_idx) {
+ int32_t cur_code = chrom_info_ptr->xymt_codes[xymt_idx];
+ if (cur_code != -1) {
+ set_bit(chrom_info_ptr->xymt_codes[xymt_idx], chrom_info_ptr->chrom_mask);
+ }
+ }
+ }
+}
+
+void forget_extra_chrom_names(uint32_t reinitialize, Chrom_info* chrom_info_ptr) {
+ const uint32_t name_ct = chrom_info_ptr->name_ct;
+ // guard against init_species() not being called yet
+ if (name_ct) {
+ char** nonstd_names = chrom_info_ptr->nonstd_names;
+ const uint32_t chrom_idx_last = chrom_info_ptr->max_code + name_ct;
+ for (uint32_t chrom_idx = chrom_info_ptr->max_code + 1; chrom_idx <= chrom_idx_last; ++chrom_idx) {
+ free(nonstd_names[chrom_idx]);
+ nonstd_names[chrom_idx] = nullptr;
+ }
+ if (reinitialize) {
+ fill_uint_one(CHROM_NAME_HTABLE_SIZE, chrom_info_ptr->nonstd_id_htable);
+ chrom_info_ptr->name_ct = 0;
+ }
+ }
+}
+
+int32_t finalize_chrom_info(Chrom_info* chrom_info_ptr) {
+ const uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ const uint32_t name_ct = chrom_info_ptr->name_ct;
+ const uint32_t chrom_code_end = chrom_info_ptr->max_code + 1 + name_ct;
+ const uint32_t chrom_code_bitvec_ct = BITCT_TO_VECCT(chrom_code_end);
+ const uint32_t chrom_ct_int32vec_ct = (chrom_ct + (VEC_INT32 - 1)) / VEC_INT32;
+ const uint32_t chrom_ct_p1_int32vec_ct = 1 + (chrom_ct / VEC_INT32);
+ const uint32_t chrom_code_end_int32vec_ct = (chrom_code_end + (VEC_INT32 - 1)) / VEC_INT32;
+ const uint32_t chrom_code_end_wordvec_ct = (chrom_code_end + (VEC_WORDS - 1)) / VEC_WORDS;
+ uint32_t final_vecs_required = 2 * chrom_code_bitvec_ct + chrom_ct_int32vec_ct + chrom_ct_p1_int32vec_ct + chrom_code_end_int32vec_ct;
+ if (name_ct) {
+ final_vecs_required += chrom_code_end_wordvec_ct + (CHROM_NAME_HTABLE_SIZE + (VEC_INT32 - 1)) / VEC_INT32;
+ }
+ uintptr_t* new_alloc;
+ if (aligned_malloc(final_vecs_required * VEC_BYTES, &new_alloc)) {
+ return RET_NOMEM;
+ }
+ uintptr_t* old_alloc = chrom_info_ptr->chrom_mask;
+ uintptr_t* new_alloc_iter = new_alloc;
+
+ memcpy(new_alloc_iter, chrom_info_ptr->chrom_mask, chrom_code_bitvec_ct * VEC_BYTES);
+ chrom_info_ptr->chrom_mask = new_alloc_iter;
+ new_alloc_iter = &(new_alloc_iter[chrom_code_bitvec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->haploid_mask, chrom_code_bitvec_ct * VEC_BYTES);
+ chrom_info_ptr->haploid_mask = new_alloc_iter;
+ new_alloc_iter = &(new_alloc_iter[chrom_code_bitvec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->chrom_file_order, chrom_ct_int32vec_ct * VEC_BYTES);
+ chrom_info_ptr->chrom_file_order = (uint32_t*)new_alloc_iter;
+ new_alloc_iter = &(new_alloc_iter[chrom_ct_int32vec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->chrom_fo_vidx_start, chrom_ct_p1_int32vec_ct * VEC_BYTES);
+ chrom_info_ptr->chrom_fo_vidx_start = (uint32_t*)new_alloc_iter;
+ new_alloc_iter = &(new_alloc_iter[chrom_ct_p1_int32vec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->chrom_idx_to_foidx, chrom_code_end_int32vec_ct * VEC_BYTES);
+ chrom_info_ptr->chrom_idx_to_foidx = (uint32_t*)new_alloc_iter;
+
+ if (!name_ct) {
+ chrom_info_ptr->nonstd_names = nullptr;
+ chrom_info_ptr->nonstd_id_htable = nullptr;
+ } else {
+ new_alloc_iter = &(new_alloc_iter[chrom_code_end_int32vec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->nonstd_names, chrom_code_end_wordvec_ct * VEC_BYTES);
+ chrom_info_ptr->nonstd_names = (char**)new_alloc_iter;
+ new_alloc_iter = &(new_alloc_iter[chrom_code_end_wordvec_ct * VEC_WORDS]);
+
+ memcpy(new_alloc_iter, chrom_info_ptr->nonstd_id_htable, CHROM_NAME_HTABLE_SIZE * sizeof(int32_t));
+ chrom_info_ptr->nonstd_id_htable = (uint32_t*)new_alloc_iter;
+ }
+ aligned_free(old_alloc);
+ return 0;
+}
+
+void cleanup_chrom_info(Chrom_info* chrom_info_ptr) {
+ if (chrom_info_ptr->chrom_mask) {
+ // bugfix: this must happened before aligned_free() call
+ forget_extra_chrom_names(0, chrom_info_ptr);
+
+ aligned_free(chrom_info_ptr->chrom_mask);
+ chrom_info_ptr->chrom_mask = nullptr;
+ }
+ Ll_str* ll_str_ptr = chrom_info_ptr->incl_excl_name_stack;
+ while (ll_str_ptr) {
+ Ll_str* next_ptr = ll_str_ptr->next;
+ free(ll_str_ptr);
+ ll_str_ptr = next_ptr;
+ }
+ chrom_info_ptr->incl_excl_name_stack = nullptr;
+}
char* chrom_name_std(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, char* buf) {
- uint32_t output_encoding = chrom_info_ptr->output_encoding;
+ const uint32_t output_encoding = chrom_info_ptr->output_encoding;
if (output_encoding & (CHR_OUTPUT_PREFIX | CHR_OUTPUT_0M)) {
if (output_encoding == CHR_OUTPUT_0M) {
// force two chars
if (chrom_idx <= chrom_info_ptr->autosome_ct) {
- buf = memcpya(buf, &(digit2_table[chrom_idx * 2]), 2);
- } else if ((int32_t)chrom_idx == chrom_info_ptr->xy_code) {
- buf = memcpya(buf, "XY", 2);
+ buf = (char*)memcpya(buf, &(digit2_table[chrom_idx * 2]), 2);
+ } else if ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[XY_OFFSET]) {
+ buf = (char*)memcpya(buf, "XY", 2);
} else {
*buf++ = '0';
- if ((int32_t)chrom_idx == chrom_info_ptr->x_code) {
+ if ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]) {
*buf++ = 'X';
} else {
- *buf++ = ((int32_t)chrom_idx == chrom_info_ptr->y_code)? 'Y' : 'M';
+ // assumes only X/Y/XY/MT defined
+ *buf++ = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET])? 'Y' : 'M';
}
}
return buf;
@@ -4196,12 +4519,12 @@ char* chrom_name_std(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, char*
}
if ((!(output_encoding & (CHR_OUTPUT_M | CHR_OUTPUT_MT))) || (chrom_idx <= chrom_info_ptr->autosome_ct)) {
return uint32toa(chrom_idx, buf);
- } else if ((int32_t)chrom_idx == chrom_info_ptr->x_code) {
+ } else if ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]) {
*buf++ = 'X';
- } else if ((int32_t)chrom_idx == chrom_info_ptr->y_code) {
+ } else if ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]) {
*buf++ = 'Y';
- } else if ((int32_t)chrom_idx == chrom_info_ptr->xy_code) {
- buf = memcpya(buf, "XY", 2);
+ } else if ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[XY_OFFSET]) {
+ buf = (char*)memcpya(buf, "XY", 2);
} else {
*buf++ = 'M';
if (output_encoding & CHR_OUTPUT_MT) {
@@ -4251,99 +4574,43 @@ char* chrom_name_buf5w4write(const Chrom_info* chrom_info_ptr, uint32_t chrom_id
return buf5;
}
-uint32_t get_max_chrom_len(const Chrom_info* chrom_info_ptr) {
+uint32_t get_max_chrom_slen(const Chrom_info* chrom_info_ptr) {
// does not include trailing null
// can be overestimate
// if more functions start calling this, it should just be built into
// load_bim() instead
if (chrom_info_ptr->zero_extra_chroms) {
- return 3 + MAX_CHROM_TEXTNUM_LEN;
+ return 3 + MAX_CHROM_TEXTNUM_SLEN;
}
- uint32_t max_chrom_len = 3 + MAX_CHROM_TEXTNUM_LEN;
- uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
- uint32_t max_code = chrom_info_ptr->max_code;
- uint32_t chrom_fo_idx;
- uint32_t chrom_idx;
- uint32_t slen;
- for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
- chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ const uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ const uint32_t max_code = chrom_info_ptr->max_code;
+ uint32_t max_chrom_slen = 3 + MAX_CHROM_TEXTNUM_SLEN;
+ for (uint32_t chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
+ const uint32_t chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
continue;
}
if (chrom_idx > max_code) {
- slen = strlen(chrom_info_ptr->nonstd_names[chrom_idx]);
- if (slen > max_chrom_len) {
- max_chrom_len = slen;
+ const uint32_t name_slen = strlen(chrom_info_ptr->nonstd_names[chrom_idx]);
+ if (name_slen > max_chrom_slen) {
+ max_chrom_slen = name_slen;
}
}
}
- return max_chrom_len;
-}
-
-void forget_extra_chrom_names(Chrom_info* chrom_info_ptr) {
- uint32_t name_ct = chrom_info_ptr->name_ct;
- char** nonstd_names;
- uint32_t chrom_name_idx;
- // guard against init_species() not being called yet
- if (name_ct) {
- nonstd_names = &(chrom_info_ptr->nonstd_names[chrom_info_ptr->max_code + 1]);
- for (chrom_name_idx = 0; chrom_name_idx < name_ct; chrom_name_idx++) {
- free(nonstd_names[chrom_name_idx]);
- nonstd_names[chrom_name_idx] = NULL;
- }
- chrom_info_ptr->name_ct = 0;
- }
+ return max_chrom_slen;
}
uint32_t haploid_chrom_present(const Chrom_info* chrom_info_ptr) {
const uintptr_t* chrom_mask = chrom_info_ptr->chrom_mask;
const uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
- uint32_t uii;
- for (uii = 0; uii < CHROM_MASK_INITIAL_WORDS; uii++) {
- if (chrom_mask[uii] & haploid_mask[uii]) {
+ for (uint32_t widx = 0; widx < CHROM_MASK_INITIAL_WORDS; widx++) {
+ if (chrom_mask[widx] & haploid_mask[widx]) {
return 1;
}
}
return 0;
}
-uint32_t bsearch_str_idx(const char* sptr, uint32_t slen, char* const* str_array, const uint32_t* __restrict str_sorted_idxs, uint32_t end_idx, uint32_t* __restrict gt_ptr) {
- // return 0 on success, 1 on failure
- // *gt_ptr is number of strings current string is lexicographically after
- // (so, on success, it's the correct index, and on failure, it's the
- // insertion point)
- uint32_t start_idx = 0;
- const char* sptr2;
- uint32_t mid_idx;
- uint32_t slen2;
- int32_t ii;
- while (start_idx < end_idx) {
- mid_idx = (start_idx + end_idx) / 2;
- sptr2 = str_array[str_sorted_idxs[mid_idx]];
- slen2 = strlen(sptr2);
- if (slen2 < slen) {
- ii = memcmp(sptr, sptr2, slen2);
- if (ii >= 0) {
- start_idx = mid_idx + 1;
- } else {
- end_idx = mid_idx;
- }
- } else {
- ii = memcmp(sptr, sptr2, slen);
- if (ii > 0) {
- start_idx = mid_idx + 1;
- } else if ((ii < 0) || (slen != slen2)) {
- end_idx = mid_idx;
- } else {
- *gt_ptr = mid_idx;
- return 0;
- }
- }
- }
- *gt_ptr = start_idx;
- return 1;
-}
-
static inline int32_t single_letter_chrom(uint32_t letter) {
letter &= 0xdf;
if (letter == 'X') {
@@ -4361,110 +4628,94 @@ int32_t get_chrom_code_raw(const char* sptr) {
// any character <= ' ' is considered a terminator
// note that char arithmetic tends to be compiled to int32 operations, so we
// mostly work with ints here
- uint32_t uii;
- uint32_t ujj;
- uint32_t ukk;
- uii = (unsigned char)sptr[0];
- ujj = (unsigned char)sptr[1];
- if ((uii & 0xdf) == 'C') {
- if (((ujj & 0xdf) == 'H') && ((((unsigned char)sptr[2]) & 0xdf) == 'R')) {
+ // assumes MAX_CHROM_TEXTNUM_SLEN == 2
+ uint32_t first_char_code = (unsigned char)sptr[0];
+ uint32_t second_char_code = (unsigned char)sptr[1];
+ if ((first_char_code & 0xdf) == 'C') {
+ if (((second_char_code & 0xdf) == 'H') && ((((unsigned char)sptr[2]) & 0xdf) == 'R')) {
sptr = &(sptr[3]);
- uii = (unsigned char)sptr[0];
- ujj = (unsigned char)sptr[1];
+ first_char_code = (unsigned char)sptr[0];
+ second_char_code = (unsigned char)sptr[1];
} else {
return -1;
}
}
- if (ujj > ' ') {
+ if (second_char_code > ' ') {
if (sptr[2] > ' ') {
return -1;
}
- ukk = uii - '0';
- if (ukk < 10) {
- uii = ujj - '0';
- if (uii < 10) {
- return ukk * 10 + uii;
- } else if (!ukk) {
+ const uint32_t first_char_toi = first_char_code - '0';
+ if (first_char_toi < 10) {
+ const uint32_t second_char_toi = second_char_code - '0';
+ if (second_char_toi < 10) {
+ return first_char_toi * 10 + second_char_toi;
+ } else if (!first_char_toi) {
// accept '0X', '0Y', '0M' emitted by Oxford software
- return single_letter_chrom(ujj);
+ return single_letter_chrom(second_char_code);
}
} else {
- uii &= 0xdf;
- if (uii == 'X') {
- if ((ujj == 'Y') || (ujj == 'y')) {
+ first_char_code &= 0xdf;
+ if (first_char_code == 'X') {
+ if ((second_char_code == 'Y') || (second_char_code == 'y')) {
return CHROM_XY;
}
- } else if (uii == 'M') {
- if ((ujj == 'T') || (ujj == 't')) {
+ } else if (first_char_code == 'M') {
+ if ((second_char_code == 'T') || (second_char_code == 't')) {
return CHROM_MT;
}
}
}
} else {
- ukk = uii - '0';
- if (ukk < 10) {
- return ukk;
+ const uint32_t first_char_toi = first_char_code - '0';
+ if (first_char_toi < 10) {
+ return first_char_toi;
} else {
- return single_letter_chrom(uii);
+ return single_letter_chrom(first_char_code);
}
}
return -1;
}
-int32_t get_chrom_code(const Chrom_info* chrom_info_ptr, const char* sptr) {
- // does not require string to be null-terminated, and does not perform
- // exhaustive error-checking
- // -1 = total fail, -2 = --allow-extra-chr ok
- int32_t ii = get_chrom_code_raw(sptr);
- uint32_t max_code_p1;
- uint32_t uii;
- if (ii >= MAX_POSSIBLE_CHROM) {
- switch (ii) {
- case CHROM_X:
- ii = chrom_info_ptr->x_code;
- break;
- case CHROM_Y:
- ii = chrom_info_ptr->y_code;
- break;
- case CHROM_XY:
- ii = chrom_info_ptr->xy_code;
- break;
- case CHROM_MT:
- ii = chrom_info_ptr->mt_code;
- }
- } else {
- max_code_p1 = chrom_info_ptr->max_code + 1;
- if (ii == -1) {
- if (bsearch_str_idx(sptr, strlen_se(sptr), &(chrom_info_ptr->nonstd_names[max_code_p1]), chrom_info_ptr->nonstd_name_order, chrom_info_ptr->name_ct, &uii)) {
- return -2;
- }
- return chrom_info_ptr->nonstd_name_order[uii] + max_code_p1;
- } else if (((uint32_t)ii) >= max_code_p1) {
- return -1;
+int32_t get_chrom_code(const char* chrom_name, const Chrom_info* chrom_info_ptr, uint32_t name_slen) {
+ // requires chrom_name to be null-terminated
+ // in practice, name_slen will usually already be known, may as well avoid
+ // redundant strlen() calls even though this uglifies the interface
+ // does not perform exhaustive error-checking
+ // -1 = --allow-extra-chr ok, -2 = total fail
+ const int32_t chrom_code_raw = get_chrom_code_raw(chrom_name);
+ if (((const uint32_t)chrom_code_raw) <= chrom_info_ptr->max_code) {
+ return chrom_code_raw;
+ }
+ if (chrom_code_raw != -1) {
+ if (chrom_code_raw >= MAX_POSSIBLE_CHROM) {
+ return chrom_info_ptr->xymt_codes[chrom_code_raw - MAX_POSSIBLE_CHROM];
}
+ return -2;
+ }
+ if (!chrom_info_ptr->name_ct) {
+ return -1;
}
- return ii;
+ // 0xffffffffU gets casted to -1
+ return (int32_t)nonstd_chrom_name_htable_find(chrom_name, (const char* const*)chrom_info_ptr->nonstd_names, chrom_info_ptr->nonstd_id_htable, name_slen);
}
-int32_t get_chrom_code2(const Chrom_info* chrom_info_ptr, char* sptr, uint32_t slen) {
- // when the chromosome name doesn't end with a space
- char* s_end = &(sptr[slen]);
- char tmpc = *s_end;
- int32_t retval;
- *s_end = ' ';
- retval = get_chrom_code(chrom_info_ptr, sptr);
+int32_t get_chrom_code_counted(const Chrom_info* chrom_info_ptr, uint32_t name_slen, char* chrom_name) {
+ // when the chromosome name isn't null-terminated
+ char* s_end = &(chrom_name[name_slen]);
+ const char tmpc = *s_end;
+ *s_end = '\0';
+ const int32_t retval = get_chrom_code(chrom_name, chrom_info_ptr, name_slen);
*s_end = tmpc;
return retval;
}
-uint32_t get_marker_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx) {
- const uint32_t* marker_binsearch = chrom_info_ptr->chrom_file_order_marker_idx;
+uint32_t get_variant_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t variant_uidx) {
+ const uint32_t* variant_binsearch = chrom_info_ptr->chrom_fo_vidx_start;
uint32_t chrom_fo_min = 0;
uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
- uint32_t chrom_fo_cur;
while (chrom_ct - chrom_fo_min > 1) {
- chrom_fo_cur = (chrom_ct + chrom_fo_min) / 2;
- if (marker_binsearch[chrom_fo_cur] > marker_uidx) {
+ const uint32_t chrom_fo_cur = (chrom_ct + chrom_fo_min) / 2;
+ if (variant_binsearch[chrom_fo_cur] > variant_uidx) {
chrom_ct = chrom_fo_cur;
} else {
chrom_fo_min = chrom_fo_cur;
@@ -4473,46 +4724,79 @@ uint32_t get_marker_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t mar
return chrom_fo_min;
}
-int32_t resolve_or_add_chrom_name(const char* cur_chrom_name, const char* file_descrip, uintptr_t line_idx, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr) {
- char** nonstd_names = chrom_info_ptr->nonstd_names;
- uint32_t* nonstd_name_order = chrom_info_ptr->nonstd_name_order;
- uint32_t max_code_p1 = chrom_info_ptr->max_code + 1;
- uint32_t name_ct = chrom_info_ptr->name_ct;
- uint32_t chrom_code_end = max_code_p1 + name_ct;
- uint32_t slen = strlen_se(cur_chrom_name);
- Ll_str* name_stack_ptr = chrom_info_ptr->incl_excl_name_stack;
- uint32_t in_name_stack = 0;
- uint32_t chrom_idx;
- uint32_t slen2;
- if (!bsearch_str_idx(cur_chrom_name, slen, &(nonstd_names[max_code_p1]), nonstd_name_order, chrom_info_ptr->name_ct, &chrom_idx)) {
- *chrom_idx_ptr = (int32_t)(chrom_idx + max_code_p1);
- return 0;
+void chrom_error(const char* chrom_name, const char* file_descrip, const Chrom_info* chrom_info_ptr, uintptr_t line_idx, int32_t error_code) {
+ // assumes chrom_name is null-terminated
+ const int32_t raw_code = get_chrom_code_raw(chrom_name);
+ logprint("\n");
+ if (line_idx) {
+ LOGERRPRINTFWW("Error: Invalid chromosome code '%s' on line %" PRIuPTR " of %s.\n", chrom_name, line_idx, file_descrip);
+ } else {
+ LOGERRPRINTFWW("Error: Invalid chromosome code '%s' in %s.\n", chrom_name, file_descrip);
}
- if (*cur_chrom_name == '#') {
- // this breaks VCF and PLINK 2 binary
+ if ((raw_code > ((int32_t)chrom_info_ptr->max_code)) && ((raw_code <= MAX_CHROM_TEXTNUM + XYMT_OFFSET_CT) || (raw_code >= MAX_POSSIBLE_CHROM))) {
+ if (chrom_info_ptr->species != SPECIES_UNKNOWN) {
+ if (chrom_info_ptr->species == SPECIES_HUMAN) {
+ logerrprint("(This is disallowed for humans. Check if the problem is with your data, or if\nyou forgot to define a different chromosome set with e.g. --chr-set.).\n");
+ } else {
+ logerrprint("(This is disallowed by the PLINK 1.07 species flag you used. You can\ntemporarily work around this restriction with --chr-set; contact the developers\nif you want the flag to be permanently redefined.)\n");
+ }
+ } else {
+ logerrprint("(This is disallowed by your --chr-set/--autosome-num parameters. Check if the\nproblem is with your data, or your command line.)\n");
+ }
+ } else if (error_code == -1) {
+ logerrprint("(Use --allow-extra-chr to force it to be accepted.)\n");
+ }
+}
+
+int32_t try_to_add_chrom_name(const char* chrom_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chroms, int32_t* chrom_idx_ptr, Chrom_info* chrom_info_ptr) {
+ // assumes chrom_name is nonstandard (i.e. not "2", "chr2", "chrX", etc.)
+ // requires chrom_name to be null-terminated
+ // assumes chrom_idx currently has the return value of get_chrom_code()
+ if ((!allow_extra_chroms) || ((*chrom_idx_ptr) == -2)) {
+ chrom_error(chrom_name, file_descrip, chrom_info_ptr, line_idx, *chrom_idx_ptr);
+ return RET_MALFORMED_INPUT;
+ }
+
+ // quasi-bugfix: remove redundant hash table check
+
+ if (chrom_name[0] == '#') {
+ // redundant with some of the comment-skipping loaders, but this isn't
+ // performance-critical
+ logprint("\n");
logerrprint("Error: Chromosome/contig names may not begin with '#'.\n");
- return RET_INVALID_FORMAT;
+ return RET_MALFORMED_INPUT;
}
- if (slen > MAX_ID_LEN) {
+ if (name_slen > MAX_ID_SLEN) {
+ logprint("\n");
if (line_idx) {
- LOGERRPRINTFWW("Error: Line %" PRIuPTR " of %s has an excessively long chromosome/contig name. (The " PROG_NAME_CAPS " limit is " MAX_ID_LEN_STR " characters.)\n", line_idx, file_descrip);
+ LOGERRPRINTFWW("Error: Line %" PRIuPTR " of %s has an excessively long chromosome/contig name. (The " PROG_NAME_CAPS " limit is " MAX_ID_SLEN_STR " characters.)\n", line_idx, file_descrip);
} else {
- LOGERRPRINTFWW("Error: Excessively long chromosome/contig name in %s. (The " PROG_NAME_CAPS " limit is " MAX_ID_LEN_STR " characters.)\n", file_descrip);
+ LOGERRPRINTFWW("Error: Excessively long chromosome/contig name in %s. (The " PROG_NAME_CAPS " limit is " MAX_ID_SLEN_STR " characters.)\n", file_descrip);
}
- return RET_INVALID_FORMAT;
+ return RET_MALFORMED_INPUT;
}
+ const uint32_t max_code_p1 = chrom_info_ptr->max_code + 1;
+ const uint32_t name_ct = chrom_info_ptr->name_ct;
+ const uint32_t chrom_code_end = max_code_p1 + name_ct;
if (chrom_code_end == MAX_POSSIBLE_CHROM) {
+ logprint("\n");
logerrprint("Error: Too many distinct nonstandard chromosome/contig names.\n");
- return RET_INVALID_FORMAT;
+ return RET_MALFORMED_INPUT;
}
- nonstd_names[chrom_code_end] = (char*)malloc(slen + 1);
+ if (!name_ct) {
+ // lazy initialization
+ fill_uint_one(CHROM_NAME_HTABLE_SIZE, chrom_info_ptr->nonstd_id_htable);
+ }
+ char** nonstd_names = chrom_info_ptr->nonstd_names;
+ nonstd_names[chrom_code_end] = (char*)malloc(name_slen + 1);
if (!nonstd_names[chrom_code_end]) {
return RET_NOMEM;
}
+ Ll_str* name_stack_ptr = chrom_info_ptr->incl_excl_name_stack;
+ uint32_t in_name_stack = 0;
while (name_stack_ptr) {
// there shouldn't be many of these, so sorting is unimportant
- slen2 = strlen(name_stack_ptr->ss);
- if ((slen == slen2) && (!memcmp(cur_chrom_name, name_stack_ptr->ss, slen))) {
+ if (!strcmp(chrom_name, name_stack_ptr->ss)) {
in_name_stack = 1;
break;
}
@@ -4521,28 +4805,85 @@ int32_t resolve_or_add_chrom_name(const char* cur_chrom_name, const char* file_d
if ((in_name_stack && chrom_info_ptr->is_include_stack) || ((!in_name_stack) && (!chrom_info_ptr->is_include_stack))) {
SET_BIT(chrom_code_end, chrom_info_ptr->chrom_mask);
}
- memcpy(nonstd_names[chrom_code_end], cur_chrom_name, slen);
- nonstd_names[chrom_code_end][slen] = '\0';
+ memcpy(nonstd_names[chrom_code_end], chrom_name, name_slen + 1);
*chrom_idx_ptr = (int32_t)chrom_code_end;
- for (slen2 = name_ct; slen2 > chrom_idx; slen2--) {
- nonstd_name_order[slen2] = nonstd_name_order[slen2 - 1];
+ chrom_info_ptr->name_ct = name_ct + 1;
+ uint32_t* id_htable = chrom_info_ptr->nonstd_id_htable;
+ uint32_t hashval = murmurhash3_32(chrom_name, name_slen) % CHROM_NAME_HTABLE_SIZE;
+ uint32_t next_incr = 1;
+ while (1) {
+ if (id_htable[hashval] == 0xffffffffU) {
+ id_htable[hashval] = chrom_code_end;
+ return 0;
+ }
+ // no overflow danger here
+ hashval += next_incr;
+ if (hashval >= CHROM_NAME_HTABLE_SIZE) {
+ hashval -= CHROM_NAME_HTABLE_SIZE;
+ }
+ next_incr += 2; // quadratic probing
+ }
+}
+
+uint32_t allele_set(const char* newval, uint32_t slen, char** allele_ptr) {
+ char* newptr;
+ if (slen == 1) {
+ newptr = (char*)(&(g_one_char_strs[((unsigned char)*newval) * 2]));
+ } else {
+ newptr = (char*)malloc(slen + 1);
+ if (!newptr) {
+ return 1;
+ }
+ memcpyx(newptr, newval, slen, '\0');
}
- nonstd_name_order[chrom_idx] = name_ct;
- chrom_info_ptr->name_ct += 1;
+ *allele_ptr = newptr;
return 0;
}
+uint32_t allele_reset(const char* newval, uint32_t slen, char** allele_ptr) {
+ char* newptr;
+ if (slen == 1) {
+ newptr = (char*)(&(g_one_char_strs[((uint8_t)*newval) * 2]));
+ } else {
+ newptr = (char*)malloc(slen + 1);
+ if (!newptr) {
+ return 1;
+ }
+ memcpyx(newptr, newval, slen, '\0');
+ }
+ if (allele_ptr[0][1]) {
+ free(*allele_ptr);
+ }
+ *allele_ptr = newptr;
+ return 0;
+}
+
+void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, char** allele_storage) {
+ if (allele_storage && (max_allele_slen > 1)) {
+ const uintptr_t one_char_strs_addr = (uintptr_t)g_one_char_strs;
+ for (uintptr_t idx = 0; idx < allele_storage_entry_ct; ++idx) {
+ char* cur_entry = allele_storage[idx];
+ assert(cur_entry);
+ // take advantage of unsigned wraparound
+ if ((((uintptr_t)cur_entry) - one_char_strs_addr) >= 512) {
+ free(cur_entry);
+ }
+ }
+ }
+}
+
void refresh_chrom_info(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx, uint32_t* __restrict chrom_end_ptr, uint32_t* __restrict chrom_fo_idx_ptr, uint32_t* __restrict is_x_ptr, uint32_t* __restrict is_y_ptr, uint32_t* __restrict is_mt_ptr, uint32_t* __restrict is_haploid_ptr) {
+ // assumes we are at the end of the chromosome denoted by chrom_fo_idx. Ok
+ // for chrom_fo_idx == 0xffffffffU.
// assumes marker_uidx < unfiltered_marker_ct
- int32_t chrom_idx;
- *chrom_end_ptr = chrom_info_ptr->chrom_file_order_marker_idx[(*chrom_fo_idx_ptr) + 1];
+ *chrom_end_ptr = chrom_info_ptr->chrom_fo_vidx_start[(*chrom_fo_idx_ptr) + 1];
while (marker_uidx >= (*chrom_end_ptr)) {
- *chrom_end_ptr = chrom_info_ptr->chrom_file_order_marker_idx[(++(*chrom_fo_idx_ptr)) + 1];
+ *chrom_end_ptr = chrom_info_ptr->chrom_fo_vidx_start[(++(*chrom_fo_idx_ptr)) + 1];
}
- chrom_idx = chrom_info_ptr->chrom_file_order[*chrom_fo_idx_ptr];
- *is_x_ptr = (chrom_idx == chrom_info_ptr->x_code);
- *is_y_ptr = (chrom_idx == chrom_info_ptr->y_code);
- *is_mt_ptr = (chrom_idx == chrom_info_ptr->mt_code);
+ const int32_t chrom_idx = chrom_info_ptr->chrom_file_order[*chrom_fo_idx_ptr];
+ *is_x_ptr = (chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ *is_y_ptr = (chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
+ *is_mt_ptr = (chrom_idx == chrom_info_ptr->xymt_codes[MT_OFFSET]);
*is_haploid_ptr = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
}
@@ -4551,11 +4892,20 @@ int32_t single_chrom_start(const Chrom_info* chrom_info_ptr, const uintptr_t* ma
// Returns first marker_uidx in chromosome if there is only one, or -1 if
// there's more than one chromosome.
uint32_t first_marker_uidx = next_unset_unsafe(marker_exclude, 0);
- uint32_t last_marker_chrom = get_marker_chrom(chrom_info_ptr, last_clear_bit(marker_exclude, unfiltered_marker_ct));
- if (get_marker_chrom(chrom_info_ptr, first_marker_uidx) == last_marker_chrom) {
- return first_marker_uidx;
+ uint32_t last_marker_chrom = get_variant_chrom(chrom_info_ptr, last_clear_bit(marker_exclude, unfiltered_marker_ct));
+ return (get_variant_chrom(chrom_info_ptr, first_marker_uidx) == last_marker_chrom)? first_marker_uidx : -1;
+}
+
+double get_dmedian(const double* sorted_arr, uintptr_t len) {
+ if (len) {
+ if (len % 2) {
+ return sorted_arr[len / 2];
+ } else {
+ return (sorted_arr[len / 2] + sorted_arr[(len / 2) - 1]) * 0.5;
+ }
+ } else {
+ return 0.0;
}
- return -1;
}
#ifdef __cplusplus
@@ -4573,18 +4923,6 @@ double destructive_get_dmedian(uintptr_t len, double* unsorted_arr) {
}
}
#else
-double get_dmedian(const double* sorted_arr, uintptr_t len) {
- if (len) {
- if (len % 2) {
- return sorted_arr[len / 2];
- } else {
- return (sorted_arr[len / 2] + sorted_arr[(len / 2) - 1]) * 0.5;
- }
- } else {
- return 0.0;
- }
-}
-
double destructive_get_dmedian(uintptr_t len, double* unsorted_arr) {
// no, I'm not gonna bother reimplementing introselect just for folks who
// insist on using gcc over g++
@@ -4775,14 +5113,14 @@ char* scan_for_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id
return &(sorted_ids[id_idx * max_id_len]);
}
}
- return NULL;
+ return nullptr;
}
char* scan_for_duplicate_or_overlap_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_len, const char* sorted_nonoverlap_ids, uintptr_t nonoverlap_id_ct, uintptr_t max_nonoverlap_id_len) {
// extended scan_for_duplicate_ids() which also verifies that no entry in
// sorted_ids matches any entry in sorted_nonoverlap_ids.
- // nonoverlap_id_ct == 0 and sorted_nonoverlap_ids == NULL ok. id_ct cannot
- // be zero, though.
+ // nonoverlap_id_ct == 0 and sorted_nonoverlap_ids == nullptr ok. id_ct
+ // cannot be zero, though.
uintptr_t nonoverlap_id_idx = 0;
uintptr_t id_idx = 0;
char* cur_id_ptr = sorted_ids;
@@ -4797,7 +5135,7 @@ char* scan_for_duplicate_or_overlap_ids(char* sorted_ids, uintptr_t id_ct, uintp
ii = strcmp(cur_id_ptr, nonoverlap_id_ptr);
if (ii < 0) {
if (++id_idx == id_ct) {
- return NULL;
+ return nullptr;
}
other_id_ptr = &(cur_id_ptr[max_id_len]);
if (!strcmp(cur_id_ptr, other_id_ptr)) {
@@ -5974,8 +6312,9 @@ uint32_t chrom_window_max(const uint32_t* marker_pos, const uintptr_t* marker_ex
return ct_max;
}
// assumes chrom_idx exists
- uint32_t chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
- uint32_t marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_start[chrom_idx], chrom_end);
+ uint32_t chrom_fo_idx = chrom_info_ptr->chrom_idx_to_foidx[chrom_idx];
+ uint32_t chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ uint32_t marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
uint32_t marker_ct = chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
if (marker_ct <= cur_window_max) {
return cur_window_max;
@@ -6079,7 +6418,7 @@ uint32_t window_back(const uint32_t* __restrict marker_pos, const uintptr_t* mar
}
uint32_t window_forward(const uint32_t* __restrict marker_pos, const uintptr_t* marker_exclude, uint32_t marker_uidx_start, uint32_t marker_uidx_last, uint32_t count_max, uint32_t bp_max, uint32_t* __restrict window_lead_ct_ptr) {
- // window_lead_ct_ptr currently cannot be NULL
+ // window_lead_ct_ptr currently cannot be nullptr
if (marker_uidx_start == marker_uidx_last) {
*window_lead_ct_ptr = 0;
return marker_uidx_start;
@@ -7635,7 +7974,7 @@ void fill_all_bits(uintptr_t ct, uintptr_t* bitarr) {
// ok for ct == 0
uintptr_t quotient = ct / BITCT;
uintptr_t remainder = ct % BITCT;
- fill_ulong_one(bitarr, quotient);
+ fill_ulong_one(quotient, bitarr);
if (remainder) {
bitarr[quotient] = (ONELU << remainder) - ONELU;
}
@@ -7652,7 +7991,7 @@ uint32_t numeric_range_list_to_bitarr(const Range_list* range_list_ptr, uint32_t
uint32_t idx1;
uint32_t idx2;
for (name_idx = 0; name_idx < name_ct; name_idx++) {
- if (scan_uint_capped(&(names[name_idx * name_max_len]), idx_max / 10, idx_max % 10, &idx1)) {
+ if (scan_uint_capped(&(names[name_idx * name_max_len]), idx_max, &idx1)) {
if (ignore_overflow) {
continue;
}
@@ -7660,7 +7999,7 @@ uint32_t numeric_range_list_to_bitarr(const Range_list* range_list_ptr, uint32_t
}
if (starts_range[name_idx]) {
name_idx++;
- if (scan_uint_capped(&(names[name_idx * name_max_len]), idx_max / 10, idx_max % 10, &idx2)) {
+ if (scan_uint_capped(&(names[name_idx * name_max_len]), idx_max, &idx2)) {
if (!ignore_overflow) {
return 1;
}
@@ -7748,11 +8087,11 @@ int32_t string_range_list_to_bitarr_alloc(char* header_line, uint32_t item_ct, u
return RET_NOMEM;
}
// kludge to use sort_item_ids()
- fill_ulong_zero((uintptr_t*)seen_idxs, BITCT_TO_WORDCT(name_ct));
+ fill_ulong_zero(BITCT_TO_WORDCT(name_ct), (uintptr_t*)seen_idxs);
if (sort_item_ids(name_ct, (uintptr_t*)seen_idxs, 0, range_list_ptr->names, range_list_ptr->name_max_len, 0, 0, strcmp_deref, &sorted_ids, &id_map)) {
return RET_NOMEM;
}
- fill_int_one(seen_idxs, name_ct);
+ fill_int_one(name_ct, seen_idxs);
retval = string_range_list_to_bitarr(header_line, item_ct, fixed_len, range_list_ptr, sorted_ids, id_map, range_list_flag, file_descrip, *bitarr_ptr, seen_idxs);
bigstack_reset(seen_idxs);
return retval;
@@ -7810,10 +8149,10 @@ int32_t string_range_list_to_bitarr2(const char* __restrict sorted_ids, const ui
uint32_t count_non_autosomal_markers(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t count_x, uint32_t count_mt) {
// for backward compatibility, unplaced markers are considered to be
// autosomal here
+ const int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
+ const int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
+ const int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
uint32_t ct = 0;
- int32_t x_code = chrom_info_ptr->x_code;
- int32_t y_code = chrom_info_ptr->y_code;
- int32_t mt_code = chrom_info_ptr->mt_code;
if (count_x && (x_code != -1)) {
ct += count_chrom_markers(chrom_info_ptr, marker_exclude, x_code);
}
@@ -7827,26 +8166,26 @@ uint32_t count_non_autosomal_markers(const Chrom_info* chrom_info_ptr, const uin
}
int32_t conditional_allocate_non_autosomal_markers(const Chrom_info* chrom_info_ptr, uintptr_t unfiltered_marker_ct, const uintptr_t* marker_exclude_orig, uint32_t marker_ct, uint32_t count_x, uint32_t count_mt, const char* calc_descrip, uintptr_t** marker_exclude_ptr, uint32_t* newly_excluded_ct_ptr) {
- uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
- int32_t x_code = chrom_info_ptr->x_code;
- int32_t y_code = chrom_info_ptr->y_code;
- int32_t mt_code = chrom_info_ptr->mt_code;
- uint32_t x_ct = 0;
- uint32_t y_ct = 0;
- uint32_t mt_ct = 0;
+ // if all markers are autosomal (or pseudoautosomal) diploid, nothing
+ // happens. otherwise, this creates a marker_exclude copy with
+ // non-{autosomal diploid} markers excluded for the caller to use.
+ const uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
+ const int32_t* xymt_codes = chrom_info_ptr->xymt_codes;
+ uint32_t xymt_cts[XYMT_OFFSET_CT];
+ fill_uint_zero(XYMT_OFFSET_CT, xymt_cts);
if (is_set(chrom_info_ptr->haploid_mask, 0)) {
*newly_excluded_ct_ptr = marker_ct;
} else {
- if (count_x && (x_code != -1)) {
- x_ct = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, x_code);
+ if (count_x && (xymt_codes[X_OFFSET] != -1)) {
+ xymt_cts[X_OFFSET] = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, xymt_codes[X_OFFSET]);
}
- if (y_code != -1) {
- y_ct = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, y_code);
+ if (xymt_codes[Y_OFFSET] != -1) {
+ xymt_cts[Y_OFFSET] = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, xymt_codes[Y_OFFSET]);
}
- if (count_mt && (mt_code != -1)) {
- mt_ct = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, mt_code);
+ if (count_mt && (xymt_codes[MT_OFFSET] != -1)) {
+ xymt_cts[MT_OFFSET] = count_chrom_markers(chrom_info_ptr, marker_exclude_orig, xymt_codes[MT_OFFSET]);
}
- *newly_excluded_ct_ptr = x_ct + y_ct + mt_ct;
+ *newly_excluded_ct_ptr = xymt_cts[X_OFFSET] + xymt_cts[Y_OFFSET] + xymt_cts[MT_OFFSET];
}
if (*newly_excluded_ct_ptr) {
LOGPRINTF("Excluding %u variant%s on non-autosomes from %s.\n", *newly_excluded_ct_ptr, (*newly_excluded_ct_ptr == 1)? "" : "s", calc_descrip);
@@ -7862,26 +8201,21 @@ int32_t conditional_allocate_non_autosomal_markers(const Chrom_info* chrom_info_
return RET_NOMEM;
}
memcpy(*marker_exclude_ptr, marker_exclude_orig, unfiltered_marker_ctl * sizeof(intptr_t));
- if (x_ct) {
- fill_bits(chrom_info_ptr->chrom_start[(uint32_t)x_code], chrom_info_ptr->chrom_end[(uint32_t)x_code] - chrom_info_ptr->chrom_start[(uint32_t)x_code], *marker_exclude_ptr);
- }
- if (y_ct) {
- fill_bits(chrom_info_ptr->chrom_start[(uint32_t)y_code], chrom_info_ptr->chrom_end[(uint32_t)y_code] - chrom_info_ptr->chrom_start[(uint32_t)y_code], *marker_exclude_ptr);
- }
- if (mt_ct) {
- fill_bits(chrom_info_ptr->chrom_start[(uint32_t)mt_code], chrom_info_ptr->chrom_end[(uint32_t)mt_code] - chrom_info_ptr->chrom_start[(uint32_t)mt_code], *marker_exclude_ptr);
+ for (uint32_t xymt_idx = 0; xymt_idx < XYMT_OFFSET_CT; ++xymt_idx) {
+ if (xymt_cts[xymt_idx]) {
+ const uint32_t chrom_fo_idx = chrom_info_ptr->chrom_idx_to_foidx[xymt_codes[xymt_idx]];
+ fill_bits(chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], *marker_exclude_ptr);
+ }
}
return 0;
}
uint32_t get_max_chrom_size(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t* last_chrom_fo_idx_ptr) {
- uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
+ const uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
uint32_t max_chrom_size = 0;
uint32_t last_chrom_fo_idx = 0;
- uint32_t chrom_fo_idx;
- uint32_t cur_chrom_size;
- for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
- cur_chrom_size = count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->chrom_file_order[chrom_fo_idx]);
+ for (uint32_t chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
+ const uint32_t cur_chrom_size = count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->chrom_file_order[chrom_fo_idx]);
if (cur_chrom_size) {
last_chrom_fo_idx = chrom_fo_idx;
if (cur_chrom_size > max_chrom_size) {
@@ -8925,7 +9259,7 @@ void extract_collapsed_missing_bitfield(uintptr_t* lptr, uintptr_t unfiltered_sa
*missing_bitfield++ = cur_write;
}
} else {
- fill_ulong_zero(missing_bitfield, BITCT_TO_WORDCT(sample_ct));
+ fill_ulong_zero(BITCT_TO_WORDCT(sample_ct), missing_bitfield);
sample_idx = 0;
for (widx = 0; sample_idx < sample_ct; widx++, lptr++) {
cur_mask = *sample_include_quaterarr++;
@@ -9136,7 +9470,7 @@ uint32_t alloc_raw_haploid_filters(uint32_t unfiltered_sample_ct, uint32_t hh_ex
void haploid_fix_multiple(uintptr_t* marker_exclude, uintptr_t marker_uidx_start, uintptr_t marker_ct, Chrom_info* chrom_info_ptr, uint32_t hh_exists, uintptr_t* sample_raw_include2, uintptr_t* sample_raw_male_include2, uintptr_t unfiltered_sample_ct, uintptr_t byte_ct_per_marker, unsigned char* loadbuf) {
uintptr_t marker_idx = 0;
uintptr_t marker_uidx = next_unset_ul_unsafe(marker_exclude, marker_uidx_start);
- uint32_t chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ uint32_t chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
uint32_t chrom_idx;
uint32_t is_x;
uint32_t is_y;
@@ -9146,9 +9480,9 @@ void haploid_fix_multiple(uintptr_t* marker_exclude, uintptr_t marker_uidx_start
while (marker_idx < marker_ct) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- is_x = (chrom_info_ptr->x_code == (int32_t)chrom_idx);
- is_y = (chrom_info_ptr->y_code == (int32_t)chrom_idx);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ is_x = (chrom_info_ptr->xymt_codes[X_OFFSET] == (int32_t)chrom_idx);
+ is_y = (chrom_info_ptr->xymt_codes[Y_OFFSET] == (int32_t)chrom_idx);
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
marker_idx_chrom_end = marker_idx + chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
if (marker_idx_chrom_end > marker_ct) {
@@ -9398,7 +9732,7 @@ int32_t scan_max_strlen(char* fname, uint32_t colnum, uint32_t colnum2, uint32_t
// colnum and colnum2 are 1-based indices. If colnum2 is zero, only colnum
// is scanned.
// Includes terminating null in lengths.
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t loadbuf_size = bigstack_left();
uintptr_t max_str_len = *max_str_len_ptr;
uintptr_t max_str2_len = 0;
@@ -9501,7 +9835,7 @@ int32_t scan_max_fam_indiv_strlen(char* fname, uint32_t colnum, uintptr_t* max_s
// colnum is a 1-based index with the FID column number; IID column is
// assumed to follow.
// Includes terminating null in lengths.
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t loadbuf_size = bigstack_left();
uintptr_t max_sample_id_len = *max_sample_id_len_ptr;
uintptr_t line_idx = 0;
@@ -9603,7 +9937,7 @@ char* alloc_and_init_collapsed_arr(char* item_arr, uintptr_t item_len, uintptr_t
return item_arr;
}
if (bigstack_alloc_c(filtered_ct * item_len, &new_arr)) {
- return NULL;
+ return nullptr;
}
wptr = new_arr;
wptr_end = &(new_arr[filtered_ct * item_len]);
@@ -9629,7 +9963,7 @@ char* alloc_and_init_collapsed_arr_incl(char* item_arr, uintptr_t item_len, uint
return item_arr;
}
if (bigstack_alloc_c(filtered_ct * item_len, &new_arr)) {
- return NULL;
+ return nullptr;
}
wptr = new_arr;
wptr_end = &(new_arr[filtered_ct * item_len]);
@@ -9858,16 +10192,14 @@ void copy_when_nonmissing(uintptr_t* loadbuf, char* source, uintptr_t elem_size,
do {
cur_word = *loadbuf++;
cur_word = cur_word & (~(cur_word >> 1)) & FIVEMASK;
- if (cur_word) {
- do {
- new_missing_idx = sample_idx_offset + (CTZLU(cur_word) / 2);
- diff = new_missing_idx - last_missing_p1;
- if (diff) {
- dest = memcpya(dest, &(source[last_missing_p1 * elem_size]), diff * elem_size);
- }
- last_missing_p1 = new_missing_idx + 1;
- cur_word &= cur_word - 1;
- } while (cur_word);
+ while (cur_word) {
+ new_missing_idx = sample_idx_offset + (CTZLU(cur_word) / 2);
+ diff = new_missing_idx - last_missing_p1;
+ if (diff) {
+ dest = memcpya(dest, &(source[last_missing_p1 * elem_size]), diff * elem_size);
+ }
+ last_missing_p1 = new_missing_idx + 1;
+ cur_word &= cur_word - 1;
}
sample_idx_offset += BITCT2;
} while (loadbuf < loadbuf_end);
@@ -9880,7 +10212,7 @@ void copy_when_nonmissing(uintptr_t* loadbuf, char* source, uintptr_t elem_size,
uint32_t collapse_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max_id_len, uint32_t* id_starts) {
// Collapses array of sorted IDs to remove duplicates, and writes
// pre-collapse positions to id_starts (so e.g. duplication count of any
- // sample ID can be determined via subtraction) if it isn't NULL.
+ // sample ID can be determined via subtraction) if it isn't nullptr.
// Returns id_ct of collapsed array.
uintptr_t read_idx;
uintptr_t write_idx;
@@ -9920,8 +10252,8 @@ uint32_t collapse_duplicate_ids(char* sorted_ids, uintptr_t id_ct, uintptr_t max
}
void range_list_init(Range_list* range_list_ptr) {
- range_list_ptr->names = NULL;
- range_list_ptr->starts_range = NULL;
+ range_list_ptr->names = nullptr;
+ range_list_ptr->starts_range = nullptr;
range_list_ptr->name_ct = 0;
range_list_ptr->name_max_len = 0;
}
@@ -9984,7 +10316,7 @@ void generate_perm1_interleaved(uint32_t tot_ct, uint32_t set_ct, uintptr_t perm
magic_num(tot_quotient, &totq_magic, &totq_preshift, &totq_postshift, &totq_incr);
if (set_ct * 2 < tot_ct) {
for (ulii = 0; ulii < tot_ctl; ulii++) {
- fill_ulong_zero(&(perm_buf[perm_idx + (ulii * perm_ct)]), uljj);
+ fill_ulong_zero(uljj, &(perm_buf[perm_idx + (ulii * perm_ct)]));
}
for (; perm_idx < perm_ct; perm_idx++) {
pbptr = &(perm_buf[perm_idx]);
@@ -10003,7 +10335,7 @@ void generate_perm1_interleaved(uint32_t tot_ct, uint32_t set_ct, uintptr_t perm
}
} else {
for (ulii = 0; ulii < tot_ctl; ulii++) {
- fill_ulong_one(&(perm_buf[perm_idx + (ulii * perm_ct)]), uljj);
+ fill_ulong_one(uljj, &(perm_buf[perm_idx + (ulii * perm_ct)]));
}
// "set" has reversed meaning here
set_ct = tot_ct - set_ct;
@@ -10111,7 +10443,7 @@ void join_threads(pthread_t* threads, uint32_t ctp1) {
#else
uint32_t uii;
for (uii = 0; uii < ctp1; uii++) {
- pthread_join(threads[uii], NULL);
+ pthread_join(threads[uii], nullptr);
}
#endif
}
@@ -10128,13 +10460,13 @@ int32_t spawn_threads(pthread_t* threads, void* (*start_routine)(void*), uintptr
}
for (ulii = 1; ulii < ct; ulii++) {
#ifdef _WIN32
- threads[ulii - 1] = (HANDLE)_beginthreadex(NULL, 4096, start_routine, (void*)ulii, 0, NULL);
+ threads[ulii - 1] = (HANDLE)_beginthreadex(nullptr, 4096, start_routine, (void*)ulii, 0, nullptr);
if (!threads[ulii - 1]) {
join_threads(threads, ulii);
return -1;
}
#else
- if (pthread_create(&(threads[ulii - 1]), NULL, start_routine, (void*)ulii)) {
+ if (pthread_create(&(threads[ulii - 1]), nullptr, start_routine, (void*)ulii)) {
join_threads(threads, ulii);
return -1;
}
@@ -10264,7 +10596,7 @@ void join_threads2(pthread_t* threads, uint32_t ctp1, uint32_t is_last_block) {
// keep mutex until next block loaded
} else {
for (uii = 0; uii < ctp1; uii++) {
- pthread_join(threads[uii], NULL);
+ pthread_join(threads[uii], nullptr);
}
// slightly inefficient if there are multiple multithreaded commands being
// run, but if different commands require different numbers of threads,
@@ -10299,11 +10631,11 @@ int32_t spawn_threads2(pthread_t* threads, void* (*start_routine)(void*), uintpt
return 0;
}
for (ulii = 1; ulii < ct; ulii++) {
- g_thread_start_next_event[ulii - 1] = CreateEvent(NULL, FALSE, FALSE, NULL);
- g_thread_cur_block_done_events[ulii - 1] = CreateEvent(NULL, FALSE, FALSE, NULL);
+ g_thread_start_next_event[ulii - 1] = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+ g_thread_cur_block_done_events[ulii - 1] = CreateEvent(nullptr, FALSE, FALSE, nullptr);
}
for (ulii = 1; ulii < ct; ulii++) {
- threads[ulii - 1] = (HANDLE)_beginthreadex(NULL, 4096, start_routine, (void*)ulii, 0, NULL);
+ threads[ulii - 1] = (HANDLE)_beginthreadex(nullptr, 4096, start_routine, (void*)ulii, 0, nullptr);
if (!threads[ulii - 1]) {
join_threads2(threads, ulii, is_last_block);
return -1;
@@ -10325,13 +10657,13 @@ int32_t spawn_threads2(pthread_t* threads, void* (*start_routine)(void*), uintpt
if (ct == 1) {
return 0;
}
- if (pthread_mutex_init(&g_thread_sync_mutex, NULL) ||
- pthread_cond_init(&g_thread_cur_block_done_condvar, NULL) ||
- pthread_cond_init(&g_thread_start_next_condvar, NULL)) {
+ if (pthread_mutex_init(&g_thread_sync_mutex, nullptr) ||
+ pthread_cond_init(&g_thread_cur_block_done_condvar, nullptr) ||
+ pthread_cond_init(&g_thread_start_next_condvar, nullptr)) {
return -1;
}
for (ulii = 1; ulii < ct; ulii++) {
- if (pthread_create(&(threads[ulii - 1]), NULL, start_routine, (void*)ulii)) {
+ if (pthread_create(&(threads[ulii - 1]), nullptr, start_routine, (void*)ulii)) {
join_threads2(threads, ulii, is_last_block);
return -1;
}
diff --git a/plink_common.h b/plink_common.h
index 84eb929..bbd57ae 100644
--- a/plink_common.h
+++ b/plink_common.h
@@ -45,10 +45,19 @@
#define EOLN_STR "\r\n"
#define FOPEN_RB "rb"
#define FOPEN_WB "wb"
+ #ifdef _WIN64
+ #define getc_unlocked _fgetc_nolock
+ #define putc_unlocked _fputc_nolock
+ #else
+ #define getc_unlocked getc
+ #define putc_unlocked putc
+ #endif
+ #define uint64_t unsigned long long
+ #define int64_t long long
#else
#include <pthread.h>
#define THREAD_RET_TYPE void*
- #define THREAD_RETURN return NULL
+ #define THREAD_RETURN return nullptr
#ifdef __cplusplus
#ifndef PRId64
#define PRId64 "lld"
@@ -57,20 +66,15 @@
#define EOLN_STR "\n"
#define FOPEN_RB "r"
#define FOPEN_WB "w"
-#endif
-
-#ifdef __APPLE__
- // fix OS X 10.9 build break; unfortunately, this doesn't work on some other
- // systems...
- typedef unsigned long long uint64_t;
- typedef long long int64_t;
-#else
- #if __GNUC__ < 6
- // not sure how the f*** this inconsistency between GCC 6 and earlier
- // versions is supposed to be handled, hopefully there will be official
- // guidance...
- #define uint64_t unsigned long long
- #define int64_t long long
+ #ifndef __APPLE__
+ // argh
+ // not sure what the right threshold actually is, but this works for now
+ // (may break on gcc <3.0? but that shouldn't matter anymore)
+ // tried defining GCC_VERSION, but that didn't always work
+ #if (__GNUC__ <= 4) && (__GNUC_MINOR__ < 8)
+ #define uint64_t unsigned long long
+ #define int64_t long long
+ #endif
#endif
#endif
@@ -98,6 +102,16 @@
#define HEADER_INLINE static inline
#endif
+#ifndef HAVE_NULLPTR
+ #ifndef __cplusplus
+ #define nullptr NULL
+ #else
+ #if __cplusplus <= 199711L
+ #define nullptr NULL
+ #endif
+ #endif
+#endif
+
// It would be useful to disable compilation on big-endian platforms, but I
// don't see a decent portable way to do this (see e.g. discussion at
// http://esr.ibiblio.org/?p=5095 ).
@@ -231,6 +245,9 @@
#define RET_NETWORK 14
#define LOAD_PHENO_LAST_COL 127
+// for 2.0 -> 1.9 backports
+#define RET_MALFORMED_INPUT RET_INVALID_FORMAT
+
#define MISC_AFFECTION_01 1LLU
#define MISC_NONFOUNDERS 2LLU
#define MISC_MAF_SUCC 4LLU
@@ -334,30 +351,31 @@
#define CALC_CMH 0x80000000LLU
#define CALC_HOMOG 0x100000000LLU
#define CALC_LASSO 0x200000000LLU
-#define CALC_WRITE_SET 0x400000000LLU
-#define CALC_LD 0x800000000LLU
-#define CALC_EPI 0x1000000000LLU
-#define CALC_TESTMISS 0x2000000000LLU
-#define CALC_TESTMISHAP 0x4000000000LLU
-#define CALC_SEXCHECK 0x8000000000LLU
-#define CALC_CLUMP 0x10000000000LLU
-#define CALC_PCA 0x20000000000LLU
-#define CALC_BLOCKS 0x40000000000LLU
-#define CALC_SCORE 0x80000000000LLU
-#define CALC_MENDEL 0x100000000000LLU
-#define CALC_HET 0x200000000000LLU
-#define CALC_FLIPSCAN 0x400000000000LLU
-#define CALC_TDT 0x800000000000LLU
-#define CALC_MAKE_PERM_PHENO 0x1000000000000LLU
-#define CALC_QFAM 0x2000000000000LLU
-#define CALC_FST 0x4000000000000LLU
-#define CALC_SHOW_TAGS 0x8000000000000LLU
-#define CALC_MAKE_BIM 0x10000000000000LLU
-#define CALC_MAKE_FAM 0x20000000000000LLU
-#define CALC_WRITE_VAR_RANGES 0x40000000000000LLU
-#define CALC_DUPVAR 0x80000000000000LLU
-#define CALC_RPLUGIN 0x100000000000000LLU
-#define CALC_DFAM 0x200000000000000LLU
+#define CALC_LASSO_LAMBDA 0x400000000LLU
+#define CALC_WRITE_SET 0x800000000LLU
+#define CALC_LD 0x1000000000LLU
+#define CALC_EPI 0x2000000000LLU
+#define CALC_TESTMISS 0x4000000000LLU
+#define CALC_TESTMISHAP 0x8000000000LLU
+#define CALC_SEXCHECK 0x10000000000LLU
+#define CALC_CLUMP 0x20000000000LLU
+#define CALC_PCA 0x40000000000LLU
+#define CALC_BLOCKS 0x80000000000LLU
+#define CALC_SCORE 0x100000000000LLU
+#define CALC_MENDEL 0x200000000000LLU
+#define CALC_HET 0x400000000000LLU
+#define CALC_FLIPSCAN 0x800000000000LLU
+#define CALC_TDT 0x1000000000000LLU
+#define CALC_MAKE_PERM_PHENO 0x2000000000000LLU
+#define CALC_QFAM 0x4000000000000LLU
+#define CALC_FST 0x8000000000000LLU
+#define CALC_SHOW_TAGS 0x10000000000000LLU
+#define CALC_MAKE_BIM 0x20000000000000LLU
+#define CALC_MAKE_FAM 0x40000000000000LLU
+#define CALC_WRITE_VAR_RANGES 0x80000000000000LLU
+#define CALC_DUPVAR 0x100000000000000LLU
+#define CALC_RPLUGIN 0x200000000000000LLU
+#define CALC_DFAM 0x400000000000000LLU
#define CALC_ONLY_BIM (CALC_WRITE_SET | CALC_WRITE_SNPLIST | CALC_WRITE_VAR_RANGES | CALC_LIST_23_INDELS | CALC_MAKE_BIM | CALC_DUPVAR)
#define CALC_ONLY_FAM (CALC_MAKE_PERM_PHENO | CALC_WRITE_COVAR | CALC_MAKE_FAM)
// only room for 6 more basic commands before we need to switch from a single
@@ -375,8 +393,9 @@
// 0 = non-explicit error
#define VCF_HALF_CALL_ERROR 1
-#define VCF_HALF_CALL_HAPLOID 2
-#define VCF_HALF_CALL_MISSING 3
+#define VCF_HALF_CALL_MISSING 2
+#define VCF_HALF_CALL_HAPLOID 3
+#define VCF_HALF_CALL_REFERENCE 4
#define M23_MALE 1
#define M23_FEMALE 2
@@ -642,6 +661,7 @@
#define BYTECT (BITCT / 8)
#define BYTECT4 (BITCT / 32)
#define VEC_WORDS (VEC_BITS / BITCT)
+#define VEC_INT32 (VEC_BYTES / 4)
// assumed number of bytes per cache line, for alignment
#define CACHELINE 64
@@ -718,10 +738,10 @@ HEADER_INLINE uint32_t round_up_pow2_ui(uint32_t val, uint32_t alignment) {
// Maximum length of chromosome, variant, FID, IID, cluster, and set IDs (not
// including terminating null, that's what _P1 is for). This value supports up
// to 8 IDs per line (maximum so far is 5, for e.g. --hom).
-#define MAX_ID_LEN 16000
+#define MAX_ID_SLEN 16000
-#define MAX_ID_LEN_P1 (MAX_ID_LEN + 1)
-#define MAX_ID_LEN_STR "16000"
+#define MAX_ID_BLEN (MAX_ID_SLEN + 1)
+#define MAX_ID_SLEN_STR "16000"
// Maximum size of "dynamically" allocated line load buffer. (This is the
// limit that applies to .vcf and similar files.) Inconvenient to go higher
@@ -817,13 +837,13 @@ HEADER_INLINE void aligned_free_cond(uintptr_t* aligned_ptr) {
HEADER_INLINE void aligned_free_null(uintptr_t** aligned_pp) {
aligned_free(*aligned_pp);
- *aligned_pp = NULL;
+ *aligned_pp = nullptr;
}
HEADER_INLINE void aligned_free_cond_null(uintptr_t** aligned_pp) {
if (*aligned_pp) {
aligned_free(*aligned_pp);
- *aligned_pp = NULL;
+ *aligned_pp = nullptr;
}
}
@@ -916,7 +936,7 @@ void wordwrapb(uint32_t suffix_len);
int32_t fopen_checked(const char* fname, const char* mode, FILE** target_ptr);
HEADER_INLINE int32_t putc_checked(int32_t ii, FILE* outfile) {
- putc(ii, outfile);
+ putc_unlocked(ii, outfile);
return ferror(outfile);
}
@@ -943,7 +963,7 @@ HEADER_INLINE void fclose_cond(FILE* fptr) {
HEADER_INLINE int32_t fclose_null(FILE** fptr_ptr) {
int32_t ii = ferror(*fptr_ptr);
int32_t jj = fclose(*fptr_ptr);
- *fptr_ptr = NULL;
+ *fptr_ptr = nullptr;
return ii || jj;
}
@@ -954,7 +974,7 @@ int32_t gzopen_read_checked(const char* fname, gzFile* gzf_ptr);
HEADER_INLINE int32_t gzclose_null(gzFile* gzf_ptr) {
int32_t ii = gzclose(*gzf_ptr);
- *gzf_ptr = NULL;
+ *gzf_ptr = nullptr;
return (ii != Z_OK);
}
@@ -1204,39 +1224,54 @@ HEADER_INLINE int32_t is_space_or_eoln(unsigned char cc) {
// contains nothing but letters and a null terminator.
uint32_t match_upper(const char* ss, const char* fixed_str);
-uint32_t match_upper_nt(const char* ss, const char* fixed_str, uint32_t ct);
+uint32_t match_upper_counted(const char* ss, const char* fixed_str, uint32_t ct);
// Reads an integer in [1, cap]. Assumes first character is nonspace. Has the
// overflow detection atoi() lacks.
+#ifdef __LP64__
+uint32_t scan_posint_capped(const char* ss, uint64_t cap, uint32_t* valp);
+
+uint32_t scan_uint_capped(const char* ss, uint64_t cap, uint32_t* valp);
+
+uint32_t scan_int_abs_bounded(const char* ss, uint64_t bound, int32_t* valp);
+#else // not __LP64__
+// Need to be more careful in 32-bit case due to overflow.
// A funny-looking div_10/mod_10 interface is used since the cap will usually
// be a constant, and we want the integer division/modulus to occur at compile
// time.
-uint32_t scan_posint_capped(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
+uint32_t scan_posint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
+
+uint32_t scan_uint_capped32(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
-uint32_t scan_uint_capped(const char* ss, uint32_t cap_div_10, uint32_t cap_mod_10, uint32_t* valp);
+uint32_t scan_int_abs_bounded32(const char* ss, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp);
-uint32_t scan_int_abs_bounded(const char* ss, uint32_t bound_div_10, uint32_t bound_mod_10, int32_t* valp);
+ #define scan_posint_capped(aa, bb, cc) scan_posint_capped32((aa), (bb) / 10, (bb) % 10, (cc))
+
+ #define scan_uint_capped(aa, bb, cc) scan_uint_capped32((aa), (bb) / 10, (bb) % 10, (cc))
+
+ #define scan_int_abs_bounded(aa, bb, cc) scan_int_abs_bounded32((aa), (bb) / 10, (bb) % 10, (cc))
+#endif
// intentionally rejects -2^31 for now
HEADER_INLINE uint32_t scan_int32(const char* ss, int32_t* valp) {
- return scan_int_abs_bounded(ss, 0x7fffffff / 10, 0x7fffffff % 10, valp);
+ return scan_int_abs_bounded(ss, 0x7fffffff, valp);
}
// default cap = 0x7ffffffe
HEADER_INLINE uint32_t scan_posint_defcap(const char* ss, uint32_t* valp) {
- return scan_posint_capped(ss, 0x7ffffffe / 10, 0x7ffffffe % 10, valp);
+ return scan_posint_capped(ss, 0x7ffffffe, valp);
}
HEADER_INLINE uint32_t scan_uint_defcap(const char* ss, uint32_t* valp) {
- return scan_uint_capped(ss, 0x7ffffffe / 10, 0x7ffffffe % 10, valp);
+ return scan_uint_capped(ss, 0x7ffffffe, valp);
}
HEADER_INLINE uint32_t scan_int_abs_defcap(const char* ss, int32_t* valp) {
- return scan_int_abs_bounded(ss, 0x7ffffffe / 10, 0x7ffffffe % 10, valp);
+ return scan_int_abs_bounded(ss, 0x7ffffffe, valp);
}
HEADER_INLINE uint32_t scan_uint_icap(const char* ss, uint32_t* valp) {
- return scan_uint_capped(ss, 0x7fffffff / 10, 0x7fffffff % 10, valp);
+ return scan_uint_capped(ss, 0x7fffffff, valp);
}
uint32_t scan_posintptr(const char* ss, uintptr_t* valp);
@@ -1301,6 +1336,7 @@ HEADER_INLINE char* memcpyl3a(char* __restrict target, const void* __restrict so
return &(target[3]);
}
+// note that, unlike stpcpy(), this does not copy the null terminator
HEADER_INLINE char* strcpya(char* __restrict target, const void* __restrict source) {
uintptr_t slen = strlen((char*)source);
memcpy(target, source, slen);
@@ -1359,6 +1395,7 @@ void get_top_two_ui(const uint32_t* __restrict uint_arr, uintptr_t uia_size, uin
uint32_t intlen(int32_t num);
// safer than token_endnn(), since it handles length zero
+// "se" = stops at space or eoln character
HEADER_INLINE uintptr_t strlen_se(const char* ss) {
const char* ss2 = ss;
while (!is_space_or_eoln(*ss2)) {
@@ -1593,23 +1630,8 @@ HEADER_INLINE char* dtoa_g_wxp8x(double dxx, uint32_t min_width, char extra_char
return &(penult[1]);
}
-HEADER_INLINE void read_next_terminate(char* __restrict target, const char* __restrict source) {
- while (!is_space_or_eoln(*source)) {
- *target++ = *source++;
- }
- *target = '\0';
-}
-
char* chrom_print_human(uint32_t num, char* buf);
-// newval does not need to be null-terminated, and slen does not include
-// terminator
-// assumes *allele_ptr is not initialized
-uint32_t allele_set(const char* newval, uint32_t slen, char** allele_ptr);
-
-// *allele_ptr must be initialized; frees *allele_ptr if necessary
-uint32_t allele_reset(const char* newval, uint32_t slen, char** allele_ptr);
-
void magic_num(uint32_t divisor, uint64_t* multp, uint32_t* __restrict pre_shiftp, uint32_t* __restrict post_shiftp, uint32_t* __restrict incrp);
HEADER_INLINE uintptr_t tri_coord_no_diag(uintptr_t small_coord, uintptr_t big_coord) {
@@ -1777,14 +1799,7 @@ HEADER_INLINE void prev_unset_unsafe_ck(const uintptr_t* bitarr, uint32_t* loc_p
// These functions seem to optimize better than memset(arr, 0, x) under OS X
// <10.9's gcc, and they should be equivalent for later versions (looks like
// memcpy/memset were redone in gcc 4.3).
-HEADER_INLINE void fill_long_zero(intptr_t* larr, size_t size) {
- size_t ulii;
- for (ulii = 0; ulii < size; ulii++) {
- *larr++ = 0;
- }
-}
-
-HEADER_INLINE void fill_ulong_zero(uintptr_t* ularr, size_t size) {
+HEADER_INLINE void fill_ulong_zero(size_t size, uintptr_t* ularr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*ularr++ = 0;
@@ -1792,31 +1807,24 @@ HEADER_INLINE void fill_ulong_zero(uintptr_t* ularr, size_t size) {
}
#ifdef __LP64__
-HEADER_INLINE void fill_ull_zero(uint64_t* ullarr, size_t size) {
- fill_ulong_zero((uintptr_t*)ullarr, size);
+HEADER_INLINE void fill_ull_zero(size_t size, uint64_t* ullarr) {
+ fill_ulong_zero(size, (uintptr_t*)ullarr);
}
// double v indicates that size is a vector count, not a word count.
-HEADER_INLINE void fill_vec_zero(VECITYPE* vec, size_t size) {
+HEADER_INLINE void fill_vvec_zero(size_t size, VECITYPE* vvec) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
- *vec++ = _mm_setzero_si128();
+ *vvec++ = _mm_setzero_si128();
}
}
#else
-HEADER_INLINE void fill_ull_zero(uint64_t* ullarr, size_t size) {
- fill_ulong_zero((uintptr_t*)ullarr, size * 2);
+HEADER_INLINE void fill_ull_zero(size_t size, uint64_t* ullarr) {
+ fill_ulong_zero(size * 2, (uintptr_t*)ullarr);
}
#endif
-HEADER_INLINE void fill_long_one(intptr_t* larr, size_t size) {
- size_t ulii;
- for (ulii = 0; ulii < size; ulii++) {
- *larr++ = -1;
- }
-}
-
-HEADER_INLINE void fill_ulong_one(uintptr_t* ularr, size_t size) {
+HEADER_INLINE void fill_ulong_one(size_t size, uintptr_t* ularr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*ularr++ = ~ZEROLU;
@@ -1824,51 +1832,51 @@ HEADER_INLINE void fill_ulong_one(uintptr_t* ularr, size_t size) {
}
#ifdef __LP64__
-HEADER_INLINE void fill_ull_one(uint64_t* ullarr, size_t size) {
- fill_ulong_one((uintptr_t*)ullarr, size);
+HEADER_INLINE void fill_ull_one(size_t size, uint64_t* ullarr) {
+ fill_ulong_one(size, (uintptr_t*)ullarr);
}
#else
-HEADER_INLINE void fill_ull_one(uint64_t* ullarr, size_t size) {
- fill_ulong_one((uintptr_t*)ullarr, size * 2);
+HEADER_INLINE void fill_ull_one(size_t size, uint64_t* ullarr) {
+ fill_ulong_one(size * 2, (uintptr_t*)ullarr);
}
#endif
-HEADER_INLINE void fill_int_zero(int32_t* iarr, size_t size) {
+HEADER_INLINE void fill_int_zero(size_t size, int32_t* iarr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*iarr++ = 0;
}
}
-HEADER_INLINE void fill_int_one(int32_t* iarr, size_t size) {
+HEADER_INLINE void fill_int_one(size_t size, int32_t* iarr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*iarr++ = -1;
}
}
-HEADER_INLINE void fill_uint_zero(uint32_t* uiarr, size_t size) {
+HEADER_INLINE void fill_uint_zero(size_t size, uint32_t* uiarr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*uiarr++ = 0;
}
}
-HEADER_INLINE void fill_uint_one(uint32_t* uiarr, size_t size) {
+HEADER_INLINE void fill_uint_one(size_t size, uint32_t* uiarr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*uiarr++ = ~0U;
}
}
-HEADER_INLINE void fill_float_zero(float* farr, size_t size) {
+HEADER_INLINE void fill_float_zero(size_t size, float* farr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*farr++ = 0.0;
}
}
-HEADER_INLINE void fill_double_zero(double* darr, size_t size) {
+HEADER_INLINE void fill_double_zero(size_t size, double* darr) {
size_t ulii;
for (ulii = 0; ulii < size; ulii++) {
*darr++ = 0.0;
@@ -2010,24 +2018,36 @@ void sample_delim_convert(uintptr_t unfiltered_sample_ct, const uintptr_t* sampl
void get_set_wrange_align(const uintptr_t* __restrict bitarr, uintptr_t word_ct, uintptr_t* __restrict firstw_ptr, uintptr_t* __restrict wlen_ptr);
-// Maximum accepted chromosome index is this minus 1. Currently cannot exceed
-// 2^14 due to SMALL_INTERVAL_BITS setting in plink_cnv.c...
-#define MAX_POSSIBLE_CHROM 5120
-// ...unless this is uncommented (it removes the entire CNV module).
-// #define HIGH_MAX_CHROM
+// for hash tables where maximum ID string length is not known in advance.
+uint32_t unklen_id_htable_find(const char* cur_id, const char* const* item_ids, const uint32_t* id_htable, uint32_t hashval, uint32_t id_htable_size);
+
+// okay, time to provide O(c log c)-time instead of O(c^2)-time initialization
+// (c = # of chromosomes/contigs).
+#define MAX_POSSIBLE_CHROM 65280
+
+// get_id_htable_size(MAX_POSSIBLE_CHROM) (use constexpr once sufficient
+// compiler support is available)
+#define CHROM_NAME_HTABLE_SIZE 130579
// assumes MAX_POSSIBLE_CHROM is a multiple of 64, otherwise add round-up
#define CHROM_MASK_WORDS (MAX_POSSIBLE_CHROM / BITCT)
-#define MAX_CHROM_TEXTNUM 59
-// usual PLINK 1.07 chromosome field length is 4. So it's safe to increase
-// MAX_CHROM_TEXTNUM to 9995, but 9996+ creates problems...
// (note that n+1, n+2, n+3, and n+4 are reserved for X/Y/XY/MT)
-#define MAX_CHROM_TEXTNUM_LEN 2
-#define CHROM_X MAX_POSSIBLE_CHROM
-#define CHROM_Y (MAX_POSSIBLE_CHROM + 1)
-#define CHROM_XY (MAX_POSSIBLE_CHROM + 2)
-#define CHROM_MT (MAX_POSSIBLE_CHROM + 3)
+#define MAX_CHROM_TEXTNUM 95
+
+// get_chrom_code_raw() needs to be modified if this changes
+#define MAX_CHROM_TEXTNUM_SLEN 2
+
+#define X_OFFSET 0
+#define Y_OFFSET 1
+#define XY_OFFSET 2
+#define MT_OFFSET 3
+#define XYMT_OFFSET_CT 4
+
+#define CHROM_X (MAX_POSSIBLE_CHROM + X_OFFSET)
+#define CHROM_Y (MAX_POSSIBLE_CHROM + Y_OFFSET)
+#define CHROM_XY (MAX_POSSIBLE_CHROM + XY_OFFSET)
+#define CHROM_MT (MAX_POSSIBLE_CHROM + MT_OFFSET)
#ifdef __LP64__
// dog requires 42 bits, and other species require less
@@ -2037,45 +2057,49 @@ void get_set_wrange_align(const uintptr_t* __restrict bitarr, uintptr_t word_ct,
#endif
typedef struct {
- // no point to dynamic allocation when MAX_POSSIBLE_CHROM is small and
- // there's only one copy of this
+ // Main dynamic block intended to be allocated as a single aligned block of
+ // memory on the heap freeable with vecaligned_free(), with chrom_mask at the
+ // base.
+
+ uintptr_t* chrom_mask; // which chromosomes aren't known to be absent?
+ // This is a misnomer--it includes X and excludes MT. Underlying concept is
+ // "are some calls guaranteed to be homozygous (assuming >= 1 male)", which
+ // is no longer true for MT since heteroplasmy is a thing. (Well, the real
+ // goal with MT is to enable dosage-based analysis, but until all pipelines
+ // have adapted, diploid data handling loses slightly less information than
+ // haploid.)
+ uintptr_t* haploid_mask;
// order of chromosomes in input files
- // currently tolerates out-of-order chromosomes, as long as all markers for
+ // currently tolerates out-of-order chromosomes, as long as all variants for
// any given chromosome are together
- uint32_t chrom_file_order[MAX_POSSIBLE_CHROM];
- uint32_t chrom_ct; // length of chrom_file_order
- uint32_t chrom_file_order_marker_idx[MAX_POSSIBLE_CHROM + 1];
-
- // markers chrom_start[k] to (chrom_end[k] - 1) are part of chromosome k
- uint32_t chrom_start[MAX_POSSIBLE_CHROM];
- uint32_t chrom_end[MAX_POSSIBLE_CHROM];
+ uint32_t* chrom_file_order;
+
+ // if the second chromosome in the dataset is chr5, chrom_file_order[1] == 5,
+ // the raw variant indexes for chr5 are in [chrom_fo_vidx_start[1],
+ // chrom_fo_vidx_start[2]). and chrom_idx_to_foidx[5] == 1.
+ uint32_t* chrom_fo_vidx_start;
+ uint32_t* chrom_idx_to_foidx;
- uintptr_t chrom_mask[CHROM_MASK_WORDS];
+ // --allow-extra-chr support
+ char** nonstd_names;
+ uint32_t* nonstd_id_htable;
+ // end main dynamic block
+ uint32_t chrom_ct; // number of distinct chromosomes/contigs
uint32_t species;
- int32_t x_code;
- int32_t y_code;
- int32_t xy_code;
- int32_t mt_code;
+ int32_t xymt_codes[XYMT_OFFSET_CT]; // x, y, xy, mt
uint32_t max_code;
uint32_t autosome_ct;
- // this is a misnomer--it includes X and excludes MT. Underlying concept is
- // "are some calls guaranteed to be homozygous (assuming >= 1 male)", which
- // is no longer true for MT since heteroplasmy is a thing.
- uintptr_t haploid_mask[CHROM_MASK_WORDS];
-
- // --allow-extra-chr support
+ // yet more --allow-extra-chr support
uint32_t zero_extra_chroms;
uint32_t name_ct;
Ll_str* incl_excl_name_stack;
uint32_t is_include_stack;
uint32_t output_encoding;
- char* nonstd_names[MAX_POSSIBLE_CHROM];
- uint32_t nonstd_name_order[MAX_POSSIBLE_CHROM];
} Chrom_info;
#define SPECIES_HUMAN 0
@@ -2090,6 +2114,31 @@ typedef struct {
extern const char* g_species_singular;
extern const char* g_species_plural;
+int32_t init_chrom_info(Chrom_info* chrom_info_ptr);
+
+void init_species(uint32_t species_code, Chrom_info* chrom_info_ptr);
+
+void init_default_chrom_mask(Chrom_info* chrom_info_ptr);
+
+HEADER_INLINE int32_t init_chrom_info_human(Chrom_info* chrom_info_ptr) {
+ // convenience wrapper
+ if (init_chrom_info(chrom_info_ptr)) {
+ return RET_NOMEM;
+ }
+ init_species(SPECIES_HUMAN, chrom_info_ptr);
+ init_default_chrom_mask(chrom_info_ptr);
+ return 0;
+}
+
+void forget_extra_chrom_names(uint32_t reinitialize, Chrom_info* chrom_info_ptr);
+
+// in the usual case where the number of chromosomes/contigs is much less than
+// MAX_POSSIBLE_CHROM, this reduces chrom_info's memory consumption and
+// improves locality.
+int32_t finalize_chrom_info(Chrom_info* chrom_info_ptr);
+
+void cleanup_chrom_info(Chrom_info* chrom_info_ptr);
+
HEADER_INLINE const char* species_str(uintptr_t ct) {
return (ct == ONELU)? g_species_singular : g_species_plural;
}
@@ -2099,7 +2148,7 @@ HEADER_INLINE const char* species_str(uintptr_t ct) {
#define CHR_OUTPUT_MT 4
#define CHR_OUTPUT_0M 8
-HEADER_INLINE uint32_t all_words_zero(const uintptr_t* word_arr, uintptr_t word_ct) {
+HEADER_INLINE uint32_t are_all_words_zero(const uintptr_t* word_arr, uintptr_t word_ct) {
while (word_ct--) {
if (*word_arr++) {
return 0;
@@ -2112,31 +2161,71 @@ char* chrom_name_write(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, cha
char* chrom_name_buf5w4write(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx, uint32_t* chrom_name_len_ptr, char* buf5);
-uint32_t get_max_chrom_len(const Chrom_info* chrom_info_ptr);
-
-void forget_extra_chrom_names(Chrom_info* chrom_info_ptr);
+uint32_t get_max_chrom_slen(const Chrom_info* chrom_info_ptr);
uint32_t haploid_chrom_present(const Chrom_info* chrom_info_ptr);
+// does not require null-termination
+// only handles 1-99, X, Y, XY, MT, and "chr" prefix
int32_t get_chrom_code_raw(const char* sptr);
-int32_t get_chrom_code(const Chrom_info* chrom_info_ptr, const char* sptr);
+// now requires null-termination
+// now returns -1 when --allow-extra-chr may be ok, and -2 on total fail
+int32_t get_chrom_code(const char* chrom_name, const Chrom_info* chrom_info_ptr, uint32_t name_slen);
+
+// when the chromosome name isn't null-terminated, but we want to preserve the
+// character there
+// requires chrom_name[name_slen] to be mutable
+int32_t get_chrom_code_counted(const Chrom_info* chrom_info_ptr, uint32_t name_slen, char* chrom_name);
+
+// when it's okay to just replace the terminating space/tab with a \0
+HEADER_INLINE int32_t get_chrom_code_destructive(const Chrom_info* chrom_info_ptr, char* chrom_name) {
+ char* chrom_token_end = token_endnn(chrom_name);
+ *chrom_token_end = '\0';
+ return get_chrom_code(chrom_name, chrom_info_ptr, (uintptr_t)(chrom_token_end - chrom_name));
+}
+
+uint32_t get_variant_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t variant_uidx);
-// when the chromosome name doesn't end with a space
-// currently requires sptr[slen] to be mutable
-int32_t get_chrom_code2(const Chrom_info* chrom_info_ptr, char* sptr, uint32_t slen);
+HEADER_INLINE uint32_t get_variant_chrom(const Chrom_info* chrom_info_ptr, uintptr_t variant_uidx) {
+ return chrom_info_ptr->chrom_file_order[get_variant_chrom_fo_idx(chrom_info_ptr, variant_uidx)];
+}
-uint32_t get_marker_chrom_fo_idx(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx);
-HEADER_INLINE uint32_t get_marker_chrom(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx) {
- return chrom_info_ptr->chrom_file_order[get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx)];
+// these assume the chromosome is present in the dataset
+HEADER_INLINE uint32_t get_chrom_start_vidx(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx) {
+ return chrom_info_ptr->chrom_fo_vidx_start[chrom_info_ptr->chrom_idx_to_foidx[chrom_idx]];
}
-HEADER_INLINE int32_t chrom_exists(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx) {
- return is_set(chrom_info_ptr->chrom_mask, chrom_idx);
+HEADER_INLINE uint32_t get_chrom_end_vidx(const Chrom_info* chrom_info_ptr, uint32_t chrom_idx) {
+ return chrom_info_ptr->chrom_fo_vidx_start[chrom_info_ptr->chrom_idx_to_foidx[chrom_idx] + 1];
}
-int32_t resolve_or_add_chrom_name(const char* cur_chrom_name, const char* file_descrip, uintptr_t line_idx, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr);
+// now assumes chrom_name is null-terminated
+int32_t try_to_add_chrom_name(const char* chrom_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chroms, int32_t* chrom_idx_ptr, Chrom_info* chrom_info_ptr);
+
+HEADER_INLINE int32_t get_or_add_chrom_code(const char* chrom_name, const char* file_descrip, uintptr_t line_idx, uint32_t name_slen, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr) {
+ *chrom_idx_ptr = get_chrom_code(chrom_name, chrom_info_ptr, name_slen);
+ if (*chrom_idx_ptr >= 0) {
+ return 0;
+ }
+ return try_to_add_chrom_name(chrom_name, file_descrip, line_idx, name_slen, allow_extra_chroms, chrom_idx_ptr, chrom_info_ptr);
+}
+
+HEADER_INLINE int32_t get_or_add_chrom_code_destructive(const char* file_descrip, uintptr_t line_idx, uint32_t allow_extra_chroms, char* chrom_name, char* chrom_name_end, Chrom_info* chrom_info_ptr, int32_t* chrom_idx_ptr) {
+ *chrom_name_end = '\0';
+ return get_or_add_chrom_code(chrom_name, file_descrip, line_idx, (uintptr_t)(chrom_name_end - chrom_name), allow_extra_chroms, chrom_info_ptr, chrom_idx_ptr);
+}
+
+// newval does not need to be null-terminated
+// assumes *allele_ptr is not initialized
+// make last parameter const char** later
+uint32_t allele_set(const char* newval, uint32_t allele_slen, char** allele_ptr);
+
+// *allele_ptr must be initialized; frees *allele_ptr if necessary
+uint32_t allele_reset(const char* newval, uint32_t allele_slen, char** allele_ptr);
+
+void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, char** allele_storage);
// no need for this; code is simpler if we just create a copy of marker_exclude
// with all non-autosomal loci removed
@@ -2167,6 +2256,8 @@ void refresh_chrom_info(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx,
int32_t single_chrom_start(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t unfiltered_marker_ct);
+double get_dmedian(const double* sorted_arr, uintptr_t len);
+
double destructive_get_dmedian(uintptr_t len, double* unsorted_arr);
int32_t strcmp_casted(const void* s1, const void* s2);
@@ -2373,13 +2464,12 @@ int32_t string_range_list_to_bitarr_alloc(char* header_line, uint32_t item_ct, u
int32_t string_range_list_to_bitarr2(const char* __restrict sorted_ids, const uint32_t* id_map, uintptr_t item_ct, uintptr_t max_id_len, const Range_list* __restrict range_list_ptr, const char* __restrict range_list_flag, uintptr_t* bitarr_excl);
HEADER_INLINE uint32_t count_chrom_markers(const Chrom_info* chrom_info_ptr, const uintptr_t* marker_exclude, uint32_t chrom_idx) {
- uint32_t min_idx;
- uint32_t max_idx;
if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
return 0;
}
- min_idx = chrom_info_ptr->chrom_start[chrom_idx];
- max_idx = chrom_info_ptr->chrom_end[chrom_idx];
+ const uint32_t chrom_fo_idx = chrom_info_ptr->chrom_idx_to_foidx[chrom_idx];
+ const uint32_t min_idx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ const uint32_t max_idx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
return (max_idx - min_idx) - ((uint32_t)popcount_bit_idx(marker_exclude, min_idx, max_idx));
}
diff --git a/plink_data.c b/plink_data.c
index bd1577f..490bb28 100644
--- a/plink_data.c
+++ b/plink_data.c
@@ -58,8 +58,8 @@ int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintp
// previously used mmap(); turns out this is more portable without being
// noticeably slower.
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- FILE* outfile = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_marker_ct4 = (unfiltered_marker_ct + 3) / 4;
uintptr_t unfiltered_marker_ctl2 = QUATERCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
@@ -119,7 +119,7 @@ int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintp
sample_idx_base = sample_idx_end;
sample_idx_end = sample_idx_base + 4;
if (sample_idx_end > unfiltered_sample_ct) {
- fill_ulong_zero(&(loadbuf[(unfiltered_sample_ct % 4) * unfiltered_marker_ctl2]), (4 - (unfiltered_sample_ct % 4)) * unfiltered_marker_ctl2);
+ fill_ulong_zero((4 - (unfiltered_sample_ct % 4)) * unfiltered_marker_ctl2, &(loadbuf[(unfiltered_sample_ct % 4) * unfiltered_marker_ctl2]));
sample_idx_end = unfiltered_sample_ct;
}
lptr = loadbuf;
@@ -183,204 +183,170 @@ int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintp
return retval;
}
-uint32_t chrom_error(const char* extension, Chrom_info* chrom_info_ptr, char* chrom_str, uintptr_t line_idx, int32_t error_code, uint32_t allow_extra_chroms) {
- if (allow_extra_chroms && (error_code == -2)) {
- return 0;
- }
- int32_t raw_code = get_chrom_code_raw(chrom_str);
- uint32_t slen = strlen_se(chrom_str);
- chrom_str[slen] = '\0';
- logprint("\n");
- if (line_idx) {
- LOGERRPRINTFWW("Error: Invalid chromosome code '%s' on line %" PRIuPTR " of %s.\n", chrom_str, line_idx, extension);
- } else {
- LOGERRPRINTFWW("Error: Invalid chromosome code '%s' in %s.\n", chrom_str, extension);
- }
- if ((raw_code > ((int32_t)chrom_info_ptr->max_code)) && ((raw_code <= MAX_CHROM_TEXTNUM + 4) || (raw_code >= MAX_POSSIBLE_CHROM))) {
- if (chrom_info_ptr->species != SPECIES_UNKNOWN) {
- if (chrom_info_ptr->species == SPECIES_HUMAN) {
- logerrprint("(This is disallowed for humans. Check if the problem is with your data, or if\nyou forgot to define a different chromosome set with e.g. --chr-set.).\n");
- } else {
- logerrprint("(This is disallowed by the PLINK 1.07 species flag you used. You can\ntemporarily work around this restriction with --chr-set; contact the developers\nif you want the flag to be permanently redefined.)\n");
- }
- } else {
- logerrprint("(This is disallowed by your --chr-set/--autosome-num parameters. Check if the\nproblem is with your data, or your command line.)\n");
- }
- } else if (error_code == -2) {
- logerrprint("(Use --allow-extra-chr to force it to be accepted.)\n");
- }
- return 1;
-}
-
-int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, char** marker_ids_ptr, Chrom_info* chrom_info_ptr, uint32_t** marker_pos_ptr, uint32_t* map_is_unsorted_ptr, uint32_t allow_extra_chroms, uint32_t allow_no_vars) {
+int32_t load_map(FILE** mapfile_ptr, char* mapname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, char** marker_ids_ptr, Chrom_info* chrom_info_ptr, uint32_t** marker_pos_ptr, uint32_t* map_is_unsorted_ptr, uint32_t allow_extra_chroms, uint32_t allow_no_vars) {
// currently only used by lgen_to_bed()
// todo: some cleanup
uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
- uintptr_t max_marker_id_len = 0;
+ uintptr_t max_marker_id_blen = 0;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t line_idx = 0;
uint32_t last_pos = 0;
int32_t last_chrom = -1;
int32_t marker_pos_needed = 0;
- int32_t chroms_encountered_m1 = -1;
+ uint32_t chroms_encountered_m1 = 0xffffffffU; // deliberate overflow
int32_t retval = 0;
uintptr_t loaded_chrom_mask[CHROM_MASK_WORDS];
uintptr_t* marker_exclude;
uintptr_t unfiltered_marker_ctl;
- char* bufptr;
uintptr_t marker_uidx;
uintptr_t ulii;
uint32_t cur_pos;
int32_t ii;
- int32_t jj;
- fill_ulong_zero(loaded_chrom_mask, CHROM_MASK_WORDS);
- if (fopen_checked(mapname, "r", mapfile_ptr)) {
- goto load_map_ret_OPEN_FAIL;
- }
- // first pass: count columns, determine raw marker count, determine maximum
- // marker ID length if necessary.
- g_textbuf[MAXLINELEN - 6] = ' ';
- while (fgets(g_textbuf, MAXLINELEN - 5, *mapfile_ptr)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 6]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .map file is pathologically long.\n", line_idx);
- goto load_map_ret_INVALID_FORMAT_2;
- }
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_or_comment_kns(*bufptr)) {
- continue;
- }
- bufptr = next_token(bufptr);
- if (no_more_tokens_kns(bufptr)) {
- goto load_map_ret_MISSING_TOKENS;
- }
- ulii = strlen_se(bufptr) + 1;
- if (ulii > max_marker_id_len) {
- max_marker_id_len = ulii;
- }
- if (!unfiltered_marker_ct) {
- bufptr = next_token_mult(bufptr, 2);
- if (!bufptr) {
- goto load_map_ret_MISSING_TOKENS;
+ {
+ fill_ulong_zero(CHROM_MASK_WORDS, loaded_chrom_mask);
+ if (fopen_checked(mapname, "r", mapfile_ptr)) {
+ goto load_map_ret_OPEN_FAIL;
+ }
+ // first pass: count columns, determine raw marker count, determine maximum
+ // marker ID length if necessary.
+ g_textbuf[MAXLINELEN - 6] = ' ';
+ while (fgets(g_textbuf, MAXLINELEN - 5, *mapfile_ptr)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 6]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .map file is pathologically long.\n", line_idx);
+ goto load_map_ret_INVALID_FORMAT_2;
+ }
+ char* textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_or_comment_kns(*textbuf_first_token)) {
+ continue;
+ }
+ char* textbuf_iter = next_token(textbuf_first_token);
+ if (no_more_tokens_kns(textbuf_iter)) {
+ goto load_map_ret_MISSING_TOKENS;
+ }
+ ulii = strlen_se(textbuf_iter) + 1;
+ if (ulii > max_marker_id_blen) {
+ max_marker_id_blen = ulii;
}
- if (*bufptr > ' ') {
- *map_cols_ptr = 4;
+ if (!unfiltered_marker_ct) {
+ textbuf_iter = next_token_mult(textbuf_iter, 2);
+ if (!textbuf_iter) {
+ goto load_map_ret_MISSING_TOKENS;
+ }
+ if (*textbuf_iter > ' ') {
+ *map_cols_ptr = 4;
+ }
}
+ unfiltered_marker_ct++;
}
- unfiltered_marker_ct++;
- }
- if (!feof(*mapfile_ptr)) {
- goto load_map_ret_READ_FAIL;
- }
- if ((!unfiltered_marker_ct) && (!allow_no_vars)) {
- logerrprint("Error: No variants in .map file.\n");
- goto load_map_ret_INVALID_FORMAT;
- }
- *unfiltered_marker_ct_ptr = unfiltered_marker_ct;
- *max_marker_id_len_ptr = max_marker_id_len;
- rewind(*mapfile_ptr);
- unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
+ if (!feof(*mapfile_ptr)) {
+ goto load_map_ret_READ_FAIL;
+ }
+ if ((!unfiltered_marker_ct) && (!allow_no_vars)) {
+ logerrprint("Error: No variants in .map file.\n");
+ goto load_map_ret_INVALID_FORMAT;
+ }
+ *unfiltered_marker_ct_ptr = unfiltered_marker_ct;
+ *max_marker_id_blen_ptr = max_marker_id_blen;
+ rewind(*mapfile_ptr);
+ unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
- // unfiltered_marker_ct can be very large, so use bigstack for all
- // allocations that are a multiple of it
+ // unfiltered_marker_ct can be very large, so use bigstack for all
+ // allocations that are a multiple of it
- // permanent bigstack allocation #1: marker_exclude
- if (bigstack_calloc_ul(unfiltered_marker_ctl, marker_exclude_ptr)) {
- goto load_map_ret_NOMEM;
- }
- marker_exclude = *marker_exclude_ptr;
- fill_uint_zero(chrom_info_ptr->chrom_file_order, MAX_POSSIBLE_CHROM);
- fill_uint_zero(chrom_info_ptr->chrom_file_order_marker_idx, MAX_POSSIBLE_CHROM + 1);
- fill_uint_zero(chrom_info_ptr->chrom_start, MAX_POSSIBLE_CHROM);
- fill_uint_zero(chrom_info_ptr->chrom_end, MAX_POSSIBLE_CHROM);
- // permanent bigstack allocation #2, if needed: marker_pos
- if (marker_pos_needed) {
- if (bigstack_alloc_ui(unfiltered_marker_ct, marker_pos_ptr)) {
+ // permanent bigstack allocation #1: marker_exclude
+ if (bigstack_calloc_ul(unfiltered_marker_ctl, marker_exclude_ptr)) {
goto load_map_ret_NOMEM;
}
- }
- if (bigstack_alloc_c(unfiltered_marker_ct * max_marker_id_len, marker_ids_ptr)) {
- goto load_map_ret_NOMEM;
- }
+ marker_exclude = *marker_exclude_ptr;
+ fill_uint_one(MAX_POSSIBLE_CHROM, chrom_info_ptr->chrom_idx_to_foidx);
- // second pass: actually load stuff
- line_idx = 0;
- for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
- if (get_next_noncomment(*mapfile_ptr, &bufptr, &line_idx)) {
- goto load_map_ret_READ_FAIL;
- }
- jj = get_chrom_code(chrom_info_ptr, bufptr);
- if (jj < 0) {
- if (chrom_error(".map file", chrom_info_ptr, bufptr, line_idx, jj, allow_extra_chroms)) {
- goto load_map_ret_INVALID_FORMAT;
- }
- retval = resolve_or_add_chrom_name(bufptr, ".map file", line_idx, chrom_info_ptr, &jj);
- if (retval) {
- goto load_map_ret_1;
+ // permanent bigstack allocation #2, if needed: marker_pos
+ if (marker_pos_needed) {
+ if (bigstack_alloc_ui(unfiltered_marker_ct, marker_pos_ptr)) {
+ goto load_map_ret_NOMEM;
}
}
- if (jj != last_chrom) {
- if (last_chrom != -1) {
- chrom_info_ptr->chrom_end[last_chrom] = marker_uidx;
- }
- if (jj < last_chrom) {
- *map_is_unsorted_ptr |= UNSORTED_CHROM;
- }
- last_chrom = jj;
- if (is_set(loaded_chrom_mask, jj)) {
- *map_is_unsorted_ptr |= UNSORTED_SPLIT_CHROM | UNSORTED_BP;
- } else {
- set_bit(jj, loaded_chrom_mask);
- chrom_info_ptr->chrom_start[(uint32_t)jj] = marker_uidx;
- chrom_info_ptr->chrom_file_order[++chroms_encountered_m1] = jj;
- chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
- }
- last_pos = 0;
+ if (bigstack_alloc_c(unfiltered_marker_ct * max_marker_id_blen, marker_ids_ptr)) {
+ goto load_map_ret_NOMEM;
}
- if (!is_set(chrom_info_ptr->chrom_mask, jj)) {
- SET_BIT(marker_uidx, marker_exclude);
- marker_exclude_ct++;
- } else {
- bufptr = next_token(bufptr);
- if (no_more_tokens_kns(bufptr)) {
- goto load_map_ret_MISSING_TOKENS;
+ // second pass: actually load stuff
+ line_idx = 0;
+ for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
+ char* textbuf_first_token;
+ if (get_next_noncomment(*mapfile_ptr, &textbuf_first_token, &line_idx)) {
+ goto load_map_ret_READ_FAIL;
}
- read_next_terminate(&((*marker_ids_ptr)[marker_uidx * max_marker_id_len]), bufptr);
- bufptr = next_token_mult(bufptr, *map_cols_ptr - 2);
- if (no_more_tokens_kns(bufptr)) {
+ char* textbuf_iter = token_endnn(textbuf_first_token);
+ if (!(*textbuf_iter)) {
goto load_map_ret_MISSING_TOKENS;
}
- if (scan_int_abs_defcap(bufptr, &ii)) {
- sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .map file.\n", line_idx);
- goto load_map_ret_INVALID_FORMAT_2;
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code_destructive(".map file", line_idx, allow_extra_chroms, textbuf_first_token, textbuf_iter, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto load_map_ret_1;
+ }
+ if (cur_chrom_code != last_chrom) {
+ if (cur_chrom_code < last_chrom) {
+ *map_is_unsorted_ptr |= UNSORTED_CHROM;
+ }
+ last_chrom = cur_chrom_code;
+ if (is_set(loaded_chrom_mask, cur_chrom_code)) {
+ *map_is_unsorted_ptr |= UNSORTED_SPLIT_CHROM | UNSORTED_BP;
+ } else {
+ set_bit(cur_chrom_code, loaded_chrom_mask);
+ chrom_info_ptr->chrom_file_order[++chroms_encountered_m1] = cur_chrom_code;
+ chrom_info_ptr->chrom_fo_vidx_start[chroms_encountered_m1] = marker_uidx;
+ chrom_info_ptr->chrom_idx_to_foidx[(uint32_t)cur_chrom_code] = chroms_encountered_m1;
+ }
+ last_pos = 0;
}
- if (ii < 0) {
+
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
SET_BIT(marker_uidx, marker_exclude);
marker_exclude_ct++;
} else {
- cur_pos = ii;
- if (cur_pos < last_pos) {
- *map_is_unsorted_ptr |= UNSORTED_BP;
- } else {
- last_pos = cur_pos;
+ textbuf_iter = skip_initial_spaces(&(textbuf_iter[1]));
+ if (is_eoln_kns(*textbuf_iter)) {
+ goto load_map_ret_MISSING_TOKENS;
}
- if (marker_pos_needed && jj) {
- (*marker_pos_ptr)[marker_uidx] = cur_pos;
+ char* token_end = token_endnn(textbuf_iter);
+ memcpyx(&((*marker_ids_ptr)[marker_uidx * max_marker_id_blen]), textbuf_iter, (uintptr_t)(token_end - textbuf_iter), '\0');
+ textbuf_iter = next_token_mult(token_end, *map_cols_ptr - 2);
+ if (no_more_tokens_kns(textbuf_iter)) {
+ goto load_map_ret_MISSING_TOKENS;
+ }
+ if (scan_int_abs_defcap(textbuf_iter, &ii)) {
+ sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .map file.\n", line_idx);
+ goto load_map_ret_INVALID_FORMAT_2;
+ }
+ if (ii < 0) {
+ SET_BIT(marker_uidx, marker_exclude);
+ marker_exclude_ct++;
+ } else {
+ cur_pos = ii;
+ if (cur_pos < last_pos) {
+ *map_is_unsorted_ptr |= UNSORTED_BP;
+ } else {
+ last_pos = cur_pos;
+ }
+ if (marker_pos_needed && cur_chrom_code) {
+ (*marker_pos_ptr)[marker_uidx] = cur_pos;
+ }
}
}
}
- }
- chrom_info_ptr->chrom_ct = ++chroms_encountered_m1;
- *marker_exclude_ct_ptr = marker_exclude_ct;
- if (unfiltered_marker_ct) {
- chrom_info_ptr->chrom_end[last_chrom] = marker_uidx;
- if (marker_exclude_ct == unfiltered_marker_ct) {
- logerrprint("Error: All variants excluded from .map file.\n");
- goto load_map_ret_ALL_MARKERS_EXCLUDED;
+ chrom_info_ptr->chrom_ct = ++chroms_encountered_m1;
+ *marker_exclude_ct_ptr = marker_exclude_ct;
+ if (unfiltered_marker_ct) {
+ if (marker_exclude_ct == unfiltered_marker_ct) {
+ logerrprint("Error: All variants excluded from .map file.\n");
+ goto load_map_ret_ALL_MARKERS_EXCLUDED;
+ }
}
+ chrom_info_ptr->chrom_fo_vidx_start[chroms_encountered_m1] = marker_uidx;
}
- chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
while (0) {
load_map_ret_NOMEM:
retval = RET_NOMEM;
@@ -503,17 +469,18 @@ static inline uint32_t sf_out_of_range(uint32_t cur_pos, uint32_t chrom_idx, uin
return 1;
}
-int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_len_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_len, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_ [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
// supports .map now too, to make e.g. --snps + --dosage work
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* bimfile = NULL;
+ FILE* bimfile = nullptr;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
- uintptr_t max_marker_id_len = *max_marker_id_len_ptr;
- uintptr_t max_marker_allele_len = *max_marker_allele_len_ptr;
+ uintptr_t max_marker_id_blen = *max_marker_id_blen_ptr;
+ uintptr_t max_marker_allele_blen = *max_marker_allele_blen_ptr;
uintptr_t line_idx = 0;
int32_t prev_chrom = -1;
uint32_t last_pos = 0;
+ const uint32_t is_bim = (ftype_str[1] == 'b'); // .map also supported
uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
uint32_t exclude_snp = (filter_flags / FILTER_EXCLUDE_MARKERNAME_SNP) & 1;
@@ -540,24 +507,25 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
uint32_t missing_ids_set = 0;
uint32_t missing_template_base_len = 0;
uint32_t template_insert_ct = 0;
+ uint32_t chrom_header_line_present = 0;
uint32_t uii = 0;
uint32_t ujj = 0;
int32_t exclude_window_start = 0;
int32_t exclude_window_end = -1;
int32_t retval = 0;
char* missing_geno_ptr = (char*)g_missing_geno_ptr;
- uint32_t* sf_start_idxs = NULL;
- uint32_t* sf_pos = NULL;
- uint32_t* sf_str_chroms = NULL;
- uint32_t* sf_str_pos = NULL;
- uint32_t* sf_str_lens = NULL;
- uint32_t* sf_llbuf = NULL;
- char* loadbuf2 = NULL; // on heap, second pass
- char* prev_new_id = NULL;
- char* bufptr2 = NULL;
- char* bufptr4 = NULL;
- char* bufptr5 = NULL;
- char** marker_allele_ptrs = NULL;
+ uint32_t* sf_start_idxs = nullptr;
+ uint32_t* sf_pos = nullptr;
+ uint32_t* sf_str_chroms = nullptr;
+ uint32_t* sf_str_pos = nullptr;
+ uint32_t* sf_str_lens = nullptr;
+ uint32_t* sf_llbuf = nullptr;
+ char* loadbuf2 = nullptr; // on heap, second pass
+ char* prev_new_id = nullptr;
+ char* bufptr2 = nullptr;
+ char* bufptr4 = nullptr;
+ char* bufptr5 = nullptr;
+ char** marker_allele_ptrs = nullptr;
uintptr_t loaded_chrom_mask[CHROM_MASK_WORDS];
uintptr_t sf_mask[CHROM_MASK_WORDS];
uint32_t missing_template_seg_len[5];
@@ -574,7 +542,7 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
uint32_t sf_entry_ct;
uint32_t sf_lltop;
char* bufptr;
- char* bufptr3;
+ char* col2_ptr;
uintptr_t ulii;
uint32_t ukk;
uint32_t umm;
@@ -583,735 +551,769 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
int32_t jj;
uint32_t cur_pos;
char cc;
- fill_ulong_zero(loaded_chrom_mask, CHROM_MASK_WORDS);
- insert_buf[0] = NULL;
- insert_buf[1] = NULL;
- insert_buf[2] = NULL;
- insert_buf[3] = NULL;
- if (sf_ct) {
- sf_start_idxs = (uint32_t*)malloc((MAX_POSSIBLE_CHROM + 1) * sizeof(int32_t));
- if (!sf_start_idxs) {
- goto load_bim_ret_NOMEM;
- }
- if (bigstack_alloc_ui(sf_ct, &sf_str_chroms) ||
- bigstack_alloc_ui(sf_ct, &sf_str_pos) ||
- bigstack_alloc_ui(sf_ct, &sf_str_lens) ||
- bigstack_alloc_ui(3 * (MAX_POSSIBLE_CHROM + sf_ct), &sf_llbuf)) {
- goto load_bim_ret_NOMEM;
- }
- for (uii = 0; uii < sf_ct; uii++) {
- sf_str_chroms[uii] = MAX_POSSIBLE_CHROM;
- sf_str_lens[uii] = strlen(&(sf_range_list_ptr->names[uii * sf_max_len]));
- }
- }
- fill_uint_zero(missing_template_seg_len, 5);
- missing_template_seg[0] = NULL;
- missing_template_seg[1] = NULL;
- missing_template_seg[2] = NULL;
- missing_template_seg[3] = NULL;
- missing_template_seg[4] = NULL;
- if (missing_mid_template) {
- if (!missing_marker_id_match) {
- missing_marker_id_match = &(g_one_char_strs[92]); // '.'
- }
- missing_marker_id_match_len = strlen(missing_marker_id_match);
- bufptr = missing_mid_template;
- missing_template_seg[template_insert_ct] = bufptr; // current segment start
- cc = *bufptr; // template string previously validated
- do {
- if (cc == '@') {
- ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
- ukk = 0;
- goto load_bim_template_match;
- } else if (cc == '#') {
- ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
- ukk = 1;
- goto load_bim_template_match;
- } else if (cc == '$') {
- ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
- cc = *(++bufptr);
- ukk = ((unsigned char)cc) - 47;
- load_bim_template_match:
- missing_template_seg_len[template_insert_ct] = ujj;
- missing_template_base_len += ujj;
- missing_template_seg_order[template_insert_ct++] = ukk;
- missing_template_seg[template_insert_ct] = &(bufptr[1]);
- }
- cc = *(++bufptr);
- } while (cc);
- ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
- missing_template_seg_len[template_insert_ct] = ujj;
- missing_template_base_len += ujj;
- insert_buf[1] = poscharbuf;
- if (template_insert_ct == 4) {
- insert_buf[2] = (char*)malloc(new_id_max_allele_len + 1);
- insert_buf[3] = (char*)malloc(new_id_max_allele_len + 1);
- if ((!insert_buf[2]) || (!insert_buf[3])) {
+ {
+ fill_ulong_zero(CHROM_MASK_WORDS, loaded_chrom_mask);
+ insert_buf[0] = nullptr;
+ insert_buf[1] = nullptr;
+ insert_buf[2] = nullptr;
+ insert_buf[3] = nullptr;
+ if (sf_ct) {
+ sf_start_idxs = (uint32_t*)malloc((MAX_POSSIBLE_CHROM + 1) * sizeof(int32_t));
+ if (!sf_start_idxs) {
goto load_bim_ret_NOMEM;
}
- }
- }
- if (fopen_checked(bimname, "r", &bimfile)) {
- goto load_bim_ret_OPEN_FAIL;
- }
- // first pass: count columns, determine raw marker count, determine maximum
- // marker ID length and/or marker allele length if necessary, save
- // nonstandard chromosome names.
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto load_bim_ret_NOMEM;
- }
- loadbuf = (char*)g_bigstack_base;
- loadbuf[loadbuf_size - 1] = ' ';
- while (fgets(loadbuf, loadbuf_size, bimfile)) {
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
- } else {
+ if (bigstack_alloc_ui(sf_ct, &sf_str_chroms) ||
+ bigstack_alloc_ui(sf_ct, &sf_str_pos) ||
+ bigstack_alloc_ui(sf_ct, &sf_str_lens) ||
+ bigstack_alloc_ui(3 * (MAX_POSSIBLE_CHROM + sf_ct), &sf_llbuf)) {
goto load_bim_ret_NOMEM;
}
- }
- uii = strlen(loadbuf);
- if (uii >= max_bim_linelen) {
- max_bim_linelen = uii + 1;
- }
- // bufptr3 = col 1 start
- bufptr3 = skip_initial_spaces(loadbuf);
- if (is_eoln_or_comment_kns(*bufptr3)) {
- continue;
- }
- jj = get_chrom_code(chrom_info_ptr, bufptr3);
- if (jj < 0) {
- if (chrom_error(ftype_str, chrom_info_ptr, bufptr3, line_idx, jj, allow_extra_chroms)) {
- goto load_bim_ret_INVALID_FORMAT;
- }
- retval = resolve_or_add_chrom_name(bufptr3, ftype_str, line_idx, chrom_info_ptr, &jj);
- if (retval) {
- goto load_bim_ret_1;
+ for (uii = 0; uii < sf_ct; uii++) {
+ sf_str_chroms[uii] = MAX_POSSIBLE_CHROM;
+ sf_str_lens[uii] = strlen(&(sf_range_list_ptr->names[uii * sf_max_len]));
+ }
+ }
+ fill_uint_zero(5, missing_template_seg_len);
+ missing_template_seg[0] = nullptr;
+ missing_template_seg[1] = nullptr;
+ missing_template_seg[2] = nullptr;
+ missing_template_seg[3] = nullptr;
+ missing_template_seg[4] = nullptr;
+ if (missing_mid_template) {
+ if (!missing_marker_id_match) {
+ missing_marker_id_match = &(g_one_char_strs[92]); // '.'
+ }
+ missing_marker_id_match_len = strlen(missing_marker_id_match);
+ bufptr = missing_mid_template;
+ missing_template_seg[template_insert_ct] = bufptr; // current segment start
+ cc = *bufptr; // template string previously validated
+ do {
+ if (cc == '@') {
+ ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
+ ukk = 0;
+ goto load_bim_template_match;
+ } else if (cc == '#') {
+ ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
+ ukk = 1;
+ goto load_bim_template_match;
+ } else if (cc == '$') {
+ ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
+ cc = *(++bufptr);
+ ukk = ((unsigned char)cc) - 47;
+ load_bim_template_match:
+ missing_template_seg_len[template_insert_ct] = ujj;
+ missing_template_base_len += ujj;
+ missing_template_seg_order[template_insert_ct++] = ukk;
+ missing_template_seg[template_insert_ct] = &(bufptr[1]);
+ }
+ cc = *(++bufptr);
+ } while (cc);
+ ujj = (uintptr_t)(bufptr - missing_template_seg[template_insert_ct]);
+ missing_template_seg_len[template_insert_ct] = ujj;
+ missing_template_base_len += ujj;
+ insert_buf[1] = poscharbuf;
+ if (template_insert_ct == 4) {
+ insert_buf[2] = (char*)malloc(new_id_max_allele_slen + 1);
+ insert_buf[3] = (char*)malloc(new_id_max_allele_slen + 1);
+ if ((!insert_buf[2]) || (!insert_buf[3])) {
+ goto load_bim_ret_NOMEM;
+ }
}
}
+ if (fopen_checked(bimname, "r", &bimfile)) {
+ goto load_bim_ret_OPEN_FAIL;
+ }
+ // first pass: count columns, determine raw marker count, determine maximum
+ // marker ID length and/or marker allele length if necessary, save
+ // nonstandard chromosome names.
- // bufptr = col 2 start
- bufptr = next_token(bufptr3);
- if (no_more_tokens_kns(bufptr)) {
- goto load_bim_ret_MISSING_TOKENS;
+ // ensure strcmp_se comparison doesn't read past end of buffer
+ loadbuf_size = bigstack_left() - 16;
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
+ goto load_bim_ret_NOMEM;
}
- ulii = strlen_se(bufptr);
- if (!unfiltered_marker_ct) {
- if (ftype_str[1] == 'b') {
- // .bim: bufptr2 = col 5 start
- bufptr2 = next_token_mult(bufptr, 3);
- } else {
- // .map
- bufptr2 = next_token(bufptr);
- }
- if (no_more_tokens_kns(bufptr2)) {
- goto load_bim_ret_MISSING_TOKENS;
- }
- // check if col 6 exists
- if (*(skip_initial_spaces(token_endnn(bufptr2))) > ' ') {
- *map_cols_ptr = 4;
- mcm2 = 2;
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf[loadbuf_size - 1] = ' ';
+ while (fgets(loadbuf, loadbuf_size, bimfile)) {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ } else {
+ goto load_bim_ret_NOMEM;
+ }
}
- }
- if (marker_alleles_needed || missing_marker_id_match_len) {
- bufptr4 = next_token_mult(bufptr, mcm2 + 1);
- bufptr5 = next_token(bufptr4);
- if (no_more_tokens_kns(bufptr5)) {
- goto load_bim_ret_MISSING_TOKENS;
+ uii = strlen(loadbuf);
+ if (uii >= max_bim_linelen) {
+ max_bim_linelen = uii + 1;
}
- uii = strlen_se(bufptr4);
- ujj = strlen_se(bufptr5);
- if (marker_alleles_needed) {
- if (uii >= max_marker_allele_len) {
- max_marker_allele_len = uii + 1;
- }
- if (ujj >= max_marker_allele_len) {
- max_marker_allele_len = ujj + 1;
+ char* loadbuf_first_token = skip_initial_spaces(loadbuf);
+ if (is_eoln_or_comment_kns(*loadbuf_first_token)) {
+ if (is_bim) {
+ if (!strcmp_se(loadbuf_first_token, "#CHROM", 6)) {
+ if (chrom_header_line_present) {
+ sprintf(g_logbuf, "Error: Multiple #CHROM header lines in %s.\n", ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ // support plink 2.x files with default column order; error out on
+ // VCF and other column orders
+ loadbuf_first_token = skip_initial_spaces(&(loadbuf_first_token[6]));
+ if (strcmp_se(loadbuf_first_token, "ID", 2)) {
+ goto load_bim_ret_UNSUPPORTED_COLUMN_ORDER;
+ }
+ loadbuf_first_token = skip_initial_spaces(&(loadbuf_first_token[2]));
+ if (!strcmp_se(loadbuf_first_token, "CM", 2)) {
+ mcm2 = 2;
+ loadbuf_first_token = skip_initial_spaces(&(loadbuf_first_token[2]));
+ }
+ if (strcmp_se(loadbuf_first_token, "POS", 3)) {
+ goto load_bim_ret_UNSUPPORTED_COLUMN_ORDER;
+ }
+ loadbuf_first_token = skip_initial_spaces(&(loadbuf_first_token[3]));
+ if (strcmp_se(loadbuf_first_token, "ALT", 3)) {
+ goto load_bim_ret_UNSUPPORTED_COLUMN_ORDER;
+ }
+ loadbuf_first_token = skip_initial_spaces(&(loadbuf_first_token[3]));
+ if (strcmp_se(loadbuf_first_token, "REF", 3)) {
+ goto load_bim_ret_UNSUPPORTED_COLUMN_ORDER;
+ }
+ chrom_header_line_present = 1;
+ }
+ } else if (*loadbuf_first_token == '#') {
+ sprintf(g_logbuf, "Error: Header lines are not permitted in %ss.\n", ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
}
+ continue;
}
- }
- if ((ulii == missing_marker_id_match_len) && (!memcmp(bufptr, missing_marker_id_match, missing_marker_id_match_len))) {
- bufptr2 = next_token_mult(bufptr, mcm2);
- if (no_more_tokens_kns(bufptr2)) {
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ col2_ptr = skip_initial_spaces(first_token_end);
+ if (is_eoln_kns(*col2_ptr)) {
goto load_bim_ret_MISSING_TOKENS;
}
- insert_buf_len[1] = strlen_se(bufptr2);
- if (insert_buf_len[1] > 11) {
- // permit negative sign and 10 digit number
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- insert_buf_len[0] = strlen_se(bufptr3);
- ulii = missing_template_base_len + insert_buf_len[1] + insert_buf_len[0];
- if (template_insert_ct == 4) {
- uii = MINV(uii, new_id_max_allele_len);
- ujj = MINV(ujj, new_id_max_allele_len);
- ulii += uii + ujj;
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code(loadbuf_first_token, ftype_str, line_idx, chrom_name_slen, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto load_bim_ret_1;
}
- if (ulii >= max_marker_id_len) {
- if (ulii > MAX_ID_LEN) {
- logerrprint("Error: Variant names are limited to " MAX_ID_LEN_STR " characters.\n");
- goto load_bim_ret_INVALID_FORMAT;
+
+ char* col2_end = token_endnn(col2_ptr);
+ ulii = (uintptr_t)(col2_end - col2_ptr);
+ if (!unfiltered_marker_ct) {
+ if (is_bim) {
+ // .bim: bufptr2 = col 5 start
+ bufptr2 = next_token_mult(col2_end, 3);
+ } else {
+ // .map
+ bufptr2 = skip_initial_spaces(col2_end);
+ }
+ if (no_more_tokens_kns(bufptr2)) {
+ goto load_bim_ret_MISSING_TOKENS;
+ }
+ // check if CM col exists
+ if ((!chrom_header_line_present) && (!is_eoln_kns(*(skip_initial_spaces(token_endnn(bufptr2)))))) {
+ mcm2 = 2;
}
- max_marker_id_len = ulii + 1;
}
- ulii = 0;
- } else {
- if (ulii >= max_marker_id_len) {
- max_marker_id_len = ulii + 1;
+ if (marker_alleles_needed || missing_marker_id_match_len) {
+ bufptr4 = next_token_mult(col2_end, mcm2 + 1);
+ bufptr5 = next_token(bufptr4);
+ if (no_more_tokens_kns(bufptr5)) {
+ goto load_bim_ret_MISSING_TOKENS;
+ }
+ uii = strlen_se(bufptr4);
+ ujj = strlen_se(bufptr5);
+ if (memchr(bufptr4, ',', ujj + ((uintptr_t)(bufptr5 - bufptr4)))) {
+ // this breaks VCF and plink 2.x
+ // may need to add word wrapping if this message is changed
+ sprintf(g_logbuf, "Error: Comma-containing allele code on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ if (marker_alleles_needed) {
+ if (uii >= max_marker_allele_blen) {
+ max_marker_allele_blen = uii + 1;
+ }
+ if (ujj >= max_marker_allele_blen) {
+ max_marker_allele_blen = ujj + 1;
+ }
+ }
}
- }
- if (slen_check) {
- if (!ulii) {
- // --set-missing-var-ids applies
- // safe to clobber buffer contents
- // bufptr3 = chromosome name, umm = chr length
- insert_buf[0] = bufptr3;
- memcpyx(poscharbuf, bufptr2, insert_buf_len[1], '\0');
+ if ((ulii == missing_marker_id_match_len) && (!memcmp(col2_ptr, missing_marker_id_match, missing_marker_id_match_len))) {
+ bufptr2 = next_token_mult(col2_end, mcm2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto load_bim_ret_MISSING_TOKENS;
+ }
+ insert_buf_len[1] = strlen_se(bufptr2);
+ if (insert_buf_len[1] > 11) {
+ // permit negative sign and 10 digit number
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ insert_buf_len[0] = chrom_name_slen;
+ ulii = missing_template_base_len + insert_buf_len[1] + insert_buf_len[0];
if (template_insert_ct == 4) {
- bufptr4[uii] = '\0';
- bufptr5[ujj] = '\0';
- // ASCII-sort allele names
- if (strcmp(bufptr4, bufptr5) <= 0) {
- memcpy(insert_buf[2], bufptr4, uii);
- insert_buf_len[2] = uii;
- memcpy(insert_buf[3], bufptr5, ujj);
- insert_buf_len[3] = ujj;
- } else {
- memcpy(insert_buf[3], bufptr4, uii);
- insert_buf_len[3] = uii;
- memcpy(insert_buf[2], bufptr5, ujj);
- insert_buf_len[2] = ujj;
- }
+ uii = MINV(uii, new_id_max_allele_slen);
+ ujj = MINV(ujj, new_id_max_allele_slen);
+ ulii += uii + ujj;
}
- bufptr4 = bufptr;
- for (uii = 0; uii < template_insert_ct; uii++) {
- bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
- ujj = missing_template_seg_order[uii];
- bufptr4 = memcpya(bufptr4, insert_buf[ujj], insert_buf_len[ujj]);
+ if (ulii >= max_marker_id_blen) {
+ if (ulii > MAX_ID_BLEN) {
+ logerrprint("Error: Variant names are limited to " MAX_ID_SLEN_STR " characters.\n");
+ goto load_bim_ret_INVALID_FORMAT;
+ }
+ max_marker_id_blen = ulii + 1;
}
- bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
- ulii = (uintptr_t)(bufptr4 - bufptr);
- bufptr2 = poscharbuf;
+ ulii = 0;
} else {
- bufptr2 = next_token_mult(bufptr, mcm2);
- if (no_more_tokens_kns(bufptr2)) {
- goto load_bim_ret_MISSING_TOKENS;
+ if (ulii >= max_marker_id_blen) {
+ max_marker_id_blen = ulii + 1;
}
- bufptr2[strlen_se(bufptr2)] = '\0';
}
- if (sf_ct) {
- uii = 0;
- do {
- if ((ulii == sf_str_lens[uii]) && (!memcmp(bufptr, &(sf_range_list_ptr->names[uii * sf_max_len]), ulii))) {
- if (sf_str_chroms[uii] != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- sf_str_chroms[uii] = jj;
- if (scan_uint_defcap(bufptr2, &(sf_str_pos[uii]))) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
+ if (slen_check) {
+ if (!ulii) {
+ // --set-missing-var-ids applies
+ // safe to clobber buffer contents
+ insert_buf[0] = loadbuf_first_token;
+ memcpyx(poscharbuf, bufptr2, insert_buf_len[1], '\0');
+ if (template_insert_ct == 4) {
+ bufptr4[uii] = '\0';
+ bufptr5[ujj] = '\0';
+ // ASCII-sort allele names
+ if (strcmp(bufptr4, bufptr5) <= 0) {
+ memcpy(insert_buf[2], bufptr4, uii);
+ insert_buf_len[2] = uii;
+ memcpy(insert_buf[3], bufptr5, ujj);
+ insert_buf_len[3] = ujj;
+ } else {
+ memcpy(insert_buf[3], bufptr4, uii);
+ insert_buf_len[3] = uii;
+ memcpy(insert_buf[2], bufptr5, ujj);
+ insert_buf_len[2] = ujj;
}
- break;
- }
- } while (++uii < sf_ct);
- } else {
- if ((ulii == from_slen) && (!memcmp(bufptr, markername_from, ulii))) {
- if (from_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
}
- from_chrom = jj;
- if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
+ bufptr4 = col2_ptr;
+ for (uii = 0; uii < template_insert_ct; uii++) {
+ bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
+ ujj = missing_template_seg_order[uii];
+ bufptr4 = memcpya(bufptr4, insert_buf[ujj], insert_buf_len[ujj]);
}
- if (to_chrom != MAX_POSSIBLE_CHROM) {
- if (from_chrom != to_chrom) {
- goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
- }
+ bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
+ ulii = (uintptr_t)(bufptr4 - col2_ptr);
+ bufptr2 = poscharbuf;
+ } else {
+ bufptr2 = next_token_mult(col2_ptr, mcm2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto load_bim_ret_MISSING_TOKENS;
}
- fill_ulong_zero(chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS);
- SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
+ bufptr2[strlen_se(bufptr2)] = '\0';
}
- if ((ulii == to_slen) && (!memcmp(bufptr, markername_to, ulii))) {
- if (to_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- to_chrom = jj;
- if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- if (from_chrom != MAX_POSSIBLE_CHROM) {
- if (to_chrom != from_chrom) {
- goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+ if (sf_ct) {
+ uii = 0;
+ do {
+ if ((ulii == sf_str_lens[uii]) && (!memcmp(col2_ptr, &(sf_range_list_ptr->names[uii * sf_max_len]), ulii))) {
+ if (sf_str_chroms[uii] != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ sf_str_chroms[uii] = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, &(sf_str_pos[uii]))) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ break;
}
+ } while (++uii < sf_ct);
+ } else {
+ if ((ulii == from_slen) && (!memcmp(col2_ptr, markername_from, ulii))) {
+ if (from_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ from_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_start)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (to_chrom != MAX_POSSIBLE_CHROM) {
+ if (from_chrom != to_chrom) {
+ goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+ }
+ }
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(from_chrom, chrom_info_ptr->chrom_mask);
}
- fill_ulong_zero(chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS);
- SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
- }
- if ((ulii == snp_slen) && (!memcmp(bufptr, markername_snp, ulii))) {
- if (snp_chrom != MAX_POSSIBLE_CHROM) {
- goto load_bim_ret_DUPLICATE_ID;
- }
- snp_chrom = jj;
- if (scan_uint_defcap(bufptr2, &snp_pos)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
+ if ((ulii == to_slen) && (!memcmp(col2_ptr, markername_to, ulii))) {
+ if (to_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ to_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, (uint32_t*)&marker_pos_end)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (from_chrom != MAX_POSSIBLE_CHROM) {
+ if (to_chrom != from_chrom) {
+ goto load_bim_ret_FROM_TO_DIFFERENT_CHROM;
+ }
+ }
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(to_chrom, chrom_info_ptr->chrom_mask);
}
- if (!exclude_snp) {
- fill_ulong_zero(chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS);
- SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
+ if ((ulii == snp_slen) && (!memcmp(col2_ptr, markername_snp, ulii))) {
+ if (snp_chrom != MAX_POSSIBLE_CHROM) {
+ goto load_bim_ret_DUPLICATE_ID;
+ }
+ snp_chrom = cur_chrom_code;
+ if (scan_uint_defcap(bufptr2, &snp_pos)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ if (!exclude_snp) {
+ fill_ulong_zero(CHROM_MASK_WORDS, chrom_info_ptr->chrom_mask);
+ SET_BIT(snp_chrom, chrom_info_ptr->chrom_mask);
+ }
}
}
}
+ unfiltered_marker_ct++;
}
- unfiltered_marker_ct++;
- }
- if (sf_ct) {
- for (uii = 0; uii < sf_ct; uii++) {
- if (sf_str_chroms[uii] == MAX_POSSIBLE_CHROM) {
- LOGPREPRINTFWW("Error: Variant '%s' not found in %s.\n", &(sf_range_list_ptr->names[uii * sf_max_len]), ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
+ if (sf_ct) {
+ for (uii = 0; uii < sf_ct; uii++) {
+ if (sf_str_chroms[uii] == MAX_POSSIBLE_CHROM) {
+ LOGPREPRINTFWW("Error: Variant '%s' not found in %s.\n", &(sf_range_list_ptr->names[uii * sf_max_len]), ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
}
- }
- // effectively build out one linked list per chromosome
- memcpy(sf_mask, chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
- sf_entry_ct = 0;
- sf_lltop = 0;
- ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
- for (uii = 0; uii <= ujj; uii++) {
- sf_start_idxs[uii] = 1; // impossible (multiples of 3)
- }
- uii = 0;
- do {
- ujj = sf_str_chroms[uii];
- ukk = sf_str_pos[uii];
- if (sf_range_list_ptr->starts_range[uii]) {
- umm = sf_str_chroms[uii + 1];
- unn = sf_str_pos[uii + 1];
- if (ujj != umm) {
- if (ujj > umm) {
- uoo = ujj;
- ujj = umm;
- umm = uoo;
- uoo = ukk;
- ukk = unn;
- unn = uoo;
- }
- if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- for (uoo = ujj + 1; uoo < umm; uoo++) {
- if (IS_SET(sf_mask, uoo)) {
- load_bim_sf_insert(uoo, 0, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
+ // effectively build out one linked list per chromosome
+ memcpy(sf_mask, chrom_info_ptr->chrom_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
+ sf_entry_ct = 0;
+ sf_lltop = 0;
+ ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
+ for (uii = 0; uii <= ujj; uii++) {
+ sf_start_idxs[uii] = 1; // impossible (multiples of 3)
+ }
+ uii = 0;
+ do {
+ ujj = sf_str_chroms[uii];
+ ukk = sf_str_pos[uii];
+ if (sf_range_list_ptr->starts_range[uii]) {
+ umm = sf_str_chroms[uii + 1];
+ unn = sf_str_pos[uii + 1];
+ if (ujj != umm) {
+ if (ujj > umm) {
+ uoo = ujj;
+ ujj = umm;
+ umm = uoo;
+ uoo = ukk;
+ ukk = unn;
+ unn = uoo;
+ }
+ if (IS_SET(sf_mask, ujj)) {
+ load_bim_sf_insert(ujj, ukk, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
+ }
+ for (uoo = ujj + 1; uoo < umm; uoo++) {
+ if (IS_SET(sf_mask, uoo)) {
+ load_bim_sf_insert(uoo, 0, 0x7fffffff, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
+ }
+ }
+ if (IS_SET(sf_mask, umm)) {
+ load_bim_sf_insert(umm, 0, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
+ }
+ } else {
+ if (ukk > unn) {
+ umm = ukk;
+ ukk = unn;
+ unn = umm;
+ }
+ if (IS_SET(sf_mask, ujj)) {
+ load_bim_sf_insert(ujj, ukk, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
}
}
- if (IS_SET(sf_mask, umm)) {
- load_bim_sf_insert(umm, 0, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
+ uii += 2;
} else {
- if (ukk > unn) {
- umm = ukk;
- ukk = unn;
- unn = umm;
- }
if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, unn, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
+ load_bim_sf_insert(ujj, ukk, ukk, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
}
+ uii++;
}
- uii += 2;
- } else {
- if (IS_SET(sf_mask, ujj)) {
- load_bim_sf_insert(ujj, ukk, ukk, sf_start_idxs, sf_llbuf, &sf_lltop, &sf_entry_ct);
- }
- uii++;
+ } while (uii < sf_ct);
+ // now compactify
+ sf_pos = (uint32_t*)malloc(sf_entry_ct * 2 * sizeof(int32_t));
+ if (!sf_pos) {
+ goto load_bim_ret_NOMEM;
}
- } while (uii < sf_ct);
- // now compactify
- sf_pos = (uint32_t*)malloc(sf_entry_ct * 2 * sizeof(int32_t));
- if (!sf_pos) {
- goto load_bim_ret_NOMEM;
- }
- ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
- ukk = 0;
- for (uii = 0; uii <= ujj; uii++) {
- if (sf_start_idxs[uii] == 1) {
- CLEAR_BIT(uii, sf_mask);
+ ujj = chrom_info_ptr->max_code + chrom_info_ptr->name_ct;
+ ukk = 0;
+ for (uii = 0; uii <= ujj; uii++) {
+ if (sf_start_idxs[uii] == 1) {
+ CLEAR_BIT(uii, sf_mask);
+ sf_start_idxs[uii] = ukk;
+ continue;
+ }
+ umm = sf_start_idxs[uii];
sf_start_idxs[uii] = ukk;
- continue;
+ do {
+ sf_pos[ukk++] = sf_llbuf[umm];
+ sf_pos[ukk++] = sf_llbuf[umm + 1];
+ umm = sf_llbuf[umm + 2];
+ } while (umm != 1);
}
- umm = sf_start_idxs[uii];
- sf_start_idxs[uii] = ukk;
- do {
- sf_pos[ukk++] = sf_llbuf[umm];
- sf_pos[ukk++] = sf_llbuf[umm + 1];
- umm = sf_llbuf[umm + 2];
- } while (umm != 1);
- }
- sf_start_idxs[ujj + 1] = ukk;
- if (!exclude_snp) {
- memcpy(chrom_info_ptr->chrom_mask, sf_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
- }
- bigstack_reset(bigstack_mark);
- }
- if (!feof(bimfile)) {
- goto load_bim_ret_READ_FAIL;
- }
- if ((!unfiltered_marker_ct) && (!allow_no_variants)) {
- sprintf(g_logbuf, "Error: No variants in %s.\n", ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
- } else if (unfiltered_marker_ct > 2147483645) {
- // maximum prime < 2^32 is 4294967291; quadratic hashing guarantee breaks
- // down past that divided by 2.
- // PLINK/SEQ now supports a 64-bit count here, and few other tools do, so
- // it's appropriate to explicitly recommend it.
- logerrprint("Error: PLINK does not support more than 2^31 - 3 variants. We recommend other\nsoftware, such as PLINK/SEQ, for very deep studies of small numbers of genomes.\n");
- goto load_bim_ret_INVALID_FORMAT;
- }
- if (from_slen || to_slen) {
- if (from_slen && (from_chrom == MAX_POSSIBLE_CHROM)) {
- LOGPREPRINTFWW("Error: --from variant '%s' not found.\n", markername_from);
- goto load_bim_ret_INVALID_FORMAT_2;
- }
- if (to_slen && (to_chrom == MAX_POSSIBLE_CHROM)) {
- LOGPREPRINTFWW("Error: --to variant '%s' not found.\n", markername_to);
- goto load_bim_ret_INVALID_FORMAT_2;
- }
- if (marker_pos_start == -1) {
- marker_pos_start = 0;
- }
- if (marker_pos_end == -1) {
- marker_pos_end = 0x7fffffff;
+ sf_start_idxs[ujj + 1] = ukk;
+ if (!exclude_snp) {
+ memcpy(chrom_info_ptr->chrom_mask, sf_mask, CHROM_MASK_WORDS * sizeof(intptr_t));
+ }
+ bigstack_reset(bigstack_mark);
}
- if (marker_pos_start > marker_pos_end) {
- jj = marker_pos_start;
- marker_pos_start = marker_pos_end;
- marker_pos_end = jj;
+ if (!feof(bimfile)) {
+ goto load_bim_ret_READ_FAIL;
}
- }
- if (snp_slen) {
- if (snp_chrom == MAX_POSSIBLE_CHROM) {
- LOGPREPRINTFWW("Error: --%ssnp variant '%s' not found.\n", exclude_snp? "exclude-" : "", markername_snp);
+ if ((!unfiltered_marker_ct) && (!allow_no_variants)) {
+ sprintf(g_logbuf, "Error: No variants in %s.\n", ftype_str);
goto load_bim_ret_INVALID_FORMAT_2;
+ } else if (unfiltered_marker_ct > 2147483645) {
+ // maximum prime < 2^32 is 4294967291; quadratic hashing guarantee breaks
+ // down past that divided by 2.
+ // PLINK/SEQ now supports a 64-bit count here, and few other tools do, so
+ // it's appropriate to explicitly recommend it.
+ logerrprint("Error: PLINK does not support more than 2^31 - 3 variants. We recommend other\nsoftware, such as PLINK/SEQ, for very deep studies of small numbers of genomes.\n");
+ goto load_bim_ret_INVALID_FORMAT;
}
- if (!exclude_snp) {
- if (snp_window_size == -1) {
- // no harm in screening on position before variant ID
- uii = 0;
- } else {
- uii = snp_window_size;
+ if (from_slen || to_slen) {
+ if (from_slen && (from_chrom == MAX_POSSIBLE_CHROM)) {
+ LOGPREPRINTFWW("Error: --from variant '%s' not found.\n", markername_from);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ if (to_slen && (to_chrom == MAX_POSSIBLE_CHROM)) {
+ LOGPREPRINTFWW("Error: --to variant '%s' not found.\n", markername_to);
+ goto load_bim_ret_INVALID_FORMAT_2;
}
- if (uii > snp_pos) {
+ if (marker_pos_start == -1) {
marker_pos_start = 0;
- } else {
- marker_pos_start = snp_pos - uii;
}
- if (uii > (0x7fffffff - snp_pos)) {
+ if (marker_pos_end == -1) {
marker_pos_end = 0x7fffffff;
- } else {
- marker_pos_end = snp_pos + uii;
}
- } else if (snp_window_size != -1) {
- if ((uint32_t)snp_window_size <= snp_pos) {
- exclude_window_start = snp_pos - snp_window_size;
+ if (marker_pos_start > marker_pos_end) {
+ jj = marker_pos_start;
+ marker_pos_start = marker_pos_end;
+ marker_pos_end = jj;
}
- if ((uint32_t)snp_window_size > (0x7fffffff - snp_pos)) {
- exclude_window_end = 0x7fffffff;
- } else {
- exclude_window_end = snp_pos + snp_window_size;
+ }
+ if (snp_slen) {
+ if (snp_chrom == MAX_POSSIBLE_CHROM) {
+ LOGPREPRINTFWW("Error: --%ssnp variant '%s' not found.\n", exclude_snp? "exclude-" : "", markername_snp);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ if (!exclude_snp) {
+ if (snp_window_size == -1) {
+ // no harm in screening on position before variant ID
+ uii = 0;
+ } else {
+ uii = snp_window_size;
+ }
+ if (uii > snp_pos) {
+ marker_pos_start = 0;
+ } else {
+ marker_pos_start = snp_pos - uii;
+ }
+ if (uii > (0x7fffffff - snp_pos)) {
+ marker_pos_end = 0x7fffffff;
+ } else {
+ marker_pos_end = snp_pos + uii;
+ }
+ } else if (snp_window_size != -1) {
+ if ((uint32_t)snp_window_size <= snp_pos) {
+ exclude_window_start = snp_pos - snp_window_size;
+ }
+ if ((uint32_t)snp_window_size > (0x7fffffff - snp_pos)) {
+ exclude_window_end = 0x7fffffff;
+ } else {
+ exclude_window_end = snp_pos + snp_window_size;
+ }
}
}
- }
- if (max_marker_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Variant names are limited to " MAX_ID_LEN_STR " characters.\n");
- goto load_bim_ret_INVALID_FORMAT;
- }
- *unfiltered_marker_ct_ptr = unfiltered_marker_ct;
- *max_marker_id_len_ptr = max_marker_id_len;
- rewind(bimfile);
- unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
+ if (max_marker_id_blen > MAX_ID_BLEN) {
+ logerrprint("Error: Variant names are limited to " MAX_ID_SLEN_STR " characters.\n");
+ goto load_bim_ret_INVALID_FORMAT;
+ }
+ *unfiltered_marker_ct_ptr = unfiltered_marker_ct;
+ *max_marker_id_blen_ptr = max_marker_id_blen;
+ rewind(bimfile);
+ unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
- // unfiltered_marker_ct can be very large, so use bigstack for all
- // allocations that are a multiple of it
+ // unfiltered_marker_ct can be very large, so use bigstack for all
+ // allocations that are a multiple of it
- // permanent bigstack allocation #1: marker_exclude
- // permanent bigstack allocation #2: set_allele_freqs
- if (bigstack_calloc_ul(unfiltered_marker_ctl, marker_exclude_ptr)) {
- goto load_bim_ret_NOMEM;
- }
- marker_exclude = *marker_exclude_ptr;
- if (set_allele_freqs_ptr) {
- if (bigstack_alloc_d(unfiltered_marker_ct, set_allele_freqs_ptr)) {
+ // permanent bigstack allocation #1: marker_exclude
+ // permanent bigstack allocation #2: set_allele_freqs
+ if (bigstack_calloc_ul(unfiltered_marker_ctl, marker_exclude_ptr)) {
goto load_bim_ret_NOMEM;
}
- // leave set_allele_freqs uninitialized
- if (nchrobs_ptr) {
- if (bigstack_alloc_ui(unfiltered_marker_ct, nchrobs_ptr)) {
+ marker_exclude = *marker_exclude_ptr;
+ if (set_allele_freqs_ptr) {
+ if (bigstack_alloc_d(unfiltered_marker_ct, set_allele_freqs_ptr)) {
goto load_bim_ret_NOMEM;
}
- // on the other hand, this is not autocomputed
- fill_uint_one(*nchrobs_ptr, unfiltered_marker_ct);
- }
- }
- fill_uint_zero(chrom_info_ptr->chrom_file_order, MAX_POSSIBLE_CHROM);
- fill_uint_zero(chrom_info_ptr->chrom_file_order_marker_idx, MAX_POSSIBLE_CHROM + 1);
- fill_uint_zero(chrom_info_ptr->chrom_start, MAX_POSSIBLE_CHROM);
- fill_uint_zero(chrom_info_ptr->chrom_end, MAX_POSSIBLE_CHROM);
- // permanent bigstack allocation #3, if needed: marker_pos
- if (marker_pos_needed) {
- if (bigstack_alloc_ui(unfiltered_marker_ct, marker_pos_ptr)) {
- goto load_bim_ret_NOMEM;
+ // leave set_allele_freqs uninitialized
+ if (nchrobs_ptr) {
+ if (bigstack_alloc_ui(unfiltered_marker_ct, nchrobs_ptr)) {
+ goto load_bim_ret_NOMEM;
+ }
+ // on the other hand, this is not autocomputed
+ fill_uint_one(unfiltered_marker_ct, *nchrobs_ptr);
+ }
}
- }
- if (marker_alleles_needed) {
- if (snps_only) {
- max_marker_allele_len = 2;
+ fill_uint_one(MAX_POSSIBLE_CHROM, chrom_info_ptr->chrom_idx_to_foidx);
+ // permanent bigstack allocation #3, if needed: marker_pos
+ if (marker_pos_needed) {
+ if (bigstack_alloc_ui(unfiltered_marker_ct, marker_pos_ptr)) {
+ goto load_bim_ret_NOMEM;
+ }
}
- if (max_marker_allele_len > NON_BIGSTACK_MIN - 1) {
- // guard against overflows
- LOGERRPRINTF("Error: Alleles are limited to %u characters.\n", NON_BIGSTACK_MIN - 1);
- goto load_bim_ret_INVALID_FORMAT;
+ if (marker_alleles_needed) {
+ if (snps_only) {
+ max_marker_allele_blen = 2;
+ }
+ if (max_marker_allele_blen > NON_BIGSTACK_MIN - 1) {
+ // guard against overflows
+ LOGERRPRINTF("Error: Alleles are limited to %u characters.\n", NON_BIGSTACK_MIN - 1);
+ goto load_bim_ret_INVALID_FORMAT;
+ }
+ *max_marker_allele_blen_ptr = max_marker_allele_blen;
+ marker_allele_ptrs = (char**)bigstack_alloc(unfiltered_marker_ct * 2 * sizeof(intptr_t));
+ if (!marker_allele_ptrs) {
+ goto load_bim_ret_NOMEM;
+ }
+ *marker_allele_pp = marker_allele_ptrs;
+ ujj = unfiltered_marker_ct * 2;
+ for (uii = 0; uii < ujj; uii++) {
+ marker_allele_ptrs[uii] = missing_geno_ptr;
+ }
}
- *max_marker_allele_len_ptr = max_marker_allele_len;
- marker_allele_ptrs = (char**)bigstack_alloc(unfiltered_marker_ct * 2 * sizeof(intptr_t));
- if (!marker_allele_ptrs) {
+ if (bigstack_alloc_c(unfiltered_marker_ct * max_marker_id_blen, marker_ids_ptr)) {
goto load_bim_ret_NOMEM;
}
- *marker_allele_pp = marker_allele_ptrs;
- ujj = unfiltered_marker_ct * 2;
- for (uii = 0; uii < ujj; uii++) {
- marker_allele_ptrs[uii] = missing_geno_ptr;
+ // todo: check whether marker_cms can be unloaded before
+ // marker_ids/marker_alleles, or vice versa
+ if (marker_cms_needed & MARKER_CMS_FORCED) {
+ if (bigstack_calloc_d(unfiltered_marker_ct, marker_cms_ptr)) {
+ goto load_bim_ret_NOMEM;
+ }
}
- }
- if (bigstack_alloc_c(unfiltered_marker_ct * max_marker_id_len, marker_ids_ptr)) {
- goto load_bim_ret_NOMEM;
- }
- // todo: check whether marker_cms can be unloaded before
- // marker_ids/marker_alleles, or vice versa
- if (marker_cms_needed & MARKER_CMS_FORCED) {
- if (bigstack_calloc_d(unfiltered_marker_ct, marker_cms_ptr)) {
- goto load_bim_ret_NOMEM;
+ if ((filter_flags & FILTER_ZERO_CMS) || (mcm2 == 1)) {
+ marker_cms_needed = 0;
}
- }
- if (filter_flags & FILTER_ZERO_CMS) {
- marker_cms_needed = 0;
- }
- // second pass: actually load stuff
- loadbuf2 = (char*)malloc(max_bim_linelen);
- if (!loadbuf2) {
- goto load_bim_ret_NOMEM;
- }
- if (missing_mid_template) {
- prev_new_id = (char*)malloc(max_marker_id_len);
- if (!prev_new_id) {
+ // second pass: actually load stuff
+ loadbuf2 = (char*)malloc(max_bim_linelen);
+ if (!loadbuf2) {
goto load_bim_ret_NOMEM;
}
- *prev_new_id = '\0';
- }
- line_idx = 0;
- for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
- do {
- line_idx++;
- if (!fgets(loadbuf2, max_bim_linelen, bimfile)) {
- goto load_bim_ret_READ_FAIL;
- }
- bufptr3 = skip_initial_spaces(loadbuf2);
- } while (is_eoln_or_comment_kns(*bufptr3));
- jj = get_chrom_code(chrom_info_ptr, bufptr3);
- if (jj != prev_chrom) {
- if (!split_chrom) {
- if (prev_chrom != -1) {
- chrom_info_ptr->chrom_end[(uint32_t)prev_chrom] = marker_uidx;
- }
- if (jj < prev_chrom) {
- *map_is_unsorted_ptr |= UNSORTED_CHROM;
- }
- prev_chrom = jj;
- if (is_set(loaded_chrom_mask, jj)) {
- if (split_chrom_cmd) {
- sprintf(g_logbuf, "Error: %s has a split chromosome. Use --%s by itself to\nremedy this.\n", ftype_str, split_chrom_cmd);
- goto load_bim_ret_INVALID_FORMAT_2;
+ if (missing_mid_template) {
+ prev_new_id = (char*)malloc(max_marker_id_blen);
+ if (!prev_new_id) {
+ goto load_bim_ret_NOMEM;
+ }
+ *prev_new_id = '\0';
+ }
+ line_idx = 0;
+ for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
+ char* loadbuf_first_token;
+ do {
+ line_idx++;
+ if (!fgets(loadbuf2, max_bim_linelen, bimfile)) {
+ goto load_bim_ret_READ_FAIL;
+ }
+ loadbuf_first_token = skip_initial_spaces(loadbuf2);
+ } while (is_eoln_or_comment_kns(*loadbuf_first_token));
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ col2_ptr = skip_initial_spaces(first_token_end);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(loadbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code != prev_chrom) {
+ if (!split_chrom) {
+ if (cur_chrom_code < prev_chrom) {
+ *map_is_unsorted_ptr |= UNSORTED_CHROM;
+ }
+ prev_chrom = cur_chrom_code;
+ if (is_set(loaded_chrom_mask, cur_chrom_code)) {
+ if (split_chrom_cmd) {
+ sprintf(g_logbuf, "Error: %s has a split chromosome. Use --%s by itself to\nremedy this.\n", ftype_str, split_chrom_cmd);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ split_chrom = 1;
+ *map_is_unsorted_ptr = UNSORTED_CHROM | UNSORTED_BP | UNSORTED_SPLIT_CHROM;
+ } else {
+ chrom_info_ptr->chrom_file_order[++chroms_encountered_m1] = cur_chrom_code;
+ chrom_info_ptr->chrom_fo_vidx_start[chroms_encountered_m1] = marker_uidx;
+ chrom_info_ptr->chrom_idx_to_foidx[(uint32_t)cur_chrom_code] = chroms_encountered_m1;
}
- split_chrom = 1;
- *map_is_unsorted_ptr = UNSORTED_CHROM | UNSORTED_BP | UNSORTED_SPLIT_CHROM;
- } else {
- chrom_info_ptr->chrom_start[(uint32_t)jj] = marker_uidx;
- chrom_info_ptr->chrom_file_order[++chroms_encountered_m1] = jj;
- chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
+ last_pos = 0;
}
- last_pos = 0;
+ set_bit(cur_chrom_code, loaded_chrom_mask);
}
- set_bit(jj, loaded_chrom_mask);
- }
- if (is_set(chrom_info_ptr->chrom_mask, jj)) {
- bufptr2 = next_token(bufptr3);
- if (no_more_tokens_kns(bufptr2)) {
- goto load_bim_ret_MISSING_TOKENS;
- }
- uii = strlen_se(bufptr2);
- ujj = (uii == missing_marker_id_match_len) && (!memcmp(bufptr2, missing_marker_id_match, missing_marker_id_match_len));
- if (!ujj) {
- memcpyx(&((*marker_ids_ptr)[marker_uidx * max_marker_id_len]), bufptr2, uii, '\0');
- }
- if (marker_cms_needed) {
- bufptr = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr)) {
+ if (is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ if (no_more_tokens_kns(col2_ptr)) {
goto load_bim_ret_MISSING_TOKENS;
}
- if ((*bufptr != '0') || (bufptr[1] > ' ')) {
- if (!(*marker_cms_ptr)) {
- if (bigstack_calloc_d(unfiltered_marker_ct, marker_cms_ptr)) {
- goto load_bim_ret_NOMEM;
- }
- }
- if (scan_double(bufptr, &((*marker_cms_ptr)[marker_uidx]))) {
- sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
- }
+ uii = strlen_se(col2_ptr);
+ ujj = (uii == missing_marker_id_match_len) && (!memcmp(col2_ptr, missing_marker_id_match, missing_marker_id_match_len));
+ if (!ujj) {
+ memcpyx(&((*marker_ids_ptr)[marker_uidx * max_marker_id_blen]), col2_ptr, uii, '\0');
}
- bufptr = next_token(bufptr);
- } else {
- bufptr = next_token_mult(bufptr2, mcm2);
- }
- if (no_more_tokens_kns(bufptr)) {
- goto load_bim_ret_MISSING_TOKENS;
- }
- if (scan_int_abs_defcap(bufptr, (int32_t*)&cur_pos)) {
- goto load_bim_ret_INVALID_BP_COORDINATE;
- }
- // negative marker positions now have the same effect in .bim as .map
- if ((int32_t)cur_pos < 0) {
- goto load_bim_skip_marker;
- }
- if (cur_pos < last_pos) {
- *map_is_unsorted_ptr |= UNSORTED_BP;
- } else {
- last_pos = cur_pos;
- }
- if ((sf_ct && (exclude_snp ^ sf_out_of_range(cur_pos, (uint32_t)jj, sf_start_idxs, sf_pos))) || ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end)))) {
- goto load_bim_skip_marker;
- }
- if (snp_slen) {
- if (snp_window_size == -1) {
- if ((uii == snp_slen) && (!memcmp(bufptr2, markername_snp, snp_slen))) {
- if (exclude_snp) {
- goto load_bim_skip_marker;
+ if (marker_cms_needed) {
+ bufptr = next_token(col2_ptr);
+ if (no_more_tokens_kns(bufptr)) {
+ goto load_bim_ret_MISSING_TOKENS;
+ }
+ if ((*bufptr != '0') || (bufptr[1] > ' ')) {
+ if (!(*marker_cms_ptr)) {
+ if (bigstack_calloc_d(unfiltered_marker_ct, marker_cms_ptr)) {
+ goto load_bim_ret_NOMEM;
+ }
+ }
+ if (scan_double(bufptr, &((*marker_cms_ptr)[marker_uidx]))) {
+ sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
}
- } else if (!exclude_snp) {
- goto load_bim_skip_marker;
}
- } else if (exclude_snp && ((((int32_t)cur_pos) <= exclude_window_end) && (((int32_t)cur_pos) >= exclude_window_start) && ((uint32_t)jj == snp_chrom))) {
- goto load_bim_skip_marker;
+ bufptr = next_token(bufptr);
+ } else {
+ bufptr = next_token_mult(col2_ptr, mcm2);
}
- }
- if (marker_pos_needed) {
- (*marker_pos_ptr)[marker_uidx] = cur_pos;
- }
- if (marker_alleles_needed || ujj) {
- bufptr4 = next_token(bufptr);
- bufptr5 = next_token(bufptr4);
- if (!bufptr5) {
+ if (no_more_tokens_kns(bufptr)) {
goto load_bim_ret_MISSING_TOKENS;
}
- ukk = strlen_se(bufptr4);
- umm = strlen_se(bufptr5);
- if (marker_alleles_needed) {
- if (snps_only) {
- if ((ukk != 1) || (umm != 1) || (snps_only_no_di && ((*bufptr4 == 'D') || (*bufptr4 == 'I') || (*bufptr5 == 'D') || (*bufptr5 == 'I')))) {
+ if (scan_int_abs_defcap(bufptr, (int32_t*)&cur_pos)) {
+ goto load_bim_ret_INVALID_BP_COORDINATE;
+ }
+ // negative marker positions now have the same effect in .bim as .map
+ if ((int32_t)cur_pos < 0) {
+ goto load_bim_skip_marker;
+ }
+ if (cur_pos < last_pos) {
+ *map_is_unsorted_ptr |= UNSORTED_BP;
+ } else {
+ last_pos = cur_pos;
+ }
+ if ((sf_ct && (exclude_snp ^ sf_out_of_range(cur_pos, (uint32_t)cur_chrom_code, sf_start_idxs, sf_pos))) || ((marker_pos_start != -1) && ((((int32_t)cur_pos) < marker_pos_start) || (((int32_t)cur_pos) > marker_pos_end)))) {
+ goto load_bim_skip_marker;
+ }
+ if (snp_slen) {
+ if (snp_window_size == -1) {
+ if ((uii == snp_slen) && (!memcmp(col2_ptr, markername_snp, snp_slen))) {
+ if (exclude_snp) {
+ goto load_bim_skip_marker;
+ }
+ } else if (!exclude_snp) {
goto load_bim_skip_marker;
}
- }
- ulii = marker_uidx * 2;
- // possible todo: warn if a comma is present (could use memchr),
- // since that breaks VCF and PLINK 2.0.
- if (allele_set(bufptr4, ukk, &(marker_allele_ptrs[ulii]))) {
- goto load_bim_ret_NOMEM;
- }
- ulii++;
- if (allele_set(bufptr5, umm, &(marker_allele_ptrs[ulii]))) {
- goto load_bim_ret_NOMEM;
+ } else if (exclude_snp && ((((int32_t)cur_pos) <= exclude_window_end) && (((int32_t)cur_pos) >= exclude_window_start) && ((uint32_t)cur_chrom_code == snp_chrom))) {
+ goto load_bim_skip_marker;
}
}
- if (ujj) {
- // --set-missing-var-ids
- // bufptr = position string
- // bufptr3 = chromosome code
- // bufptr4 and bufptr5: alleles (ok to null-terminate)
- // ukk and umm: allele lengths
- insert_buf[0] = bufptr3;
- insert_buf_len[0] = strlen_se(bufptr3);
- insert_buf[1] = bufptr;
- insert_buf_len[1] = strlen_se(bufptr);
- if (template_insert_ct == 4) {
- ukk = MINV(ukk, new_id_max_allele_len);
- umm = MINV(umm, new_id_max_allele_len);
- bufptr4[ukk] = '\0';
- bufptr5[umm] = '\0';
- // ASCII-sort allele names
- if (strcmp(bufptr4, bufptr5) <= 0) {
- memcpy(insert_buf[2], bufptr4, ukk);
- insert_buf_len[2] = ukk;
- memcpy(insert_buf[3], bufptr5, umm);
- insert_buf_len[3] = umm;
- } else {
- memcpy(insert_buf[3], bufptr4, ukk);
- insert_buf_len[3] = ukk;
- memcpy(insert_buf[2], bufptr5, umm);
- insert_buf_len[2] = umm;
+ if (marker_pos_needed) {
+ (*marker_pos_ptr)[marker_uidx] = cur_pos;
+ }
+ if (marker_alleles_needed || ujj) {
+ bufptr4 = next_token(bufptr);
+ bufptr5 = next_token(bufptr4);
+ if (!bufptr5) {
+ goto load_bim_ret_MISSING_TOKENS;
+ }
+ ukk = strlen_se(bufptr4);
+ umm = strlen_se(bufptr5);
+ if (marker_alleles_needed) {
+ if (snps_only) {
+ if ((ukk != 1) || (umm != 1) || (snps_only_no_di && ((*bufptr4 == 'D') || (*bufptr4 == 'I') || (*bufptr5 == 'D') || (*bufptr5 == 'I')))) {
+ goto load_bim_skip_marker;
+ }
+ }
+ ulii = marker_uidx * 2;
+ if (allele_set(bufptr4, ukk, &(marker_allele_ptrs[ulii]))) {
+ goto load_bim_ret_NOMEM;
+ }
+ ulii++;
+ if (allele_set(bufptr5, umm, &(marker_allele_ptrs[ulii]))) {
+ goto load_bim_ret_NOMEM;
}
}
- bufptr5 = &((*marker_ids_ptr)[marker_uidx * max_marker_id_len]);
- bufptr4 = bufptr5;
- for (uii = 0; uii < template_insert_ct; uii++) {
- bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
- ujj = missing_template_seg_order[uii];
- bufptr4 = memcpya(bufptr4, insert_buf[ujj], insert_buf_len[ujj]);
- }
- bufptr4 = memcpyax(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii], '\0');
- if (!strcmp(prev_new_id, bufptr5)) {
- LOGERRPRINTFWW("Error: Duplicate ID '%s' generated by --set-missing-var-ids.\n", prev_new_id);
- goto load_bim_ret_INVALID_CMDLINE;
+ if (ujj) {
+ // --set-missing-var-ids
+ // bufptr = position string
+ // loadbuf_first_token = chromosome code
+ // bufptr4 and bufptr5: alleles (ok to null-terminate)
+ // ukk and umm: allele lengths
+ insert_buf[0] = loadbuf_first_token;
+ insert_buf_len[0] = chrom_name_slen;
+ insert_buf[1] = bufptr;
+ insert_buf_len[1] = strlen_se(bufptr);
+ if (template_insert_ct == 4) {
+ ukk = MINV(ukk, new_id_max_allele_slen);
+ umm = MINV(umm, new_id_max_allele_slen);
+ bufptr4[ukk] = '\0';
+ bufptr5[umm] = '\0';
+ // ASCII-sort allele names
+ if (strcmp(bufptr4, bufptr5) <= 0) {
+ memcpy(insert_buf[2], bufptr4, ukk);
+ insert_buf_len[2] = ukk;
+ memcpy(insert_buf[3], bufptr5, umm);
+ insert_buf_len[3] = umm;
+ } else {
+ memcpy(insert_buf[3], bufptr4, ukk);
+ insert_buf_len[3] = ukk;
+ memcpy(insert_buf[2], bufptr5, umm);
+ insert_buf_len[2] = umm;
+ }
+ }
+ bufptr5 = &((*marker_ids_ptr)[marker_uidx * max_marker_id_blen]);
+ bufptr4 = bufptr5;
+ for (uii = 0; uii < template_insert_ct; uii++) {
+ bufptr4 = memcpya(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii]);
+ ujj = missing_template_seg_order[uii];
+ bufptr4 = memcpya(bufptr4, insert_buf[ujj], insert_buf_len[ujj]);
+ }
+ bufptr4 = memcpyax(bufptr4, missing_template_seg[uii], missing_template_seg_len[uii], '\0');
+ if (!strcmp(prev_new_id, bufptr5)) {
+ LOGERRPRINTFWW("Error: Duplicate ID '%s' generated by --set-missing-var-ids.\n", prev_new_id);
+ goto load_bim_ret_INVALID_CMDLINE;
+ }
+ missing_ids_set++;
+ memcpy(prev_new_id, bufptr5, (uintptr_t)(bufptr4 - bufptr5));
}
- missing_ids_set++;
- memcpy(prev_new_id, bufptr5, (uintptr_t)(bufptr4 - bufptr5));
}
- }
- } else {
- load_bim_skip_marker:
- SET_BIT(marker_uidx, marker_exclude);
- marker_exclude_ct++;
- if (marker_pos_needed) {
- // support unfiltered marker_pos search
- (*marker_pos_ptr)[marker_uidx] = last_pos;
+ } else {
+ load_bim_skip_marker:
+ SET_BIT(marker_uidx, marker_exclude);
+ marker_exclude_ct++;
+ if (marker_pos_needed) {
+ // support unfiltered marker_pos search
+ (*marker_pos_ptr)[marker_uidx] = last_pos;
+ }
}
}
- }
- if ((unfiltered_marker_ct == marker_exclude_ct) && (!allow_no_variants)) {
- logerrprint("Error: All variants excluded.\n");
- goto load_bim_ret_ALL_MARKERS_EXCLUDED;
- }
- if (missing_mid_template && ((*map_is_unsorted_ptr) & UNSORTED_BP)) {
- sprintf(g_logbuf, "Error: --set-missing-var-ids requires a sorted %s. Retry this command\nafter using --make-bed to sort your data.\n", ftype_str);
- goto load_bim_ret_INVALID_FORMAT_2;
- }
- for (uii = 0; uii < CHROM_MASK_WORDS; uii++) {
- chrom_info_ptr->chrom_mask[uii] &= loaded_chrom_mask[uii];
- }
- chrom_info_ptr->chrom_end[prev_chrom] = marker_uidx;
- chrom_info_ptr->chrom_ct = ++chroms_encountered_m1;
- chrom_info_ptr->chrom_file_order_marker_idx[chroms_encountered_m1] = marker_uidx;
- *marker_exclude_ct_ptr = marker_exclude_ct;
- LOGPRINTF("%" PRIuPTR " variant%s loaded from %s.\n", unfiltered_marker_ct - marker_exclude_ct, (unfiltered_marker_ct == marker_exclude_ct + 1)? "" : "s", ftype_str);
- if (missing_ids_set) {
- LOGPRINTF("%u missing ID%s set.\n", missing_ids_set, (missing_ids_set == 1)? "" : "s");
- }
+ if ((unfiltered_marker_ct == marker_exclude_ct) && (!allow_no_variants)) {
+ logerrprint("Error: All variants excluded.\n");
+ goto load_bim_ret_ALL_MARKERS_EXCLUDED;
+ }
+ if (missing_mid_template && ((*map_is_unsorted_ptr) & UNSORTED_BP)) {
+ sprintf(g_logbuf, "Error: --set-missing-var-ids requires a sorted %s. Retry this command\nafter using --make-bed to sort your data.\n", ftype_str);
+ goto load_bim_ret_INVALID_FORMAT_2;
+ }
+ for (uii = 0; uii < CHROM_MASK_WORDS; uii++) {
+ chrom_info_ptr->chrom_mask[uii] &= loaded_chrom_mask[uii];
+ }
+ chrom_info_ptr->chrom_ct = ++chroms_encountered_m1;
+ chrom_info_ptr->chrom_fo_vidx_start[chroms_encountered_m1] = marker_uidx;
+ *marker_exclude_ct_ptr = marker_exclude_ct;
+ LOGPRINTF("%" PRIuPTR " variant%s loaded from %s.\n", unfiltered_marker_ct - marker_exclude_ct, (unfiltered_marker_ct == marker_exclude_ct + 1)? "" : "s", ftype_str);
+ if (missing_ids_set) {
+ LOGPRINTF("%u missing ID%s set.\n", missing_ids_set, (missing_ids_set == 1)? "" : "s");
+ }
- if (max_bim_linelen_ptr) {
- *max_bim_linelen_ptr = max_bim_linelen;
+ if (max_bim_linelen_ptr) {
+ *max_bim_linelen_ptr = max_bim_linelen;
+ }
}
while (0) {
load_bim_ret_NOMEM:
@@ -1326,6 +1328,10 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
load_bim_ret_INVALID_CMDLINE:
retval = RET_INVALID_CMDLINE;
break;
+ load_bim_ret_UNSUPPORTED_COLUMN_ORDER:
+ LOGERRPRINTF("Error: Unsupported column order specified on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
+ retval = RET_INVALID_FORMAT;
+ break;
load_bim_ret_INVALID_BP_COORDINATE:
LOGERRPRINTF("Error: Invalid bp coordinate on line %" PRIuPTR " of %s.\n", line_idx, ftype_str);
retval = RET_INVALID_FORMAT;
@@ -1339,9 +1345,9 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
retval = RET_INVALID_FORMAT;
break;
load_bim_ret_DUPLICATE_ID:
- uii = strlen_se(bufptr);
- bufptr[uii] = '\0';
- LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in %s.\n", bufptr, ftype_str);
+ uii = strlen_se(col2_ptr);
+ col2_ptr[uii] = '\0';
+ LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in %s.\n", col2_ptr, ftype_str);
load_bim_ret_INVALID_FORMAT_2:
logerrprintb();
load_bim_ret_INVALID_FORMAT:
@@ -1364,28 +1370,28 @@ int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_ma
int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* sex_nm, uintptr_t* sex_male, char* sample_ids, uintptr_t max_sample_id_len, double missing_phenod, uint32_t covar_modifier, Range_list* covar_range_list_ptr, uint32_t gxe_mcovar, uintptr_t* covar_ctx_ptr, char** covar_names_ptr, uintptr_t* max_covar_name_len_ptr, uintptr_t* pheno_nm, uintptr_t** covar_nm_ptr, double** covar_d_ptr, uintptr_t** gxe_covar_nm_ptr, [...]
// similar to load_clusters() in plink_cluster.c
- // sex_nm and sex_male should be NULL unless sex is supposed to be added as
- // an extra covariate
- // covar_range_list_ptr is NULL iff --gxe was specified
+ // sex_nm and sex_male should be nullptr unless sex is supposed to be added
+ // as an extra covariate
+ // covar_range_list_ptr is nullptr iff --gxe was specified
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- unsigned char* bigstack_mark2 = NULL;
- FILE* covar_file = NULL;
+ unsigned char* bigstack_mark2 = nullptr;
+ FILE* covar_file = nullptr;
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
uintptr_t covar_raw_ct = 0;
uintptr_t loaded_sample_ct = 0;
uintptr_t missing_cov_ct = 0;
- uint32_t* sample_idx_to_uidx = NULL;
- char* sorted_covar_name_flag_ids = NULL;
- uint32_t* covar_name_flag_id_map = NULL;
- int32_t* covar_name_flag_seen_idxs = NULL;
- char* covar_names = NULL;
- uintptr_t* covar_nm = NULL;
- double* covar_d = NULL;
- uintptr_t* gxe_covar_nm = NULL;
- uintptr_t* gxe_covar_c = NULL;
- double* dptr = NULL;
- char* bufptr = NULL;
+ uint32_t* sample_idx_to_uidx = nullptr;
+ char* sorted_covar_name_flag_ids = nullptr;
+ uint32_t* covar_name_flag_id_map = nullptr;
+ int32_t* covar_name_flag_seen_idxs = nullptr;
+ char* covar_names = nullptr;
+ uintptr_t* covar_nm = nullptr;
+ double* covar_d = nullptr;
+ uintptr_t* gxe_covar_nm = nullptr;
+ uintptr_t* gxe_covar_c = nullptr;
+ double* dptr = nullptr;
+ char* bufptr = nullptr;
uintptr_t max_covar_name_len = sex_nm? 4 : 1;
double dxx = 0.0;
uint32_t keep_pheno_on_missing_cov = covar_modifier & COVAR_KEEP_PHENO_ON_MISSING_COV;
@@ -1432,7 +1438,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
}
// kludge to use sort_item_ids_noalloc()
- fill_ulong_zero((uintptr_t*)covar_name_flag_seen_idxs, BITCT_TO_WORDCT(ulii));
+ fill_ulong_zero(BITCT_TO_WORDCT(ulii), (uintptr_t*)covar_name_flag_seen_idxs);
retval = sort_item_ids_noalloc(ulii, (const uintptr_t*)covar_name_flag_seen_idxs, ulii, covar_range_list_ptr->names, covar_range_list_ptr->name_max_len, 0, 0, strcmp_deref, sorted_covar_name_flag_ids, covar_name_flag_id_map);
if (retval) {
if (retval == RET_INVALID_FORMAT) {
@@ -1441,7 +1447,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
}
goto load_covars_ret_1;
}
- fill_int_one(covar_name_flag_seen_idxs, ulii);
+ fill_int_one(ulii, covar_name_flag_seen_idxs);
}
retval = sort_item_ids_noalloc(unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, 0, 1, strcmp_deref, sorted_ids, id_map);
if (retval) {
@@ -1502,11 +1508,11 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
// no header line present?
bufptr2 = next_token(bufptr);
- header_absent = (strcmp_se(bufptr, "FID", 3) || strcmp_se(bufptr2, "IID", 3));
+ header_absent = ((strcmp_se(bufptr, "FID", 3) && (strcmp_se(bufptr, "#FID", 4))) || strcmp_se(bufptr2, "IID", 3));
bufptr = next_token(bufptr2);
if ((covar_modifier & (COVAR_NAME | COVAR_NUMBER)) && covar_raw_ct) {
- fill_ulong_zero(covars_active, covar_raw_ctl);
+ fill_ulong_zero(covar_raw_ctl, covars_active);
if (covar_modifier & COVAR_NUMBER) {
if (numeric_range_list_to_bitarr(covar_range_list_ptr, covar_raw_ct, 1, 0, covars_active)) {
goto load_covars_ret_MISSING_TOKENS;
@@ -1529,7 +1535,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
covar_ct = covar_raw_ct;
} else {
// --gxe only
- fill_ulong_zero(covars_active, covar_raw_ctl);
+ fill_ulong_zero(covar_raw_ctl, covars_active);
covar_ct = 0;
}
covar_ctx = covar_ct + (sex_nm? 1 : 0);
@@ -1567,8 +1573,8 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
// It does track when some covariates are missing and others aren't
// (missing covariates are represented as the --missing-phenotype value).
if (covar_range_list_ptr) {
- if (max_covar_name_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Covariate names are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_covar_name_len > MAX_ID_BLEN) {
+ logerrprint("Error: Covariate names are limited to " MAX_ID_SLEN_STR " characters.\n");
goto load_covars_ret_INVALID_FORMAT;
}
// not only --gxe
@@ -1583,7 +1589,7 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
covar_names = *covar_names_ptr;
covar_nm = *covar_nm_ptr;
covar_d = *covar_d_ptr;
- fill_ulong_zero(covar_nm, sample_ctl);
+ fill_ulong_zero(sample_ctl, covar_nm);
for (covar_idx = 0; covar_idx < ulii; covar_idx++) {
covar_d[covar_idx] = missing_phenod;
}
@@ -1678,7 +1684,10 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
covar_missing = 0;
for (uii = 0; uii < min_covar_col_ct; uii++) {
bufptr = skip_initial_spaces(bufptr2);
+
+ // column count already validated
bufptr2 = token_endnn(bufptr);
+
if (IS_SET(covars_active, uii)) {
if (scan_double(bufptr, &dxx)) {
covar_missing = 1;
@@ -1848,10 +1857,10 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
load_covars_none:
if (covar_modifier & COVAR_ALLOW_NONE) {
*covar_ctx_ptr = 0;
- *covar_names_ptr = NULL;
+ *covar_names_ptr = nullptr;
*max_covar_name_len_ptr = 1;
- *covar_nm_ptr = NULL;
- *covar_d_ptr = NULL;
+ *covar_nm_ptr = nullptr;
+ *covar_d_ptr = nullptr;
// --gxe not possible
bigstack_reset(bigstack_mark);
logerrprint("Warning: ");
@@ -1873,16 +1882,16 @@ int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t
int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modifier, uint32_t write_covar_dummy_max_categories, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_ids, uintptr_t max_maternal_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t* pheno_nm, uintptr_t* pheno_c, double* pheno_d, double missing_phenod, char* output_ [...]
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint32_t write_pheno = write_covar_modifier & WRITE_COVAR_PHENO;
uint32_t exclude_parents = write_covar_modifier & WRITE_COVAR_NO_PARENTS;
uint32_t exclude_sex = write_covar_modifier & WRITE_COVAR_NO_SEX;
uint32_t female_2 = write_covar_modifier & WRITE_COVAR_FEMALE_2;
uintptr_t sample_uidx = 0;
- uint32_t* downcoding_level = NULL;
- uint32_t* downcoding_values = NULL;
- char* zbuf = NULL;
- char* out_missing_buf = NULL;
+ uint32_t* downcoding_level = nullptr;
+ uint32_t* downcoding_values = nullptr;
+ char* zbuf = nullptr;
+ char* out_missing_buf = nullptr;
uintptr_t omplen_p1 = strlen(output_missing_pheno) + 1;
uint32_t do_downcoding = (write_covar_modifier & WRITE_COVAR_DUMMY) && (sample_ct > 2);
uint32_t downcoding_no_round = (write_covar_modifier & WRITE_COVAR_DUMMY_NO_ROUND);
@@ -1966,7 +1975,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
downcode_category_ct = 0;
if (sample_idx2 > 2) {
covar_nm_ct = sample_idx2;
- fill_uint_one(uiptr, sample_ct);
+ fill_uint_one(sample_ct, uiptr);
#ifdef __cplusplus
std::sort(sorted_downcoding_intbuf, &(sorted_downcoding_intbuf[covar_nm_ct]));
#else
@@ -2037,7 +2046,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
} else {
downcoding_level[covar_idx] = 0;
fputs(&(covar_names[covar_idx * max_covar_name_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
}
} else {
@@ -2058,7 +2067,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
downcode_category_ct = 0;
if (sample_idx2 > 2) {
covar_nm_ct = sample_idx2;
- fill_uint_one(uiptr, sample_ct);
+ fill_uint_one(sample_ct, uiptr);
if (qsort_ext((char*)sorted_downcoding_buf, covar_nm_ct, sizeof(double), double_cmp_deref, (char*)downcoding_buf_idxs, sizeof(int32_t))) {
goto write_covars_ret_NOMEM;
}
@@ -2127,7 +2136,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
} else {
downcoding_level[covar_idx] = 0;
fputs(&(covar_names[covar_idx * max_covar_name_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
}
}
@@ -2152,7 +2161,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
} else {
for (covar_idx = 0; covar_idx < covar_ct; covar_idx++) {
fputs(&(covar_names[covar_idx * max_covar_name_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
}
if (putc_checked('\n', outfile)) {
@@ -2167,35 +2176,35 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
goto write_covars_ret_WRITE_FAIL;
}
*wptr = '\t';
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (write_pheno) {
if (!exclude_parents) {
fputs(&(paternal_ids[sample_uidx * max_paternal_id_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(&(maternal_ids[sample_uidx * max_maternal_id_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
if (!exclude_sex) {
if (!female_2) {
- putc((uint32_t)(48 + IS_SET(sex_male, sample_uidx)), outfile);
+ putc_unlocked((uint32_t)(48 + IS_SET(sex_male, sample_uidx)), outfile);
} else {
if (IS_SET(sex_nm, sample_uidx)) {
- putc((uint32_t)(50 - IS_SET(sex_male, sample_uidx)), outfile);
+ putc_unlocked((uint32_t)(50 - IS_SET(sex_male, sample_uidx)), outfile);
} else {
- putc('0', outfile);
+ putc_unlocked('0', outfile);
}
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
if (!IS_SET(pheno_nm, sample_uidx)) {
fputs(output_missing_pheno, outfile);
} else if (pheno_c) {
- putc('1' + IS_SET(pheno_c, sample_uidx), outfile);
+ putc_unlocked('1' + IS_SET(pheno_c, sample_uidx), outfile);
} else {
wptr = dtoa_g(pheno_d[sample_uidx], g_textbuf);
fwrite(g_textbuf, 1, wptr - g_textbuf, outfile);
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
if (do_downcoding) {
dptr = &(covar_d[sample_idx * covar_ct]);
@@ -2236,7 +2245,7 @@ int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modi
}
} else {
fputs(output_missing_pheno, outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
}
}
@@ -2282,7 +2291,7 @@ int32_t zero_cluster_init(char* zerofname, uintptr_t unfiltered_marker_ct, uintp
// compress each
// 5. allocate and initialize cluster_zc_masks
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* zcfile = NULL;
+ FILE* zcfile = nullptr;
uintptr_t marker_ctp2l = (marker_ct + (BITCT + 1)) / BITCT;
uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
uintptr_t zc_item_ct = 0;
@@ -2315,9 +2324,9 @@ int32_t zero_cluster_init(char* zerofname, uintptr_t unfiltered_marker_ct, uintp
goto zero_cluster_init_ret_NOMEM;
}
#ifdef __LP64__
- fill_ulong_zero(marker_bitfield_tmp, round_up_pow2(marker_ctp2l, 2));
+ fill_ulong_zero(round_up_pow2(marker_ctp2l, 2), marker_bitfield_tmp);
#else
- fill_ulong_zero(marker_bitfield_tmp, round_up_pow2(marker_ctp2l, 4));
+ fill_ulong_zero(round_up_pow2(marker_ctp2l, 4), marker_bitfield_tmp);
#endif
zc_entries_end = (int64_t*)marker_bitfield_tmp;
zc_entries = &(zc_entries_end[-1]);
@@ -2386,7 +2395,7 @@ int32_t zero_cluster_init(char* zerofname, uintptr_t unfiltered_marker_ct, uintp
range_last = 0;
for (cluster_idx = 0; cluster_idx < cluster_ct; cluster_idx++) {
if (range_first < marker_ct) {
- fill_ulong_zero(marker_bitfield_tmp, marker_ctp2l);
+ fill_ulong_zero(marker_ctp2l, marker_bitfield_tmp);
range_first = marker_ct;
range_last = 0;
bigstack_end_set(zc_entries);
@@ -2455,7 +2464,7 @@ int32_t zero_cluster_init(char* zerofname, uintptr_t unfiltered_marker_ct, uintp
}
int32_t write_fam(char* outname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_ids, uintptr_t max_maternal_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t* pheno_nm, uintptr_t* pheno_c, double* pheno_d, char* output_missing_pheno, char delim, uint32_t* sample_sort_map) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t sample_uidx = 0;
uintptr_t sample_uidx2 = 0;
uintptr_t omplen = strlen(output_missing_pheno);
@@ -2515,14 +2524,14 @@ int32_t write_fam(char* outname, uintptr_t unfiltered_sample_ct, uintptr_t* samp
}
int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, char delim, Chrom_info* chrom_info_ptr) {
- // write a .map if marker_allele_ptrs is NULL, .bim otherwise
- FILE* outfile = NULL;
+ // write a .map if marker_allele_ptrs is nullptr, .bim otherwise
+ FILE* outfile = nullptr;
uintptr_t marker_uidx = 0;
int32_t retval = 0;
uint32_t chrom_end = 0;
uint32_t chrom_fo_idx = 0xffffffffU;
uint32_t chrom_idx = 0;
- char* buf_start = NULL;
+ char* buf_start = nullptr;
uintptr_t marker_idx;
char* bufptr;
if (fopen_checked(outname, "w", &outfile)) {
@@ -2532,7 +2541,7 @@ int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t mar
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
while (marker_uidx >= chrom_end) {
chrom_idx = chrom_info_ptr->chrom_file_order[++chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
buf_start = chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf);
*buf_start++ = delim;
}
@@ -2548,12 +2557,12 @@ int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t mar
goto write_map_or_bim_ret_WRITE_FAIL;
}
if (marker_allele_ptrs) {
- putc(delim, outfile);
+ putc_unlocked(delim, outfile);
fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
- putc(delim, outfile);
+ putc_unlocked(delim, outfile);
fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (fclose_null(&outfile)) {
goto write_map_or_bim_ret_WRITE_FAIL;
@@ -2572,37 +2581,39 @@ int32_t write_map_or_bim(char* outname, uintptr_t* marker_exclude, uintptr_t mar
int32_t load_bim_split_chrom(char* bimname, uintptr_t* marker_exclude, uintptr_t marker_ct, Chrom_info* chrom_info_ptr, int64_t* ll_buf, uint32_t max_bim_linelen) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char* loadbuf = g_textbuf;
- uint32_t marker_uidx = 0xffffffffU; // deliberate overflow
int32_t retval = 0;
- uintptr_t marker_idx;
- char* bufptr;
- uint64_t chrom_idx;
- if (max_bim_linelen > MAXLINELEN) {
- if (bigstack_alloc_c(max_bim_linelen, &loadbuf)) {
- goto load_bim_split_chrom_ret_NOMEM;
- }
- }
- if (fopen_checked(bimname, "r", &infile)) {
- goto load_bim_split_chrom_ret_OPEN_FAIL;
- }
- for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
- load_bim_split_chrom_reread:
- if (!fgets(loadbuf, max_bim_linelen, infile)) {
- goto load_bim_split_chrom_ret_READ_FAIL;
+ {
+ if (max_bim_linelen > MAXLINELEN) {
+ if (bigstack_alloc_c(max_bim_linelen, &loadbuf)) {
+ goto load_bim_split_chrom_ret_NOMEM;
+ }
}
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_or_comment_kns(*bufptr)) {
- goto load_bim_split_chrom_reread;
+ if (fopen_checked(bimname, "r", &infile)) {
+ goto load_bim_split_chrom_ret_OPEN_FAIL;
}
- marker_uidx++;
- if (IS_SET(marker_exclude, marker_uidx)) {
- goto load_bim_split_chrom_reread;
+ uint32_t marker_uidx = 0xffffffffU; // deliberate overflow
+ for (uintptr_t marker_idx = 0; marker_idx < marker_ct;) {
+ if (!fgets(loadbuf, max_bim_linelen, infile)) {
+ goto load_bim_split_chrom_ret_READ_FAIL;
+ }
+ char* loadbuf_first_token = skip_initial_spaces(loadbuf);
+ if (is_eoln_or_comment_kns(*loadbuf_first_token)) {
+ continue;
+ }
+ marker_uidx++;
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ continue;
+ }
+ // already validated
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ uint64_t chrom_idx = (uint32_t)get_chrom_code(loadbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ ll_buf[marker_idx] = (int64_t)((chrom_idx << 32) | ((uint64_t)marker_idx));
+ ++marker_idx;
}
- // already validated
- chrom_idx = ((uint32_t)get_chrom_code(chrom_info_ptr, bufptr));
- ll_buf[marker_idx] = (int64_t)((chrom_idx << 32) | ((uint64_t)marker_idx));
}
while (0) {
load_bim_split_chrom_ret_NOMEM:
@@ -2630,7 +2641,7 @@ void fill_ll_buf(uintptr_t* marker_exclude, uintptr_t marker_ct, Chrom_info* chr
next_unset_unsafe_ck(marker_exclude, &marker_uidx);
if (marker_uidx >= chrom_end) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[++chrom_idx_p1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[++chrom_idx_p1];
} while (marker_uidx >= chrom_end);
chrom_idx_shifted = ((uint64_t)(chrom_info_ptr->chrom_file_order[chrom_idx_p1 - 1])) << 32;
}
@@ -2640,7 +2651,7 @@ void fill_ll_buf(uintptr_t* marker_exclude, uintptr_t marker_ct, Chrom_info* chr
int32_t update_marker_chroms(Two_col_params* update_chr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t allow_extra_chroms, Chrom_info* chrom_info_ptr, int64_t* ll_buf) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char skipchar = update_chr->skipchar;
uint32_t colid_first = (update_chr->colid < update_chr->colx);
uint32_t marker_ctl = BITCT_TO_WORDCT(marker_ct);
@@ -2660,101 +2671,96 @@ int32_t update_marker_chroms(Two_col_params* update_chr, uintptr_t unfiltered_ma
uint32_t coldiff;
uint32_t slen;
uint32_t marker_idx;
- int32_t sorted_idx;
int32_t retval;
char cc;
- retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
- if (retval) {
- goto update_marker_chroms_ret_1;
- }
- if (bigstack_calloc_ul(marker_ctl, &already_seen) ||
- bigstack_alloc_ui(unfiltered_marker_ct, &marker_uidx_to_idx)) {
- goto update_marker_chroms_ret_NOMEM;
- }
- fill_uidx_to_idx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_uidx_to_idx);
- loadbuf = (char*)g_bigstack_base;
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- }
- if (loadbuf_size <= MAXLINELEN) {
- goto update_marker_chroms_ret_NOMEM;
- }
- retval = open_and_skip_first_lines(&infile, update_chr->fname, loadbuf, loadbuf_size, update_chr->skip);
- if (retval) {
- goto update_marker_chroms_ret_1;
- }
- if (colid_first) {
- colmin = update_chr->colid - 1;
- coldiff = update_chr->colx - update_chr->colid;
- } else {
- colmin = update_chr->colx - 1;
- coldiff = update_chr->colid - update_chr->colx;
- }
- while (fgets(loadbuf, loadbuf_size, infile)) {
- line_idx++;
- if (!(loadbuf[loadbuf_size - 1])) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --update-chr file is pathologically long.\n", line_idx);
- goto update_marker_chroms_ret_INVALID_FORMAT_2;
- } else {
- goto update_marker_chroms_ret_NOMEM;
- }
+ {
+ retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
+ if (retval) {
+ goto update_marker_chroms_ret_1;
}
- colid_ptr = skip_initial_spaces(loadbuf);
- cc = *colid_ptr;
- if (is_eoln_kns(cc) || (cc == skipchar)) {
- continue;
+ if (bigstack_calloc_ul(marker_ctl, &already_seen) ||
+ bigstack_alloc_ui(unfiltered_marker_ct, &marker_uidx_to_idx)) {
+ goto update_marker_chroms_ret_NOMEM;
+ }
+ fill_uidx_to_idx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_uidx_to_idx);
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ }
+ if (loadbuf_size <= MAXLINELEN) {
+ goto update_marker_chroms_ret_NOMEM;
+ }
+ retval = open_and_skip_first_lines(&infile, update_chr->fname, loadbuf, loadbuf_size, update_chr->skip);
+ if (retval) {
+ goto update_marker_chroms_ret_1;
}
if (colid_first) {
- colid_ptr = next_token_multz(colid_ptr, colmin);
- colx_ptr = next_token_mult(colid_ptr, coldiff);
- if (no_more_tokens_kns(colx_ptr)) {
- goto update_marker_chroms_ret_MISSING_TOKENS;
- }
+ colmin = update_chr->colid - 1;
+ coldiff = update_chr->colx - update_chr->colid;
} else {
- colx_ptr = next_token_multz(colid_ptr, colmin);
- colid_ptr = next_token_mult(colx_ptr, coldiff);
- if (no_more_tokens_kns(colid_ptr)) {
- goto update_marker_chroms_ret_MISSING_TOKENS;
- }
+ colmin = update_chr->colx - 1;
+ coldiff = update_chr->colid - update_chr->colx;
}
- slen = strlen_se(colid_ptr);
- marker_uidx = id_htable_find(colid_ptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
- if (marker_uidx == 0xffffffffU) {
- miss_ct++;
- continue;
- }
- marker_idx = marker_uidx_to_idx[marker_uidx];
- if (is_set(already_seen, marker_idx)) {
- colid_ptr[slen] = '\0';
- LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in --update-chr file.\n", colid_ptr);
- goto update_marker_chroms_ret_INVALID_FORMAT_2;
- }
- set_bit(marker_idx, already_seen);
- sorted_idx = get_chrom_code(chrom_info_ptr, colx_ptr);
- if (sorted_idx < 0) {
- if ((!allow_extra_chroms) || (sorted_idx == -1)) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of --update-chr file.\n", line_idx);
+ while (fgets(loadbuf, loadbuf_size, infile)) {
+ line_idx++;
+ if (!(loadbuf[loadbuf_size - 1])) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --update-chr file is pathologically long.\n", line_idx);
+ goto update_marker_chroms_ret_INVALID_FORMAT_2;
+ } else {
+ goto update_marker_chroms_ret_NOMEM;
+ }
+ }
+ colid_ptr = skip_initial_spaces(loadbuf);
+ cc = *colid_ptr;
+ if (is_eoln_kns(cc) || (cc == skipchar)) {
+ continue;
+ }
+ if (colid_first) {
+ colid_ptr = next_token_multz(colid_ptr, colmin);
+ colx_ptr = next_token_mult(colid_ptr, coldiff);
+ if (no_more_tokens_kns(colx_ptr)) {
+ goto update_marker_chroms_ret_MISSING_TOKENS;
+ }
+ } else {
+ colx_ptr = next_token_multz(colid_ptr, colmin);
+ colid_ptr = next_token_mult(colx_ptr, coldiff);
+ if (no_more_tokens_kns(colid_ptr)) {
+ goto update_marker_chroms_ret_MISSING_TOKENS;
+ }
+ }
+ slen = strlen_se(colid_ptr);
+ marker_uidx = id_htable_find(colid_ptr, slen, marker_id_htable, marker_id_htable_size, marker_ids, max_marker_id_len);
+ if (marker_uidx == 0xffffffffU) {
+ miss_ct++;
+ continue;
+ }
+ marker_idx = marker_uidx_to_idx[marker_uidx];
+ if (is_set(already_seen, marker_idx)) {
+ colid_ptr[slen] = '\0';
+ LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in --update-chr file.\n", colid_ptr);
goto update_marker_chroms_ret_INVALID_FORMAT_2;
}
- retval = resolve_or_add_chrom_name(colx_ptr, "--update-chr file", line_idx, chrom_info_ptr, &sorted_idx);
+ set_bit(marker_idx, already_seen);
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code_destructive("--update-chr file", line_idx, allow_extra_chroms, colx_ptr, token_endnn(colx_ptr), chrom_info_ptr, &cur_chrom_code);
if (retval) {
goto update_marker_chroms_ret_1;
}
+ ll_buf[marker_idx] = (int64_t)((((uint64_t)((uint32_t)cur_chrom_code)) << 32) | (((uint64_t)ll_buf[marker_idx]) & 0xffffffffLLU));
+ hit_ct++;
+ }
+ if (!feof(infile)) {
+ goto update_marker_chroms_ret_READ_FAIL;
}
- ll_buf[marker_idx] = (int64_t)((((uint64_t)((uint32_t)sorted_idx)) << 32) | (((uint64_t)ll_buf[marker_idx]) & 0xffffffffLLU));
- hit_ct++;
+ if (miss_ct) {
+ sprintf(g_logbuf, "--update-chr: %" PRIuPTR " value%s updated, %" PRIuPTR " variant ID%s not present.\n", hit_ct, (hit_ct == 1)? "" : "s", miss_ct, (miss_ct == 1)? "" : "s");
+ } else {
+ sprintf(g_logbuf, "--update-chr: %" PRIuPTR " value%s updated.\n", hit_ct, (hit_ct == 1)? "" : "s");
+ }
+ logprintb();
}
- if (!feof(infile)) {
- goto update_marker_chroms_ret_READ_FAIL;
- }
- if (miss_ct) {
- sprintf(g_logbuf, "--update-chr: %" PRIuPTR " value%s updated, %" PRIuPTR " variant ID%s not present.\n", hit_ct, (hit_ct == 1)? "" : "s", miss_ct, (miss_ct == 1)? "" : "s");
- } else {
- sprintf(g_logbuf, "--update-chr: %" PRIuPTR " value%s updated.\n", hit_ct, (hit_ct == 1)? "" : "s");
- }
- logprintb();
while (0) {
update_marker_chroms_ret_NOMEM:
retval = RET_NOMEM;
@@ -2778,7 +2784,8 @@ int32_t update_marker_chroms(Two_col_params* update_chr, uintptr_t unfiltered_ma
void sort_marker_chrom_pos(int64_t* ll_buf, uintptr_t marker_ct, uint32_t* pos_buf, uint32_t* chrom_start, uint32_t* chrom_id, uint32_t* unpack_map, uint32_t* chrom_ct_ptr) {
// Assumes ll_buf is initially filled with chromosome idxs in high 32 bits,
// and filtered marker indices in low 32 bits. pos_buf is expected to have
- // base-pair positions; lookup is by filtered_index iff unpack_map is NULL.
+ // base-pair positions; lookup is by filtered_index iff unpack_map is
+ // nullptr.
// After this is finished, ll_buf has marker positions in high bits and
// filtered original indices in low bits, while chrom_start[] tracks
// chromosome boundaries.
@@ -2835,9 +2842,9 @@ void sort_marker_chrom_pos(int64_t* ll_buf, uintptr_t marker_ct, uint32_t* pos_b
*chrom_ct_ptr = chrom_ct;
}
-int32_t sort_and_write_bim(uint32_t* map_reverse, uint32_t map_cols, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, int64_t* ll_buf, Chrom_info* chrom_info_ptr) {
+int32_t sort_and_write_bim(uint32_t* map_reverse, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, int64_t* ll_buf, Chrom_info* chrom_info_ptr) {
// caller is expected to pop stuff off stack
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint32_t max_code = chrom_info_ptr->max_code;
uint32_t chrom_code_end = max_code + 1 + chrom_info_ptr->name_ct;
int32_t retval = 0;
@@ -2898,7 +2905,7 @@ int32_t sort_and_write_bim(uint32_t* map_reverse, uint32_t map_cols, char* outna
goto sort_and_write_bim_ret_WRITE_FAIL;
}
fputs(cond_replace(marker_allele_ptrs[2 * marker_uidx], missing_geno_ptr, output_missing_geno_ptr), outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(cond_replace(marker_allele_ptrs[2 * marker_uidx + 1], missing_geno_ptr, output_missing_geno_ptr), outfile);
if (putc_checked('\n', outfile)) {
goto sort_and_write_bim_ret_WRITE_FAIL;
@@ -2922,8 +2929,8 @@ int32_t sort_and_write_bim(uint32_t* map_reverse, uint32_t map_cols, char* outna
int32_t load_sort_and_write_map(uint32_t** map_reverse_ptr, FILE* mapfile, uint32_t map_cols, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, uintptr_t max_marker_id_len, int32_t compact_map_reverse, Chrom_info* chrom_info_ptr) {
// get_chrom_code() cannot fail
- FILE* map_outfile = NULL;
- int64_t* ll_buf = NULL;
+ FILE* map_outfile = nullptr;
+ int64_t* ll_buf = nullptr;
uintptr_t line_idx = 0;
uint32_t orig_zec = chrom_info_ptr->zero_extra_chroms;
int32_t retval = 0;
@@ -2943,74 +2950,80 @@ int32_t load_sort_and_write_map(uint32_t** map_reverse_ptr, FILE* mapfile, uint3
uint32_t ujj;
uint32_t cur_chrom;
uint32_t chrom_ct;
- // See sort_and_write_bim() for discussion. Note that marker_ids and
- // marker_cms use filtered instead of unfiltered coordinates, though.
- if (bigstack_alloc_ui(compact_map_reverse? marker_ct : unfiltered_marker_ct, map_reverse_ptr) ||
- bigstack_alloc_ll(marker_ct, &ll_buf) ||
- bigstack_alloc_c(marker_ct * max_marker_id_len, &marker_ids) ||
- bigstack_alloc_d(marker_ct, &marker_cms) ||
- bigstack_alloc_ui(marker_ct, &pos_buf) ||
- bigstack_alloc_ui(marker_ct, &unpack_map) ||
- bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 2, &chrom_start) ||
- bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 1, &chrom_id)) {
- goto load_sort_and_write_map_ret_NOMEM;
- }
- rewind(mapfile);
- marker_idx = 0;
- for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
- if (get_next_noncomment(mapfile, &bufptr, &line_idx)) {
- goto load_sort_and_write_map_ret_READ_FAIL;
- }
- if (IS_SET(marker_exclude, marker_uidx)) {
- continue;
+ {
+ // See sort_and_write_bim() for discussion. Note that marker_ids and
+ // marker_cms use filtered instead of unfiltered coordinates, though.
+ if (bigstack_alloc_ui(compact_map_reverse? marker_ct : unfiltered_marker_ct, map_reverse_ptr) ||
+ bigstack_alloc_ll(marker_ct, &ll_buf) ||
+ bigstack_alloc_c(marker_ct * max_marker_id_len, &marker_ids) ||
+ bigstack_alloc_d(marker_ct, &marker_cms) ||
+ bigstack_alloc_ui(marker_ct, &pos_buf) ||
+ bigstack_alloc_ui(marker_ct, &unpack_map) ||
+ bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 2, &chrom_start) ||
+ bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 1, &chrom_id)) {
+ goto load_sort_and_write_map_ret_NOMEM;
}
- ll_buf[marker_idx] = (((uint64_t)((uint32_t)get_chrom_code(chrom_info_ptr, bufptr))) << 32) + marker_idx;
- bufptr = next_token(bufptr);
- uii = strlen_se(bufptr);
- memcpyx(&(marker_ids[marker_idx * max_marker_id_len]), bufptr, uii, 0);
- bufptr = next_token(bufptr);
- if (map_cols == 4) {
- if (scan_double(bufptr, &(marker_cms[marker_idx]))) {
+ rewind(mapfile);
+ marker_idx = 0;
+ for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
+ char* textbuf_first_token;
+ if (get_next_noncomment(mapfile, &textbuf_first_token, &line_idx)) {
+ goto load_sort_and_write_map_ret_READ_FAIL;
+ }
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(textbuf_first_token);
+ char* textbuf_iter = skip_initial_spaces(first_token_end);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ ll_buf[marker_idx] = (((uint64_t)((uint32_t)get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen))) << 32) + marker_idx;
+ uii = strlen_se(textbuf_iter);
+ memcpyx(&(marker_ids[marker_idx * max_marker_id_len]), textbuf_iter, uii, 0);
+ textbuf_iter = next_token(textbuf_iter);
+ if (map_cols == 4) {
+ if (scan_double(textbuf_iter, &(marker_cms[marker_idx]))) {
+ marker_cms[marker_idx] = 0.0;
+ }
+ textbuf_iter = next_token(textbuf_iter);
+ } else {
marker_cms[marker_idx] = 0.0;
}
- bufptr = next_token(bufptr);
- } else {
- marker_cms[marker_idx] = 0.0;
+ unpack_map[marker_idx] = marker_uidx;
+ // previously validated
+ scan_uint_defcap(textbuf_iter, &(pos_buf[marker_idx++]));
}
- unpack_map[marker_idx] = marker_uidx;
- // previously validated
- scan_uint_defcap(bufptr, &(pos_buf[marker_idx++]));
- }
- sort_marker_chrom_pos(ll_buf, marker_ct, pos_buf, chrom_start, chrom_id, NULL, &chrom_ct);
+ sort_marker_chrom_pos(ll_buf, marker_ct, pos_buf, chrom_start, chrom_id, nullptr, &chrom_ct);
- strcpy(outname_end, ".map.tmp");
- if (fopen_checked(outname, "w", &map_outfile)) {
- goto load_sort_and_write_map_ret_OPEN_FAIL;
- }
+ strcpy(outname_end, ".map.tmp");
+ if (fopen_checked(outname, "w", &map_outfile)) {
+ goto load_sort_and_write_map_ret_OPEN_FAIL;
+ }
- marker_idx = 0;
- *zstr = '0';
- zstr[1] = '\0';
- chrom_info_ptr->zero_extra_chroms = 0;
- for (uii = 0; uii < chrom_ct; uii++) {
- cur_chrom = chrom_id[uii];
- ujj = chrom_start[uii + 1];
- bufptr0 = chrom_name_write(chrom_info_ptr, cur_chrom, g_textbuf);
- *bufptr0++ = '\t';
- for (; marker_idx < ujj; marker_idx++) {
- marker_idx2 = (uint32_t)ll_buf[marker_idx];
- marker_uidx = unpack_map[marker_idx2];
- bufptr = strcpyax(bufptr0, &(marker_ids[marker_idx2 * max_marker_id_len]), '\t');
- bufptr = dtoa_g_wxp8x(marker_cms[marker_idx2], 1, '\t', bufptr);
- bufptr = uint32toa_x((uint32_t)(ll_buf[marker_idx] >> 32), '\n', bufptr);
- if (fwrite_checked(g_textbuf, bufptr - g_textbuf, map_outfile)) {
- goto load_sort_and_write_map_ret_WRITE_FAIL;
+ marker_idx = 0;
+ *zstr = '0';
+ zstr[1] = '\0';
+ chrom_info_ptr->zero_extra_chroms = 0;
+ for (uii = 0; uii < chrom_ct; uii++) {
+ cur_chrom = chrom_id[uii];
+ ujj = chrom_start[uii + 1];
+ bufptr0 = chrom_name_write(chrom_info_ptr, cur_chrom, g_textbuf);
+ *bufptr0++ = '\t';
+ for (; marker_idx < ujj; marker_idx++) {
+ marker_idx2 = (uint32_t)ll_buf[marker_idx];
+ marker_uidx = unpack_map[marker_idx2];
+ bufptr = strcpyax(bufptr0, &(marker_ids[marker_idx2 * max_marker_id_len]), '\t');
+ bufptr = dtoa_g_wxp8x(marker_cms[marker_idx2], 1, '\t', bufptr);
+ bufptr = uint32toa_x((uint32_t)(ll_buf[marker_idx] >> 32), '\n', bufptr);
+ if (fwrite_checked(g_textbuf, bufptr - g_textbuf, map_outfile)) {
+ goto load_sort_and_write_map_ret_WRITE_FAIL;
+ }
+ (*map_reverse_ptr)[compact_map_reverse? marker_idx2 : marker_uidx] = marker_idx;
}
- (*map_reverse_ptr)[compact_map_reverse? marker_idx2 : marker_uidx] = marker_idx;
}
- }
- if (fclose_null(&map_outfile)) {
- goto load_sort_and_write_map_ret_WRITE_FAIL;
+ if (fclose_null(&map_outfile)) {
+ goto load_sort_and_write_map_ret_WRITE_FAIL;
+ }
}
while (0) {
load_sort_and_write_map_ret_NOMEM:
@@ -3035,12 +3048,12 @@ int32_t load_sort_and_write_map(uint32_t** map_reverse_ptr, FILE* mapfile, uint3
int32_t flip_subset_init(char* flip_fname, char* flip_subset_fname, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uint32_t* sample_sort_map, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* flip_subset_markers, uintptr_t* flip_subset_vec2) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
uintptr_t miss_ct = 0;
uintptr_t line_idx = 0;
- uint32_t* sample_uidx_to_idx = NULL;
+ uint32_t* sample_uidx_to_idx = nullptr;
uint32_t flip_marker_ct = 0;
uint32_t flip_sample_ct = 0;
int32_t retval = 0;
@@ -3059,7 +3072,7 @@ int32_t flip_subset_init(char* flip_fname, char* flip_subset_fname, uintptr_t un
int32_t sorted_idx;
unsigned char ucc;
// load --flip file, then --flip-subset
- fill_ulong_zero(flip_subset_markers, unfiltered_marker_ctl);
+ fill_ulong_zero(unfiltered_marker_ctl, flip_subset_markers);
retval = alloc_and_populate_id_htable(unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, 0, &marker_id_htable_size, &marker_id_htable);
if (retval) {
goto flip_subset_init_ret_1;
@@ -3118,7 +3131,7 @@ int32_t flip_subset_init(char* flip_fname, char* flip_subset_fname, uintptr_t un
if (fopen_checked(flip_subset_fname, "r", &infile)) {
goto flip_subset_init_ret_OPEN_FAIL;
}
- fill_ulong_zero(flip_subset_vec2, sample_ctv2);
+ fill_ulong_zero(sample_ctv2, flip_subset_vec2);
line_idx = 0;
while (fgets(g_textbuf, MAXLINELEN, infile)) {
line_idx++;
@@ -3130,7 +3143,7 @@ int32_t flip_subset_init(char* flip_fname, char* flip_subset_fname, uintptr_t un
if (is_eoln_kns(*bufptr)) {
continue;
}
- if (bsearch_read_fam_indiv(bufptr, sorted_sample_ids, max_sample_id_len, sample_ct, NULL, &sorted_idx, id_buf) || (sorted_idx == -1)) {
+ if (bsearch_read_fam_indiv(bufptr, sorted_sample_ids, max_sample_id_len, sample_ct, nullptr, &sorted_idx, id_buf) || (sorted_idx == -1)) {
miss_ct++;
continue;
}
@@ -3187,11 +3200,11 @@ uint32_t merge_or_split_x(uint32_t mergex, uint32_t splitx_bound1, uint32_t spli
uint32_t marker_idx;
uint32_t cur_pos;
if (mergex) {
- match_chrom = chrom_info_ptr->xy_code;
- new_chrom_shifted = ((uint64_t)((uint32_t)chrom_info_ptr->x_code)) << 32;
+ match_chrom = chrom_info_ptr->xymt_codes[XY_OFFSET];
+ new_chrom_shifted = ((uint64_t)((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET])) << 32;
} else {
- match_chrom = chrom_info_ptr->x_code;
- new_chrom_shifted = ((uint64_t)((uint32_t)chrom_info_ptr->xy_code)) << 32;
+ match_chrom = chrom_info_ptr->xymt_codes[X_OFFSET];
+ new_chrom_shifted = ((uint64_t)((uint32_t)chrom_info_ptr->xymt_codes[XY_OFFSET])) << 32;
}
for (marker_idx = 0; marker_idx < marker_ct; marker_idx++, marker_uidx++) {
@@ -3307,7 +3320,7 @@ void zeropatch(uintptr_t sample_ctv2, uintptr_t cluster_ct, uintptr_t* cluster_z
if (in_setdef(zcdefs[cluster_idx], marker_idx)) {
if (!at_least_one_cluster) {
at_least_one_cluster = 1;
- fill_ulong_zero(patchbuf, sample_ctv2);
+ fill_ulong_zero(sample_ctv2, patchbuf);
}
bitvec_or(&(cluster_zc_masks[cluster_idx * sample_ctv2]), sample_ctv2, patchbuf);
}
@@ -3384,7 +3397,7 @@ void replace_missing_a2(uintptr_t* writebuf, uintptr_t* subset_vec2, uintptr_t w
#endif
}
-int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t map_cols, char* outname, char* outname_end, uint64_t calculation_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_i [...]
+int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, char* outname, char* outname_end, uint64_t calculation_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, char* patern [...]
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
@@ -3396,20 +3409,20 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
uintptr_t marker_idx = 0;
uintptr_t trio_ct = 0;
uintptr_t final_mask = get_final_mask(unfiltered_sample_ct);
- FILE* bedoutfile = NULL;
- int64_t* ll_buf = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* sample_raw_male_include2 = NULL;
- uintptr_t* workbuf = NULL;
- uintptr_t* cluster_zc_masks = NULL;
- uintptr_t* patchbuf = NULL;
- uintptr_t* flip_subset_markers = NULL;
- uintptr_t* flip_subset_vec2 = NULL;
- uint64_t* family_list = NULL;
- uint64_t* trio_list = NULL;
- uint32_t* trio_lookup = NULL;
- uint32_t** zcdefs = NULL;
+ FILE* bedoutfile = nullptr;
+ int64_t* ll_buf = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* sample_raw_male_include2 = nullptr;
+ uintptr_t* workbuf = nullptr;
+ uintptr_t* cluster_zc_masks = nullptr;
+ uintptr_t* patchbuf = nullptr;
+ uintptr_t* flip_subset_markers = nullptr;
+ uintptr_t* flip_subset_vec2 = nullptr;
+ uint64_t* family_list = nullptr;
+ uint64_t* trio_list = nullptr;
+ uint32_t* trio_lookup = nullptr;
+ uint32_t** zcdefs = nullptr;
uint64_t mendel_error_ct = 0;
uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
uint32_t family_ct = 0;
@@ -3513,7 +3526,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
goto make_bed_ret_1;
}
} else if (mergex || splitx_bound2) {
- if (splitx_bound2 && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xy_code)) {
+ if (splitx_bound2 && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[XY_OFFSET])) {
logerrprint("Error: --split-x cannot be used when the dataset already contains an XY region.\n");
goto make_bed_ret_INVALID_CMDLINE;
}
@@ -3522,7 +3535,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
if (mergex) {
logerrprint("Error: --merge-x requires XY pseudo-autosomal region data. (Use 'no-fail' to\nforce --make-bed to proceed anyway.\n");
} else {
- if (!is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->x_code)) {
+ if (!is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[X_OFFSET])) {
logerrprint("Error: --split-x requires X chromosome data. (Use 'no-fail' to force\n--make-bed to proceed anyway.\n");
} else {
LOGERRPRINTFWW("Error: No X chromosome loci have bp positions <= %u or >= %u. (Use 'no-fail' to force --make-bed to proceed anyway.)\n", splitx_bound1, splitx_bound2);
@@ -3536,7 +3549,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
fill_ll_buf(marker_exclude, marker_ct, chrom_info_ptr, ll_buf);
}
memcpy(outname_end, ".bim", 5);
- retval = sort_and_write_bim(map_reverse, map_cols, outname, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, ll_buf, chrom_info_ptr);
+ retval = sort_and_write_bim(map_reverse, outname, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, ll_buf, chrom_info_ptr);
if (retval) {
goto make_bed_ret_1;
}
@@ -3593,7 +3606,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
}
if (markers_done >= loop_end) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (markers_done * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -3622,7 +3635,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
goto make_bed_ret_NOMEM;
}
if (set_me_missing) {
- retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, NULL, NULL, NULL, NULL, &family_list, &family_ct, &trio_list, &trio_ct, &trio_lookup, mendel_include_duos, mendel_multigen);
+ retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, nullptr, nullptr, nullptr, nullptr, &family_list, &family_ct, &trio_list, &trio_ct, &trio_lookup, mendel_include_duos, mendel_multigen);
if (retval) {
goto make_bed_ret_1;
}
@@ -3693,7 +3706,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -3722,7 +3735,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
} else {
fill_ll_buf(marker_exclude, marker_ct, chrom_info_ptr, ll_buf);
}
- retval = sort_and_write_bim(map_reverse, map_cols, outname, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, ll_buf, chrom_info_ptr);
+ retval = sort_and_write_bim(map_reverse, outname, unfiltered_marker_ct, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, marker_allele_ptrs, ll_buf, chrom_info_ptr);
if (retval) {
goto make_bed_ret_1;
}
@@ -3764,7 +3777,7 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
if (calculation_type & CALC_MAKE_BED) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -3798,9 +3811,9 @@ int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t ma
int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
unsigned char* bigstack_mark = g_bigstack_base;
double missing_phenod = (double)missing_pheno;
- uintptr_t* pheno_c = NULL;
- double* pheno_d = NULL;
- FILE* famfile = NULL;
+ uintptr_t* pheno_c = nullptr;
+ double* pheno_d = nullptr;
+ FILE* famfile = nullptr;
uintptr_t unfiltered_sample_ct = 0;
uintptr_t max_sample_id_len = *max_sample_id_len_ptr;
uintptr_t max_paternal_id_len = *max_paternal_id_len_ptr;
@@ -3855,7 +3868,7 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
}
}
bufptr0 = skip_initial_spaces(loadbuf);
- if (!is_eoln_kns(*bufptr0)) {
+ if (!is_eoln_or_comment_kns(*bufptr0)) {
if (fam_cols & FAM_COL_1) {
bufptr = next_token(bufptr0);
if (!bufptr) {
@@ -3914,8 +3927,8 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
}
// don't yet need to enforce separate FID and IID limits, but in theory this
// may change
- if ((max_sample_id_len > 2 * MAX_ID_LEN_P1) || (max_paternal_id_len > MAX_ID_LEN_P1) || (max_maternal_id_len > MAX_ID_LEN_P1)) {
- logerrprint("Error: FIDs and IIDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if ((max_sample_id_len > 2 * MAX_ID_BLEN) || (max_paternal_id_len > MAX_ID_BLEN) || (max_maternal_id_len > MAX_ID_BLEN)) {
+ logerrprint("Error: FIDs and IIDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto load_fam_ret_INVALID_FORMAT;
}
bigstack_reset(bigstack_mark);
@@ -3940,13 +3953,13 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
goto load_fam_ret_NOMEM;
}
pheno_c = *pheno_c_ptr;
- fill_ulong_zero(pheno_c, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_c);
} else {
pheno_d = (double*)malloc(unfiltered_sample_ct * sizeof(double));
if (!pheno_d) {
goto load_fam_ret_NOMEM;
}
- fill_double_zero(pheno_d, unfiltered_sample_ct);
+ fill_double_zero(unfiltered_sample_ct, pheno_d);
*pheno_d_ptr = pheno_d;
}
bigstack_mark = g_bigstack_base;
@@ -3974,14 +3987,14 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
pheno_nm = *pheno_nm_ptr;
founder_info = *founder_info_ptr;
if (fam_cols & FAM_COL_34) {
- fill_ulong_zero(founder_info, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, founder_info);
} else {
fill_all_bits(unfiltered_sample_ct, founder_info);
}
- fill_ulong_zero(sex_nm, unfiltered_sample_ctl);
- fill_ulong_zero(sex_male, unfiltered_sample_ctl);
- fill_ulong_zero(*sample_exclude_ptr, unfiltered_sample_ctl);
- fill_ulong_zero(pheno_nm, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, sex_nm);
+ fill_ulong_zero(unfiltered_sample_ctl, sex_male);
+ fill_ulong_zero(unfiltered_sample_ctl, *sample_exclude_ptr);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_nm);
// ----- .fam read, second pass -----
rewind(famfile);
@@ -3993,7 +4006,7 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
goto load_fam_ret_NOMEM;
}
bufptr0 = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr0)) {
+ if (is_eoln_or_comment_kns(*bufptr0)) {
continue;
}
if (fam_cols & FAM_COL_1) {
@@ -4080,16 +4093,16 @@ int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32
int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outname_end, char* single_chr, char* pheno_name, double hard_call_threshold, char* missing_code, int32_t missing_pheno, uint64_t misc_flags, uint32_t is_bgen, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- gzFile gz_infile = NULL;
- FILE* outfile = NULL;
- FILE* outfile_bim = NULL;
+ FILE* infile = nullptr;
+ gzFile gz_infile = nullptr;
+ FILE* outfile = nullptr;
+ FILE* outfile_bim = nullptr;
uintptr_t mc_ct = 0;
uintptr_t max_mc_len = 0;
uintptr_t line_idx = 0;
double hard_call_floor = 1.0 - hard_call_threshold;
- char* loadbuf = NULL;
- char* sorted_mc = NULL;
+ char* loadbuf = nullptr;
+ char* sorted_mc = nullptr;
char* tbuf2 = &(g_textbuf[MAXLINELEN]); // .fam write
// 0 = not present, otherwise zero-based index (this is fine since first
@@ -4152,999 +4165,993 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
uint16_t uskk;
char cc;
char cc2;
- if (single_chr && (!allow_extra_chroms)) {
- ii = get_chrom_code_raw(single_chr);
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- logerrprint("Error: --oxford-single-chr chromosome code is excluded by chromosome filter.\n");
- goto oxford_to_bed_ret_INVALID_CMDLINE;
- }
- }
- bufptr = int32toa(missing_pheno, missing_pheno_str);
- missing_pheno_len = (uintptr_t)(bufptr - missing_pheno_str);
- if (!missing_code) {
- mc_ct = 1;
- max_mc_len = 3;
- if (bigstack_alloc_c(3, &sorted_mc)) {
- goto oxford_to_bed_ret_NOMEM;
- }
- memcpy(sorted_mc, "NA", 3);
- } else {
- bufptr = missing_code;
- while (*bufptr) {
- while (*bufptr == ',') {
- bufptr++;
- }
- if (!(*bufptr)) {
- break;
- }
- mc_ct++;
- bufptr2 = strchr(bufptr, ',');
- if (!bufptr2) {
- bufptr2 = strchr(bufptr, '\0');
- }
- ulii = (uintptr_t)(bufptr2 - bufptr);
- if (ulii >= max_mc_len) {
- max_mc_len = ulii + 1;
+ {
+ if (single_chr && (!allow_extra_chroms)) {
+ ii = get_chrom_code_raw(single_chr);
+ if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
+ logerrprint("Error: --oxford-single-chr chromosome code is excluded by chromosome filter.\n");
+ goto oxford_to_bed_ret_INVALID_CMDLINE;
}
- bufptr = bufptr2;
}
- if (mc_ct) {
- if (bigstack_alloc_c(mc_ct * max_mc_len, &sorted_mc)) {
+ bufptr = int32toa(missing_pheno, missing_pheno_str);
+ missing_pheno_len = (uintptr_t)(bufptr - missing_pheno_str);
+ if (!missing_code) {
+ mc_ct = 1;
+ max_mc_len = 3;
+ if (bigstack_alloc_c(3, &sorted_mc)) {
goto oxford_to_bed_ret_NOMEM;
}
+ memcpy(sorted_mc, "NA", 3);
+ } else {
bufptr = missing_code;
- ulii = 0; // current missing-code index
- do {
- while (*bufptr == ',') {
- bufptr++;
+ while (*bufptr) {
+ while (*bufptr == ',') {
+ bufptr++;
}
- if (!(*bufptr)) {
+ if (!(*bufptr)) {
break;
}
- bufptr2 = strchr(bufptr, ',');
- if (!bufptr2) {
+ mc_ct++;
+ bufptr2 = strchr(bufptr, ',');
+ if (!bufptr2) {
bufptr2 = strchr(bufptr, '\0');
}
- uljj = (uintptr_t)(bufptr2 - bufptr);
- memcpyx(&(sorted_mc[ulii * max_mc_len]), bufptr, uljj, '\0');
+ ulii = (uintptr_t)(bufptr2 - bufptr);
+ if (ulii >= max_mc_len) {
+ max_mc_len = ulii + 1;
+ }
bufptr = bufptr2;
- ulii++;
- } while (*bufptr);
- qsort(sorted_mc, mc_ct, max_mc_len, strcmp_casted);
- }
- }
- if (fopen_checked(samplename, "r", &infile)) {
- goto oxford_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto oxford_to_bed_ret_OPEN_FAIL;
- }
- g_textbuf[MAXLINELEN - 1] = ' ';
- do {
- line_idx++;
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto oxford_to_bed_ret_READ_FAIL;
}
- logerrprint("Error: Empty --data/--sample file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ if (mc_ct) {
+ if (bigstack_alloc_c(mc_ct * max_mc_len, &sorted_mc)) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ bufptr = missing_code;
+ ulii = 0; // current missing-code index
+ do {
+ while (*bufptr == ',') {
+ bufptr++;
+ }
+ if (!(*bufptr)) {
+ break;
+ }
+ bufptr2 = strchr(bufptr, ',');
+ if (!bufptr2) {
+ bufptr2 = strchr(bufptr, '\0');
+ }
+ uljj = (uintptr_t)(bufptr2 - bufptr);
+ memcpyx(&(sorted_mc[ulii * max_mc_len]), bufptr, uljj, '\0');
+ bufptr = bufptr2;
+ ulii++;
+ } while (*bufptr);
+ qsort(sorted_mc, mc_ct, max_mc_len, strcmp_casted);
+ }
}
- if (!g_textbuf[MAXLINELEN - 1]) {
- goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
+ if (fopen_checked(samplename, "r", &infile)) {
+ goto oxford_to_bed_ret_OPEN_FAIL;
}
- bufptr = skip_initial_spaces(g_textbuf);
- } while (is_eoln_kns(*bufptr));
- bufptr2 = token_endnn(bufptr);
- if ((((uintptr_t)(bufptr2 - bufptr)) != 4) || memcmp(bufptr, "ID_1", 4)) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
- }
- bufptr = skip_initial_spaces(bufptr2);
- slen = strlen_se(bufptr);
- if ((slen != 4) || memcmp(bufptr, "ID_2", 4)) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
- }
- bufptr = skip_initial_spaces(&(bufptr[4]));
- slen = strlen_se(bufptr);
- if ((slen != 7) || (!match_upper_nt(bufptr, "MISSING", 7))) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
- }
- bufptr = skip_initial_spaces(&(bufptr[7]));
- if (pheno_name) {
- pheno_name_len = strlen(pheno_name);
- }
- while (!is_eoln_kns(*bufptr)) {
- bufptr2 = token_endnn(bufptr);
- ulii = (uintptr_t)(bufptr2 - bufptr);
- // allow "Sex", "SEX", etc.
- if ((ulii == 3) && (tolower(bufptr[0]) == 's') && (tolower(bufptr[1]) == 'e') && (tolower(bufptr[2]) == 'x')) {
- if (sex_col) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto oxford_to_bed_ret_OPEN_FAIL;
+ }
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ do {
+ line_idx++;
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ logerrprint("Error: Empty --data/--sample file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
}
- sex_col = col_ct;
- } else if ((ulii == pheno_name_len) && (!memcmp(bufptr, pheno_name, ulii))) {
- if (pheno_col) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
}
- pheno_col = col_ct;
+ bufptr = skip_initial_spaces(g_textbuf);
+ } while (is_eoln_kns(*bufptr));
+ bufptr2 = token_endnn(bufptr);
+ if ((((uintptr_t)(bufptr2 - bufptr)) != 4) || memcmp(bufptr, "ID_1", 4)) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
}
- col_ct++;
bufptr = skip_initial_spaces(bufptr2);
- }
- if (pheno_name) {
- if (!pheno_col) {
- logerrprint("Error: --oxford-pheno-name parameter not found in .sample file header.\n");
- goto oxford_to_bed_ret_INVALID_CMDLINE;
- } else if (sex_col > pheno_col) {
- logerrprint("Error: .sample phenotype column(s) should be after sex covariate.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ slen = strlen_se(bufptr);
+ if ((slen != 4) || memcmp(bufptr, "ID_2", 4)) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
}
- }
- do {
- line_idx++;
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto oxford_to_bed_ret_READ_FAIL;
+ bufptr = skip_initial_spaces(&(bufptr[4]));
+ slen = strlen_se(bufptr);
+ if ((slen != 7) || (!match_upper_counted(bufptr, "MISSING", 7))) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
+ }
+ bufptr = skip_initial_spaces(&(bufptr[7]));
+ if (pheno_name) {
+ pheno_name_len = strlen(pheno_name);
+ }
+ while (!is_eoln_kns(*bufptr)) {
+ bufptr2 = token_endnn(bufptr);
+ ulii = (uintptr_t)(bufptr2 - bufptr);
+ // allow "Sex", "SEX", etc.
+ if ((ulii == 3) && (tolower(bufptr[0]) == 's') && (tolower(bufptr[1]) == 'e') && (tolower(bufptr[2]) == 'x')) {
+ if (sex_col) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
+ }
+ sex_col = col_ct;
+ } else if ((ulii == pheno_name_len) && (!memcmp(bufptr, pheno_name, ulii))) {
+ if (pheno_col) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_1;
+ }
+ pheno_col = col_ct;
}
- logerrprint("Error: Only one nonempty line in .sample file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ col_ct++;
+ bufptr = skip_initial_spaces(bufptr2);
}
- if (!g_textbuf[MAXLINELEN - 1]) {
- goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
+ if (pheno_name) {
+ if (!pheno_col) {
+ logerrprint("Error: --oxford-pheno-name parameter not found in .sample file header.\n");
+ goto oxford_to_bed_ret_INVALID_CMDLINE;
+ } else if (sex_col > pheno_col) {
+ logerrprint("Error: .sample phenotype column(s) should be after sex covariate.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
}
- bufptr = skip_initial_spaces(g_textbuf);
- } while (is_eoln_kns(*bufptr));
- bufptr2 = token_endnn(bufptr);
- if ((((uintptr_t)(bufptr2 - bufptr)) != 1) || (*bufptr != '0')) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
- }
- bufptr = skip_initial_spaces(bufptr2);
- slen = strlen_se(bufptr);
- if ((slen != 1) || (*bufptr != '0')) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
- }
- bufptr = skip_initial_spaces(&(bufptr[1]));
- slen = strlen_se(bufptr);
- if ((slen != 1) || (*bufptr != '0')) {
- goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
- }
- bufptr++;
- col_idx = 3;
- while (col_idx < col_ct) {
- bufptr = skip_initial_spaces(bufptr);
- if (is_eoln_kns(*bufptr)) {
- logerrprint("Error: Second .sample header line has fewer tokens than the first.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ do {
+ line_idx++;
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ logerrprint("Error: Only one nonempty line in .sample file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
+ }
+ bufptr = skip_initial_spaces(g_textbuf);
+ } while (is_eoln_kns(*bufptr));
+ bufptr2 = token_endnn(bufptr);
+ if ((((uintptr_t)(bufptr2 - bufptr)) != 1) || (*bufptr != '0')) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
}
- if (bufptr[1] > ' ') {
+ bufptr = skip_initial_spaces(bufptr2);
+ slen = strlen_se(bufptr);
+ if ((slen != 1) || (*bufptr != '0')) {
goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
}
- cc = *bufptr;
- if ((col_idx == sex_col) && (cc != 'D')) {
- logerrprint("Error: .sample sex column is not of type 'D'.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ bufptr = skip_initial_spaces(&(bufptr[1]));
+ slen = strlen_se(bufptr);
+ if ((slen != 1) || (*bufptr != '0')) {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
}
- if (!pheno_col) {
- if ((cc == 'B') || (cc == 'P')) {
- if (sex_col > col_idx) {
- logerrprint("Error: .sample phenotype column(s) should be after sex covariate.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ bufptr++;
+ col_idx = 3;
+ while (col_idx < col_ct) {
+ bufptr = skip_initial_spaces(bufptr);
+ if (is_eoln_kns(*bufptr)) {
+ logerrprint("Error: Second .sample header line has fewer tokens than the first.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (bufptr[1] > ' ') {
+ goto oxford_to_bed_ret_INVALID_SAMPLE_HEADER_2;
+ }
+ cc = *bufptr;
+ if ((col_idx == sex_col) && (cc != 'D')) {
+ logerrprint("Error: .sample sex column is not of type 'D'.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (!pheno_col) {
+ if ((cc == 'B') || (cc == 'P')) {
+ if (sex_col > col_idx) {
+ logerrprint("Error: .sample phenotype column(s) should be after sex covariate.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ pheno_col = col_idx;
+ is_binary_pheno = (cc == 'B');
+ break;
}
- pheno_col = col_idx;
+ } else if (col_idx == pheno_col) {
is_binary_pheno = (cc == 'B');
+ if ((!is_binary_pheno) && (cc != 'P')) {
+ logerrprint("Error: --oxford-pheno-name parameter does not refer to a binary or continuous\nphenotype.\n");
+ goto oxford_to_bed_ret_INVALID_CMDLINE;
+ }
break;
}
- } else if (col_idx == pheno_col) {
- is_binary_pheno = (cc == 'B');
- if ((!is_binary_pheno) && (cc != 'P')) {
- logerrprint("Error: --oxford-pheno-name parameter does not refer to a binary or continuous\nphenotype.\n");
+ col_idx++;
+ bufptr++;
+ }
+ if (is_binary_pheno) {
+ // check for pathological case
+ if ((bsearch_str("0", 1, sorted_mc, max_mc_len, mc_ct) != -1) || (bsearch_str("1", 1, sorted_mc, max_mc_len, mc_ct) != -1)) {
+ logerrprint("Error: '0' and '1' are unacceptable missing case/control phenotype codes.\n");
goto oxford_to_bed_ret_INVALID_CMDLINE;
}
- break;
- }
- col_idx++;
- bufptr++;
- }
- if (is_binary_pheno) {
- // check for pathological case
- if ((bsearch_str("0", 1, sorted_mc, max_mc_len, mc_ct) != -1) || (bsearch_str("1", 1, sorted_mc, max_mc_len, mc_ct) != -1)) {
- logerrprint("Error: '0' and '1' are unacceptable missing case/control phenotype codes.\n");
- goto oxford_to_bed_ret_INVALID_CMDLINE;
- }
- }
- while (fgets(g_textbuf, MAXLINELEN, infile)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
- }
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
- }
- bufptr2 = token_endnn(bufptr);
- wptr = memcpyax(tbuf2, bufptr, bufptr2 - bufptr, '\t');
- bufptr = skip_initial_spaces(bufptr2);
- if (is_eoln_kns(*bufptr)) {
- goto oxford_to_bed_ret_MISSING_TOKENS;
}
- bufptr2 = token_endnn(bufptr);
- wptr = memcpya(wptr, bufptr, bufptr2 - bufptr);
- wptr = memcpya(wptr, "\t0\t0\t", 5);
- col_idx = 2;
- bufptr = bufptr2;
- if (sex_col) {
- while (1) {
- bufptr = skip_initial_spaces(bufptr);
- if (is_eoln_kns(*bufptr)) {
- goto oxford_to_bed_ret_MISSING_TOKENS;
- }
- if (col_idx == sex_col) {
- break;
- }
- bufptr = token_endnn(bufptr);
- col_idx++;
+ while (fgets(g_textbuf, MAXLINELEN, infile)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ goto oxford_to_bed_ret_SAMPLE_LONG_LINE;
}
- cc = *bufptr++;
- if ((cc < '0') || (cc > '2') || ((*bufptr) > ' ')) {
- sprintf(g_logbuf, "Error: Invalid sex code on line %" PRIuPTR " of .sample file.\n", line_idx);
- goto oxford_to_bed_ret_INVALID_FORMAT_2;
+ bufptr = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*bufptr)) {
+ continue;
}
- *wptr++ = cc;
- col_idx++;
- } else {
- *wptr++ = '0';
- }
- *wptr++ = '\t';
- if (pheno_col) {
- while (1) {
- bufptr = skip_initial_spaces(bufptr);
- if (is_eoln_kns(*bufptr)) {
- goto oxford_to_bed_ret_MISSING_TOKENS;
+ bufptr2 = token_endnn(bufptr);
+ wptr = memcpyax(tbuf2, bufptr, bufptr2 - bufptr, '\t');
+ bufptr = skip_initial_spaces(bufptr2);
+ if (is_eoln_kns(*bufptr)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS;
+ }
+ bufptr2 = token_endnn(bufptr);
+ wptr = memcpya(wptr, bufptr, bufptr2 - bufptr);
+ wptr = memcpya(wptr, "\t0\t0\t", 5);
+ col_idx = 2;
+ bufptr = bufptr2;
+ if (sex_col) {
+ while (1) {
+ bufptr = skip_initial_spaces(bufptr);
+ if (is_eoln_kns(*bufptr)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS;
+ }
+ if (col_idx == sex_col) {
+ break;
+ }
+ bufptr = token_endnn(bufptr);
+ col_idx++;
}
- if (col_idx == pheno_col) {
- break;
+ cc = *bufptr++;
+ if ((cc < '0') || (cc > '2') || ((*bufptr) > ' ')) {
+ sprintf(g_logbuf, "Error: Invalid sex code on line %" PRIuPTR " of .sample file.\n", line_idx);
+ goto oxford_to_bed_ret_INVALID_FORMAT_2;
}
- bufptr = token_endnn(bufptr);
+ *wptr++ = cc;
col_idx++;
+ } else {
+ *wptr++ = '0';
}
- slen = (uintptr_t)(token_endnn(bufptr) - bufptr);
- if (is_binary_pheno) {
- cc = *bufptr;
- if ((slen != 1) || ((cc != '0') && (cc != '1'))) {
- goto oxford_to_bed_missing_pheno;
- } else {
- *wptr++ = cc + 1;
+ *wptr++ = '\t';
+ if (pheno_col) {
+ while (1) {
+ bufptr = skip_initial_spaces(bufptr);
+ if (is_eoln_kns(*bufptr)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS;
+ }
+ if (col_idx == pheno_col) {
+ break;
+ }
+ bufptr = token_endnn(bufptr);
+ col_idx++;
}
- } else {
- if (bsearch_str(bufptr, slen, sorted_mc, max_mc_len, mc_ct) == -1) {
- if (!scan_double(bufptr, &dxx)) {
- wptr = memcpya(wptr, bufptr, slen);
- } else {
+ slen = (uintptr_t)(token_endnn(bufptr) - bufptr);
+ if (is_binary_pheno) {
+ cc = *bufptr;
+ if ((slen != 1) || ((cc != '0') && (cc != '1'))) {
goto oxford_to_bed_missing_pheno;
+ } else {
+ *wptr++ = cc + 1;
}
} else {
- goto oxford_to_bed_missing_pheno;
+ if (bsearch_str(bufptr, slen, sorted_mc, max_mc_len, mc_ct) == -1) {
+ if (!scan_double(bufptr, &dxx)) {
+ wptr = memcpya(wptr, bufptr, slen);
+ } else {
+ goto oxford_to_bed_missing_pheno;
+ }
+ } else {
+ goto oxford_to_bed_missing_pheno;
+ }
}
+ } else {
+ oxford_to_bed_missing_pheno:
+ wptr = memcpya(wptr, missing_pheno_str, missing_pheno_len);
}
- } else {
- oxford_to_bed_missing_pheno:
- wptr = memcpya(wptr, missing_pheno_str, missing_pheno_len);
+ *wptr++ = '\n';
+ if (fwrite_checked(tbuf2, wptr - tbuf2, outfile)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ sample_ct++;
}
- *wptr++ = '\n';
- if (fwrite_checked(tbuf2, wptr - tbuf2, outfile)) {
+ if ((!sample_ct) && (!allow_no_samples)) {
+ logerrprint("Error: No samples in .sample file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fclose_null(&infile)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fclose_null(&outfile)) {
goto oxford_to_bed_ret_WRITE_FAIL;
}
- sample_ct++;
- }
- if ((!sample_ct) && (!allow_no_samples)) {
- logerrprint("Error: No samples in .sample file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (fclose_null(&infile)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (fclose_null(&outfile)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- sample_ct4 = (sample_ct + 3) / 4;
- sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
- if (bigstack_alloc_ul(sample_ctl2, &writebuf)) {
- goto oxford_to_bed_ret_NOMEM;
- }
- memcpy(outname_end, ".bim", 5);
- if (fopen_checked(outname, "w", &outfile_bim)) {
- goto oxford_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bed", 5);
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto oxford_to_bed_ret_OPEN_FAIL;
- }
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- if (!is_bgen) {
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
+ sample_ct4 = (sample_ct + 3) / 4;
+ sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
+ if (bigstack_alloc_ul(sample_ctl2, &writebuf)) {
goto oxford_to_bed_ret_NOMEM;
}
- loadbuf = (char*)g_bigstack_base;
- retval = gzopen_read_checked(genname, &gz_infile);
- if (retval) {
- goto oxford_to_bed_ret_1;
+ memcpy(outname_end, ".bim", 5);
+ if (fopen_checked(outname, "w", &outfile_bim)) {
+ goto oxford_to_bed_ret_OPEN_FAIL;
}
- loadbuf[loadbuf_size - 1] = ' ';
- line_idx = 0;
- while (1) {
- line_idx++;
- if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
- if (!gzeof(gz_infile)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- break;
- }
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- LOGERRPRINTF("Error: Line %" PRIuPTR " of .gen file is pathologically long.\n", line_idx);
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
+ memcpy(outname_end, ".bed", 5);
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto oxford_to_bed_ret_OPEN_FAIL;
+ }
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ if (!is_bgen) {
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
goto oxford_to_bed_ret_NOMEM;
}
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
+ loadbuf = (char*)g_bigstack_base;
+ retval = gzopen_read_checked(genname, &gz_infile);
+ if (retval) {
+ goto oxford_to_bed_ret_1;
}
- if (!single_chr) {
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if (chrom_error(".gen file", chrom_info_ptr, bufptr, line_idx, ii, allow_extra_chroms)) {
- if (!memcmp(bufptr, "---", 3)) {
- logprint("(Did you forget --oxford-single-chr?)\n");
- }
+ loadbuf[loadbuf_size - 1] = ' ';
+ line_idx = 0;
+ while (1) {
+ line_idx++;
+ if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
+ if (!gzeof(gz_infile)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ break;
+ }
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ LOGERRPRINTF("Error: Line %" PRIuPTR " of .gen file is pathologically long.\n", line_idx);
goto oxford_to_bed_ret_INVALID_FORMAT;
}
- retval = resolve_or_add_chrom_name(bufptr, ".gen file", line_idx, chrom_info_ptr, &ii);
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ char* loadbuf_first_token = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*loadbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ if (!single_chr) {
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code(loadbuf_first_token, ".gen file", line_idx, chrom_name_slen, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
if (retval) {
+ if ((chrom_name_slen == 3) && (!memcmp(loadbuf_first_token, "---", 3))) {
+ logerrprint("(Did you forget --oxford-single-chr?)\n");
+ }
goto oxford_to_bed_ret_1;
}
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ continue;
+ }
+ *first_token_end = ' ';
}
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- continue;
+ fill_ulong_zero(sample_ctl2, writebuf);
+ if (single_chr) {
+ fputs(single_chr, outfile_bim);
+ putc_unlocked(' ', outfile_bim);
+ bufptr = next_token(first_token_end);
+ bufptr2 = next_token(bufptr);
+ } else {
+ bufptr = loadbuf_first_token;
+ bufptr2 = next_token(skip_initial_spaces(first_token_end));
}
- }
- fill_ulong_zero(writebuf, sample_ctl2);
- if (single_chr) {
- fputs(single_chr, outfile_bim);
- putc(' ', outfile_bim);
- bufptr = next_token(bufptr);
- bufptr2 = next_token(bufptr);
- } else {
- bufptr2 = next_token_mult(bufptr, 2);
- }
- if (!bufptr2) {
- goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
- }
- fwrite(bufptr, 1, bufptr2 - bufptr, outfile_bim);
- putc('0', outfile_bim);
- if (putc_checked(' ', outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- bufptr = next_token(bufptr2);
- bufptr3 = next_token(bufptr);
- if (no_more_tokens_kns(bufptr3)) {
- goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
- }
- // bufptr2 = pos
- // bufptr = allele 1
- // bufptr3 = allele 2
- bufptr4 = token_endnn(bufptr3);
- uii = (uintptr_t)(bufptr4 - bufptr3);
- identical_alleles = (strlen_se(bufptr) == uii) && (!memcmp(bufptr, bufptr3, uii));
- if (identical_alleles) {
- // we treat identical A1 and A2 as a special case, since naive handling
- // prevents e.g. later data merge.
- // maybe add a warning?
- fwrite(bufptr2, 1, strlen_se(bufptr2), outfile_bim);
- fputs(" 0 ", outfile_bim);
- fwrite(bufptr3, 1, bufptr4 - bufptr3, outfile_bim);
- } else {
- fwrite(bufptr2, 1, bufptr4 - bufptr2, outfile_bim);
- }
- if (putc_checked('\n', outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- if (sample_ct) {
- cur_word = 0;
- shiftval = 0;
- ulptr = writebuf;
- bufptr = skip_initial_spaces(bufptr4);
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- if (is_eoln_kns(*bufptr)) {
- goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
- }
- // fast handling of common cases
- cc = bufptr[1];
- if ((cc == ' ') || (cc == '\t')) {
- cc = bufptr[3];
- cc2 = bufptr[5];
- if (((cc == ' ') || (cc == '\t')) && ((cc2 == ' ') || (cc2 == '\t'))) {
- cc = *bufptr;
- if (cc == '0') {
- bufptr2 = &(bufptr[2]);
- cc = *bufptr2;
- cc2 = bufptr2[2];
+ if (!bufptr2) {
+ goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+ }
+ fwrite(bufptr, 1, bufptr2 - bufptr, outfile_bim);
+ putc_unlocked('0', outfile_bim);
+ if (putc_checked(' ', outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ bufptr = next_token(bufptr2);
+ bufptr3 = next_token(bufptr);
+ if (no_more_tokens_kns(bufptr3)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+ }
+ // bufptr2 = pos
+ // bufptr = allele 1
+ // bufptr3 = allele 2
+ bufptr4 = token_endnn(bufptr3);
+ uii = (uintptr_t)(bufptr4 - bufptr3);
+ identical_alleles = (strlen_se(bufptr) == uii) && (!memcmp(bufptr, bufptr3, uii));
+ if (identical_alleles) {
+ // we treat identical A1 and A2 as a special case, since naive
+ // handling prevents e.g. later data merge.
+ // maybe add a warning?
+ fwrite(bufptr2, 1, strlen_se(bufptr2), outfile_bim);
+ fputs(" 0 ", outfile_bim);
+ fwrite(bufptr3, 1, bufptr4 - bufptr3, outfile_bim);
+ } else {
+ fwrite(bufptr2, 1, bufptr4 - bufptr2, outfile_bim);
+ }
+ if (putc_checked('\n', outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ if (sample_ct) {
+ cur_word = 0;
+ shiftval = 0;
+ ulptr = writebuf;
+ bufptr = skip_initial_spaces(bufptr4);
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ if (is_eoln_kns(*bufptr)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+ }
+ // fast handling of common cases
+ cc = bufptr[1];
+ if ((cc == ' ') || (cc == '\t')) {
+ cc = bufptr[3];
+ cc2 = bufptr[5];
+ if (((cc == ' ') || (cc == '\t')) && ((cc2 == ' ') || (cc2 == '\t'))) {
+ cc = *bufptr;
if (cc == '0') {
- if (cc2 == '1') {
- ulii = 3;
- } else if (cc2 == '0') {
- ulii = 1;
+ bufptr2 = &(bufptr[2]);
+ cc = *bufptr2;
+ cc2 = bufptr2[2];
+ if (cc == '0') {
+ if (cc2 == '1') {
+ ulii = 3;
+ } else if (cc2 == '0') {
+ ulii = 1;
+ } else {
+ // could be a space...
+ goto oxford_to_bed_full_parse_2;
+ }
+ } else if ((cc == '1') && (cc2 == '0')) {
+ ulii = 2;
} else {
- // could be a space...
goto oxford_to_bed_full_parse_2;
}
- } else if ((cc == '1') && (cc2 == '0')) {
- ulii = 2;
+ } else if ((cc == '1') && (bufptr[2] == '0') && (bufptr[4] == '0')) {
+ ulii = 0;
} else {
- goto oxford_to_bed_full_parse_2;
+ goto oxford_to_bed_full_parse;
}
- } else if ((cc == '1') && (bufptr[2] == '0') && (bufptr[4] == '0')) {
- ulii = 0;
+ bufptr = &(bufptr[6]);
} else {
goto oxford_to_bed_full_parse;
}
- bufptr = &(bufptr[6]);
} else {
- goto oxford_to_bed_full_parse;
- }
- } else {
- // okay, gotta do things the slow way
- oxford_to_bed_full_parse:
- bufptr2 = token_endnn(bufptr);
- oxford_to_bed_full_parse_2:
- bufptr2 = skip_initial_spaces(bufptr2);
- if (is_eoln_kns(*bufptr2)) {
- goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
- }
- bufptr3 = token_endnn(bufptr2);
- dzz = strtod(bufptr3, &bufptr4);
- if (!is_randomized) {
- if (dzz >= hard_call_floor) {
- ulii = 3;
- } else {
- if (bufptr3 == bufptr4) {
- goto oxford_to_bed_ret_INVALID_DOSAGE;
- }
- dyy = strtod(bufptr2, &bufptr3);
- if (dyy >= hard_call_floor) {
- ulii = 2;
+ // okay, gotta do things the slow way
+ oxford_to_bed_full_parse:
+ bufptr2 = token_endnn(bufptr);
+ oxford_to_bed_full_parse_2:
+ bufptr2 = skip_initial_spaces(bufptr2);
+ if (is_eoln_kns(*bufptr2)) {
+ goto oxford_to_bed_ret_MISSING_TOKENS_GEN;
+ }
+ bufptr3 = token_endnn(bufptr2);
+ dzz = strtod(bufptr3, &bufptr4);
+ if (!is_randomized) {
+ if (dzz >= hard_call_floor) {
+ ulii = 3;
} else {
- if (bufptr2 == bufptr3) {
+ if (bufptr3 == bufptr4) {
goto oxford_to_bed_ret_INVALID_DOSAGE;
}
- dxx = strtod(bufptr, &bufptr2);
- if (dxx >= hard_call_floor) {
- ulii = 0;
+ dyy = strtod(bufptr2, &bufptr3);
+ if (dyy >= hard_call_floor) {
+ ulii = 2;
} else {
- if (bufptr == bufptr2) {
+ if (bufptr2 == bufptr3) {
goto oxford_to_bed_ret_INVALID_DOSAGE;
}
- ulii = 1;
+ dxx = strtod(bufptr, &bufptr2);
+ if (dxx >= hard_call_floor) {
+ ulii = 0;
+ } else {
+ if (bufptr == bufptr2) {
+ goto oxford_to_bed_ret_INVALID_DOSAGE;
+ }
+ ulii = 1;
+ }
}
}
- }
- } else {
- drand = rand_unif();
- if (drand < dzz) {
- ulii = 3;
} else {
- if (bufptr3 == bufptr4) {
- goto oxford_to_bed_ret_INVALID_DOSAGE;
- }
- dyy = strtod(bufptr2, &bufptr3) + dzz;
- if (drand < dyy) {
- ulii = 2;
+ drand = rand_unif();
+ if (drand < dzz) {
+ ulii = 3;
} else {
- if (bufptr2 == bufptr3) {
+ if (bufptr3 == bufptr4) {
goto oxford_to_bed_ret_INVALID_DOSAGE;
}
- dxx = strtod(bufptr, &bufptr2) + dyy;
+ dyy = strtod(bufptr2, &bufptr3) + dzz;
if (drand < dyy) {
- ulii = 0;
- } else if (dxx < 1 - D_EPSILON) {
- ulii = 1;
+ ulii = 2;
} else {
- // fully called genotype probabilities may add up to less
- // than one due to rounding error. If this appears to have
- // happened, do NOT make a missing call; instead rescale
- // everything to add to one and reinterpret the random
- // number. (D_EPSILON is currently set to make 4 decimal
- // place precision safe to use.)
- drand *= dxx;
- if (drand < dzz) {
- ulii = 3;
- } else if (drand < dyy) {
- ulii = 2;
- } else {
+ if (bufptr2 == bufptr3) {
+ goto oxford_to_bed_ret_INVALID_DOSAGE;
+ }
+ dxx = strtod(bufptr, &bufptr2) + dyy;
+ if (drand < dyy) {
ulii = 0;
+ } else if (dxx < 1 - D_EPSILON) {
+ ulii = 1;
+ } else {
+ // fully called genotype probabilities may add up to less
+ // than one due to rounding error. If this appears to
+ // have happened, do NOT make a missing call; instead
+ // rescale everything to add to one and reinterpret the
+ // random number. (D_EPSILON is currently set to make 4
+ // decimal place precision safe to use.)
+ drand *= dxx;
+ if (drand < dzz) {
+ ulii = 3;
+ } else if (drand < dyy) {
+ ulii = 2;
+ } else {
+ ulii = 0;
+ }
}
}
}
}
+ bufptr = skip_initial_spaces(bufptr4);
+ }
+ cur_word |= ulii << shiftval;
+ shiftval += 2;
+ if (shiftval == BITCT) {
+ *ulptr++ = cur_word;
+ cur_word = 0;
+ shiftval = 0;
}
- bufptr = skip_initial_spaces(bufptr4);
}
- cur_word |= ulii << shiftval;
- shiftval += 2;
- if (shiftval == BITCT) {
+ if (shiftval) {
*ulptr++ = cur_word;
- cur_word = 0;
- shiftval = 0;
}
- }
- if (shiftval) {
- *ulptr++ = cur_word;
- }
- if (identical_alleles) {
- // keep missing calls, but convert hom/het A1 to hom A2.
- for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
- ulii = *ulptr;
- *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+ if (identical_alleles) {
+ // keep missing calls, but convert hom/het A1 to hom A2.
+ for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
+ ulii = *ulptr;
+ *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+ }
+ if (sample_ct % 4) {
+ writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
+ }
}
- if (sample_ct % 4) {
- writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
+ if (fwrite_checked(writebuf, sample_ct4, outfile)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
}
}
- if (fwrite_checked(writebuf, sample_ct4, outfile)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
+ marker_ct++;
+ if (!(marker_ct % 1000)) {
+ printf("\r--data: %uk variants converted.", marker_ct / 1000);
+ fflush(stdout);
}
}
- marker_ct++;
- if (!(marker_ct % 1000)) {
- printf("\r--data: %uk variants converted.", marker_ct / 1000);
- fflush(stdout);
+ if ((!marker_ct) && (!allow_no_variants)) {
+ logerrprint("Error: Empty .gen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
}
- }
- if ((!marker_ct) && (!allow_no_variants)) {
- logerrprint("Error: Empty .gen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- } else {
- if (fopen_checked(genname, FOPEN_RB, &infile)) {
- goto oxford_to_bed_ret_OPEN_FAIL;
- }
- // supports BGEN v1.0 and v1.1.
- bgen_probs = (uint16_t*)bigstack_alloc(6LU * sample_ct);
- if (!bgen_probs) {
- goto oxford_to_bed_ret_NOMEM;
- }
- loadbuf = (char*)g_bigstack_base;
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size < 3 * 65536) {
- goto oxford_to_bed_ret_NOMEM;
- }
- if (fread(uint_arr, 1, 20, infile) < 20) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (uint_arr[1] > uint_arr[0]) {
- logerrprint("Error: Invalid .bgen header.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- raw_marker_ct = uint_arr[2];
- if ((!raw_marker_ct) && (!allow_no_variants)) {
- logerrprint("Error: .bgen file contains no variants.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (uint_arr[3] != sample_ct) {
- logerrprint("Error: --bgen and --sample files contain different numbers of samples.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (uint_arr[4] && (uint_arr[4] != 0x6e656762)) {
- logerrprint("Error: Invalid .bgen magic number.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (fseeko(infile, uint_arr[1], SEEK_SET)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (fread(&uii, 1, 4, infile) < 4) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (uii & (~5)) {
- uii = (uii >> 2) & 15;
- if (uii == 2) {
- logerrprint("Error: BGEN v1.2 input requires PLINK 2.0 (under development as of this\nwriting). Use gen-convert to downcode to BGEN v1.1 if you want to process this\ndata with PLINK 1.9.\n");
- } else if (uii > 2) {
- logerrprint("Error: Unrecognized BGEN version. Use gen-convert or a similar tool to\ndowncode to BGEN v1.1 if you want to process this data with PLINK 1.9.\n");
- } else {
- logerrprint("Error: Unrecognized flags in .bgen header. (PLINK 1.9 only supports\nBGEN v1.0 and v1.1.)\n");
+ } else {
+ if (fopen_checked(genname, FOPEN_RB, &infile)) {
+ goto oxford_to_bed_ret_OPEN_FAIL;
}
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (fseeko(infile, 4 + uint_arr[0], SEEK_SET)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- bgen_compressed = uii & 1;
- bgen_multichar_alleles = (uii >> 2) & 1;
- if ((!bgen_multichar_alleles) && (!snpid_chr) && (chrom_info_ptr->species != SPECIES_HUMAN)) {
- logerrprint("Error: BGEN v1.0 files can only support nonhuman genomes if the SNP ID field is\nused for chromosome codes.\n");
- goto oxford_to_bed_ret_INVALID_CMDLINE;
- }
- if (!is_randomized) {
- bgen_hardthresh = 32768 - (int32_t)(hard_call_threshold * 32768);
- }
- memcpyl3(g_textbuf, " 0 ");
- for (marker_uidx = 0; marker_uidx < raw_marker_ct; marker_uidx++) {
- if (fread(&uii, 1, 4, infile) < 4) {
+ // supports BGEN v1.0 and v1.1.
+ bgen_probs = (uint16_t*)bigstack_alloc(6LU * sample_ct);
+ if (!bgen_probs) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size < 3 * 65536) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ if (fread(uint_arr, 1, 20, infile) < 20) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if (uii != sample_ct) {
- logerrprint("Error: Unexpected number of samples specified in SNP block header.\n");
+ if (uint_arr[1] > uint_arr[0]) {
+ logerrprint("Error: Invalid .bgen header.\n");
goto oxford_to_bed_ret_INVALID_FORMAT;
}
- if (bgen_multichar_alleles) {
- // v1.1
- if (fread(&usii, 1, 2, infile) < 2) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (!snpid_chr) {
- if (fseeko(infile, usii, SEEK_CUR)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- bufptr = loadbuf;
+ raw_marker_ct = uint_arr[2];
+ if ((!raw_marker_ct) && (!allow_no_variants)) {
+ logerrprint("Error: .bgen file contains no variants.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (uint_arr[3] != sample_ct) {
+ logerrprint("Error: --bgen and --sample files contain different numbers of samples.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (uint_arr[4] && (uint_arr[4] != 0x6e656762)) {
+ logerrprint("Error: Invalid .bgen magic number.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fseeko(infile, uint_arr[1], SEEK_SET)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (uii & (~5)) {
+ uii = (uii >> 2) & 15;
+ if (uii == 2) {
+ logerrprint("Error: BGEN v1.2 input requires PLINK 2.0 (under development as of this\nwriting). Use gen-convert to downcode to BGEN v1.1 if you want to process this\ndata with PLINK 1.9.\n");
+ } else if (uii > 2) {
+ logerrprint("Error: Unrecognized BGEN version. Use gen-convert or a similar tool to\ndowncode to BGEN v1.1 if you want to process this data with PLINK 1.9.\n");
} else {
- if (!usii) {
- logerrprint("Error: Length-0 SNP ID in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (fread(loadbuf, 1, usii, infile) < usii) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- loadbuf[usii] = '\0';
- bufptr = &(loadbuf[usii + 1]);
+ logerrprint("Error: Unrecognized flags in .bgen header. (PLINK 1.9 only supports\nBGEN v1.0 and v1.1.)\n");
}
- if (fread(&usjj, 1, 2, infile) < 2) {
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fseeko(infile, 4 + uint_arr[0], SEEK_SET)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ bgen_compressed = uii & 1;
+ bgen_multichar_alleles = (uii >> 2) & 1;
+ if ((!bgen_multichar_alleles) && (!snpid_chr) && (chrom_info_ptr->species != SPECIES_HUMAN)) {
+ logerrprint("Error: BGEN v1.0 files can only support nonhuman genomes if the SNP ID field is\nused for chromosome codes.\n");
+ goto oxford_to_bed_ret_INVALID_CMDLINE;
+ }
+ if (!is_randomized) {
+ bgen_hardthresh = 32768 - (int32_t)(hard_call_threshold * 32768);
+ }
+ memcpyl3(g_textbuf, " 0 ");
+ for (marker_uidx = 0; marker_uidx < raw_marker_ct; marker_uidx++) {
+ if (fread(&uii, 1, 4, infile) < 4) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if (!usjj) {
- logerrprint("Error: Length-0 rsID in .bgen file.\n");
+ if (uii != sample_ct) {
+ logerrprint("Error: Unexpected number of samples specified in SNP block header.\n");
goto oxford_to_bed_ret_INVALID_FORMAT;
}
- if (fread(bufptr, 1, usjj, infile) < usjj) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- bufptr2 = &(bufptr[usjj]);
- if (fread(&uskk, 1, 2, infile) < 2) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (!snpid_chr) {
- if (!uskk) {
- logerrprint("Error: Length-0 chromosome ID in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- usii = uskk;
- if (fread(bufptr2, 1, usii, infile) < usii) {
+ if (bgen_multichar_alleles) {
+ // v1.1
+ if (fread(&usii, 1, 2, infile) < 2) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if ((usii == 2) && (!memcmp(bufptr2, "NA", 2))) {
- // convert 'NA' to 0
- usii = 1;
- memcpy(bufptr2, "0", 2);
+ if (!snpid_chr) {
+ if (fseeko(infile, usii, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ bufptr = loadbuf;
} else {
- bufptr2[usii] = '\0';
+ if (!usii) {
+ logerrprint("Error: Length-0 SNP ID in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fread(loadbuf, 1, usii, infile) < usii) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ loadbuf[usii] = '\0';
+ bufptr = &(loadbuf[usii + 1]);
}
- } else {
- if (fseeko(infile, uskk, SEEK_CUR)) {
+ if (fread(&usjj, 1, 2, infile) < 2) {
goto oxford_to_bed_ret_READ_FAIL;
}
- bufptr2 = loadbuf;
- }
- if (fread(uint_arr, 1, 8, infile) < 8) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (!uint_arr[1]) {
- logerrprint("Error: Length-0 allele ID in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- ii = get_chrom_code(chrom_info_ptr, bufptr2);
- if (ii < 0) {
- if (chrom_error(".bgen file", chrom_info_ptr, bufptr2, 0, ii, allow_extra_chroms)) {
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- retval = resolve_or_add_chrom_name(bufptr2, ".bgen file", 0, chrom_info_ptr, &ii);
- if (retval) {
- goto oxford_to_bed_ret_1;
+ if (!usjj) {
+ logerrprint("Error: Length-0 rsID in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
}
- }
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- // skip rest of current SNP
- if (fseeko(infile, uint_arr[1], SEEK_CUR)) {
+ if (fread(bufptr, 1, usjj, infile) < usjj) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if (fread(&uii, 1, 4, infile) < 4) {
+ bufptr2 = &(bufptr[usjj]);
+ if (fread(&uskk, 1, 2, infile) < 2) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if (bgen_compressed) {
- if (fseeko(infile, uii, SEEK_CUR)) {
- goto oxford_to_bed_ret_READ_FAIL;
+ if (!snpid_chr) {
+ if (!uskk) {
+ logerrprint("Error: Length-0 chromosome ID in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
}
- if (fread(&uii, 1, 4, infile) < 4) {
+ usii = uskk;
+ if (fread(bufptr2, 1, usii, infile) < usii) {
goto oxford_to_bed_ret_READ_FAIL;
}
- if (fseeko(infile, uii, SEEK_CUR)) {
- goto oxford_to_bed_ret_READ_FAIL;
+ if ((usii == 2) && (!memcmp(bufptr2, "NA", 2))) {
+ // convert 'NA' to 0
+ usii = 1;
+ memcpy(bufptr2, "0", 2);
+ } else {
+ bufptr2[usii] = '\0';
}
} else {
- if (fseeko(infile, uii + ((uint64_t)sample_ct) * 6, SEEK_CUR)) {
+ if (fseeko(infile, uskk, SEEK_CUR)) {
goto oxford_to_bed_ret_READ_FAIL;
}
+ bufptr2 = loadbuf;
}
- continue;
- }
- fputs(bufptr2, outfile_bim);
- if (putc_checked(' ', outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- fwrite(bufptr, 1, usjj, outfile_bim);
- bufptr = uint32toa_x(uint_arr[0], ' ', &(g_textbuf[3]));
- fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile_bim);
-
- // halve the limit since there are two alleles
- // (may want to enforce NON_BIGSTACK_MIN allele length limit?)
- if (uint_arr[1] >= loadbuf_size / 2) {
- if (loadbuf_size < MAXLINEBUFLEN) {
- goto oxford_to_bed_ret_NOMEM;
- }
- logerrprint("Error: Excessively long allele in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- if (fread(loadbuf, 1, uint_arr[1], infile) < uint_arr[1]) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- loadbuf[uint_arr[1]] = ' ';
- if (fread(&uii, 1, 4, infile) < 4) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (uii >= loadbuf_size / 2) {
- if (loadbuf_size < MAXLINEBUFLEN) {
- goto oxford_to_bed_ret_NOMEM;
+ if (fread(uint_arr, 1, 8, infile) < 8) {
+ goto oxford_to_bed_ret_READ_FAIL;
}
- logerrprint("Error: Excessively long allele in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- bufptr = &(loadbuf[uint_arr[1] + 1]);
- if (fread(bufptr, 1, uii, infile) < uii) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- bufptr[uii] = '\n';
- identical_alleles = (uii == uint_arr[1]) && (!memcmp(loadbuf, bufptr, uii));
- if (!identical_alleles) {
- if (fwrite_checked(loadbuf, uint_arr[1] + uii + 2, outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
+ if (!uint_arr[1]) {
+ logerrprint("Error: Length-0 allele ID in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
}
- } else {
- fputs("0 ", outfile_bim);
- if (fwrite_checked(bufptr, uii + 1, outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
+ // bufptr2 (chromosome code) is already zero-terminated, with known
+ // length usii
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code(bufptr2, ".bgen file", 0, usii, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto oxford_to_bed_ret_1;
}
- }
- } else {
- // v1.0
- uii = 0;
- if (fread(&uii, 1, 1, infile) < 1) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (fread(loadbuf, 1, 2 * uii + 9, infile) < (2 * uii + 9)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- // save marker ID length since we might clobber it
- ukk = (unsigned char)(loadbuf[uii + 1]);
- if (!snpid_chr) {
- ii = ((unsigned char)(loadbuf[2 * uii + 2]));
- if (ii > 24) {
- if (ii == 255) {
- // unknown
- ii = 0;
- } else if (ii > 252) {
- // XY or MT
- ii = ii - 228;
- } else {
- logerrprint("Error: Invalid chromosome code in BGEN v1.0 file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ // skip rest of current SNP
+ if (fseeko(infile, uint_arr[1], SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (bgen_compressed) {
+ if (fseeko(infile, uii, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fseeko(infile, uii, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ } else {
+ if (fseeko(infile, uii + ((uint64_t)sample_ct) * 6, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ }
+ continue;
+ }
+ fputs(bufptr2, outfile_bim);
+ if (putc_checked(' ', outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ fwrite(bufptr, 1, usjj, outfile_bim);
+ bufptr = uint32toa_x(uint_arr[0], ' ', &(g_textbuf[3]));
+ fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile_bim);
+
+ // halve the limit since there are two alleles
+ // (may want to enforce NON_BIGSTACK_MIN allele length limit?)
+ if (uint_arr[1] >= loadbuf_size / 2) {
+ if (loadbuf_size < MAXLINEBUFLEN) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ logerrprint("Error: Excessively long allele in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fread(loadbuf, 1, uint_arr[1], infile) < uint_arr[1]) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ loadbuf[uint_arr[1]] = ' ';
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (uii >= loadbuf_size / 2) {
+ if (loadbuf_size < MAXLINEBUFLEN) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ logerrprint("Error: Excessively long allele in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ bufptr = &(loadbuf[uint_arr[1] + 1]);
+ if (fread(bufptr, 1, uii, infile) < uii) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ bufptr[uii] = '\n';
+ identical_alleles = (uii == uint_arr[1]) && (!memcmp(loadbuf, bufptr, uii));
+ if (!identical_alleles) {
+ if (fwrite_checked(loadbuf, uint_arr[1] + uii + 2, outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ } else {
+ fputs("0 ", outfile_bim);
+ if (fwrite_checked(bufptr, uii + 1, outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
}
}
- uint32toa_x((uint32_t)ii, '\0', loadbuf);
- bufptr = loadbuf;
} else {
- ujj = (unsigned char)loadbuf[0];
- bufptr = &(loadbuf[1]);
- if ((ujj == 2) && (!memcmp(bufptr, "NA", 2))) {
- *bufptr = '0';
- ujj = 1;
- }
- bufptr[ujj] = '\0';
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if (chrom_error(".bgen file", chrom_info_ptr, bufptr, 0, ii, allow_extra_chroms)) {
- goto oxford_to_bed_ret_INVALID_FORMAT;
+ // v1.0
+ uii = 0;
+ if (fread(&uii, 1, 1, infile) < 1) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fread(loadbuf, 1, 2 * uii + 9, infile) < (2 * uii + 9)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ // save marker ID length since we might clobber it
+ ukk = (unsigned char)(loadbuf[uii + 1]);
+ int32_t cur_chrom_code;
+ if (!snpid_chr) {
+ cur_chrom_code = ((unsigned char)(loadbuf[2 * uii + 2]));
+ if (cur_chrom_code > 24) {
+ if (cur_chrom_code == 255) {
+ // unknown
+ cur_chrom_code = 0;
+ } else if (cur_chrom_code > 252) {
+ // XY or MT
+ cur_chrom_code = cur_chrom_code - 228;
+ } else {
+ logerrprint("Error: Invalid chromosome code in BGEN v1.0 file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ }
+ uint32toa_x((uint32_t)cur_chrom_code, '\0', loadbuf);
+ bufptr = loadbuf;
+ } else {
+ ujj = (unsigned char)loadbuf[0];
+ bufptr = &(loadbuf[1]);
+ if ((ujj == 2) && (!memcmp(bufptr, "NA", 2))) {
+ *bufptr = '0';
+ ujj = 1;
}
- retval = resolve_or_add_chrom_name(bufptr, ".bgen file", 0, chrom_info_ptr, &ii);
+ bufptr[ujj] = '\0';
+ retval = get_or_add_chrom_code(bufptr, ".bgen file", 0, ujj, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
if (retval) {
goto oxford_to_bed_ret_1;
}
}
- }
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- if (bgen_compressed) {
- if (fread(&uii, 1, 4, infile) < 4) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (fseeko(infile, uii, SEEK_CUR)) {
- goto oxford_to_bed_ret_READ_FAIL;
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ if (bgen_compressed) {
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ if (fseeko(infile, uii, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ } else {
+ if (fseeko(infile, ((uint64_t)sample_ct) * 6, SEEK_CUR)) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
}
+ continue;
+ }
+ fputs(bufptr, outfile_bim);
+ if (putc_checked(' ', outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ fwrite(&(loadbuf[uii + 2]), 1, ukk, outfile_bim);
+ memcpy(&ujj, &(loadbuf[2 * uii + 3]), 4);
+ bufptr = uint32toa_x(ujj, ' ', &(g_textbuf[3]));
+ identical_alleles = (loadbuf[2 * uii + 7] == loadbuf[2 * uii + 8]);
+ if (!identical_alleles) {
+ *bufptr++ = loadbuf[2 * uii + 7];
} else {
- if (fseeko(infile, ((uint64_t)sample_ct) * 6, SEEK_CUR)) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
+ *bufptr++ = '0';
}
- continue;
- }
- fputs(bufptr, outfile_bim);
- if (putc_checked(' ', outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- fwrite(&(loadbuf[uii + 2]), 1, ukk, outfile_bim);
- memcpy(&ujj, &(loadbuf[2 * uii + 3]), 4);
- bufptr = uint32toa_x(ujj, ' ', &(g_textbuf[3]));
- identical_alleles = (loadbuf[2 * uii + 7] == loadbuf[2 * uii + 8]);
- if (!identical_alleles) {
- *bufptr++ = loadbuf[2 * uii + 7];
- } else {
- *bufptr++ = '0';
- }
- *bufptr++ = ' ';
- *bufptr++ = loadbuf[2 * uii + 8];
- *bufptr++ = '\n';
- if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- }
- if (bgen_compressed) {
- if (fread(&uii, 1, 4, infile) < 4) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- if (uii > loadbuf_size) {
- if (loadbuf_size < MAXLINEBUFLEN) {
- goto oxford_to_bed_ret_NOMEM;
+ *bufptr++ = ' ';
+ *bufptr++ = loadbuf[2 * uii + 8];
+ *bufptr++ = '\n';
+ if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
}
- logerrprint("Error: Excessively long compressed SNP block in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
}
- if (fread(loadbuf, 1, uii, infile) < uii) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- zlib_ulongf = 6 * sample_ct;
- if (uncompress((Bytef*)bgen_probs, &zlib_ulongf, (Bytef*)loadbuf, uii) != Z_OK) {
- logerrprint("Error: Invalid compressed SNP block in .bgen file.\n");
- goto oxford_to_bed_ret_INVALID_FORMAT;
- }
- } else {
- if (fread(bgen_probs, 1, 6 * sample_ct, infile) < 6 * sample_ct) {
- goto oxford_to_bed_ret_READ_FAIL;
- }
- }
- cur_word = 0;
- shiftval = 0;
- ulptr = writebuf;
- usptr = bgen_probs;
- if (!is_randomized) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, usptr = &(usptr[3])) {
- if (usptr[2] >= bgen_hardthresh) {
- ulii = 3;
- } else if (usptr[1] >= bgen_hardthresh) {
- ulii = 2;
- } else if (usptr[0] >= bgen_hardthresh) {
- ulii = 0;
- } else {
- ulii = 1;
+ if (bgen_compressed) {
+ if (fread(&uii, 1, 4, infile) < 4) {
+ goto oxford_to_bed_ret_READ_FAIL;
}
- cur_word |= ulii << shiftval;
- shiftval += 2;
- if (shiftval == BITCT) {
- *ulptr++ = cur_word;
- cur_word = 0;
- shiftval = 0;
+ if (uii > loadbuf_size) {
+ if (loadbuf_size < MAXLINEBUFLEN) {
+ goto oxford_to_bed_ret_NOMEM;
+ }
+ logerrprint("Error: Excessively long compressed SNP block in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ if (fread(loadbuf, 1, uii, infile) < uii) {
+ goto oxford_to_bed_ret_READ_FAIL;
+ }
+ zlib_ulongf = 6 * sample_ct;
+ if (uncompress((Bytef*)bgen_probs, &zlib_ulongf, (Bytef*)loadbuf, uii) != Z_OK) {
+ logerrprint("Error: Invalid compressed SNP block in .bgen file.\n");
+ goto oxford_to_bed_ret_INVALID_FORMAT;
+ }
+ } else {
+ if (fread(bgen_probs, 1, 6 * sample_ct, infile) < 6 * sample_ct) {
+ goto oxford_to_bed_ret_READ_FAIL;
}
}
- } else {
- uii = 0;
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, usptr = &(usptr[3])) {
- // fast handling of common cases
- ukk = usptr[2];
- if (ukk >= 32768) {
- ulii = 3;
- } else if (usptr[1] >= 32768) {
- ulii = 2;
- } else if (usptr[0] >= 32768) {
- ulii = 0;
- } else {
- while (1) {
- uii >>= 16;
- if (!uii) {
- uii = sfmt_genrand_uint32(&g_sfmt) | 0x80000000U;
- }
- ujj = uii & 32767;
- if (ujj < ukk) {
- ulii = 3;
- break;
- } else {
- ukk += usptr[1];
+ cur_word = 0;
+ shiftval = 0;
+ ulptr = writebuf;
+ usptr = bgen_probs;
+ if (!is_randomized) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, usptr = &(usptr[3])) {
+ if (usptr[2] >= bgen_hardthresh) {
+ ulii = 3;
+ } else if (usptr[1] >= bgen_hardthresh) {
+ ulii = 2;
+ } else if (usptr[0] >= bgen_hardthresh) {
+ ulii = 0;
+ } else {
+ ulii = 1;
+ }
+ cur_word |= ulii << shiftval;
+ shiftval += 2;
+ if (shiftval == BITCT) {
+ *ulptr++ = cur_word;
+ cur_word = 0;
+ shiftval = 0;
+ }
+ }
+ } else {
+ uii = 0;
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, usptr = &(usptr[3])) {
+ // fast handling of common cases
+ ukk = usptr[2];
+ if (ukk >= 32768) {
+ ulii = 3;
+ } else if (usptr[1] >= 32768) {
+ ulii = 2;
+ } else if (usptr[0] >= 32768) {
+ ulii = 0;
+ } else {
+ while (1) {
+ uii >>= 16;
+ if (!uii) {
+ uii = sfmt_genrand_uint32(&g_sfmt) | 0x80000000U;
+ }
+ ujj = uii & 32767;
if (ujj < ukk) {
- ulii = 2;
+ ulii = 3;
break;
} else {
- ukk += usptr[0];
+ ukk += usptr[1];
if (ujj < ukk) {
- ulii = 0;
- break;
- } else if (ukk < 32766) {
- ulii = 1;
+ ulii = 2;
break;
} else {
- ukk = usptr[2];
+ ukk += usptr[0];
+ if (ujj < ukk) {
+ ulii = 0;
+ break;
+ } else if (ukk < 32766) {
+ ulii = 1;
+ break;
+ } else {
+ ukk = usptr[2];
+ }
}
}
}
}
+ cur_word |= ulii << shiftval;
+ shiftval += 2;
+ if (shiftval == BITCT) {
+ *ulptr++ = cur_word;
+ cur_word = 0;
+ shiftval = 0;
+ }
}
- cur_word |= ulii << shiftval;
- shiftval += 2;
- if (shiftval == BITCT) {
- *ulptr++ = cur_word;
- cur_word = 0;
- shiftval = 0;
+ }
+ if (shiftval) {
+ *ulptr++ = cur_word;
+ }
+ if (identical_alleles) {
+ for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
+ ulii = *ulptr;
+ *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+ }
+ if (sample_ct % 4) {
+ writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
}
}
- }
- if (shiftval) {
- *ulptr++ = cur_word;
- }
- if (identical_alleles) {
- for (ulptr = writebuf; ulptr < (&(writebuf[sample_ctl2])); ulptr++) {
- ulii = *ulptr;
- *ulptr = ((~ulii) << 1) | ulii | FIVEMASK;
+ if (fwrite_checked(writebuf, sample_ct4, outfile)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
}
- if (sample_ct % 4) {
- writebuf[sample_ctl2 - 1] &= (ONELU << (2 * (sample_ct % BITCT2))) - ONELU;
+ marker_ct++;
+ if (!(marker_ct % 1000)) {
+ if (marker_ct == marker_uidx + 1) {
+ printf("\r--bgen: %uk variants converted.", marker_ct / 1000);
+ } else {
+ printf("\r--bgen: %uk variants converted (out of %u).", marker_ct / 1000, marker_uidx + 1);
+ }
+ fflush(stdout);
}
}
- if (fwrite_checked(writebuf, sample_ct4, outfile)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- marker_ct++;
- if (!(marker_ct % 1000)) {
- if (marker_ct == marker_uidx + 1) {
- printf("\r--bgen: %uk variants converted.", marker_ct / 1000);
- } else {
- printf("\r--bgen: %uk variants converted (out of %u).", marker_ct / 1000, marker_uidx + 1);
- }
- fflush(stdout);
+ if (fclose_null(&infile)) {
+ goto oxford_to_bed_ret_READ_FAIL;
}
}
- if (fclose_null(&infile)) {
- goto oxford_to_bed_ret_READ_FAIL;
+ if (fclose_null(&outfile)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
}
+ if (fclose_null(&outfile_bim)) {
+ goto oxford_to_bed_ret_WRITE_FAIL;
+ }
+ putc_unlocked('\r', stdout);
+ *outname_end = '\0';
+ LOGPRINTFWW("--%s: %s.bed + %s.bim + %s.fam written.\n", is_bgen? "bgen" : "data", outname, outname, outname);
}
- if (fclose_null(&outfile)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- if (fclose_null(&outfile_bim)) {
- goto oxford_to_bed_ret_WRITE_FAIL;
- }
- putchar('\r');
- *outname_end = '\0';
- LOGPRINTFWW("--%s: %s.bed + %s.bim + %s.fam written.\n", is_bgen? "bgen" : "data", outname, outname, outname);
while (0) {
oxford_to_bed_ret_NOMEM:
retval = RET_NOMEM;
@@ -5200,7 +5207,8 @@ int32_t oxford_to_bed(char* genname, char* samplename, char* outname, char* outn
return retval;
}
-// side effect: initializes textbuf to first nonempty line of .map/.bim
+// side effect: initializes textbuf to first nonempty noncomment line of
+// .map/.bim
int32_t check_cm_col(FILE* bimfile, char* textbuf, uint32_t is_binary, uint32_t allow_no_variants, uint32_t bufsize, uint32_t* cm_col_exists_ptr, uintptr_t* line_idx_ptr) {
uintptr_t line_idx = 0;
char* bufptr;
@@ -5215,11 +5223,7 @@ int32_t check_cm_col(FILE* bimfile, char* textbuf, uint32_t is_binary, uint32_t
if (no_more_tokens_kns(bufptr)) {
return -1;
}
- if (no_more_tokens_kns(next_token(bufptr))) {
- *cm_col_exists_ptr = 0;
- } else {
- *cm_col_exists_ptr = 1;
- }
+ *cm_col_exists_ptr = !no_more_tokens_kns(next_token(bufptr));
return 0;
}
*line_idx_ptr = 0;
@@ -5254,7 +5258,7 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
// Start with preallocated array of 16-byte Ll_strs.
// Ll_str.ss is a null-terminated sequence of ordered, tab-delimited allele
// names. If the starting 8 (or 12 bytes, on 32-bit systems) is adequate,
- // Ll_str.next is NULL. Otherwise, Ll_str.ss stores the first few (or
+ // Ll_str.next is nullptr. Otherwise, Ll_str.ss stores the first few (or
// possibly 0, if the very first allele name is too long) allele names, and
// Ll_str.next is a pointer to a linked list entry storing the next 1+ allele
// names. Worst case, the linked list is of length 4 (beyond that we error
@@ -5273,7 +5277,7 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
return RET_NOMEM;
}
allele_list_start->next = ll_ptr;
- ll_ptr->next = NULL;
+ ll_ptr->next = nullptr;
cur_allele_name_start = ll_ptr->ss;
}
memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
@@ -5305,7 +5309,7 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
return RET_NOMEM;
}
allele_list_start->next = ll_ptr;
- ll_ptr->next = NULL;
+ ll_ptr->next = nullptr;
cur_allele_name_start = ll_ptr->ss;
memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
}
@@ -5322,7 +5326,7 @@ char* get_llstr(Ll_str* ll_ptr, uint32_t allele_idx) {
if (*cptr == '\0') {
ll_ptr = ll_ptr->next;
if (!ll_ptr) {
- return NULL;
+ return nullptr;
}
cptr = ll_ptr->ss;
}
@@ -5334,7 +5338,7 @@ char* get_llstr(Ll_str* ll_ptr, uint32_t allele_idx) {
} else {
ll_ptr = ll_ptr->next;
if (!ll_ptr) {
- return NULL;
+ return nullptr;
}
cptr = ll_ptr->ss;
}
@@ -5342,9 +5346,8 @@ char* get_llstr(Ll_str* ll_ptr, uint32_t allele_idx) {
return cptr;
}
-static inline char* write_token_nt(char* read_ptr, FILE* outfile) {
+static inline char* write_token_notab(char* read_ptr, FILE* outfile) {
// assumes read_ptr is at the beginning of an item to write
- // nt = "no tab"
uint32_t slen = strlen_se(read_ptr);
fwrite(read_ptr, 1, slen, outfile);
return skip_initial_spaces(&(read_ptr[slen]));
@@ -5353,7 +5356,7 @@ static inline char* write_token_nt(char* read_ptr, FILE* outfile) {
static inline char* write_token(char* read_ptr, FILE* outfile) {
uint32_t slen = strlen_se(read_ptr);
fwrite(read_ptr, 1, slen, outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
return skip_initial_spaces(&(read_ptr[slen]));
}
@@ -5368,12 +5371,12 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
uint32_t pct = 1;
int64_t ped_next_thresh = ped_size / 100;
uint32_t last_pass = 0;
- int64_t* line_starts = NULL;
+ int64_t* line_starts = nullptr;
char* missing_geno_ptr = (char*)g_missing_geno_ptr;
char missing_geno = *missing_geno_ptr;
// do NOT convert missing -> output_missing when autoconverting, since the
// .bim/.fam files are usually read right back in.
- char** marker_allele_ptrs = NULL;
+ char** marker_allele_ptrs = nullptr;
FILE* outfile;
uint32_t pass_ct;
uintptr_t sample_ct4;
@@ -5473,7 +5476,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
if (fwrite_checked(col1_ptr, strlen_se(col1_ptr), outfile)) {
goto ped_to_bed_multichar_allele_ret_WRITE_FAIL;
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
bufptr2 = write_token(col2_ptr, outfile);
if (fam_cols & FAM_COL_34) {
bufptr2 = write_token(bufptr2, outfile);
@@ -5482,11 +5485,11 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
fputs("0\t0\t", outfile);
}
if (fam_cols & FAM_COL_5) {
- bufptr2 = write_token_nt(bufptr2, outfile);
+ bufptr2 = write_token_notab(bufptr2, outfile);
} else {
- putc('0', outfile);
+ putc_unlocked('0', outfile);
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
if (fam_cols & FAM_COL_6) {
uii = strlen_se(bufptr2);
fwrite(bufptr2, 1, uii, outfile);
@@ -5541,7 +5544,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
if (ftello(*pedfile_ptr) >= ped_next_thresh) {
uii = (ftello(*pedfile_ptr) * 100) / ped_size;
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", uii);
fflush(stdout);
@@ -5551,7 +5554,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
if (!feof(*pedfile_ptr)) {
goto ped_to_bed_multichar_allele_ret_READ_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
logprint(".ped scan complete (for binary autoconversion).\n");
// sample_ct == 0 impossible
if (fclose_null(outfile_ptr)) {
@@ -5631,28 +5634,28 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
if (fwrite_checked(bufptr, uii, outfile)) {
goto ped_to_bed_multichar_allele_ret_WRITE_FAIL;
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
bufptr = skip_initial_spaces(&(bufptr[uii + 1]));
bufptr = write_token(bufptr, outfile);
if (cm_col_exists) {
ucc = (unsigned char)(*bufptr);
// should be good enough at detecting nonnumeric values...
if (((ucc >= '0') && (ucc <= '9')) || (ucc == '-') || (ucc == '+')) {
- bufptr = write_token_nt(bufptr, outfile);
+ bufptr = write_token_notab(bufptr, outfile);
} else {
- putc('0', outfile);
+ putc_unlocked('0', outfile);
bufptr = next_token(bufptr);
}
} else {
- putc('0', outfile);
+ putc_unlocked('0', outfile);
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
uii = strlen_se(bufptr);
fwrite(bufptr, 1, uii, outfile);
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(aptr1, outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(aptr2, outfile);
if (putc_checked('\n', outfile)) {
goto ped_to_bed_multichar_allele_ret_WRITE_FAIL;
@@ -5740,6 +5743,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
umm = 0;
for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
aptr1 = bufptr;
+ // already validated
bufptr = token_endnn(bufptr);
alen1 = (uintptr_t)(bufptr - aptr1);
bufptr = skip_initial_spaces(bufptr);
@@ -5815,7 +5819,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
}
}
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -5876,9 +5880,9 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_end, uint32_t fam_cols, uint64_t misc_flags, int32_t missing_pheno, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* mapfile = NULL;
- FILE* pedfile = NULL;
- FILE* outfile = NULL;
+ FILE* mapfile = nullptr;
+ FILE* pedfile = nullptr;
+ FILE* outfile = nullptr;
uintptr_t* marker_exclude;
uintptr_t max_marker_id_len = 0;
uintptr_t unfiltered_marker_ct = 0;
@@ -5896,7 +5900,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
uint32_t ped_col_skip_iid_m1 = ((fam_cols & FAM_COL_34) / (FAM_COL_34 / 2)) + ((fam_cols & FAM_COL_5) / FAM_COL_5) + ((fam_cols & FAM_COL_6) / FAM_COL_6);
uint32_t ped_col_skip = ped_col_skip_iid_m1 + 1 + ((fam_cols & FAM_COL_1) / FAM_COL_1);
uint32_t last_pass = 0;
- int64_t* line_starts = NULL;
+ int64_t* line_starts = nullptr;
uint32_t is_single_char_alleles = 1;
char missing_geno = *g_missing_geno_ptr;
@@ -5923,7 +5927,6 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
uint32_t ujj;
uint32_t ukk;
uint32_t umm;
- int32_t ii;
int32_t jj;
char* loadbuf;
uintptr_t loadbuf_size;
@@ -5940,566 +5943,563 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
unsigned char* wbufptr;
int64_t ped_size;
int64_t ped_next_thresh;
- int32toa_x(missing_pheno, '\0', missing_pheno_str);
- marker_exclude = (uintptr_t*)g_bigstack_base;
- marker_exclude[0] = 0;
- // don't use fopen_checked() here, since we want to customize the error
- // message.
- mapfile = fopen(mapname, "r");
- if (!mapfile) {
- uii = strlen(mapname);
- if ((uii > 8) && ((!memcmp(&(mapname[uii - 8]), ".ped.map", 8)) || (!memcmp(&(mapname[uii - 8]), ".map.map", 8)))) {
- LOGERRPRINTFWW("Error: Failed to open %s. (--file expects a filename *prefix*; '.ped' and '.map' are automatically appended.)\n", mapname);
- } else {
- LOGERRPRINTFWW(g_errstr_fopen, mapname);
- }
- goto ped_to_bed_ret_OPEN_FAIL;
- }
- g_textbuf[MAXLINELEN - 6] = ' ';
- if (check_cm_col(mapfile, g_textbuf, 0, allow_no_variants, MAXLINELEN - 5, &cm_col_exists, &line_idx)) {
- if (line_idx) {
- goto ped_to_bed_ret_MISSING_TOKENS_MAP;
- } else {
- logerrprint("Error: Empty .map file.\n");
- goto ped_to_bed_ret_INVALID_FORMAT;
+ {
+ int32toa_x(missing_pheno, '\0', missing_pheno_str);
+ marker_exclude = (uintptr_t*)g_bigstack_base;
+ marker_exclude[0] = 0;
+ // don't use fopen_checked() here, since we want to customize the error
+ // message.
+ mapfile = fopen(mapname, "r");
+ if (!mapfile) {
+ uii = strlen(mapname);
+ if ((uii > 8) && ((!memcmp(&(mapname[uii - 8]), ".ped.map", 8)) || (!memcmp(&(mapname[uii - 8]), ".map.map", 8)))) {
+ LOGERRPRINTFWW("Error: Failed to open %s. (--file expects a filename *prefix*; '.ped' and '.map' are automatically appended.)\n", mapname);
+ } else {
+ LOGERRPRINTFWW(g_errstr_fopen, mapname);
+ }
+ goto ped_to_bed_ret_OPEN_FAIL;
}
- }
- if (!line_idx) {
- // no variants
- goto ped_to_bed_empty_map_with_allow_no_vars;
- }
- line_idx--;
- unfiltered_marker_ct_limit = bigstack_left();
- if (unfiltered_marker_ct_limit > 0xfffffff) {
- unfiltered_marker_ct_limit = 0x80000000U;
- } else {
- unfiltered_marker_ct_limit *= 8;
- }
- do {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 6]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .map file is pathologically long.\n", line_idx);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
+ g_textbuf[MAXLINELEN - 6] = ' ';
+ if (check_cm_col(mapfile, g_textbuf, 0, allow_no_variants, MAXLINELEN - 5, &cm_col_exists, &line_idx)) {
+ if (line_idx) {
+ goto ped_to_bed_ret_MISSING_TOKENS_MAP;
+ } else {
+ logerrprint("Error: Empty .map file.\n");
+ goto ped_to_bed_ret_INVALID_FORMAT;
+ }
}
- col1_ptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_or_comment_kns(*col1_ptr)) {
- continue;
+ if (!line_idx) {
+ // no variants
+ goto ped_to_bed_empty_map_with_allow_no_vars;
}
- col2_ptr = next_token(col1_ptr);
- bufptr = next_token_mult(col2_ptr, 1 + cm_col_exists);
- if (no_more_tokens_kns(bufptr)) {
- goto ped_to_bed_ret_MISSING_TOKENS_MAP;
+ line_idx--;
+ unfiltered_marker_ct_limit = bigstack_left();
+ if (unfiltered_marker_ct_limit > 0xfffffff) {
+ unfiltered_marker_ct_limit = 0x80000000U;
+ } else {
+ unfiltered_marker_ct_limit *= 8;
}
- ii = get_chrom_code(chrom_info_ptr, col1_ptr);
- if (ii < 0) {
- // guess it's best to extend .map format too
- if (chrom_error(".map file", chrom_info_ptr, col1_ptr, line_idx, ii, allow_extra_chroms)) {
- goto ped_to_bed_ret_INVALID_FORMAT;
+ do {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 6]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .map file is pathologically long.\n", line_idx);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ }
+ col1_ptr = skip_initial_spaces(g_textbuf);
+ if (is_eoln_or_comment_kns(*col1_ptr)) {
+ continue;
+ }
+ char* col1_end = token_endnn(col1_ptr);
+ col2_ptr = skip_initial_spaces(col1_end);
+ bufptr = next_token_mult(col2_ptr, 1 + cm_col_exists);
+ if (no_more_tokens_kns(bufptr)) {
+ goto ped_to_bed_ret_MISSING_TOKENS_MAP;
}
- retval = resolve_or_add_chrom_name(col1_ptr, ".map file", line_idx, chrom_info_ptr, &ii);
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code_destructive(".map file", line_idx, allow_extra_chroms, col1_ptr, col1_end, chrom_info_ptr, &cur_chrom_code);
if (retval) {
goto ped_to_bed_ret_1;
}
- }
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- SET_BIT(unfiltered_marker_ct, marker_exclude);
- marker_exclude_ct++;
- } else {
- if (scan_int_abs_defcap(bufptr, &jj)) {
- sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .map file.\n", line_idx);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
- }
- if (jj >= 0) {
- if (!map_is_unsorted) {
- if ((ii < last_chrom) || ((ii == last_chrom) && ((uint32_t)jj < last_mpos))) {
- map_is_unsorted = 1;
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ SET_BIT(unfiltered_marker_ct, marker_exclude);
+ marker_exclude_ct++;
+ } else {
+ if (scan_int_abs_defcap(bufptr, &jj)) {
+ sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of .map file.\n", line_idx);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ }
+ if (jj >= 0) {
+ if (!map_is_unsorted) {
+ if ((cur_chrom_code < last_chrom) || ((cur_chrom_code == last_chrom) && ((uint32_t)jj < last_mpos))) {
+ map_is_unsorted = 1;
+ }
+ last_chrom = cur_chrom_code;
+ last_mpos = (uint32_t)jj;
}
- last_chrom = ii;
- last_mpos = (uint32_t)jj;
+ uii = strlen_se(col2_ptr) + 1;
+ if (uii > max_marker_id_len) {
+ max_marker_id_len = uii;
+ }
+ } else {
+ SET_BIT(unfiltered_marker_ct, marker_exclude);
+ marker_exclude_ct++;
}
- uii = strlen_se(col2_ptr) + 1;
- if (uii > max_marker_id_len) {
- max_marker_id_len = uii;
+ }
+ unfiltered_marker_ct++;
+ if (unfiltered_marker_ct > 0x7ffffffd) {
+ logprint("Error: Too many variants in .map file (max 2147483645).\n");
+ goto ped_to_bed_ret_INVALID_FORMAT;
+ }
+ if (!(unfiltered_marker_ct & (BITCT - 1))) {
+ if (unfiltered_marker_ct == unfiltered_marker_ct_limit) {
+ goto ped_to_bed_ret_NOMEM;
}
- } else {
- SET_BIT(unfiltered_marker_ct, marker_exclude);
- marker_exclude_ct++;
+ marker_exclude[unfiltered_marker_ct / BITCT] = 0;
}
+ } while (fgets(g_textbuf, MAXLINELEN - 5, mapfile));
+ if (!feof(mapfile)) {
+ goto ped_to_bed_ret_READ_FAIL;
}
- unfiltered_marker_ct++;
- if (unfiltered_marker_ct > 0x7fffffff) {
- logprint("Error: Too many variants in .map file (max 2147483647).\n");
- goto ped_to_bed_ret_INVALID_FORMAT;
+ marker_ct = unfiltered_marker_ct - marker_exclude_ct;
+ if ((!marker_ct) && (!allow_no_variants)) {
+ logprint("Error: No variants in current analysis.\n");
+ goto ped_to_bed_ret_ALL_MARKERS_EXCLUDED;
}
- if (!(unfiltered_marker_ct & (BITCT - 1))) {
- if (unfiltered_marker_ct == unfiltered_marker_ct_limit) {
- goto ped_to_bed_ret_NOMEM;
+ ped_to_bed_empty_map_with_allow_no_vars:
+ bigstack_alloc_ul(BITCT_TO_WORDCT(unfiltered_marker_ct), &marker_exclude);
+
+ if (map_is_unsorted) {
+ retval = load_sort_and_write_map(&map_reverse, mapfile, 3 + cm_col_exists, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, max_marker_id_len, 1, chrom_info_ptr);
+ if (retval) {
+ goto ped_to_bed_ret_1;
}
- marker_exclude[unfiltered_marker_ct / BITCT] = 0;
+ cm_col_exists = 1;
+ fclose_null(&mapfile);
}
- } while (fgets(g_textbuf, MAXLINELEN - 5, mapfile));
- if (!feof(mapfile)) {
- goto ped_to_bed_ret_READ_FAIL;
- }
- marker_ct = unfiltered_marker_ct - marker_exclude_ct;
- if ((!marker_ct) && (!allow_no_variants)) {
- logprint("Error: No variants in current analysis.\n");
- goto ped_to_bed_ret_ALL_MARKERS_EXCLUDED;
- }
- ped_to_bed_empty_map_with_allow_no_vars:
- bigstack_alloc_ul(BITCT_TO_WORDCT(unfiltered_marker_ct), &marker_exclude);
-
- if (map_is_unsorted) {
- retval = load_sort_and_write_map(&map_reverse, mapfile, 3 + cm_col_exists, outname, outname_end, unfiltered_marker_ct, marker_exclude, marker_ct, max_marker_id_len, 1, chrom_info_ptr);
- if (retval) {
- goto ped_to_bed_ret_1;
+ // provisionally assume max_marker_allele_blen == 2
+ // bugfix: allocate this after map_reverse
+ if (bigstack_alloc_c(marker_ct * 2, &marker_alleles_f) ||
+ bigstack_calloc_c(marker_ct * 4, &marker_alleles) ||
+ bigstack_alloc_ui(marker_ct * 4, &marker_allele_cts)) {
+ goto ped_to_bed_ret_NOMEM;
}
- cm_col_exists = 1;
- fclose_null(&mapfile);
- }
- // provisionally assume max_marker_allele_len == 1
- // bugfix: allocate this after map_reverse
- if (bigstack_alloc_c(marker_ct * 2, &marker_alleles_f) ||
- bigstack_calloc_c(marker_ct * 4, &marker_alleles) ||
- bigstack_alloc_ui(marker_ct * 4, &marker_allele_cts)) {
- goto ped_to_bed_ret_NOMEM;
- }
- // first .ped scan: count samples, write .fam, note alleles at each locus
- if (fopen_checked(pedname, FOPEN_RB, &pedfile)) {
- goto ped_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto ped_to_bed_ret_OPEN_FAIL;
- }
- loadbuf = (char*)g_bigstack_base;
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto ped_to_bed_ret_NOMEM;
- }
- if (fseeko(pedfile, 0, SEEK_END)) {
- goto ped_to_bed_ret_READ_FAIL;
- }
- ped_size = ftello(pedfile);
- rewind(pedfile);
- logprint("Scanning .ped file...");
- fputs(" 0%", stdout);
- fflush(stdout);
- ped_next_thresh = ped_size / 100;
- loadbuf[loadbuf_size - 1] = ' ';
- pct = 0;
- line_idx = 0;
- while (fgets(loadbuf, loadbuf_size, pedfile)) {
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- logprint("\n");
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file is pathologically long.\n", line_idx);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
- } else {
- goto ped_to_bed_ret_NOMEM;
- }
+ // first .ped scan: count samples, write .fam, note alleles at each locus
+ if (fopen_checked(pedname, FOPEN_RB, &pedfile)) {
+ goto ped_to_bed_ret_OPEN_FAIL;
}
- col1_ptr = skip_initial_spaces(loadbuf);
- if (is_eoln_or_comment_kns(*col1_ptr)) {
- ulii = strlen(loadbuf) + 1;
- if (ulii > ped_buflen) {
- ped_buflen = ulii;
- }
- continue;
- }
- if (fam_cols & FAM_COL_1) {
- col2_ptr = next_token(col1_ptr);
- } else {
- col2_ptr = col1_ptr;
- }
- bufptr = next_token_multz(col2_ptr, ped_col_skip_iid_m1);
- if (no_more_tokens_kns(bufptr)) {
- goto ped_to_bed_ret_MISSING_TOKENS_PED;
- }
- bufptr = token_endnn(bufptr);
- if ((bufptr - col1_ptr) > (MAXLINELEN / 2) - 4) {
- logprint("\n");
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file has a pathologically long token.\n", line_idx);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
- }
- if (fwrite_checked(col1_ptr, strlen_se(col1_ptr), outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
- }
- putc('\t', outfile);
- bufptr2 = write_token(col2_ptr, outfile);
- if (fam_cols & FAM_COL_34) {
- bufptr2 = write_token(bufptr2, outfile);
- bufptr2 = write_token(bufptr2, outfile);
- } else {
- fwrite("0\t0\t", 1, 4, outfile);
- }
- if (fam_cols & FAM_COL_5) {
- bufptr2 = write_token_nt(bufptr2, outfile);
- } else {
- putc('0', outfile);
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto ped_to_bed_ret_OPEN_FAIL;
}
- putc('\t', outfile);
- if (fam_cols & FAM_COL_6) {
- fwrite(bufptr2, 1, strlen_se(bufptr2), outfile);
- } else {
- fputs(missing_pheno_str, outfile);
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
+ goto ped_to_bed_ret_NOMEM;
}
- if (putc_checked('\n', outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
+ if (fseeko(pedfile, 0, SEEK_END)) {
+ goto ped_to_bed_ret_READ_FAIL;
}
- marker_idx = 0;
- bufptr = skip_initial_spaces(bufptr);
- for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
- cc = *bufptr++;
- if (!cc) {
- goto ped_to_bed_ret_MISSING_TOKENS_PED;
+ ped_size = ftello(pedfile);
+ rewind(pedfile);
+ logprint("Scanning .ped file...");
+ fputs(" 0%", stdout);
+ fflush(stdout);
+ ped_next_thresh = ped_size / 100;
+ loadbuf[loadbuf_size - 1] = ' ';
+ pct = 0;
+ line_idx = 0;
+ while (fgets(loadbuf, loadbuf_size, pedfile)) {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ logprint("\n");
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file is pathologically long.\n", line_idx);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ } else {
+ goto ped_to_bed_ret_NOMEM;
+ }
}
- bufptr = skip_initial_spaces(bufptr);
- cc2 = *bufptr++;
- if (!cc2) {
+ col1_ptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_or_comment_kns(*col1_ptr)) {
+ ulii = strlen(loadbuf) + 1;
+ if (ulii > ped_buflen) {
+ ped_buflen = ulii;
+ }
+ continue;
+ }
+ if (fam_cols & FAM_COL_1) {
+ col2_ptr = next_token(col1_ptr);
+ } else {
+ col2_ptr = col1_ptr;
+ }
+ bufptr = next_token_multz(col2_ptr, ped_col_skip_iid_m1);
+ if (no_more_tokens_kns(bufptr)) {
goto ped_to_bed_ret_MISSING_TOKENS_PED;
}
- bufptr = skip_initial_spaces(bufptr);
- if (IS_SET(marker_exclude, marker_uidx)) {
- continue;
+ bufptr = token_endnn(bufptr);
+ if ((bufptr - col1_ptr) > (MAXLINELEN / 2) - 4) {
+ logprint("\n");
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file has a pathologically long token.\n", line_idx);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ }
+ if (fwrite_checked(col1_ptr, strlen_se(col1_ptr), outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
}
- if (cc == missing_geno) {
- if (cc2 != missing_geno) {
+ putc_unlocked('\t', outfile);
+ bufptr2 = write_token(col2_ptr, outfile);
+ if (fam_cols & FAM_COL_34) {
+ bufptr2 = write_token(bufptr2, outfile);
+ bufptr2 = write_token(bufptr2, outfile);
+ } else {
+ fwrite("0\t0\t", 1, 4, outfile);
+ }
+ if (fam_cols & FAM_COL_5) {
+ bufptr2 = write_token_notab(bufptr2, outfile);
+ } else {
+ putc_unlocked('0', outfile);
+ }
+ putc_unlocked('\t', outfile);
+ if (fam_cols & FAM_COL_6) {
+ fwrite(bufptr2, 1, strlen_se(bufptr2), outfile);
+ } else {
+ fputs(missing_pheno_str, outfile);
+ }
+ if (putc_checked('\n', outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
+ }
+ marker_idx = 0;
+ bufptr = skip_initial_spaces(bufptr);
+ for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
+ cc = *bufptr++;
+ if (!cc) {
+ goto ped_to_bed_ret_MISSING_TOKENS_PED;
+ }
+ bufptr = skip_initial_spaces(bufptr);
+ cc2 = *bufptr++;
+ if (!cc2) {
+ goto ped_to_bed_ret_MISSING_TOKENS_PED;
+ }
+ bufptr = skip_initial_spaces(bufptr);
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ continue;
+ }
+ if (cc == missing_geno) {
+ if (cc2 != missing_geno) {
+ is_single_char_alleles = 0;
+ break;
+ }
+ marker_idx++;
+ continue;
+ } else if (cc2 == missing_geno) {
+ is_single_char_alleles = 0;
+ break;
+ }
+ uii = 4 * (map_is_unsorted? map_reverse[marker_idx] : marker_idx);
+ if (incr_text_allele0(cc, &(marker_alleles[uii]), &(marker_allele_cts[uii])) ||
+ incr_text_allele0(cc2, &(marker_alleles[uii]), &(marker_allele_cts[uii]))) {
is_single_char_alleles = 0;
break;
}
marker_idx++;
- continue;
- } else if (cc2 == missing_geno) {
- is_single_char_alleles = 0;
- break;
}
- uii = 4 * (map_is_unsorted? map_reverse[marker_idx] : marker_idx);
- if (incr_text_allele0(cc, &(marker_alleles[uii]), &(marker_allele_cts[uii])) ||
- incr_text_allele0(cc2, &(marker_alleles[uii]), &(marker_allele_cts[uii]))) {
+ if ((!is_single_char_alleles) || (!is_eoln_kns(*bufptr))) {
+ // either multi-character alleles, or invalid format. Restart scan.
+ putc_unlocked('\r', stdout);
+ logstr("\n");
+ if (!marker_ct) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file has more tokens than expected.\n", line_idx);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ }
+ logprint("Possibly irregular .ped line. Restarting scan, assuming multichar alleles.\n");
is_single_char_alleles = 0;
break;
}
- marker_idx++;
- }
- if ((!is_single_char_alleles) || (!is_eoln_kns(*bufptr))) {
- // either multi-character alleles, or invalid format. Restart scan.
- putchar('\r');
- logstr("\n");
- if (!marker_ct) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .ped file has more tokens than expected.\n", line_idx);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
+ ulii = strlen(bufptr) + (uintptr_t)(bufptr - loadbuf) + 1;
+ if (ulii > ped_buflen) {
+ ped_buflen = ulii;
}
- logprint("Possibly irregular .ped line. Restarting scan, assuming multichar alleles.\n");
- is_single_char_alleles = 0;
- break;
- }
- ulii = strlen(bufptr) + (uintptr_t)(bufptr - loadbuf) + 1;
- if (ulii > ped_buflen) {
- ped_buflen = ulii;
- }
- sample_ct++;
- if (ftello(pedfile) >= ped_next_thresh) {
- uii = (ftello(pedfile) * 100) / ped_size;
- if (pct >= 10) {
- putchar('\b');
+ sample_ct++;
+ if (ftello(pedfile) >= ped_next_thresh) {
+ uii = (ftello(pedfile) * 100) / ped_size;
+ if (pct >= 10) {
+ putc_unlocked('\b', stdout);
+ }
+ printf("\b\b%u%%", uii);
+ fflush(stdout);
+ pct = uii;
}
- printf("\b\b%u%%", uii);
- fflush(stdout);
- pct = uii;
- }
- }
- if (is_single_char_alleles) {
- if (!feof(pedfile)) {
- goto ped_to_bed_ret_READ_FAIL;
- }
- if ((!sample_ct) && (!allow_no_samples)) {
- logprint("\n");
- sprintf(g_logbuf, "Error: No %s in .ped file.\n", g_species_plural);
- goto ped_to_bed_ret_INVALID_FORMAT_2;
- }
- if (fclose_null(&outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
- }
- memcpy(outname_end, ".bim", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto ped_to_bed_ret_OPEN_FAIL;
}
- if (map_is_unsorted) {
- memcpy(outname_end, ".map.tmp", 9);
- if (fopen_checked(outname, "r", &mapfile)) {
+ if (is_single_char_alleles) {
+ if (!feof(pedfile)) {
+ goto ped_to_bed_ret_READ_FAIL;
+ }
+ if ((!sample_ct) && (!allow_no_samples)) {
+ logprint("\n");
+ sprintf(g_logbuf, "Error: No %s in .ped file.\n", g_species_plural);
+ goto ped_to_bed_ret_INVALID_FORMAT_2;
+ }
+ if (fclose_null(&outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
+ }
+ memcpy(outname_end, ".bim", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
goto ped_to_bed_ret_OPEN_FAIL;
}
- } else {
- rewind(mapfile);
- }
- logstr(" done.\n");
- fputs("\r.ped scan complete (for binary autoconversion).\n", stdout);
- marker_uidx = 0;
- line_idx = 0;
- for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
if (map_is_unsorted) {
- if (!fgets(g_textbuf, MAXLINELEN, mapfile)) {
- goto ped_to_bed_ret_READ_FAIL;
+ memcpy(outname_end, ".map.tmp", 9);
+ if (fopen_checked(outname, "r", &mapfile)) {
+ goto ped_to_bed_ret_OPEN_FAIL;
}
} else {
- if (get_next_noncomment_excl(marker_exclude, mapfile, &bufptr, &line_idx, &marker_uidx)) {
- goto ped_to_bed_ret_READ_FAIL;
- }
+ rewind(mapfile);
}
- if (marker_alleles[marker_idx * 4 + 2]) {
- cc = marker_alleles[marker_idx * 4 + 3];
+ logstr(" done.\n");
+ fputs("\r.ped scan complete (for binary autoconversion).\n", stdout);
+ marker_uidx = 0;
+ line_idx = 0;
+ for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
if (map_is_unsorted) {
- sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (cc? "quad" : "tri"));
+ if (!fgets(g_textbuf, MAXLINELEN, mapfile)) {
+ goto ped_to_bed_ret_READ_FAIL;
+ }
} else {
- sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (cc? "quad" : "tri"));
- }
- logerrprintb();
- ujj = (cc? 4 : 3);
- // insertion sort
- for (uii = 1; uii < ujj; uii++) {
- ukk = marker_allele_cts[4 * marker_idx + uii];
- if (marker_allele_cts[4 * marker_idx + uii - 1] < ukk) {
- cc = marker_alleles[4 * marker_idx + uii];
- umm = uii;
- do {
- umm--;
- marker_alleles[4 * marker_idx + umm + 1] = marker_alleles[4 * marker_idx + umm];
- marker_allele_cts[4 * marker_idx + umm + 1] = marker_allele_cts[4 * marker_idx + umm];
- } while (umm && (marker_allele_cts[4 * marker_idx + umm - 1] < ukk));
- marker_alleles[4 * marker_idx + umm] = cc;
- marker_allele_cts[4 * marker_idx + umm] = ukk;
+ if (get_next_noncomment_excl(marker_exclude, mapfile, &bufptr, &line_idx, &marker_uidx)) {
+ goto ped_to_bed_ret_READ_FAIL;
}
}
- cc = marker_alleles[marker_idx * 4 + 1];
- cc2 = marker_alleles[marker_idx * 4];
- } else {
- if (marker_allele_cts[marker_idx * 4] >= marker_allele_cts[marker_idx * 4 + 1]) {
+ if (marker_alleles[marker_idx * 4 + 2]) {
+ cc = marker_alleles[marker_idx * 4 + 3];
+ if (map_is_unsorted) {
+ sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (cc? "quad" : "tri"));
+ } else {
+ sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (cc? "quad" : "tri"));
+ }
+ logerrprintb();
+ ujj = (cc? 4 : 3);
+ // insertion sort
+ for (uii = 1; uii < ujj; uii++) {
+ ukk = marker_allele_cts[4 * marker_idx + uii];
+ if (marker_allele_cts[4 * marker_idx + uii - 1] < ukk) {
+ cc = marker_alleles[4 * marker_idx + uii];
+ umm = uii;
+ do {
+ umm--;
+ marker_alleles[4 * marker_idx + umm + 1] = marker_alleles[4 * marker_idx + umm];
+ marker_allele_cts[4 * marker_idx + umm + 1] = marker_allele_cts[4 * marker_idx + umm];
+ } while (umm && (marker_allele_cts[4 * marker_idx + umm - 1] < ukk));
+ marker_alleles[4 * marker_idx + umm] = cc;
+ marker_allele_cts[4 * marker_idx + umm] = ukk;
+ }
+ }
cc = marker_alleles[marker_idx * 4 + 1];
cc2 = marker_alleles[marker_idx * 4];
} else {
- cc = marker_alleles[marker_idx * 4];
- cc2 = marker_alleles[marker_idx * 4 + 1];
+ if (marker_allele_cts[marker_idx * 4] >= marker_allele_cts[marker_idx * 4 + 1]) {
+ cc = marker_alleles[marker_idx * 4 + 1];
+ cc2 = marker_alleles[marker_idx * 4];
+ } else {
+ cc = marker_alleles[marker_idx * 4];
+ cc2 = marker_alleles[marker_idx * 4 + 1];
+ }
}
- }
- marker_alleles_f[marker_idx * 2] = cc;
- marker_alleles_f[marker_idx * 2 + 1] = cc2;
- if (!cc) {
- cc = '0';
- }
- if (!cc2) {
- cc2 = '0';
- }
- if (map_is_unsorted) {
- bufptr = (char*)memchr(g_textbuf, '\n', MAXLINELEN);
- if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
+ marker_alleles_f[marker_idx * 2] = cc;
+ marker_alleles_f[marker_idx * 2 + 1] = cc2;
+ if (!cc) {
+ cc = '0';
}
- } else {
- bufptr = write_token(bufptr, outfile);
- bufptr = write_token(bufptr, outfile);
- if (cm_col_exists) {
- ucc = (unsigned char)(*bufptr);
- if (((ucc >= '0') && (ucc <= '9')) || (ucc == '-') || (ucc == '+')) {
- bufptr = write_token_nt(bufptr, outfile);
- } else {
- putc('0', outfile);
- bufptr = next_token(bufptr);
+ if (!cc2) {
+ cc2 = '0';
+ }
+ if (map_is_unsorted) {
+ bufptr = (char*)memchr(g_textbuf, '\n', MAXLINELEN);
+ if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
}
} else {
- putc('0', outfile);
+ bufptr = write_token(bufptr, outfile);
+ bufptr = write_token(bufptr, outfile);
+ if (cm_col_exists) {
+ ucc = (unsigned char)(*bufptr);
+ if (((ucc >= '0') && (ucc <= '9')) || (ucc == '-') || (ucc == '+')) {
+ bufptr = write_token_notab(bufptr, outfile);
+ } else {
+ putc_unlocked('0', outfile);
+ bufptr = next_token(bufptr);
+ }
+ } else {
+ putc_unlocked('0', outfile);
+ }
+ putc_unlocked('\t', outfile);
+ fwrite(bufptr, 1, strlen_se(bufptr), outfile);
+ }
+ putc_unlocked('\t', outfile);
+ putc_unlocked(cc, outfile);
+ putc_unlocked('\t', outfile);
+ putc_unlocked(cc2, outfile);
+ if (putc_checked('\n', outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
}
- putc('\t', outfile);
- fwrite(bufptr, 1, strlen_se(bufptr), outfile);
+ marker_uidx++;
}
- putc('\t', outfile);
- putc(cc, outfile);
- putc('\t', outfile);
- putc(cc2, outfile);
- if (putc_checked('\n', outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
+ sample_ct4 = (sample_ct + 3) / 4;
+ bigstack_reset(marker_alleles);
+ fclose_null(&mapfile);
+ if (map_is_unsorted) {
+ unlink(outname);
}
- marker_uidx++;
- }
- sample_ct4 = (sample_ct + 3) / 4;
- bigstack_reset(marker_alleles);
- fclose_null(&mapfile);
- if (map_is_unsorted) {
- unlink(outname);
- }
- fclose_null(&outfile);
- if (bigstack_alloc_c(ped_buflen, &loadbuf)) {
- goto ped_to_bed_ret_NOMEM;
- }
- if (bigstack_left() >= marker_ct * sample_ct4) {
- markers_per_pass = marker_ct;
- sprintf(g_logbuf, "Performing single-pass .bed write (%" PRIuPTR " variant%s, %" PRIuPTR " %s).\n", marker_ct, (marker_ct == 1)? "" : "s", sample_ct, species_str(sample_ct));
- pass_ct = (marker_ct * sample_ct4)? 1 : 0;
- } else {
- if (!map_is_unsorted) {
- if (bigstack_alloc_ll(sample_ct, &line_starts)) {
+ fclose_null(&outfile);
+ if (bigstack_alloc_c(ped_buflen, &loadbuf)) {
+ goto ped_to_bed_ret_NOMEM;
+ }
+ if (bigstack_left() >= marker_ct * sample_ct4) {
+ markers_per_pass = marker_ct;
+ sprintf(g_logbuf, "Performing single-pass .bed write (%" PRIuPTR " variant%s, %" PRIuPTR " %s).\n", marker_ct, (marker_ct == 1)? "" : "s", sample_ct, species_str(sample_ct));
+ pass_ct = (marker_ct * sample_ct4)? 1 : 0;
+ } else {
+ if (!map_is_unsorted) {
+ if (bigstack_alloc_ll(sample_ct, &line_starts)) {
+ goto ped_to_bed_ret_NOMEM;
+ }
+ }
+ markers_per_pass = bigstack_left() / sample_ct4;
+ if (!markers_per_pass) {
goto ped_to_bed_ret_NOMEM;
}
+ pass_ct = (marker_ct + markers_per_pass - 1) / markers_per_pass;
+ sprintf(g_logbuf, "Performing %u-pass .bed write (%u/%" PRIuPTR " variant%s/pass, %" PRIuPTR " %s).\n", pass_ct, markers_per_pass, marker_ct, (markers_per_pass == 1)? "" : "s", sample_ct, species_str(sample_ct));
}
- markers_per_pass = bigstack_left() / sample_ct4;
- if (!markers_per_pass) {
- goto ped_to_bed_ret_NOMEM;
+ logprintb();
+ writebuf = g_bigstack_base;
+ memcpy(outname_end, ".bed", 5);
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto ped_to_bed_ret_OPEN_FAIL;
}
- pass_ct = (marker_ct + markers_per_pass - 1) / markers_per_pass;
- sprintf(g_logbuf, "Performing %u-pass .bed write (%u/%" PRIuPTR " variant%s/pass, %" PRIuPTR " %s).\n", pass_ct, markers_per_pass, marker_ct, (markers_per_pass == 1)? "" : "s", sample_ct, species_str(sample_ct));
- }
- logprintb();
- writebuf = g_bigstack_base;
- memcpy(outname_end, ".bed", 5);
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto ped_to_bed_ret_OPEN_FAIL;
- }
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
- }
- rewind(pedfile);
- umm = 0;
- for (uii = 0; uii < pass_ct; uii++) {
- marker_start = uii * markers_per_pass;
- if (uii + 1 == pass_ct) {
- ujj = marker_ct - marker_start;
- last_pass = 1;
- } else {
- ujj = markers_per_pass;
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
}
- memset(writebuf, 0, ujj * sample_ct4);
- marker_end = marker_start + ujj;
- fputs("0%", stdout);
- sample_idx = 0;
- // 94 instead of 100 due to big fwrite at the end
- for (pct = 1; pct <= 94; pct++) {
- loop_end = (((uint64_t)pct) * sample_ct) / 94LLU;
- for (; sample_idx < loop_end; sample_idx++) {
- if ((!uii) || map_is_unsorted) {
- do {
- if (!last_pass) {
- ped_next_thresh = ftello(pedfile);
+ rewind(pedfile);
+ umm = 0;
+ for (uii = 0; uii < pass_ct; uii++) {
+ marker_start = uii * markers_per_pass;
+ if (uii + 1 == pass_ct) {
+ ujj = marker_ct - marker_start;
+ last_pass = 1;
+ } else {
+ ujj = markers_per_pass;
+ }
+ memset(writebuf, 0, ujj * sample_ct4);
+ marker_end = marker_start + ujj;
+ fputs("0%", stdout);
+ sample_idx = 0;
+ // 94 instead of 100 due to big fwrite at the end
+ for (pct = 1; pct <= 94; pct++) {
+ loop_end = (((uint64_t)pct) * sample_ct) / 94LLU;
+ for (; sample_idx < loop_end; sample_idx++) {
+ if ((!uii) || map_is_unsorted) {
+ do {
+ if (!last_pass) {
+ ped_next_thresh = ftello(pedfile);
+ }
+ if (!fgets(loadbuf, ped_buflen, pedfile)) {
+ goto ped_to_bed_ret_READ_FAIL;
+ }
+ col1_ptr = skip_initial_spaces(loadbuf);
+ } while (is_eoln_or_comment_kns(*col1_ptr));
+ bufptr = next_token_mult(col1_ptr, ped_col_skip);
+ } else {
+ ped_next_thresh = line_starts[sample_idx];
+ if (fseeko(pedfile, line_starts[sample_idx], SEEK_SET)) {
+ goto ped_to_bed_ret_READ_FAIL;
}
if (!fgets(loadbuf, ped_buflen, pedfile)) {
goto ped_to_bed_ret_READ_FAIL;
}
- col1_ptr = skip_initial_spaces(loadbuf);
- } while (is_eoln_or_comment_kns(*col1_ptr));
- bufptr = next_token_mult(col1_ptr, ped_col_skip);
- } else {
- ped_next_thresh = line_starts[sample_idx];
- if (fseeko(pedfile, line_starts[sample_idx], SEEK_SET)) {
- goto ped_to_bed_ret_READ_FAIL;
+ bufptr = loadbuf;
}
- if (!fgets(loadbuf, ped_buflen, pedfile)) {
- goto ped_to_bed_ret_READ_FAIL;
- }
- bufptr = loadbuf;
- }
- marker_idx = uii * markers_per_pass;
- ii_shift = (sample_idx % 4) * 2;
- wbufptr = &(writebuf[sample_idx / 4]);
- if (map_is_unsorted) {
- // multipass optimizations are possible, but we won't bother,
- // especially since the .map should rarely be unsorted in the first
- // place...
- umm = 0;
- for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
- cc = *bufptr++;
- bufptr = skip_initial_spaces(bufptr);
- cc2 = *bufptr++;
- bufptr = skip_initial_spaces(bufptr);
- if (IS_SET(marker_exclude, marker_uidx)) {
- continue;
+ marker_idx = uii * markers_per_pass;
+ ii_shift = (sample_idx % 4) * 2;
+ wbufptr = &(writebuf[sample_idx / 4]);
+ if (map_is_unsorted) {
+ // multipass optimizations are possible, but we won't bother,
+ // especially since the .map should rarely be unsorted in the
+ // first place...
+ umm = 0;
+ for (marker_uidx = 0; marker_uidx < unfiltered_marker_ct; marker_uidx++) {
+ cc = *bufptr++;
+ bufptr = skip_initial_spaces(bufptr);
+ cc2 = *bufptr++;
+ bufptr = skip_initial_spaces(bufptr);
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ continue;
+ }
+ ukk = map_reverse[umm++];
+ if ((ukk >= marker_start) && (ukk < marker_end)) {
+ ucc = 1;
+ if (cc == marker_alleles_f[2 * ukk + 1]) {
+ if (cc2 == cc) {
+ ucc = 3;
+ } else if (cc2 == marker_alleles_f[2 * ukk]) {
+ ucc = 2;
+ }
+ } else if (cc == marker_alleles_f[2 * ukk]) {
+ if (cc2 == cc) {
+ ucc = 0;
+ } else if (cc2 == marker_alleles_f[2 * ukk + 1]) {
+ ucc = 2;
+ }
+ }
+ wbufptr[(ukk - marker_start) * sample_ct4] |= ucc << ii_shift;
+ marker_idx++;
+ }
}
- ukk = map_reverse[umm++];
- if ((ukk >= marker_start) && (ukk < marker_end)) {
+ } else {
+ for (marker_uidx = umm; marker_idx < marker_end; marker_uidx++) {
+ cc = *bufptr++;
+ bufptr = skip_initial_spaces(bufptr);
+ cc2 = *bufptr++;
+ bufptr = skip_initial_spaces(bufptr);
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ continue;
+ }
ucc = 1;
- if (cc == marker_alleles_f[2 * ukk + 1]) {
+ if (cc == marker_alleles_f[2 * marker_idx + 1]) {
if (cc2 == cc) {
ucc = 3;
- } else if (cc2 == marker_alleles_f[2 * ukk]) {
+ } else if (cc2 == marker_alleles_f[2 * marker_idx]) {
ucc = 2;
}
- } else if (cc == marker_alleles_f[2 * ukk]) {
+ } else if (cc == marker_alleles_f[2 * marker_idx]) {
if (cc2 == cc) {
ucc = 0;
- } else if (cc2 == marker_alleles_f[2 * ukk + 1]) {
+ } else if (cc2 == marker_alleles_f[2 * marker_idx + 1]) {
ucc = 2;
}
}
- wbufptr[(ukk - marker_start) * sample_ct4] |= ucc << ii_shift;
+ *wbufptr |= ucc << ii_shift;
+ wbufptr = &(wbufptr[sample_ct4]);
marker_idx++;
}
- }
- } else {
- for (marker_uidx = umm; marker_idx < marker_end; marker_uidx++) {
- cc = *bufptr++;
- bufptr = skip_initial_spaces(bufptr);
- cc2 = *bufptr++;
- bufptr = skip_initial_spaces(bufptr);
- if (IS_SET(marker_exclude, marker_uidx)) {
- continue;
- }
- ucc = 1;
- if (cc == marker_alleles_f[2 * marker_idx + 1]) {
- if (cc2 == cc) {
- ucc = 3;
- } else if (cc2 == marker_alleles_f[2 * marker_idx]) {
- ucc = 2;
- }
- } else if (cc == marker_alleles_f[2 * marker_idx]) {
- if (cc2 == cc) {
- ucc = 0;
- } else if (cc2 == marker_alleles_f[2 * marker_idx + 1]) {
- ucc = 2;
- }
+ if (!last_pass) {
+ line_starts[sample_idx] = ped_next_thresh + (uintptr_t)(bufptr - loadbuf);
}
- *wbufptr |= ucc << ii_shift;
- wbufptr = &(wbufptr[sample_ct4]);
- marker_idx++;
- }
- if (!last_pass) {
- line_starts[sample_idx] = ped_next_thresh + (uintptr_t)(bufptr - loadbuf);
}
}
+ if (pct > 10) {
+ putc_unlocked('\b', stdout);
+ }
+ printf("\b\b%u%%", pct);
+ fflush(stdout);
}
- if (pct > 10) {
- putchar('\b');
+ if (fwrite_checked(writebuf, ujj * sample_ct4, outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
}
- printf("\b\b%u%%", pct);
- fflush(stdout);
- }
- if (fwrite_checked(writebuf, ujj * sample_ct4, outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
- }
- if (!last_pass) {
- printf("\rPass %u: \b\b\b", uii + 2);
- fflush(stdout);
- if (map_is_unsorted) {
- rewind(pedfile);
- } else {
- umm = marker_uidx;
+ if (!last_pass) {
+ printf("\rPass %u: \b\b\b", uii + 2);
+ fflush(stdout);
+ if (map_is_unsorted) {
+ rewind(pedfile);
+ } else {
+ umm = marker_uidx;
+ }
}
}
+ } else {
+ retval = ped_to_bed_multichar_allele(&pedfile, &outfile, outname, outname_end, &mapfile, unfiltered_marker_ct, marker_exclude, marker_ct, marker_alleles_f, map_is_unsorted, fam_cols, ped_col_skip_iid_m1 + 1, ped_col_skip, cm_col_exists, map_reverse, ped_size, missing_pheno_str);
+ if (retval) {
+ goto ped_to_bed_ret_1;
+ }
}
- } else {
- retval = ped_to_bed_multichar_allele(&pedfile, &outfile, outname, outname_end, &mapfile, unfiltered_marker_ct, marker_exclude, marker_ct, marker_alleles_f, map_is_unsorted, fam_cols, ped_col_skip_iid_m1 + 1, ped_col_skip, cm_col_exists, map_reverse, ped_size, missing_pheno_str);
- if (retval) {
- goto ped_to_bed_ret_1;
- }
- }
- if (fclose_null(&outfile)) {
- goto ped_to_bed_ret_WRITE_FAIL;
+ if (fclose_null(&outfile)) {
+ goto ped_to_bed_ret_WRITE_FAIL;
+ }
+ putc_unlocked('\r', stdout);
+ *outname_end = '\0';
+ LOGPRINTFWW("--file: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
}
- putchar('\r');
- *outname_end = '\0';
- LOGPRINTFWW("--file: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
while (0) {
ped_to_bed_ret_NOMEM:
@@ -6540,42 +6540,44 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
}
int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, uint32_t lgen_modifier, char* lgen_reference_fname, Chrom_info* chrom_info_ptr) {
+ // This code has not been carefully optimized, and also does not support
+ // multipass writes.
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- FILE* outfile = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
uint32_t lgen_allele_count = lgen_modifier & LGEN_ALLELE_COUNT;
uint32_t allow_extra_chroms = (misc_flags / MISC_ALLOW_EXTRA_CHROMS) & 1;
uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
uint32_t allow_no_vars = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
uint32_t affection_01 = (misc_flags / MISC_AFFECTION_01) & 1;
uint32_t map_cols = 3;
- uintptr_t* marker_exclude = NULL;
+ uintptr_t* marker_exclude = nullptr;
uintptr_t marker_exclude_ct = 0;
uintptr_t max_marker_id_len = 0;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t max_sample_id_len = 4;
uintptr_t marker_ct = 0;
char* missing_geno_ptr = (char*)g_missing_geno_ptr;
- char** marker_allele_ptrs = NULL;
- char* marker_ids = NULL;
- uint32_t* marker_pos = NULL;
+ char** marker_allele_ptrs = nullptr;
+ char* marker_ids = nullptr;
+ uint32_t* marker_pos = nullptr;
uintptr_t sample_ct = 0;
- char* sample_ids = NULL;
- char* paternal_ids = NULL;
+ char* sample_ids = nullptr;
+ char* paternal_ids = nullptr;
uintptr_t max_paternal_id_len = 2;
- char* maternal_ids = NULL;
+ char* maternal_ids = nullptr;
uintptr_t max_maternal_id_len = 2;
- uintptr_t* sex_nm = NULL;
- uintptr_t* sex_male = NULL;
+ uintptr_t* sex_nm = nullptr;
+ uintptr_t* sex_male = nullptr;
uint32_t affection = 0;
- uintptr_t* founder_info = NULL;
- uintptr_t* sample_exclude = NULL;
+ uintptr_t* founder_info = nullptr;
+ uintptr_t* sample_exclude = nullptr;
uint32_t map_is_unsorted = 0;
uint32_t compound_genotypes = 1; // 0 = no, 1 = unresolved, 2 = yes
char missing_geno = *missing_geno_ptr;
- uintptr_t* pheno_nm = NULL;
- uintptr_t* pheno_c = NULL;
- double* pheno_d = NULL;
+ uintptr_t* pheno_nm = nullptr;
+ uintptr_t* pheno_c = nullptr;
+ double* pheno_d = nullptr;
char* sorted_marker_ids;
uint32_t* marker_id_map;
uint32_t* map_reverse;
@@ -6595,7 +6597,6 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
char* a2ptr;
char* sptr;
char* sptr2;
- char** ma_end;
int64_t lgen_size;
int64_t lgen_next_thresh;
uintptr_t loadbuf_size;
@@ -6662,7 +6663,10 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
if (!marker_allele_ptrs) {
goto lgen_to_bed_ret_NOMEM;
}
- memset(marker_allele_ptrs, 0, 2 * marker_ct * sizeof(char*));
+ uii = 2 * marker_ct;
+ for (ujj = 0; ujj < uii; ujj++) {
+ marker_allele_ptrs[ujj] = missing_geno_ptr;
+ }
sample_ct4 = (sample_ct + 3) / 4;
if (bigstack_alloc_uc(((uintptr_t)marker_ct) * sample_ct4, &writebuf)) {
logerrprint("Error: Multipass .lgen -> .bed autoconversions are not yet supported. Try\nusing --chr and/or --memory (perhaps with a better machine).\n");
@@ -6713,7 +6717,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
ii = bsearch_str(cptr, a1len, marker_ids, max_marker_id_len, marker_ct);
if (ii != -1) {
marker_idx = marker_id_map[(uint32_t)ii];
- if (marker_allele_ptrs[2 * marker_idx + 1]) {
+ if (marker_allele_ptrs[2 * marker_idx + 1] != missing_geno_ptr) {
cptr[a1len] = '\0';
LOGPREPRINTFWW("Error: Duplicate variant ID '%s' in .ref file.\n", cptr);
goto lgen_to_bed_ret_INVALID_FORMAT_2;
@@ -6841,7 +6845,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
} else if ((*a2ptr == missing_geno) && (a2len == 1)) {
goto lgen_to_bed_ret_HALF_MISSING;
} else {
- if (!sptr) {
+ if (sptr == missing_geno_ptr) {
if (allele_set(a1ptr, a1len, &(marker_allele_ptrs[2 * marker_idx + 1]))) {
goto lgen_to_bed_ret_NOMEM;
}
@@ -6855,7 +6859,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
}
} else {
sptr2 = marker_allele_ptrs[2 * marker_idx];
- if (!sptr2) {
+ if (sptr2 == missing_geno_ptr) {
if (!strcmp(a1ptr, sptr)) {
if (!strcmp(a2ptr, sptr)) {
uii = 2;
@@ -6874,6 +6878,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
} else if (!strcmp(a2ptr, a1ptr)) {
uii = 0;
} else {
+ printf("\nfail 1\n");
goto lgen_to_bed_ret_NOT_BIALLELIC;
}
}
@@ -6903,7 +6908,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
if (ftello(infile) >= lgen_next_thresh) {
uii = (ftello(infile) * 100) / lgen_size;
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", uii);
fflush(stdout);
@@ -6955,7 +6960,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
if (ftello(infile) >= lgen_next_thresh) {
uii = (ftello(infile) * 100) / lgen_size;
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", uii);
fflush(stdout);
@@ -7004,12 +7009,6 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
if (fopen_checked(outname, "w", &outfile)) {
goto lgen_to_bed_ret_OPEN_FAIL;
}
- uii = 2 * marker_ct;
- for (ujj = 0; ujj < uii; ujj++) {
- if (!marker_allele_ptrs[ujj]) {
- marker_allele_ptrs[ujj] = missing_geno_ptr;
- }
- }
uii = 0;
marker_idx = 0;
while (fgets(g_textbuf, MAXLINELEN, infile)) {
@@ -7030,7 +7029,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
*cptr++ = '\t';
fwrite(g_textbuf, 1, cptr - g_textbuf, outfile);
fputs(marker_allele_ptrs[marker_idx * 2], outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(marker_allele_ptrs[marker_idx * 2 + 1], outfile);
if (putc_checked('\n', outfile)) {
goto lgen_to_bed_ret_WRITE_FAIL;
@@ -7051,7 +7050,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
}
memcpy(outname_end, ".fam", 5);
#ifdef _WIN32
- uii = GetFullPathName(famname, FNAMESIZE, g_textbuf, NULL);
+ uii = GetFullPathName(famname, FNAMESIZE, g_textbuf, nullptr);
if ((!uii) || (uii > FNAMESIZE))
#else
if (!realpath(famname, g_textbuf))
@@ -7061,7 +7060,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
goto lgen_to_bed_ret_OPEN_FAIL;
}
#ifdef _WIN32
- uii = GetFullPathName(outname, FNAMESIZE, &(g_textbuf[FNAMESIZE + 64]), NULL);
+ uii = GetFullPathName(outname, FNAMESIZE, &(g_textbuf[FNAMESIZE + 64]), nullptr);
if (!(uii && (uii <= FNAMESIZE) && (!strcmp(g_textbuf, &(g_textbuf[FNAMESIZE + 64])))))
#else
cptr = realpath(outname, &(g_textbuf[FNAMESIZE + 64]));
@@ -7138,15 +7137,7 @@ int32_t lgen_to_bed(char* lgenname, char* mapname, char* famname, char* outname,
break;
}
lgen_to_bed_ret_1:
- if (marker_allele_ptrs) {
- ma_end = &(marker_allele_ptrs[2 * marker_ct]);
- while (marker_allele_ptrs < ma_end) {
- sptr = *marker_allele_ptrs++;
- if (sptr && ((sptr < g_one_char_strs) || (sptr >= (&(g_one_char_strs[512]))))) {
- free(sptr);
- }
- }
- }
+ cleanup_allele_storage(2, marker_ct * 2, marker_allele_ptrs);
bigstack_reset(bigstack_mark);
aligned_free_cond(pheno_c);
if (infile) {
@@ -7195,10 +7186,10 @@ void transposed_to_bed_print_pct(uint32_t pct) {
int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* outname_end, uint64_t misc_flags, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
- FILE* bimfile = NULL;
- FILE* outfile = NULL;
- char** marker_allele_ptrs = NULL;
+ FILE* infile = nullptr;
+ FILE* bimfile = nullptr;
+ FILE* outfile = nullptr;
+ char** marker_allele_ptrs = nullptr;
uintptr_t sample_ct = 0;
uintptr_t line_idx = 0;
uint32_t no_extra_cols = 1;
@@ -7211,7 +7202,7 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
uint32_t allele_tot = 0;
uintptr_t marker_ct = 0;
uintptr_t max_marker_id_len = 0;
- uintptr_t max_marker_allele_len = 2; // for .bim.tmp reloading
+ uintptr_t max_marker_allele_blen = 2; // for .bim.tmp reloading
const char* missing_geno_ptr = g_missing_geno_ptr;
char missing_geno = *missing_geno_ptr;
@@ -7264,604 +7255,608 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
uint32_t cur_chrom;
uint32_t chrom_ct;
double* marker_cms;
- if (bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 1, &chrom_start) ||
- bigstack_alloc_ui(MAX_POSSIBLE_CHROM, &chrom_id)) {
- goto transposed_to_bed_ret_NOMEM;
- }
-
- if (fopen_checked(tfamname, "r", &infile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- g_textbuf[MAXLINELEN - 1] = ' ';
- while (fgets(g_textbuf, MAXLINELEN, infile)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tfam file is pathologically long.\n", line_idx);
- goto transposed_to_bed_ret_INVALID_FORMAT_2R;
- }
- cptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*cptr)) {
- continue;
- }
- ulii = strlen(cptr);
- if (cptr[ulii - 1] != '\n') {
- cptr[ulii++] = '\n';
- }
- if (fwrite_checked(cptr, ulii, outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
+ {
+ if (bigstack_alloc_ui(MAX_POSSIBLE_CHROM + 1, &chrom_start) ||
+ bigstack_alloc_ui(MAX_POSSIBLE_CHROM, &chrom_id)) {
+ goto transposed_to_bed_ret_NOMEM;
}
- sample_ct++;
- }
- if (!feof(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
- }
- if ((!sample_ct) && (!allow_no_samples)) {
- sprintf(g_logbuf, "Error: No %s in .tfam file.\n", g_species_plural);
- goto transposed_to_bed_ret_INVALID_FORMAT_2R;
- }
- sample_ct4 = (sample_ct + 3) / 4;
- fclose_null(&infile);
- fclose_null(&outfile);
- memcpy(outname_end, ".bim.tmp", 9);
- if (fopen_checked(outname, "w", &bimfile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bed.tmp", 9);
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- if (bigstack_alloc_uc(sample_ct4, &writebuf) ||
- bigstack_alloc_uc(sample_ct, &prewritebuf)) {
- goto transposed_to_bed_ret_NOMEM;
- }
- if (bigstack_end_alloc_c(NON_BIGSTACK_MIN, &allele_buf)) {
- goto transposed_to_bed_ret_NOMEM;
- }
- max_markers = bigstack_left() / sizeof(int64_t);
- mapvals = (int64_t*)g_bigstack_base;
- writemap[16] = 1;
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
-
- if (fopen_checked(tpedname, "r", &infile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- if (fseeko(infile, 0, SEEK_END)) {
- goto transposed_to_bed_ret_READ_FAIL;
- }
- logstr("Processing .tped file.\n");
- transposed_to_bed_print_pct(0);
- fflush(stdout);
- tped_size = ftello(infile);
- rewind(infile);
- tped_next_thresh = tped_size / 100;
-
- line_idx = 0;
- while (1) {
- line_idx++;
- g_textbuf[MAXLINELEN - 1] = ' ';
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- break;
+ if (fopen_checked(tfamname, "r", &infile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- // assume first four fields are within MAXLINELEN characters, but after
- // that, anything goes. given e.g. 6MB indels in real datasets, there's
- // legitimate reason for a .tped line to be even longer than 2GB, so we use
- // a custom loading loop.
- cptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*cptr)) {
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has excessive whitespace.\n", line_idx);
- goto transposed_to_bed_ret_INVALID_FORMAT_2R;
- }
- continue;
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- cptr2 = next_token(cptr);
- cptr3 = next_token_mult(cptr2, 2);
- cptr4 = next_token(cptr3);
- if (no_more_tokens_kns(cptr4)) {
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ while (fgets(g_textbuf, MAXLINELEN, infile)) {
+ line_idx++;
if (!g_textbuf[MAXLINELEN - 1]) {
- if (strlen_se(cptr) > MAX_ID_LEN) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long\nchromosome/contig name. (The " PROG_NAME_CAPS " limit is " MAX_ID_LEN_STR " characters.)\n", line_idx);
- } else if (cptr2 && (strlen_se(cptr2) > MAX_ID_LEN)) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long variant ID.\n(The " PROG_NAME_CAPS " limit is " MAX_ID_LEN_STR " characters.)\n", line_idx);
- } else if (next_token(cptr2) && (strlen_se(next_token(cptr2)) > MAX_ID_LEN)) {
- // far higher bound than necessary; main point is to ensure that if
- // we fall through to the "excessive whitespace" error message, that
- // complaint is justified.
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long centimorgan\nposition.\n", line_idx);
- } else if (cptr3 && (strlen_se(cptr3) > MAX_ID_LEN)) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long bp coordinate.\n", line_idx);
- } else {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has excessive whitespace.\n", line_idx);
- }
- goto transposed_to_bed_ret_INVALID_FORMAT_2R;
- } else {
- goto transposed_to_bed_ret_MISSING_TOKENS;
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tfam file is pathologically long.\n", line_idx);
+ goto transposed_to_bed_ret_INVALID_FORMAT_2R;
}
- }
- if (ftello(infile) >= tped_next_thresh) {
- uii = (ftello(infile) * 100) / tped_size;
- if (pct >= 10) {
- putchar('\b');
+ cptr = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*cptr)) {
+ continue;
}
- printf("\b\b%u%%", uii);
- fflush(stdout);
- pct = uii;
- tped_next_thresh = ((pct + 1) * tped_size) / 100;
- }
- ii = get_chrom_code(chrom_info_ptr, cptr);
- if (ii < 0) {
- if (chrom_error(".tped file", chrom_info_ptr, cptr, line_idx, ii, allow_extra_chroms)) {
- goto transposed_to_bed_ret_INVALID_FORMAT;
+ ulii = strlen(cptr);
+ if (cptr[ulii - 1] != '\n') {
+ cptr[ulii++] = '\n';
}
- retval = resolve_or_add_chrom_name(cptr, ".tped file", line_idx, chrom_info_ptr, &ii);
- if (retval) {
- goto transposed_to_bed_ret_1;
+ if (fwrite_checked(cptr, ulii, outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
+ sample_ct++;
}
-
- if (scan_int_abs_defcap(cptr3, &jj)) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an invalid bp coordinate.\n", line_idx);
+ if (!feof(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ if ((!sample_ct) && (!allow_no_samples)) {
+ sprintf(g_logbuf, "Error: No %s in .tfam file.\n", g_species_plural);
goto transposed_to_bed_ret_INVALID_FORMAT_2R;
}
- if ((!is_set(chrom_info_ptr->chrom_mask, ii)) || (jj < 0)) {
- cptr2 = cptr4;
- goto transposed_to_bed_nextline;
+ sample_ct4 = (sample_ct + 3) / 4;
+ fclose_null(&infile);
+ fclose_null(&outfile);
+
+ memcpy(outname_end, ".bim.tmp", 9);
+ if (fopen_checked(outname, "w", &bimfile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- uii = strlen_se(cptr2);
- if (uii >= max_marker_id_len) {
- max_marker_id_len = uii + 1;
+ memcpy(outname_end, ".bed.tmp", 9);
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- cur_mapval = (int64_t)((((uint64_t)((uint32_t)ii)) << 32) | ((uint32_t)jj));
- if (marker_ct == max_markers) {
+ if (bigstack_alloc_uc(sample_ct4, &writebuf) ||
+ bigstack_alloc_uc(sample_ct, &prewritebuf)) {
goto transposed_to_bed_ret_NOMEM;
}
- mapvals[marker_ct++] = cur_mapval;
- if (last_mapval > cur_mapval) {
- map_is_unsorted = 1;
- } else {
- last_mapval = cur_mapval;
- }
- for (uii = 0; uii < 3; uii++) {
- cptr2 = token_endnn(cptr);
- *cptr2++ = '\t';
- fwrite(cptr, 1, cptr2 - cptr, bimfile);
- cptr = skip_initial_spaces(cptr2);
+ if (bigstack_end_alloc_c(NON_BIGSTACK_MIN, &allele_buf)) {
+ goto transposed_to_bed_ret_NOMEM;
}
- cptr2 = token_endnn(cptr);
- *cptr2++ = '\t';
- if (fwrite_checked(cptr, cptr2 - cptr, bimfile)) {
+ max_markers = bigstack_left() / sizeof(int64_t);
+ mapvals = (int64_t*)g_bigstack_base;
+ writemap[16] = 1;
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
goto transposed_to_bed_ret_WRITE_FAIL;
}
- cptr2 = cptr4;
- alleles[0] = NULL;
- alleles[1] = NULL;
- alleles[2] = NULL;
- alleles[3] = NULL;
- fill_uint_zero(allele_cts, 4);
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- cptr2 = skip_initial_spaces(cptr2);
- while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
- if (cptr2[-1] == '\n') {
- goto transposed_to_bed_ret_MISSING_TOKENS;
+
+ if (fopen_checked(tpedname, "r", &infile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
+ }
+ if (fseeko(infile, 0, SEEK_END)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ logstr("Processing .tped file.\n");
+ transposed_to_bed_print_pct(0);
+ fflush(stdout);
+ tped_size = ftello(infile);
+ rewind(infile);
+ tped_next_thresh = tped_size / 100;
+
+ line_idx = 0;
+ while (1) {
+ line_idx++;
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ break;
+ }
+ // assume first four fields are within MAXLINELEN characters, but after
+ // that, anything goes. given e.g. 6MB indels in real datasets, there's
+ // legitimate reason for a .tped line to be even longer than 2GB, so we
+ // use a custom loading loop.
+ char* textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has excessive whitespace.\n", line_idx);
+ goto transposed_to_bed_ret_INVALID_FORMAT_2R;
}
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
+ continue;
+ }
+ char* first_token_end = token_endnn(textbuf_first_token);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ cptr2 = skip_initial_spaces(first_token_end);
+ cptr3 = next_token_mult(cptr2, 2);
+ cptr4 = next_token(cptr3);
+ if (no_more_tokens_kns(cptr4)) {
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ if (chrom_name_slen > MAX_ID_SLEN) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long\nchromosome/contig name. (The " PROG_NAME_CAPS " limit is " MAX_ID_SLEN_STR " characters.)\n", line_idx);
+ } else if (cptr2 && (strlen_se(cptr2) > MAX_ID_SLEN)) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long variant ID.\n(The " PROG_NAME_CAPS " limit is " MAX_ID_SLEN_STR " characters.)\n", line_idx);
+ } else if (next_token(cptr2) && (strlen_se(next_token(cptr2)) > MAX_ID_SLEN)) {
+ // far higher bound than necessary; main point is to ensure that if
+ // we fall through to the "excessive whitespace" error message,
+ // that complaint is justified.
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long centimorgan\nposition.\n", line_idx);
+ } else if (cptr3 && (strlen_se(cptr3) > MAX_ID_SLEN)) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an excessively long bp coordinate.\n", line_idx);
+ } else {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has excessive whitespace.\n", line_idx);
}
+ goto transposed_to_bed_ret_INVALID_FORMAT_2R;
+ } else {
goto transposed_to_bed_ret_MISSING_TOKENS;
}
- cptr2 = skip_initial_spaces(g_textbuf);
}
- axptr = cptr2;
- axlen = strlen_se(cptr2);
- if (!axlen) {
- goto transposed_to_bed_ret_MISSING_TOKENS;
+ if (ftello(infile) >= tped_next_thresh) {
+ uii = (ftello(infile) * 100) / tped_size;
+ if (pct >= 10) {
+ putc_unlocked('\b', stdout);
+ }
+ printf("\b\b%u%%", uii);
+ fflush(stdout);
+ pct = uii;
+ tped_next_thresh = ((pct + 1) * tped_size) / 100;
}
- cptr2 = &(axptr[axlen]);
- // only way for this to happen if it isn't at end of buffer is if we're
- // at EOF, which is an error anyway
- if (!(*cptr2)) {
- cptr3 = memcpya(allele_buf, axptr, axlen);
- axptr = allele_buf;
- do {
+ *first_token_end = '\0';
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code(textbuf_first_token, ".tped file", line_idx, chrom_name_slen, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto transposed_to_bed_ret_1;
+ }
+
+ if (scan_int_abs_defcap(cptr3, &jj)) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has an invalid bp coordinate.\n", line_idx);
+ goto transposed_to_bed_ret_INVALID_FORMAT_2R;
+ }
+ char* textbuf_iter = textbuf_first_token;
+ if ((!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) || (jj < 0)) {
+ cptr2 = cptr4;
+ goto transposed_to_bed_nextline;
+ }
+ uii = strlen_se(cptr2);
+ if (uii >= max_marker_id_len) {
+ max_marker_id_len = uii + 1;
+ }
+ cur_mapval = (int64_t)((((uint64_t)((uint32_t)cur_chrom_code)) << 32) | ((uint32_t)jj));
+ if (marker_ct == max_markers) {
+ goto transposed_to_bed_ret_NOMEM;
+ }
+ mapvals[marker_ct++] = cur_mapval;
+ if (last_mapval > cur_mapval) {
+ map_is_unsorted = 1;
+ } else {
+ last_mapval = cur_mapval;
+ }
+ for (uii = 0; uii < 3; uii++) {
+ char* token_end = token_endnn(textbuf_iter);
+ *token_end++ = '\t';
+ fwrite(textbuf_iter, 1, token_end - textbuf_iter, bimfile);
+ textbuf_iter = skip_initial_spaces(token_end);
+ }
+ cptr2 = token_endnn(textbuf_iter);
+ *cptr2++ = '\t';
+ if (fwrite_checked(textbuf_iter, cptr2 - textbuf_iter, bimfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
+ }
+ cptr2 = cptr4;
+ alleles[0] = nullptr;
+ alleles[1] = nullptr;
+ alleles[2] = nullptr;
+ alleles[3] = nullptr;
+ fill_uint_zero(4, allele_cts);
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ cptr2 = skip_initial_spaces(cptr2);
+ while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
+ if (cptr2[-1] == '\n') {
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
if (!fgets(g_textbuf, MAXLINELEN, infile)) {
if (ferror(infile)) {
goto transposed_to_bed_ret_READ_FAIL;
}
goto transposed_to_bed_ret_MISSING_TOKENS;
}
- cptr2 = g_textbuf;
- if (!is_space_or_eoln(*cptr2)) {
- cptr2 = token_endnn(cptr2);
+ cptr2 = skip_initial_spaces(g_textbuf);
+ }
+ axptr = cptr2;
+ axlen = strlen_se(cptr2);
+ if (!axlen) {
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
+ cptr2 = &(axptr[axlen]);
+ // only way for this to happen if it isn't at end of buffer is if we're
+ // at EOF, which is an error anyway
+ if (!(*cptr2)) {
+ cptr3 = memcpya(allele_buf, axptr, axlen);
+ axptr = allele_buf;
+ do {
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
+ cptr2 = g_textbuf;
+ if (!is_space_or_eoln(*cptr2)) {
+ cptr2 = token_endnn(cptr2);
+ }
+ if ((((uintptr_t)(cptr3 - allele_buf)) + ((uintptr_t)(cptr2 - g_textbuf))) >= NON_BIGSTACK_MIN) {
+ goto transposed_to_bed_ret_NOMEM;
+ }
+ cptr3 = memcpya(cptr3, g_textbuf, cptr2 - g_textbuf);
+ } while (!(*cptr2));
+ axlen = (uintptr_t)(cptr3 - allele_buf);
+ }
+ if ((*axptr != missing_geno) || (axlen != 1)) {
+ retval = update_tped_alleles_and_cts(&allele_tot, alleles, alens, allele_cts, axptr, axlen, &uii);
+ if (retval) {
+ if (retval == RET_INVALID_FORMAT) {
+ goto transposed_to_bed_ret_TOO_MANY_ALLELES;
+ }
+ goto transposed_to_bed_ret_NOMEM;
+ }
+ } else {
+ uii = 4;
+ }
+ cptr2 = skip_initial_spaces(cptr2);
+ while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
+ if (cptr2[-1] == '\n') {
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
+ cptr2 = skip_initial_spaces(g_textbuf);
+ }
+ axptr = cptr2;
+ axlen = strlen_se(cptr2);
+ cptr2 = &(axptr[axlen]);
+ if (!(*cptr2)) {
+ if (!axlen) {
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ }
+ cptr3 = memcpya(allele_buf, axptr, axlen);
+ axptr = allele_buf;
+ do {
+ cptr2 = g_textbuf;
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ } else if (sample_idx != sample_ct - 1) {
+ goto transposed_to_bed_ret_MISSING_TOKENS;
+ } else {
+ g_textbuf[0] = '\0';
+ break;
+ }
+ }
+ if (!is_space_or_eoln(*cptr2)) {
+ cptr2 = token_endnn(cptr2);
+ }
+ if ((((uintptr_t)(cptr3 - allele_buf)) + ((uintptr_t)(cptr2 - g_textbuf))) >= NON_BIGSTACK_MIN) {
+ goto transposed_to_bed_ret_NOMEM;
+ }
+ cptr3 = memcpya(cptr3, g_textbuf, cptr2 - g_textbuf);
+ } while (!(*cptr2));
+ axlen = (uintptr_t)(cptr3 - allele_buf);
+ }
+ if ((*axptr != missing_geno) || (axlen != 1)) {
+ if (uii == 4) {
+ goto transposed_to_bed_ret_HALF_MISSING;
}
- if ((((uintptr_t)(cptr3 - allele_buf)) + ((uintptr_t)(cptr2 - g_textbuf))) >= NON_BIGSTACK_MIN) {
+ retval = update_tped_alleles_and_cts(&allele_tot, alleles, alens, allele_cts, axptr, axlen, &ujj);
+ if (retval) {
+ if (retval == RET_INVALID_FORMAT) {
+ goto transposed_to_bed_ret_TOO_MANY_ALLELES;
+ }
goto transposed_to_bed_ret_NOMEM;
}
- cptr3 = memcpya(cptr3, g_textbuf, cptr2 - g_textbuf);
- } while (!(*cptr2));
- axlen = (uintptr_t)(cptr3 - allele_buf);
- }
- if ((*axptr != missing_geno) || (axlen != 1)) {
- retval = update_tped_alleles_and_cts(&allele_tot, alleles, alens, allele_cts, axptr, axlen, &uii);
- if (retval) {
- if (retval == RET_INVALID_FORMAT) {
- goto transposed_to_bed_ret_TOO_MANY_ALLELES;
+ prewritebuf[sample_idx] = uii * 4 + ujj;
+ } else {
+ if (uii != 4) {
+ goto transposed_to_bed_ret_HALF_MISSING;
}
- goto transposed_to_bed_ret_NOMEM;
+ prewritebuf[sample_idx] = 16;
}
- } else {
- uii = 4;
}
- cptr2 = skip_initial_spaces(cptr2);
- while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
- if (cptr2[-1] == '\n') {
- goto transposed_to_bed_ret_MISSING_TOKENS;
+
+ memcpy(salleles, alleles, 4 * sizeof(intptr_t));
+ for (uii = 1; uii < 4; uii++) {
+ ujj = allele_cts[uii];
+ if (allele_cts[uii - 1] < ujj) {
+ axptr = salleles[uii];
+ ii = uii;
+ do {
+ ii--;
+ salleles[((uint32_t)ii) + 1] = salleles[(uint32_t)ii];
+ allele_cts[((uint32_t)ii) + 1] = allele_cts[(uint32_t)ii];
+ } while (ii && (allele_cts[((uint32_t)ii) - 1] < ujj));
+ salleles[(uint32_t)ii] = axptr;
+ allele_cts[(uint32_t)ii] = ujj;
}
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
+ }
+ if (allele_cts[2]) {
+ putc_unlocked('\r', stdout);
+ LOGPRINTF("Note: Variant %" PRIuPTR " is %sallelic. Setting rarest alleles to missing.\n", marker_ct - 1, allele_cts[3]? "quad" : "tri");
+ transposed_to_bed_print_pct(pct);
+ }
+ for (uii = 0; uii < 4; uii++) {
+ axptr = alleles[uii];
+ ucptr = &(writemap[4 * uii]);
+ if (!axptr) {
+ memset(ucptr, 1, 4);
+ } else if (axptr == salleles[0]) {
+ for (ujj = 0; ujj < 4; ujj++) {
+ axptr = alleles[ujj];
+ if (!axptr) {
+ *ucptr++ = 1;
+ } else if (axptr == salleles[0]) {
+ *ucptr++ = 3;
+ } else if (axptr == salleles[1]) {
+ *ucptr++ = 2;
+ } else {
+ *ucptr++ = 1;
+ }
}
- goto transposed_to_bed_ret_MISSING_TOKENS;
+ } else if (axptr == salleles[1]) {
+ for (ujj = 0; ujj < 4; ujj++) {
+ axptr = alleles[ujj];
+ if (!axptr) {
+ *ucptr++ = 1;
+ } else if (axptr == salleles[0]) {
+ *ucptr++ = 2;
+ } else if (axptr == salleles[1]) {
+ *ucptr++ = 0;
+ } else {
+ *ucptr++ = 1;
+ }
+ }
+ } else {
+ memset(ucptr, 1, 4);
}
- cptr2 = skip_initial_spaces(g_textbuf);
}
- axptr = cptr2;
- axlen = strlen_se(cptr2);
- cptr2 = &(axptr[axlen]);
- if (!(*cptr2)) {
- if (!axlen) {
- goto transposed_to_bed_ret_MISSING_TOKENS;
+ uii = sample_ct & (~3U);
+ ucptr = writebuf;
+ for (ujj = 0; ujj < uii; ujj += 4) {
+ *ucptr++ = writemap[prewritebuf[ujj]] | (writemap[prewritebuf[ujj + 1]] << 2) | (writemap[prewritebuf[ujj + 2]] << 4) | (writemap[prewritebuf[ujj + 3]] << 6);
+ }
+ ucc = 0;
+ ucptr2 = &(prewritebuf[uii]);
+ uii = sample_ct % 4;
+ if (uii) {
+ for (ujj = 0; ujj < uii; ujj++) {
+ ucc |= (writemap[*ucptr2++]) << (ujj * 2);
}
- cptr3 = memcpya(allele_buf, axptr, axlen);
- axptr = allele_buf;
- do {
+ *ucptr = ucc;
+ }
+ fwrite(writebuf, 1, sample_ct4, outfile);
+ if (!salleles[1]) {
+ putc_unlocked(missing_geno, bimfile);
+ } else {
+ uii = strlen(salleles[1]);
+ if (uii >= max_marker_allele_blen) {
+ max_marker_allele_blen = uii + 1;
+ }
+ fputs(salleles[1], bimfile);
+ }
+ putc_unlocked('\t', bimfile);
+ if (!salleles[0]) {
+ putc_unlocked(missing_geno, bimfile);
+ } else {
+ uii = strlen(salleles[0]);
+ if (uii >= max_marker_allele_blen) {
+ max_marker_allele_blen = uii + 1;
+ }
+ fputs(salleles[0], bimfile);
+ }
+ for (uii = 0; uii < allele_tot; uii++) {
+ if (alleles[uii][1]) {
+ free(alleles[uii]);
+ }
+ }
+ allele_tot = 0;
+ if (putc_checked('\n', bimfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
+ }
+ if (no_extra_cols) {
+ cptr2 = skip_initial_spaces(cptr2);
+ while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
+ if (cptr2[-1] == '\n') {
+ break;
+ }
cptr2 = g_textbuf;
if (!fgets(g_textbuf, MAXLINELEN, infile)) {
if (ferror(infile)) {
goto transposed_to_bed_ret_READ_FAIL;
- } else if (sample_idx != sample_ct - 1) {
- goto transposed_to_bed_ret_MISSING_TOKENS;
- } else {
- g_textbuf[0] = '\0';
- break;
}
+ g_textbuf[0] = '\0';
+ break;
}
- if (!is_space_or_eoln(*cptr2)) {
- cptr2 = token_endnn(cptr2);
- }
- if ((((uintptr_t)(cptr3 - allele_buf)) + ((uintptr_t)(cptr2 - g_textbuf))) >= NON_BIGSTACK_MIN) {
- goto transposed_to_bed_ret_NOMEM;
- }
- cptr3 = memcpya(cptr3, g_textbuf, cptr2 - g_textbuf);
- } while (!(*cptr2));
- axlen = (uintptr_t)(cptr3 - allele_buf);
- }
- if ((*axptr != missing_geno) || (axlen != 1)) {
- if (uii == 4) {
- goto transposed_to_bed_ret_HALF_MISSING;
+ cptr2 = skip_initial_spaces(cptr2);
}
- retval = update_tped_alleles_and_cts(&allele_tot, alleles, alens, allele_cts, axptr, axlen, &ujj);
- if (retval) {
- if (retval == RET_INVALID_FORMAT) {
- goto transposed_to_bed_ret_TOO_MANY_ALLELES;
- }
- goto transposed_to_bed_ret_NOMEM;
+ if (!is_space_or_eoln(*cptr2)) {
+ no_extra_cols = 0;
+ putc_unlocked('\r', stdout);
+ logerrprint("Warning: Extra columns in .tped file. Ignoring.\n");
+ transposed_to_bed_print_pct(pct);
+ goto transposed_to_bed_nextline;
}
- prewritebuf[sample_idx] = uii * 4 + ujj;
} else {
- if (uii != 4) {
- goto transposed_to_bed_ret_HALF_MISSING;
- }
- prewritebuf[sample_idx] = 16;
- }
- }
-
- memcpy(salleles, alleles, 4 * sizeof(intptr_t));
- for (uii = 1; uii < 4; uii++) {
- ujj = allele_cts[uii];
- if (allele_cts[uii - 1] < ujj) {
- axptr = salleles[uii];
- ii = uii;
- do {
- ii--;
- salleles[((uint32_t)ii) + 1] = salleles[(uint32_t)ii];
- allele_cts[((uint32_t)ii) + 1] = allele_cts[(uint32_t)ii];
- } while (ii && (allele_cts[((uint32_t)ii) - 1] < ujj));
- salleles[(uint32_t)ii] = axptr;
- allele_cts[(uint32_t)ii] = ujj;
- }
- }
- if (allele_cts[2]) {
- putchar('\r');
- LOGPRINTF("Note: Variant %" PRIuPTR " is %sallelic. Setting rarest alleles to missing.\n", marker_ct - 1, allele_cts[3]? "quad" : "tri");
- transposed_to_bed_print_pct(pct);
- }
- for (uii = 0; uii < 4; uii++) {
- axptr = alleles[uii];
- ucptr = &(writemap[4 * uii]);
- if (!axptr) {
- memset(ucptr, 1, 4);
- } else if (axptr == salleles[0]) {
- for (ujj = 0; ujj < 4; ujj++) {
- axptr = alleles[ujj];
- if (!axptr) {
- *ucptr++ = 1;
- } else if (axptr == salleles[0]) {
- *ucptr++ = 3;
- } else if (axptr == salleles[1]) {
- *ucptr++ = 2;
- } else {
- *ucptr++ = 1;
- }
- }
- } else if (axptr == salleles[1]) {
- for (ujj = 0; ujj < 4; ujj++) {
- axptr = alleles[ujj];
- if (!axptr) {
- *ucptr++ = 1;
- } else if (axptr == salleles[0]) {
- *ucptr++ = 2;
- } else if (axptr == salleles[1]) {
- *ucptr++ = 0;
- } else {
- *ucptr++ = 1;
+ transposed_to_bed_nextline:
+ cptr2 = (char*)memchr(cptr2, 0, MAXLINELEN - ((uintptr_t)(cptr2 - g_textbuf)));
+ while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
+ if (cptr2[-1] == '\n') {
+ break;
}
+ if (!fgets(g_textbuf, MAXLINELEN, infile)) {
+ if (ferror(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ break;
+ }
+ cptr2 = (char*)memchr(g_textbuf, 0, MAXLINELEN);
}
- } else {
- memset(ucptr, 1, 4);
}
}
- uii = sample_ct & (~3U);
- ucptr = writebuf;
- for (ujj = 0; ujj < uii; ujj += 4) {
- *ucptr++ = writemap[prewritebuf[ujj]] | (writemap[prewritebuf[ujj + 1]] << 2) | (writemap[prewritebuf[ujj + 2]] << 4) | (writemap[prewritebuf[ujj + 3]] << 6);
+ bigstack_end_reset(bigstack_end_mark);
+ if (fclose_null(&infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
+ if (fclose_null(&bimfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
- ucc = 0;
- ucptr2 = &(prewritebuf[uii]);
- uii = sample_ct % 4;
- if (uii) {
- for (ujj = 0; ujj < uii; ujj++) {
- ucc |= (writemap[*ucptr2++]) << (ujj * 2);
- }
- *ucptr = ucc;
+ if (fclose_null(&outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
- fwrite(writebuf, 1, sample_ct4, outfile);
- if (!salleles[1]) {
- putc(missing_geno, bimfile);
- } else {
- uii = strlen(salleles[1]);
- if (uii >= max_marker_allele_len) {
- max_marker_allele_len = uii + 1;
- }
- fputs(salleles[1], bimfile);
+ if ((!marker_ct) && (!allow_no_variants)) {
+ fputs("\b\b\b\b\b \r", stdout);
+ logerrprint("Error: Empty .tped file.\n");
+ goto transposed_to_bed_ret_INVALID_FORMAT;
}
- putc('\t', bimfile);
- if (!salleles[0]) {
- putc(missing_geno, bimfile);
- } else {
- uii = strlen(salleles[0]);
- if (uii >= max_marker_allele_len) {
- max_marker_allele_len = uii + 1;
+
+ chrom_info_ptr->zero_extra_chroms = 0;
+ if (map_is_unsorted) {
+ loadbuf_size = 2 * max_marker_allele_blen + MAXLINELEN;
+ bigstack_alloc(marker_ct * sizeof(int64_t)); // mapvals
+
+ if (bigstack_alloc_ll(marker_ct, &ll_buf) ||
+ bigstack_alloc_ui(marker_ct, &pos_buf) ||
+ bigstack_alloc_c(marker_ct * max_marker_id_len, &marker_ids) ||
+ bigstack_alloc_d(marker_ct, &marker_cms) ||
+ bigstack_alloc_c(loadbuf_size, &loadbuf)) {
+ goto transposed_to_bed_ret_NOMEM;
}
- fputs(salleles[0], bimfile);
- }
- for (uii = 0; uii < allele_tot; uii++) {
- if (alleles[uii][1]) {
- free(alleles[uii]);
+ marker_allele_ptrs = (char**)bigstack_alloc(marker_ct * 2 * sizeof(intptr_t));
+ if (!marker_allele_ptrs) {
+ goto transposed_to_bed_ret_NOMEM;
}
- }
- allele_tot = 0;
- if (putc_checked('\n', bimfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- if (no_extra_cols) {
- cptr2 = skip_initial_spaces(cptr2);
- while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
- if (cptr2[-1] == '\n') {
- break;
+ // prevent cleanup from failing
+ uint32_t allele_idx_end = marker_ct * 2;
+ for (uint32_t allele_idx = 0; allele_idx < allele_idx_end; ++allele_idx) {
+ marker_allele_ptrs[allele_idx] = (char*)missing_geno_ptr;
+ }
+
+ for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
+ pos_buf[marker_idx] = (uint32_t)((uint64_t)mapvals[marker_idx]);
+ ll_buf[marker_idx] = (mapvals[marker_idx] & 0xffffffff00000000LLU) | marker_idx;
+ }
+ sort_marker_chrom_pos(ll_buf, marker_ct, pos_buf, chrom_start, chrom_id, nullptr, &chrom_ct);
+
+ memcpy(outname_end, ".bim.tmp", 9);
+ if (fopen_checked(outname, "r", &infile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
+ }
+ outname_end[4] = '\0';
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
+ }
+ marker_idx = 0;
+ line_idx = 0;
+ while (fgets(loadbuf, loadbuf_size, infile)) {
+ line_idx++;
+ // .tmp file, guaranteed to be no spaces in front
+ cptr = skip_initial_spaces(token_endnn(loadbuf));
+ cptr2 = token_endnn(cptr);
+ cptr3 = skip_initial_spaces(cptr2);
+ cptr4 = next_token_mult(cptr3, 2);
+ uii = cptr2 - cptr;
+ memcpyx(&(marker_ids[marker_idx * max_marker_id_len]), cptr, uii, '\0');
+ if (scan_double(cptr3, &(marker_cms[marker_idx]))) {
+ sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of .tped file\n", line_idx);
+ goto transposed_to_bed_ret_INVALID_FORMAT_2R;
+ }
+ uii = strlen_se(cptr4);
+ if (allele_set(cptr4, uii, &(marker_allele_ptrs[2 * marker_idx]))) {
+ goto transposed_to_bed_ret_NOMEM;
}
- cptr2 = g_textbuf;
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
- }
- g_textbuf[0] = '\0';
- break;
+ cptr4 = skip_initial_spaces(&(cptr4[uii + 1]));
+ uii = strlen_se(cptr4);
+ if (allele_set(cptr4, uii, &(marker_allele_ptrs[2 * marker_idx + 1]))) {
+ goto transposed_to_bed_ret_NOMEM;
}
- cptr2 = skip_initial_spaces(cptr2);
+ marker_idx++;
}
- if (!is_space_or_eoln(*cptr2)) {
- no_extra_cols = 0;
- putchar('\r');
- logerrprint("Warning: Extra columns in .tped file. Ignoring.\n");
- transposed_to_bed_print_pct(pct);
- goto transposed_to_bed_nextline;
+ if (!feof(infile)) {
+ goto transposed_to_bed_ret_READ_FAIL;
}
- } else {
- transposed_to_bed_nextline:
- cptr2 = (char*)memchr(cptr2, 0, MAXLINELEN - ((uintptr_t)(cptr2 - g_textbuf)));
- while (cptr2 == &(g_textbuf[MAXLINELEN - 1])) {
- if (cptr2[-1] == '\n') {
- break;
- }
- if (!fgets(g_textbuf, MAXLINELEN, infile)) {
- if (ferror(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
+ fclose_null(&infile);
+ marker_idx = 0;
+ for (uii = 0; uii < chrom_ct; uii++) {
+ cur_chrom = chrom_id[uii];
+ ujj = chrom_start[uii + 1];
+ cptr2 = chrom_name_write(chrom_info_ptr, cur_chrom, &(g_textbuf[MAXLINELEN]));
+ *cptr2++ = '\t';
+ for (; marker_idx < ujj; marker_idx++) {
+ marker_uidx = (uint32_t)ll_buf[marker_idx];
+ fwrite(&(g_textbuf[MAXLINELEN]), 1, cptr2 - (&(g_textbuf[MAXLINELEN])), outfile);
+ fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
+ g_textbuf[0] = '\t';
+ cptr = dtoa_gx(marker_cms[marker_uidx], '\t', &(g_textbuf[1]));
+ cptr = uint32toa_x((uint32_t)(ll_buf[marker_idx] >> 32), '\t', cptr);
+ if (fwrite_checked(g_textbuf, (uintptr_t)(cptr - g_textbuf), outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
+ }
+ fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
+ putc_unlocked('\t', outfile);
+ fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
+ if (putc_checked('\n', outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
- break;
}
- cptr2 = (char*)memchr(g_textbuf, 0, MAXLINELEN);
}
- }
- }
- bigstack_end_reset(bigstack_end_mark);
- if (fclose_null(&infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
- }
- if (fclose_null(&bimfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- if (fclose_null(&outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- if ((!marker_ct) && (!allow_no_variants)) {
- fputs("\b\b\b\b\b \r", stdout);
- logerrprint("Error: Empty .tped file.\n");
- goto transposed_to_bed_ret_INVALID_FORMAT;
- }
-
- chrom_info_ptr->zero_extra_chroms = 0;
- if (map_is_unsorted) {
- loadbuf_size = 2 * max_marker_allele_len + MAXLINELEN;
- bigstack_alloc(marker_ct * sizeof(int64_t)); // mapvals
-
- if (bigstack_alloc_ll(marker_ct, &ll_buf) ||
- bigstack_alloc_ui(marker_ct, &pos_buf) ||
- bigstack_alloc_c(marker_ct * max_marker_id_len, &marker_ids) ||
- bigstack_alloc_d(marker_ct, &marker_cms) ||
- bigstack_alloc_c(loadbuf_size, &loadbuf)) {
- goto transposed_to_bed_ret_NOMEM;
- }
- marker_allele_ptrs = (char**)bigstack_alloc(marker_ct * 2 * sizeof(intptr_t));
- if (!marker_allele_ptrs) {
- goto transposed_to_bed_ret_NOMEM;
- }
- // prevent cleanup from failing
- memset(marker_allele_ptrs, 0, marker_ct * 2 * sizeof(intptr_t));
+ if (fclose_null(&outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
+ }
- for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
- pos_buf[marker_idx] = (uint32_t)((uint64_t)mapvals[marker_idx]);
- ll_buf[marker_idx] = (mapvals[marker_idx] & 0xffffffff00000000LLU) | marker_idx;
- }
- sort_marker_chrom_pos(ll_buf, marker_ct, pos_buf, chrom_start, chrom_id, NULL, &chrom_ct);
+ outname_end[4] = '.';
+ unlink(outname);
- memcpy(outname_end, ".bim.tmp", 9);
- if (fopen_checked(outname, "r", &infile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- outname_end[4] = '\0';
- if (fopen_checked(outname, "w", &outfile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- marker_idx = 0;
- line_idx = 0;
- while (fgets(loadbuf, loadbuf_size, infile)) {
- line_idx++;
- // .tmp file, guaranteed to be no spaces in front
- cptr = skip_initial_spaces(token_endnn(loadbuf));
- cptr2 = token_endnn(cptr);
- cptr3 = skip_initial_spaces(cptr2);
- cptr4 = next_token_mult(cptr3, 2);
- uii = cptr2 - cptr;
- memcpyx(&(marker_ids[marker_idx * max_marker_id_len]), cptr, uii, '\0');
- if (scan_double(cptr3, &(marker_cms[marker_idx]))) {
- sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of .tped file\n", line_idx);
- goto transposed_to_bed_ret_INVALID_FORMAT_2R;
+ outname_end[2] = 'e';
+ outname_end[3] = 'd';
+ if (fopen_checked(outname, FOPEN_RB, &infile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- uii = strlen_se(cptr4);
- if (allele_set(cptr4, uii, &(marker_allele_ptrs[2 * marker_idx]))) {
- goto transposed_to_bed_ret_NOMEM;
+ outname_end[4] = '\0';
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto transposed_to_bed_ret_OPEN_FAIL;
}
- cptr4 = skip_initial_spaces(&(cptr4[uii + 1]));
- uii = strlen_se(cptr4);
- if (allele_set(cptr4, uii, &(marker_allele_ptrs[2 * marker_idx + 1]))) {
- goto transposed_to_bed_ret_NOMEM;
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
- marker_idx++;
- }
- if (!feof(infile)) {
- goto transposed_to_bed_ret_READ_FAIL;
- }
- fclose_null(&infile);
- marker_idx = 0;
- for (uii = 0; uii < chrom_ct; uii++) {
- cur_chrom = chrom_id[uii];
- ujj = chrom_start[uii + 1];
- cptr2 = chrom_name_write(chrom_info_ptr, cur_chrom, &(g_textbuf[MAXLINELEN]));
- *cptr2++ = '\t';
- for (; marker_idx < ujj; marker_idx++) {
- marker_uidx = (uint32_t)ll_buf[marker_idx];
- fwrite(&(g_textbuf[MAXLINELEN]), 1, cptr2 - (&(g_textbuf[MAXLINELEN])), outfile);
- fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- g_textbuf[0] = '\t';
- cptr = dtoa_gx(marker_cms[marker_uidx], '\t', &(g_textbuf[1]));
- cptr = uint32toa_x((uint32_t)(ll_buf[marker_idx] >> 32), '\t', cptr);
- if (fwrite_checked(g_textbuf, (uintptr_t)(cptr - g_textbuf), outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
- putc('\t', outfile);
- fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
- if (putc_checked('\n', outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
+ uii = 0xfffffffeU; // last marker uidx
+ for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
+ marker_uidx = (uint32_t)(ll_buf[marker_idx]);
+ if (marker_uidx != uii + 1) {
+ if (fseeko(infile, 3 + ((uint64_t)marker_uidx) * sample_ct4, SEEK_SET)) {
+ goto transposed_to_bed_ret_READ_FAIL;
+ }
}
- }
- }
- if (fclose_null(&outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
-
- outname_end[4] = '.';
- unlink(outname);
-
- outname_end[2] = 'e';
- outname_end[3] = 'd';
- if (fopen_checked(outname, FOPEN_RB, &infile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- outname_end[4] = '\0';
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto transposed_to_bed_ret_OPEN_FAIL;
- }
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- uii = 0xfffffffeU; // last marker uidx
- for (marker_idx = 0; marker_idx < marker_ct; marker_idx++) {
- marker_uidx = (uint32_t)(ll_buf[marker_idx]);
- if (marker_uidx != uii + 1) {
- if (fseeko(infile, 3 + ((uint64_t)marker_uidx) * sample_ct4, SEEK_SET)) {
+ if (load_raw(sample_ct4, infile, (uintptr_t*)writebuf)) {
goto transposed_to_bed_ret_READ_FAIL;
}
+ if (fwrite_checked(writebuf, sample_ct4, outfile)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
+ }
+ uii = marker_uidx;
}
- if (load_raw(sample_ct4, infile, (uintptr_t*)writebuf)) {
- goto transposed_to_bed_ret_READ_FAIL;
+ fclose_null(&infile);
+ outname_end[4] = '.';
+ unlink(outname);
+ outname_end[4] = '\0';
+ } else {
+ uii = (outname_end - outname);
+ memcpy(outname_end, ".bim.tmp", 9);
+ memcpy(g_textbuf, outname, 9 + uii);
+ outname_end[4] = '\0';
+ if (rename(g_textbuf, outname)) {
+ goto transposed_to_bed_ret_WRITE_FAIL;
}
- if (fwrite_checked(writebuf, sample_ct4, outfile)) {
+ g_textbuf[uii + 2] = 'e';
+ g_textbuf[uii + 3] = 'd';
+ outname_end[2] = 'e';
+ outname_end[3] = 'd';
+ if (rename(g_textbuf, outname)) {
goto transposed_to_bed_ret_WRITE_FAIL;
}
- uii = marker_uidx;
- }
- fclose_null(&infile);
- outname_end[4] = '.';
- unlink(outname);
- outname_end[4] = '\0';
- } else {
- uii = (outname_end - outname);
- memcpy(outname_end, ".bim.tmp", 9);
- memcpy(g_textbuf, outname, 9 + uii);
- outname_end[4] = '\0';
- if (rename(g_textbuf, outname)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
- }
- g_textbuf[uii + 2] = 'e';
- g_textbuf[uii + 3] = 'd';
- outname_end[2] = 'e';
- outname_end[3] = 'd';
- if (rename(g_textbuf, outname)) {
- goto transposed_to_bed_ret_WRITE_FAIL;
}
+ fputs("\rProcessing .tped file... done.\n", stdout);
+ *outname_end = '\0';
+ LOGPRINTFWW("%s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
}
- fputs("\rProcessing .tped file... done.\n", stdout);
- *outname_end = '\0';
- LOGPRINTFWW("%s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
while (0) {
transposed_to_bed_ret_NOMEM:
@@ -7877,7 +7872,7 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
retval = RET_WRITE_FAIL;
break;
transposed_to_bed_ret_MISSING_TOKENS:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGERRPRINTF("Error: Line %" PRIuPTR " of .tped file has fewer tokens than expected.\n", line_idx);
retval = RET_INVALID_FORMAT;
break;
@@ -7887,31 +7882,27 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
transposed_to_bed_ret_HALF_MISSING:
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of .tped file has a half-missing call.\n", line_idx);
transposed_to_bed_ret_INVALID_FORMAT_2R:
- putchar('\r');
+ putc_unlocked('\r', stdout);
logerrprintb();
retval = RET_INVALID_FORMAT;
break;
transposed_to_bed_ret_TOO_MANY_ALLELES:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGERRPRINTF("Error: More than four alleles at variant %" PRIuPTR ".\n", marker_ct - 1);
// retval already set
break;
}
transposed_to_bed_ret_1:
chrom_info_ptr->zero_extra_chroms = orig_zec;
+ printf("step 1\n");
for (uii = 0; uii < allele_tot; uii++) {
if (alleles[uii][1]) {
free(alleles[uii]);
}
}
- if (marker_allele_ptrs && (max_marker_allele_len > 2)) {
- for (marker_idx = 0; marker_idx < marker_ct * 2; marker_idx++) {
- cptr = marker_allele_ptrs[marker_idx];
- if (cptr && ((cptr < g_one_char_strs) || (cptr >= (&(g_one_char_strs[512]))))) {
- free(cptr);
- }
- }
- }
+ printf("step 2\n");
+ cleanup_allele_storage(max_marker_allele_blen - 1, marker_ct * 2, marker_allele_ptrs);
+ printf("step 3\n");
fclose_cond(infile);
fclose_cond(bimfile);
fclose_cond(outfile);
@@ -7920,7 +7911,7 @@ int32_t transposed_to_bed(char* tpedname, char* tfamname, char* outname, char* o
}
int32_t vcf_sample_line(char* outname, char* outname_end, int32_t missing_pheno, char* bufptr, char* const_fid, uint32_t double_id, char id_delim, char vcf_idspace_to, char flag_char, uintptr_t* sample_ct_ptr) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t const_fid_len = 0;
uintptr_t sample_ct = 0;
int32_t retval = 0;
@@ -7967,8 +7958,8 @@ int32_t vcf_sample_line(char* outname, char* outname_end, int32_t missing_pheno,
slen = strlen_se(bufptr);
bufptr2 = &(bufptr[slen]);
}
- if (slen > MAX_ID_LEN) {
- sprintf(g_logbuf, "Error: --%ccf does not support sample IDs longer than " MAX_ID_LEN_STR " characters.\n", flag_char);
+ if (slen > MAX_ID_SLEN) {
+ sprintf(g_logbuf, "Error: --%ccf does not support sample IDs longer than " MAX_ID_SLEN_STR " characters.\n", flag_char);
goto vcf_sample_line_ret_INVALID_FORMAT_2;
}
if ((*bufptr == '0') && (slen == 1)) {
@@ -8096,11 +8087,11 @@ uint32_t vcf_gp_diploid_invalid(char* bufptr, char* bufptr2, double vcf_min_gp,
int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, char* const_fid, char id_delim, char vcf_idspace_to, double vcf_min_qual, char* vcf_filter_exceptions_flattened, double vcf_min_gq, double vcf_min_gp, uint32_t vcf_half_call, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- gzFile gz_infile = NULL;
- FILE* outfile = NULL;
- FILE* bimfile = NULL;
- FILE* skip3file = NULL;
- char* sorted_fexcepts = NULL;
+ gzFile gz_infile = nullptr;
+ FILE* outfile = nullptr;
+ FILE* bimfile = nullptr;
+ FILE* skip3file = nullptr;
+ char* sorted_fexcepts = nullptr;
uintptr_t line_idx = 0;
uintptr_t fexcept_ct = 0;
uintptr_t max_fexcept_len = 5;
@@ -8150,7 +8141,6 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
uintptr_t ulkk;
uintptr_t alt_allele_idx;
double dxx;
- uint32_t chrom_len;
uint32_t marker_id_len;
uint32_t alt_idx;
uint32_t alt_ct;
@@ -8158,347 +8148,480 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
uint32_t uii;
uint32_t ujj;
uint32_t ukk;
- int32_t ii;
char cc;
- if (vcf_half_call_explicit_error) {
- vcf_half_call = 0;
- }
- retval = gzopen_read_checked(vcfname, &gz_infile);
- if (retval) {
- goto vcf_to_bed_ret_1;
- }
- if (misc_flags & MISC_VCF_FILTER) {
- // automatically include "." and "PASS"
- fexcept_ct = 2;
- if (vcf_filter_exceptions_flattened) {
- fexcept_ct += count_and_measure_multistr(vcf_filter_exceptions_flattened, &max_fexcept_len);
+ {
+ if (vcf_half_call_explicit_error) {
+ vcf_half_call = 0;
}
- if (bigstack_alloc_c(fexcept_ct * max_fexcept_len, &sorted_fexcepts)) {
- goto vcf_to_bed_ret_NOMEM;
+ retval = gzopen_read_checked(vcfname, &gz_infile);
+ if (retval) {
+ goto vcf_to_bed_ret_1;
}
- memcpy(sorted_fexcepts, ".", 2);
- memcpy(&(sorted_fexcepts[max_fexcept_len]), "PASS", 5);
- if (vcf_filter_exceptions_flattened) {
- bufptr = vcf_filter_exceptions_flattened;
- for (ulii = 2; ulii < fexcept_ct; ulii++) {
- slen = strlen(bufptr) + 1;
- memcpy(&(sorted_fexcepts[ulii * max_fexcept_len]), bufptr, slen);
- bufptr = &(bufptr[slen]);
+ if (misc_flags & MISC_VCF_FILTER) {
+ // automatically include "." and "PASS"
+ fexcept_ct = 2;
+ if (vcf_filter_exceptions_flattened) {
+ fexcept_ct += count_and_measure_multistr(vcf_filter_exceptions_flattened, &max_fexcept_len);
+ }
+ if (bigstack_alloc_c(fexcept_ct * max_fexcept_len, &sorted_fexcepts)) {
+ goto vcf_to_bed_ret_NOMEM;
+ }
+ memcpy(sorted_fexcepts, ".", 2);
+ memcpy(&(sorted_fexcepts[max_fexcept_len]), "PASS", 5);
+ if (vcf_filter_exceptions_flattened) {
+ bufptr = vcf_filter_exceptions_flattened;
+ for (ulii = 2; ulii < fexcept_ct; ulii++) {
+ slen = strlen(bufptr) + 1;
+ memcpy(&(sorted_fexcepts[ulii * max_fexcept_len]), bufptr, slen);
+ bufptr = &(bufptr[slen]);
+ }
+ qsort(sorted_fexcepts, fexcept_ct, max_fexcept_len, strcmp_casted);
+ fexcept_ct = collapse_duplicate_ids(sorted_fexcepts, fexcept_ct, max_fexcept_len, nullptr);
+ // there can't be many filter exceptions, so don't bother to free
+ // unused memory in corner case
}
- qsort(sorted_fexcepts, fexcept_ct, max_fexcept_len, strcmp_casted);
- fexcept_ct = collapse_duplicate_ids(sorted_fexcepts, fexcept_ct, max_fexcept_len, NULL);
- // there can't be many filter exceptions, so don't bother to free unused
- // memory in corner case
}
- }
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto vcf_to_bed_ret_NOMEM;
- }
-
- loadbuf = (char*)g_bigstack_base;
- loadbuf[loadbuf_size - 1] = ' ';
- while (1) {
- line_idx++;
- if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
- goto vcf_to_bed_ret_READ_FAIL;
- }
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- goto vcf_to_bed_ret_LONG_LINE;
- }
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
goto vcf_to_bed_ret_NOMEM;
}
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
- }
- if (*bufptr != '#') {
- logerrprint("Error: Missing header line in .vcf file.\n");
- goto vcf_to_bed_ret_INVALID_FORMAT;
- }
- if (bufptr[1] != '#') {
- break;
- }
- }
- if (memcmp(bufptr, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) {
- logerrprint("Error: Improperly formatted .vcf header line.\n");
- goto vcf_to_bed_ret_INVALID_FORMAT;
- }
- bufptr = &(bufptr[38]);
- if (!memcmp(bufptr, "\tFORMAT\t", 8)) {
- retval = vcf_sample_line(outname, outname_end, missing_pheno, &(bufptr[8]), const_fid, double_id, id_delim, vcf_idspace_to, 'v', &sample_ct);
- if (retval) {
- goto vcf_to_bed_ret_1;
- }
- } else if (allow_no_samples) {
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto vcf_to_bed_ret_OPEN_FAIL;
- }
- if (fclose_null(&outfile)) {
- goto vcf_to_bed_ret_WRITE_FAIL;
- }
- }
- if ((!sample_ct) && (!allow_no_samples)) {
- logerrprint("Error: No samples in .vcf file.\n");
- goto vcf_to_bed_ret_INVALID_FORMAT;
- }
- sample_ct4 = (sample_ct + 3) / 4;
- sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
- sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
- final_mask = (~ZEROLU) >> (2 * ((0x7fffffe0 - sample_ct) % BITCT2));
- if (bigstack_alloc_ul(sample_ctv2 * 10, &base_bitfields) ||
- bigstack_alloc_ui(MAX_VCF_ALT, &vcf_alt_cts)) {
- goto vcf_to_bed_ret_NOMEM;
- }
- memcpy(outname_end, ".bim", 5);
- if (fopen_checked(outname, "w", &bimfile)) {
- goto vcf_to_bed_ret_OPEN_FAIL;
- }
- memcpyl3(&(outname_end[2]), "ed");
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto vcf_to_bed_ret_OPEN_FAIL;
- }
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto vcf_to_bed_ret_WRITE_FAIL;
- }
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto vcf_to_bed_ret_NOMEM;
- }
-
- loadbuf = (char*)g_bigstack_base;
- loadbuf[loadbuf_size - 1] = ' ';
- while (1) {
- line_idx++;
- if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
- if (!gzeof(gz_infile)) {
- goto vcf_to_bed_ret_READ_FAIL;
+
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf[loadbuf_size - 1] = ' ';
+ while (1) {
+ line_idx++;
+ if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
+ goto vcf_to_bed_ret_READ_FAIL;
+ }
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ goto vcf_to_bed_ret_LONG_LINE;
+ }
+ goto vcf_to_bed_ret_NOMEM;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*bufptr)) {
+ continue;
}
- break;
- }
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- goto vcf_to_bed_ret_LONG_LINE;
+ if (*bufptr != '#') {
+ logerrprint("Error: Missing header line in .vcf file.\n");
+ goto vcf_to_bed_ret_INVALID_FORMAT;
+ }
+ if (bufptr[1] != '#') {
+ break;
}
- goto vcf_to_bed_ret_NOMEM;
- }
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
}
- // strchr instead of memchr since we explicitly need to catch premature \0
- // here
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
+ if (memcmp(bufptr, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) {
+ logerrprint("Error: Improperly formatted .vcf header line.\n");
+ goto vcf_to_bed_ret_INVALID_FORMAT;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if (chrom_error(".vcf file", chrom_info_ptr, bufptr, line_idx, ii, allow_extra_chroms)) {
- goto vcf_to_bed_ret_INVALID_FORMAT;
- }
- retval = resolve_or_add_chrom_name(bufptr, ".vcf file", line_idx, chrom_info_ptr, &ii);
+ bufptr = &(bufptr[38]);
+ if (!memcmp(bufptr, "\tFORMAT\t", 8)) {
+ retval = vcf_sample_line(outname, outname_end, missing_pheno, &(bufptr[8]), const_fid, double_id, id_delim, vcf_idspace_to, 'v', &sample_ct);
if (retval) {
- logprint("\n");
- goto vcf_to_bed_ret_1;
+ goto vcf_to_bed_ret_1;
+ }
+ } else if (allow_no_samples) {
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto vcf_to_bed_ret_OPEN_FAIL;
+ }
+ if (fclose_null(&outfile)) {
+ goto vcf_to_bed_ret_WRITE_FAIL;
}
}
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- marker_skip_ct++;
- continue;
+ if ((!sample_ct) && (!allow_no_samples)) {
+ logerrprint("Error: No samples in .vcf file.\n");
+ goto vcf_to_bed_ret_INVALID_FORMAT;
}
- chrom_ptr = bufptr;
- chrom_len = (uintptr_t)(bufptr2 - bufptr);
- pos_str = ++bufptr2;
- marker_id = strchr(bufptr2, '\t');
- if (!marker_id) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
+ sample_ct4 = (sample_ct + 3) / 4;
+ sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
+ sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
+ final_mask = (~ZEROLU) >> (2 * ((0x7fffffe0 - sample_ct) % BITCT2));
+ if (bigstack_alloc_ul(sample_ctv2 * 10, &base_bitfields) ||
+ bigstack_alloc_ui(MAX_VCF_ALT, &vcf_alt_cts)) {
+ goto vcf_to_bed_ret_NOMEM;
+ }
+ memcpy(outname_end, ".bim", 5);
+ if (fopen_checked(outname, "w", &bimfile)) {
+ goto vcf_to_bed_ret_OPEN_FAIL;
}
- if ((((unsigned char)(*pos_str)) - '0') >= 10) {
- sprintf(g_logbuf, "Error: Invalid variant bp coordinate on line %" PRIuPTR " of .vcf file.\n", line_idx);
- goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ memcpyl3(&(outname_end[2]), "ed");
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto vcf_to_bed_ret_OPEN_FAIL;
}
- ref_allele_ptr = strchr(++marker_id, '\t');
- if (!ref_allele_ptr) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
+ goto vcf_to_bed_ret_WRITE_FAIL;
}
- marker_id_len = (uintptr_t)(ref_allele_ptr - marker_id);
- bufptr = strchr(++ref_allele_ptr, '\t');
- // now ref_allele_ptr finally points to the ref allele
- if (!bufptr) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
+ goto vcf_to_bed_ret_NOMEM;
}
- ref_allele_len = (uintptr_t)(bufptr - ref_allele_ptr);
- alt_ct = 1;
- alt_alleles = ++bufptr;
- cc = *bufptr;
- // ',' < '.'
+
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf[loadbuf_size - 1] = ' ';
while (1) {
- if ((unsigned char)cc <= ',' && (unsigned char)cc != '*') {
- sprintf(g_logbuf, "Error: Invalid alternate allele on line %" PRIuPTR " of .vcf file.\n", line_idx);
- goto vcf_to_bed_ret_INVALID_FORMAT_2N;
- }
- bufptr2 = bufptr;
- do {
- cc = *(++bufptr);
- // allow GATK 3.4 <*:DEL> symbolic allele
- } while (((unsigned char)cc > ',') || (cc == '*'));
- if (((uintptr_t)(bufptr - bufptr2) == ref_allele_len) && (!memcmp(ref_allele_ptr, bufptr2, ref_allele_len))) {
- if ((alt_ct != 1) || (cc == ',')) {
- sprintf(g_logbuf, "Error: ALT allele duplicates REF allele on line %" PRIuPTR " of .vcf file.\n", line_idx);
- goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ line_idx++;
+ if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
+ if (!gzeof(gz_infile)) {
+ goto vcf_to_bed_ret_READ_FAIL;
}
- *alt_alleles = '.'; // tolerate SHAPEIT output
- }
- if (cc != ',') {
break;
}
- cc = *(++bufptr);
- alt_ct++;
- }
- if (cc != '\t') {
- sprintf(g_logbuf, "Error: Malformed ALT field on line %" PRIuPTR " of .vcf file.\n", line_idx);
- goto vcf_to_bed_ret_INVALID_FORMAT_2N;
- }
- if (biallelic_strict && (alt_ct > 1)) {
- goto vcf_to_bed_skip3;
- }
- bufptr++;
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
- }
- if (check_qual) {
- if (*bufptr == '.') {
- marker_skip_ct++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ goto vcf_to_bed_ret_LONG_LINE;
+ }
+ goto vcf_to_bed_ret_NOMEM;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*bufptr)) {
continue;
}
- if (scan_double(bufptr, &dxx)) {
- sprintf(g_logbuf, "Error: Invalid QUAL value on line %" PRIuPTR " of .vcf file.\n", line_idx);
- goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ // strchr instead of memchr since we explicitly need to catch premature
+ // \0 here
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
}
- if (dxx < vcf_min_qual) {
- marker_skip_ct++;
- continue;
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code_destructive(".vcf file", line_idx, allow_extra_chroms, bufptr, bufptr2, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto vcf_to_bed_ret_1;
}
- }
- bufptr = &(bufptr2[1]);
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
- }
- bufptr2++;
- if (fexcept_ct) {
- // bugfix: recognize semicolon delimiter
- bufptr2[-1] = ';';
- vcf_to_bed_check_filter:
- delimiter_ptr = (char*)memchr(bufptr, ';', (uintptr_t)(bufptr2 - bufptr));
- if (bsearch_str(bufptr, (uintptr_t)(delimiter_ptr - bufptr), sorted_fexcepts, max_fexcept_len, fexcept_ct) == -1) {
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
marker_skip_ct++;
- // if we replace the vcf_to_bed_check_filter goto with a while loop,
- // can't use "continue" here
continue;
}
- bufptr = &(delimiter_ptr[1]);
- if (bufptr != bufptr2) {
- goto vcf_to_bed_check_filter;
+ chrom_ptr = bufptr;
+ pos_str = ++bufptr2;
+ marker_id = strchr(bufptr2, '\t');
+ if (!marker_id) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
}
- bufptr2[-1] = '\t';
- }
- if (!sample_ct) {
- alt_allele_idx = 1;
- goto vcf_to_bed_skip_genotype_write;
- }
- bufptr = bufptr2;
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
- }
- bufptr = &(bufptr2[1]);
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
- }
- if (memcmp(bufptr, "GT", 2)) {
- // We previously always skipped this case, but that's inconsistent with
- // how we now handle zero-sample VCFs.
- if (require_gt) {
- marker_skip_ct++;
- continue;
+ if ((((unsigned char)(*pos_str)) - '0') >= 10) {
+ sprintf(g_logbuf, "Error: Invalid variant bp coordinate on line %" PRIuPTR " of .vcf file.\n", line_idx);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2N;
}
- fill_quatervec_55(sample_ct, base_bitfields);
- missing_gt_ct++;
- alt_allele_idx = 1;
- goto vcf_to_bed_genotype_write;
- }
- bufptr2++;
- if (vcf_min_gq != -1) {
- gq_field_pos = 0;
- bufptr2[-1] = ':';
- gq_scan_ptr = bufptr;
- do {
- gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
- if (++gq_scan_ptr == bufptr2) {
- gq_field_pos = 0;
- break;
+ ref_allele_ptr = strchr(++marker_id, '\t');
+ if (!ref_allele_ptr) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ marker_id_len = (uintptr_t)(ref_allele_ptr - marker_id);
+ bufptr = strchr(++ref_allele_ptr, '\t');
+ // now ref_allele_ptr finally points to the ref allele
+ if (!bufptr) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ ref_allele_len = (uintptr_t)(bufptr - ref_allele_ptr);
+ alt_ct = 1;
+ alt_alleles = ++bufptr;
+ cc = *bufptr;
+ // ',' < '.'
+ while (1) {
+ if ((unsigned char)cc <= ',' && (unsigned char)cc != '*') {
+ sprintf(g_logbuf, "Error: Invalid alternate allele on line %" PRIuPTR " of .vcf file.\n", line_idx);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2N;
}
- gq_field_pos++;
- } while (memcmp(gq_scan_ptr, "GQ:", 3));
- bufptr2[-1] = '\t';
- }
- if (vcf_min_gp != -1) {
- gp_field_pos = 0;
- bufptr2[-1] = ':';
- do {
- bufptr = (char*)memchr(bufptr, ':', (uintptr_t)(bufptr2 - bufptr));
- if (++bufptr == bufptr2) {
- gp_field_pos = 0;
+ bufptr2 = bufptr;
+ do {
+ cc = *(++bufptr);
+ // allow GATK 3.4 <*:DEL> symbolic allele
+ } while (((unsigned char)cc > ',') || (cc == '*'));
+ if (((uintptr_t)(bufptr - bufptr2) == ref_allele_len) && (!memcmp(ref_allele_ptr, bufptr2, ref_allele_len))) {
+ if ((alt_ct != 1) || (cc == ',')) {
+ sprintf(g_logbuf, "Error: ALT allele duplicates REF allele on line %" PRIuPTR " of .vcf file.\n", line_idx);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ }
+ *alt_alleles = '.'; // tolerate SHAPEIT output
+ }
+ if (cc != ',') {
break;
}
- gp_field_pos++;
- } while (memcmp(bufptr, "GP:", 3));
- bufptr2[-1] = '\t';
- }
- bufptr = bufptr2;
- // okay, finally done with the line header
- if (alt_ct < 10) {
- // slightly faster parsing for the usual case
- fill_ulong_zero(base_bitfields, (alt_ct + 1) * sample_ctv2);
- if ((!biallelic_only) || (alt_ct == 1)) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- if (sample_idx != sample_ct - 1) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
+ cc = *(++bufptr);
+ alt_ct++;
+ }
+ if (cc != '\t') {
+ sprintf(g_logbuf, "Error: Malformed ALT field on line %" PRIuPTR " of .vcf file.\n", line_idx);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ }
+ if (biallelic_strict && (alt_ct > 1)) {
+ goto vcf_to_bed_skip3;
+ }
+ bufptr++;
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ if (check_qual) {
+ if (*bufptr == '.') {
+ marker_skip_ct++;
+ continue;
+ }
+ if (scan_double(bufptr, &dxx)) {
+ sprintf(g_logbuf, "Error: Invalid QUAL value on line %" PRIuPTR " of .vcf file.\n", line_idx);
+ goto vcf_to_bed_ret_INVALID_FORMAT_2N;
+ }
+ if (dxx < vcf_min_qual) {
+ marker_skip_ct++;
+ continue;
+ }
+ }
+ bufptr = &(bufptr2[1]);
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ bufptr2++;
+ if (fexcept_ct) {
+ // bugfix: recognize semicolon delimiter
+ bufptr2[-1] = ';';
+ vcf_to_bed_check_filter:
+ delimiter_ptr = (char*)memchr(bufptr, ';', (uintptr_t)(bufptr2 - bufptr));
+ if (bsearch_str(bufptr, (uintptr_t)(delimiter_ptr - bufptr), sorted_fexcepts, max_fexcept_len, fexcept_ct) == -1) {
+ marker_skip_ct++;
+ // if we replace the vcf_to_bed_check_filter goto with a while loop,
+ // can't use "continue" here
+ continue;
+ }
+ bufptr = &(delimiter_ptr[1]);
+ if (bufptr != bufptr2) {
+ goto vcf_to_bed_check_filter;
+ }
+ bufptr2[-1] = '\t';
+ }
+ if (!sample_ct) {
+ alt_allele_idx = 1;
+ goto vcf_to_bed_skip_genotype_write;
+ }
+ bufptr = bufptr2;
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ bufptr = &(bufptr2[1]);
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ if (memcmp(bufptr, "GT", 2)) {
+ // We previously always skipped this case, but that's inconsistent with
+ // how we now handle zero-sample VCFs.
+ if (require_gt) {
+ marker_skip_ct++;
+ continue;
+ }
+ fill_quatervec_55(sample_ct, base_bitfields);
+ missing_gt_ct++;
+ alt_allele_idx = 1;
+ goto vcf_to_bed_genotype_write;
+ }
+ bufptr2++;
+ if (vcf_min_gq != -1) {
+ gq_field_pos = 0;
+ bufptr2[-1] = ':';
+ gq_scan_ptr = bufptr;
+ do {
+ gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
+ if (++gq_scan_ptr == bufptr2) {
+ gq_field_pos = 0;
+ break;
+ }
+ gq_field_pos++;
+ } while (memcmp(gq_scan_ptr, "GQ:", 3));
+ bufptr2[-1] = '\t';
+ }
+ if (vcf_min_gp != -1) {
+ gp_field_pos = 0;
+ bufptr2[-1] = ':';
+ do {
+ bufptr = (char*)memchr(bufptr, ':', (uintptr_t)(bufptr2 - bufptr));
+ if (++bufptr == bufptr2) {
+ gp_field_pos = 0;
+ break;
+ }
+ gp_field_pos++;
+ } while (memcmp(bufptr, "GP:", 3));
+ bufptr2[-1] = '\t';
+ }
+ bufptr = bufptr2;
+ // okay, finally done with the line header
+ if (alt_ct < 10) {
+ // slightly faster parsing for the usual case
+ fill_ulong_zero((alt_ct + 1) * sample_ctv2, base_bitfields);
+ if ((!biallelic_only) || (alt_ct == 1)) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ if (sample_idx != sample_ct - 1) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ bufptr2 = &(bufptr[strlen_se(bufptr)]);
+ }
+ uii = (unsigned char)(*bufptr) - '0';
+ // time to provide proper support for VCF import; that means, among
+ // other things, providing a useful error message instead of
+ // segfaulting on an invalid GT field, to help other tool
+ // developers.
+
+ if (uii <= 9) {
+ // no GQ field with ./. calls, so this check cannot occur earlier
+ if (gq_field_pos) {
+ // to test: does splitting this off in an entirely separate
+ // loop noticeably speed up common case parsing? I hope
+ // not--this is a predictable branch--but one can never be too
+ // paranoid about this sort of performance leak when hundreds
+ // of GB are involved...
+ gq_scan_ptr = bufptr;
+ for (ujj = 0; ujj < gq_field_pos; ujj++) {
+ gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
+ if (!gq_scan_ptr) {
+ // non-GT fields are allowed to be missing
+ goto vcf_to_bed_missing_gq_1;
+ }
+ gq_scan_ptr++;
+ }
+ if ((!scan_double(gq_scan_ptr, &dxx)) && (dxx < vcf_min_gq)) {
+ continue;
+ }
+ }
+
+ vcf_to_bed_missing_gq_1:
+ cc = bufptr[1];
+ if ((cc != '/') && (cc != '|')) {
+ // haploid
+ vcf_to_bed_haploid_1:
+ if (gp_field_pos) {
+ if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
+ if (ukk) {
+ goto vcf_to_bed_ret_INVALID_GP;
+ }
+ continue;
+ }
+ }
+ set_bit_ul(sample_idx * 2 + 1, &(base_bitfields[uii * sample_ctv2]));
+ } else {
+ cc = bufptr[3];
+ if (((cc != '/') && (cc != '|')) || (bufptr[4] == '.')) {
+ // code triploids, etc. as missing
+ // might want to subject handling of 0/0/. to --vcf-half-call
+ // control
+ ujj = ((unsigned char)bufptr[2]) - '0';
+ if (ujj > 9) {
+ if (ujj != (uint32_t)(((unsigned char)'.') - '0')) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ }
+ if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
+ } else if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_1;
+ } else if (vcf_half_call == VCF_HALF_CALL_REFERENCE) {
+ ujj = 0;
+ goto vcf_to_bed_reference_1;
+ }
+ // fall through on VCF_HALF_CALL_MISSING
+ } else {
+ vcf_to_bed_reference_1:
+ if (gp_field_pos) {
+ if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
+ if (ukk) {
+ goto vcf_to_bed_ret_INVALID_GP;
+ }
+ continue;
+ }
+ }
+ set_bit_ul(sample_idx * 2, &(base_bitfields[uii * sample_ctv2]));
+ base_bitfields[ujj * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ }
+ }
+ }
+ } else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ } else if (vcf_half_call > VCF_HALF_CALL_MISSING) {
+ cc = bufptr[2];
+ if ((cc != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
+ uii = ((unsigned char)cc) - '0';
+ if (uii > 9) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ }
+ if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_1;
+ } else {
+ // VCF_HALF_CALL_REFERENCE
+ ujj = 0;
+ goto vcf_to_bed_reference_1;
+ }
+ }
}
- bufptr2 = &(bufptr[strlen_se(bufptr)]);
}
- uii = (unsigned char)(*bufptr) - '0';
- // time to provide proper support for VCF import; that means, among
- // other things, providing a useful error message instead of
- // segfaulting on an invalid GT field, to help other tool
- // developers.
- if (uii <= 9) {
- // no GQ field with ./. calls, so this check cannot occur earlier
+ alt_allele_idx = 1;
+ if (alt_ct > 1) {
+ ulii = popcount2_longs(&(base_bitfields[sample_ctv2]), sample_ctl2);
+ for (alt_idx = 2; alt_idx <= alt_ct; alt_idx++) {
+ uljj = popcount2_longs(&(base_bitfields[sample_ctv2 * alt_idx]), sample_ctl2);
+ if (uljj > ulii) {
+ ulii = uljj;
+ alt_allele_idx = alt_idx;
+ }
+ }
+ }
+ } else {
+ // --biallelic-only, expect early termination in this case
+ alt_allele_idx = 0;
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ if (sample_idx != sample_ct - 1) {
+ goto vcf_to_bed_ret_MISSING_TOKENS;
+ }
+ bufptr2 = &(bufptr[strlen_se(bufptr)]);
+ }
+ uii = (unsigned char)(*bufptr) - '0';
+ if (uii && (uii != alt_allele_idx)) {
+ if (uii == (uint32_t)(((unsigned char)'.') - '0')) {
+ if (vcf_half_call <= VCF_HALF_CALL_MISSING) {
+ continue;
+ }
+ cc = bufptr[2];
+ if ((cc == '.') || ((bufptr[1] != '/') && (bufptr[1] != '|'))) {
+ continue;
+ }
+ uii = ((unsigned char)cc) - '0';
+ if (uii > 9) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ }
+ if (uii) {
+ if (!alt_allele_idx) {
+ alt_allele_idx = uii;
+ } else if (uii != alt_allele_idx) {
+ goto vcf_to_bed_skip3;
+ }
+ }
+ if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_2;
+ } else {
+ // VCF_HALF_CALL_REFERENCE
+ ujj = 0;
+ goto vcf_to_bed_reference_2;
+ }
+ } else if (uii > 9) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ } else if (alt_allele_idx) {
+ goto vcf_to_bed_skip3;
+ }
+ alt_allele_idx = uii;
+ }
if (gq_field_pos) {
- // to test: does splitting this off in an entirely separate loop
- // noticeably speed up common case parsing? I hope not--this is
- // a predictable branch--but one can never be too paranoid about
- // this sort of performance leak when hundreds of GB are
- // involved...
gq_scan_ptr = bufptr;
for (ujj = 0; ujj < gq_field_pos; ujj++) {
gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
if (!gq_scan_ptr) {
- // non-GT fields are allowed to be missing
- goto vcf_to_bed_missing_gq_1;
+ goto vcf_to_bed_missing_gq_2;
}
gq_scan_ptr++;
}
@@ -8506,11 +8629,10 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
continue;
}
}
- vcf_to_bed_missing_gq_1:
+ vcf_to_bed_missing_gq_2:
cc = bufptr[1];
if ((cc != '/') && (cc != '|')) {
- // haploid
- vcf_to_bed_haploid_1:
+ vcf_to_bed_haploid_2:
if (gp_field_pos) {
if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
if (ukk) {
@@ -8523,51 +8645,52 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
} else {
cc = bufptr[3];
if (((cc != '/') && (cc != '|')) || (bufptr[4] == '.')) {
- // code triploids, etc. as missing
- // might want to subject handling of 0/0/. to --vcf-half-call
- // control
ujj = ((unsigned char)bufptr[2]) - '0';
- if (ujj > 9) {
- if (ujj != (uint32_t)(((unsigned char)'.') - '0')) {
+ if (ujj && (ujj != alt_allele_idx)) {
+ if (ujj == (uint32_t)(((unsigned char)'.') - '0')) {
+ if (!vcf_half_call) {
+ goto vcf_to_bed_ret_HALF_CALL_ERROR;
+ } else if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_2;
+ } else if (vcf_half_call == VCF_HALF_CALL_REFERENCE) {
+ ujj = 0;
+ goto vcf_to_bed_reference_2;
+ }
+ continue;
+ } else if (ujj > 9) {
goto vcf_to_bed_ret_INVALID_GT;
+ } else if (alt_allele_idx) {
+ goto vcf_to_bed_skip3;
}
- if (!vcf_half_call) {
- goto vcf_to_bed_ret_HALF_CALL_ERROR;
- } else if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
- goto vcf_to_bed_haploid_1;
- }
- } else {
- if (gp_field_pos) {
- if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
- if (ukk) {
- goto vcf_to_bed_ret_INVALID_GP;
- }
- continue;
+ alt_allele_idx = ujj;
+ }
+ vcf_to_bed_reference_2:
+ if (gp_field_pos) {
+ if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
+ if (ukk) {
+ goto vcf_to_bed_ret_INVALID_GP;
}
+ continue;
}
- set_bit_ul(sample_idx * 2, &(base_bitfields[uii * sample_ctv2]));
- base_bitfields[ujj * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
}
+ set_bit_ul(sample_idx * 2, &(base_bitfields[uii * sample_ctv2]));
+ base_bitfields[ujj * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
}
}
- } else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
- goto vcf_to_bed_ret_INVALID_GT;
}
- }
- alt_allele_idx = 1;
- if (alt_ct > 1) {
- ulii = popcount2_longs(&(base_bitfields[sample_ctv2]), sample_ctl2);
- for (alt_idx = 2; alt_idx <= alt_ct; alt_idx++) {
- uljj = popcount2_longs(&(base_bitfields[sample_ctv2 * alt_idx]), sample_ctl2);
- if (uljj > ulii) {
- ulii = uljj;
- alt_allele_idx = alt_idx;
- }
+ if (!alt_allele_idx) {
+ alt_allele_idx = 1;
}
}
+ alt_bitfield = &(base_bitfields[alt_allele_idx * sample_ctv2]);
} else {
- // expect early termination in this case
- alt_allele_idx = 0;
+ // bleah, multi-digit genotype codes
+ // two-pass read: determine most common alt allele, then actually load
+ // it
+ fill_ulong_zero(2 * sample_ctv2, base_bitfields);
+ alt_bitfield = &(base_bitfields[sample_ctv2]);
+ fill_uint_zero(alt_ct, vcf_alt_cts);
+ geno_start = bufptr;
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
bufptr2 = strchr(bufptr, '\t');
if (!bufptr2) {
@@ -8577,110 +8700,166 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
bufptr2 = &(bufptr[strlen_se(bufptr)]);
}
uii = (unsigned char)(*bufptr) - '0';
- if (uii && (uii != alt_allele_idx)) {
- if (uii == (uint32_t)(((unsigned char)'.') - '0')) {
- continue;
- } else if (uii > 9) {
- goto vcf_to_bed_ret_INVALID_GT;
- } else if (alt_allele_idx) {
- goto vcf_to_bed_skip3;
- }
- alt_allele_idx = uii;
- }
- if (gq_field_pos) {
- gq_scan_ptr = bufptr;
- for (ujj = 0; ujj < gq_field_pos; ujj++) {
- gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
- if (!gq_scan_ptr) {
- goto vcf_to_bed_missing_gq_2;
+ if (uii <= 9) {
+ if (gq_field_pos) {
+ gq_scan_ptr = bufptr;
+ for (ujj = 0; ujj < gq_field_pos; ujj++) {
+ gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
+ if (!gq_scan_ptr) {
+ goto vcf_to_bed_missing_gq_3;
+ }
+ gq_scan_ptr++;
+ }
+ if ((!scan_double(gq_scan_ptr, &dxx)) && (dxx < vcf_min_gq)) {
+ continue;
}
- gq_scan_ptr++;
}
- if ((!scan_double(gq_scan_ptr, &dxx)) && (dxx < vcf_min_gq)) {
- continue;
+ vcf_to_bed_missing_gq_3:
+ while (1) {
+ ujj = ((unsigned char)(*(++bufptr))) - 48;
+ if (ujj > 9) {
+ break;
+ }
+ uii = uii * 10 + ujj;
}
- }
- vcf_to_bed_missing_gq_2:
- cc = bufptr[1];
- if ((cc != '/') && (cc != '|')) {
- vcf_to_bed_haploid_2:
- if (gp_field_pos) {
- if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
- if (ukk) {
- goto vcf_to_bed_ret_INVALID_GP;
+ // '/' = ascii 47, '|' = ascii 124
+ if ((ujj != 0xffffffffU) && (ujj != 76)) {
+ // haploid, count 2x
+ vcf_to_bed_haploid_3:
+ if (gp_field_pos) {
+ if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
+ if (ukk) {
+ goto vcf_to_bed_ret_INVALID_GP;
+ }
+ continue;
}
- continue;
}
- }
- set_bit_ul(sample_idx * 2 + 1, &(base_bitfields[uii * sample_ctv2]));
- } else {
- cc = bufptr[3];
- if (((cc != '/') && (cc != '|')) || (bufptr[4] == '.')) {
- ujj = ((unsigned char)bufptr[2]) - '0';
- if (ujj && (ujj != alt_allele_idx)) {
+ if (!uii) {
+ set_bit_ul(sample_idx * 2 + 1, base_bitfields);
+ } else {
+ vcf_alt_cts[uii - 1] += 2;
+ }
+ } else {
+ ujj = (unsigned char)(*(++bufptr)) - '0';
+ if (ujj > 9) {
if (ujj == (uint32_t)(((unsigned char)'.') - '0')) {
if (!vcf_half_call) {
goto vcf_to_bed_ret_HALF_CALL_ERROR;
} else if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
- goto vcf_to_bed_haploid_2;
+ goto vcf_to_bed_haploid_3;
+ } else if (vcf_half_call == VCF_HALF_CALL_REFERENCE) {
+ ujj = 0;
+ goto vcf_to_bed_reference_3;
+ } else {
+ continue;
}
- continue;
- } else if (ujj > 9) {
- goto vcf_to_bed_ret_INVALID_GT;
- } else if (alt_allele_idx) {
- goto vcf_to_bed_skip3;
}
- alt_allele_idx = ujj;
+ goto vcf_to_bed_ret_INVALID_GT;
}
- if (gp_field_pos) {
- if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
- if (ukk) {
- goto vcf_to_bed_ret_INVALID_GP;
+ while (1) {
+ ukk = ((unsigned char)(*(++bufptr))) - 48;
+ if (ukk > 9) {
+ break;
+ }
+ ujj = ujj * 10 + ukk;
+ }
+ if (((ukk != 0xffffffffU) && (ukk != 76)) || (bufptr[1] == '.')) {
+ // diploid; triploid+ skipped
+ vcf_to_bed_reference_3:
+ if (gp_field_pos) {
+ if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
+ if (ukk) {
+ goto vcf_to_bed_ret_INVALID_GP;
+ }
+ continue;
}
- continue;
}
+ if (!uii) {
+ set_bit_ul(sample_idx * 2, base_bitfields);
+ } else {
+ vcf_alt_cts[uii - 1] += 1;
+ }
+ if (!ujj) {
+ base_bitfields[sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ } else {
+ vcf_alt_cts[ujj - 1] += 1;
+ }
+ }
+ }
+ } else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ } else if ((vcf_half_call > VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
+ bufptr = &(bufptr[2]);
+ uii = ((unsigned char)(*bufptr)) - '0';
+ if (uii > 9) {
+ goto vcf_to_bed_ret_INVALID_GT;
+ }
+ while (1) {
+ ujj = ((unsigned char)(*(++bufptr))) - 48;
+ if (ujj > 9) {
+ break;
}
- set_bit_ul(sample_idx * 2, &(base_bitfields[uii * sample_ctv2]));
- base_bitfields[ujj * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ uii = uii * 10 + ujj;
+ }
+ if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_3;
+ } else {
+ ujj = 0;
+ goto vcf_to_bed_reference_3;
}
}
}
- if (!alt_allele_idx) {
- alt_allele_idx = 1;
+ alt_allele_idx = 0;
+ uii = vcf_alt_cts[0];
+ for (alt_idx = 1; alt_idx < alt_ct; alt_idx++) {
+ ujj = vcf_alt_cts[alt_idx];
+ if (biallelic_only && ujj && uii) {
+ goto vcf_to_bed_skip3;
+ }
+ if (ujj > uii) {
+ alt_allele_idx = alt_idx;
+ uii = vcf_alt_cts[alt_idx];
+ }
}
- }
- alt_bitfield = &(base_bitfields[alt_allele_idx * sample_ctv2]);
- } else {
- // bleah, multi-digit genotype codes
- // two-pass read: determine most common alt allele, then actually load it
- fill_ulong_zero(base_bitfields, 2 * sample_ctv2);
- alt_bitfield = &(base_bitfields[sample_ctv2]);
- fill_uint_zero(vcf_alt_cts, alt_ct);
- geno_start = bufptr;
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- if (sample_idx != sample_ct - 1) {
- goto vcf_to_bed_ret_MISSING_TOKENS;
- }
- bufptr2 = &(bufptr[strlen_se(bufptr)]);
- }
- uii = (unsigned char)(*bufptr) - '0';
- if (uii <= 9) {
+ alt_allele_idx++;
+ bufptr = geno_start;
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
+ bufptr2 = strchr(bufptr, '\t');
+ if (!bufptr2) {
+ bufptr2 = &(bufptr[strlen_se(bufptr)]);
+ }
+ if (*bufptr == '.') {
+ // validated on first pass
+ if ((vcf_half_call > VCF_HALF_CALL_MISSING) && (bufptr[2] != '.') && ((bufptr[1] == '/') || (bufptr[1] == '|'))) {
+ bufptr = &(bufptr[2]);
+ uii = ((unsigned char)(*bufptr)) - '0';
+ while (1) {
+ ujj = ((unsigned char)(*(++bufptr))) - 48;
+ if (ujj > 9) {
+ break;
+ }
+ uii = uii * 10 + ujj;
+ }
+ if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_4;
+ } else {
+ ujj = 0;
+ goto vcf_to_bed_reference_4;
+ }
+ }
+ continue;
+ }
if (gq_field_pos) {
gq_scan_ptr = bufptr;
for (ujj = 0; ujj < gq_field_pos; ujj++) {
gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
- if (!gq_scan_ptr) {
- goto vcf_to_bed_missing_gq_3;
- }
gq_scan_ptr++;
}
if ((!scan_double(gq_scan_ptr, &dxx)) && (dxx < vcf_min_gq)) {
continue;
}
}
- vcf_to_bed_missing_gq_3:
+ uii = (unsigned char)(*bufptr) - '0';
while (1) {
ujj = ((unsigned char)(*(++bufptr))) - 48;
if (ujj > 9) {
@@ -8688,37 +8867,26 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
}
uii = uii * 10 + ujj;
}
- // '/' = ascii 47, '|' = ascii 124
if ((ujj != 0xffffffffU) && (ujj != 76)) {
- // haploid, count 2x
- vcf_to_bed_haploid_3:
- if (gp_field_pos) {
+ if (uii == alt_allele_idx) {
+ vcf_to_bed_haploid_4:
if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
- if (ukk) {
- goto vcf_to_bed_ret_INVALID_GP;
- }
+ // no need for ukk check since already validated
continue;
}
+ set_bit_ul(sample_idx * 2 + 1, alt_bitfield);
}
- if (!uii) {
- set_bit_ul(sample_idx * 2 + 1, base_bitfields);
- } else {
- vcf_alt_cts[uii - 1] += 2;
- }
- } else {
- ujj = (unsigned char)(*(++bufptr)) - '0';
- if (ujj > 9) {
- if (ujj == (uint32_t)(((unsigned char)'.') - '0')) {
- if (!vcf_half_call) {
- goto vcf_to_bed_ret_HALF_CALL_ERROR;
- } else if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
- goto vcf_to_bed_haploid_3;
- } else {
- continue;
- }
+ } else if (*(++bufptr) == '.') {
+ if (uii == alt_allele_idx) {
+ if (vcf_half_call == VCF_HALF_CALL_HAPLOID) {
+ goto vcf_to_bed_haploid_4;
+ } else if (vcf_half_call == VCF_HALF_CALL_REFERENCE) {
+ ujj = 0;
+ goto vcf_to_bed_reference_4;
}
- goto vcf_to_bed_ret_INVALID_GT;
}
+ } else {
+ ujj = (unsigned char)(*bufptr) - '0';
while (1) {
ukk = ((unsigned char)(*(++bufptr))) - 48;
if (ukk > 9) {
@@ -8727,205 +8895,118 @@ int32_t vcf_to_bed(char* vcfname, char* outname, char* outname_end, int32_t miss
ujj = ujj * 10 + ukk;
}
if (((ukk != 0xffffffffU) && (ukk != 76)) || (bufptr[1] == '.')) {
- // diploid; triploid+ skipped
- if (gp_field_pos) {
- if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
- if (ukk) {
- goto vcf_to_bed_ret_INVALID_GP;
- }
- continue;
- }
+ vcf_to_bed_reference_4:
+ if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
+ continue;
}
- if (!uii) {
- set_bit_ul(sample_idx * 2, base_bitfields);
- } else {
- vcf_alt_cts[uii - 1] += 1;
+ if (uii == alt_allele_idx) {
+ set_bit_ul(sample_idx * 2, alt_bitfield);
}
- if (!ujj) {
- base_bitfields[sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
- } else {
- vcf_alt_cts[ujj - 1] += 1;
+ if (ujj == alt_allele_idx) {
+ alt_bitfield[sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
}
}
}
- } else if (uii != (uint32_t)(((unsigned char)'.') - '0')) {
- goto vcf_to_bed_ret_INVALID_GT;
}
}
- alt_allele_idx = 0;
- uii = vcf_alt_cts[0];
- for (alt_idx = 1; alt_idx < alt_ct; alt_idx++) {
- ujj = vcf_alt_cts[alt_idx];
- if (biallelic_only && ujj && uii) {
- goto vcf_to_bed_skip3;
- }
- if (ujj > uii) {
- alt_allele_idx = alt_idx;
- uii = vcf_alt_cts[alt_idx];
- }
+ ref_ptr = base_bitfields;
+ alt_ptr = alt_bitfield;
+ for (sample_idx = 0; sample_idx < sample_ctl2; sample_idx++) {
+ // take ref, then:
+ // * if ref + alt is not two, force to 01
+ // * otherwise, if ref is nonzero, add 1 to match PLINK binary encoding
+ ulii = *ref_ptr;
+ uljj = *alt_ptr++;
+ ulkk = (ulii + uljj) & AAAAMASK;
+ uljj = ulii + ((ulii | (ulii >> 1)) & FIVEMASK);
+ ulii = ulkk | (ulkk >> 1); // 11 in nonmissing positions
+ *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
}
- alt_allele_idx++;
- bufptr = geno_start;
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, bufptr = &(bufptr2[1])) {
- bufptr2 = strchr(bufptr, '\t');
- if (!bufptr2) {
- bufptr2 = &(bufptr[strlen_se(bufptr)]);
- }
- if (*bufptr == '.') {
- // validated on first pass
- continue;
- }
- if (gq_field_pos) {
- gq_scan_ptr = bufptr;
- for (ujj = 0; ujj < gq_field_pos; ujj++) {
- gq_scan_ptr = (char*)memchr(gq_scan_ptr, ':', (uintptr_t)(bufptr2 - gq_scan_ptr));
- gq_scan_ptr++;
- }
- if ((!scan_double(gq_scan_ptr, &dxx)) && (dxx < vcf_min_gq)) {
- continue;
- }
+ ref_ptr[-1] &= final_mask;
+ vcf_to_bed_genotype_write:
+ if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
+ goto vcf_to_bed_ret_WRITE_FAIL;
+ }
+ vcf_to_bed_skip_genotype_write:
+ // chrom_ptr already null-terminated
+ fputs(chrom_ptr, bimfile);
+ putc_unlocked('\t', bimfile);
+ fwrite(marker_id, 1, marker_id_len + 1, bimfile);
+ putc_unlocked('0', bimfile);
+ putc_unlocked('\t', bimfile);
+ fwrite(pos_str, 1, marker_id - pos_str, bimfile);
+
+ if (*alt_alleles == '.') {
+ putc_unlocked(missing_geno, bimfile);
+ } else {
+ bufptr = alt_alleles;
+ for (alt_idx = 1; alt_idx < alt_allele_idx; alt_idx++) {
+ bufptr = strchr(bufptr, ',');
+ bufptr++;
}
- uii = (unsigned char)(*bufptr) - '0';
- while (1) {
- ujj = ((unsigned char)(*(++bufptr))) - 48;
- if (ujj > 9) {
- break;
+ bufptr2 = strchr(bufptr, (alt_allele_idx == alt_ct)? '\t' : ',');
+ *bufptr2 = '\0';
+ fputs(bufptr, bimfile);
+ }
+ putc_unlocked('\t', bimfile);
+ alt_alleles[-1] = '\n';
+ *alt_alleles = '\0';
+ if (((((unsigned char)ref_allele_ptr[0]) & 0xdf) == 'N') && (ref_allele_ptr[1] == '\t')) {
+ *ref_allele_ptr = missing_geno;
+ }
+ if (fputs_checked(ref_allele_ptr, bimfile)) {
+ goto vcf_to_bed_ret_WRITE_FAIL;
+ }
+ marker_ct++;
+ if (!(marker_ct % 1000)) {
+ printf("\r--vcf: %uk variants complete.", marker_ct / 1000);
+ fflush(stdout);
+ }
+ continue;
+ vcf_to_bed_skip3:
+ if (skip3_list) {
+ if (!marker_skip_ct) {
+ memcpy(outname_end, ".skip.3allele", 14);
+ if (fopen_checked(outname, "w", &skip3file)) {
+ goto vcf_to_bed_ret_OPEN_FAIL;
}
- uii = uii * 10 + ujj;
+ memcpy(outname_end, ".bed", 5);
}
- if ((ujj != 0xffffffffU) && (ujj != 76)) {
- if (uii == alt_allele_idx) {
- vcf_to_bed_haploid_4:
- if (vcf_gp_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, &ukk)) {
- // no need for ukk check since already validated
- continue;
- }
- set_bit_ul(sample_idx * 2 + 1, alt_bitfield);
- }
- } else if (*(++bufptr) == '.') {
- if ((vcf_half_call == VCF_HALF_CALL_HAPLOID) && (uii == alt_allele_idx)) {
- goto vcf_to_bed_haploid_4;
- }
- } else {
- ujj = (unsigned char)(*bufptr) - '0';
- while (1) {
- ukk = ((unsigned char)(*(++bufptr))) - 48;
- if (ukk > 9) {
- break;
- }
- ujj = ujj * 10 + ukk;
- }
- if (((ukk != 0xffffffffU) && (ukk != 76)) || (bufptr[1] == '.')) {
- if (vcf_gp_diploid_invalid(bufptr, bufptr2, vcf_min_gp, gp_field_pos, uii, ujj, &ukk)) {
- continue;
- }
- if (uii == alt_allele_idx) {
- set_bit_ul(sample_idx * 2, alt_bitfield);
- }
- if (ujj == alt_allele_idx) {
- alt_bitfield[sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
- }
- }
+ marker_id[marker_id_len] = '\0';
+ if (fputs_checked(marker_id, skip3file)) {
+ goto vcf_to_bed_ret_WRITE_FAIL;
}
+ putc_unlocked('\n', skip3file);
}
+ marker_skip_ct++;
}
- ref_ptr = base_bitfields;
- alt_ptr = alt_bitfield;
- for (sample_idx = 0; sample_idx < sample_ctl2; sample_idx++) {
- // take ref, then:
- // * if ref + alt is not two, force to 01
- // * otherwise, if ref is nonzero, add 1 to match PLINK binary encoding
- ulii = *ref_ptr;
- uljj = *alt_ptr++;
- ulkk = (ulii + uljj) & AAAAMASK;
- uljj = ulii + ((ulii | (ulii >> 1)) & FIVEMASK);
- ulii = ulkk | (ulkk >> 1); // 11 in nonmissing positions
- *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
- }
- ref_ptr[-1] &= final_mask;
- vcf_to_bed_genotype_write:
- if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
- goto vcf_to_bed_ret_WRITE_FAIL;
- }
- vcf_to_bed_skip_genotype_write:
- chrom_ptr[chrom_len] = '\0';
- fputs(chrom_ptr, bimfile);
- putc('\t', bimfile);
- fwrite(marker_id, 1, marker_id_len + 1, bimfile);
- putc('0', bimfile);
- putc('\t', bimfile);
- fwrite(pos_str, 1, marker_id - pos_str, bimfile);
-
- if (*alt_alleles == '.') {
- putc(missing_geno, bimfile);
- } else {
- bufptr = alt_alleles;
- for (alt_idx = 1; alt_idx < alt_allele_idx; alt_idx++) {
- bufptr = strchr(bufptr, ',');
- bufptr++;
- }
- bufptr2 = strchr(bufptr, (alt_allele_idx == alt_ct)? '\t' : ',');
- *bufptr2 = '\0';
- fputs(bufptr, bimfile);
- }
- putc('\t', bimfile);
- alt_alleles[-1] = '\n';
- *alt_alleles = '\0';
- if (((((unsigned char)ref_allele_ptr[0]) & 0xdf) == 'N') && (ref_allele_ptr[1] == '\t')) {
- *ref_allele_ptr = missing_geno;
- }
- if (fputs_checked(ref_allele_ptr, bimfile)) {
+ if (fclose_null(&bimfile) || fclose_null(&outfile)) {
goto vcf_to_bed_ret_WRITE_FAIL;
}
- marker_ct++;
- if (!(marker_ct % 1000)) {
- printf("\r--vcf: %uk variants complete.", marker_ct / 1000);
- fflush(stdout);
- }
- continue;
- vcf_to_bed_skip3:
- if (skip3_list) {
- if (!marker_skip_ct) {
- memcpy(outname_end, ".skip.3allele", 14);
- if (fopen_checked(outname, "w", &skip3file)) {
- goto vcf_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bed", 5);
- }
- marker_id[marker_id_len] = '\0';
- if (fputs_checked(marker_id, skip3file)) {
+ if (skip3file) {
+ if (fclose_null(&skip3file)) {
goto vcf_to_bed_ret_WRITE_FAIL;
}
- putc('\n', skip3file);
}
- marker_skip_ct++;
- }
- if (fclose_null(&bimfile) || fclose_null(&outfile)) {
- goto vcf_to_bed_ret_WRITE_FAIL;
- }
- if (skip3file) {
- if (fclose_null(&skip3file)) {
- goto vcf_to_bed_ret_WRITE_FAIL;
+ putc_unlocked('\r', stdout);
+ if ((!marker_ct) && (!allow_no_variants)) {
+ if (marker_skip_ct) {
+ logerrprint("Error: All variants in VCF skipped.\n");
+ retval = RET_ALL_MARKERS_EXCLUDED;
+ goto vcf_to_bed_ret_1;
+ } else {
+ logerrprint("Error: No variants in VCF file.\n");
+ goto vcf_to_bed_ret_INVALID_FORMAT;
+ }
}
- }
- putchar('\r');
- if ((!marker_ct) && (!allow_no_variants)) {
+ *outname_end = '\0';
+ LOGPRINTFWW("--vcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
if (marker_skip_ct) {
- logerrprint("Error: All variants in VCF skipped.\n");
- retval = RET_ALL_MARKERS_EXCLUDED;
- goto vcf_to_bed_ret_1;
- } else {
- logerrprint("Error: No variants in VCF file.\n");
- goto vcf_to_bed_ret_INVALID_FORMAT;
+ LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+ }
+ if (missing_gt_ct) {
+ LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
}
- }
- *outname_end = '\0';
- LOGPRINTFWW("--vcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
- if (marker_skip_ct) {
- LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
- }
- if (missing_gt_ct) {
- LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
}
while (0) {
vcf_to_bed_ret_NOMEM:
@@ -9078,14 +9159,14 @@ int32_t read_bcf_typed_string(gzFile gz_infile, char* readbuf, uint32_t maxlen,
int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t missing_pheno, uint64_t misc_flags, char* const_fid, char id_delim, char vcf_idspace_to, double vcf_min_qual, char* vcf_filter_exceptions_flattened, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- gzFile gz_infile = NULL;
- FILE* outfile = NULL;
- FILE* bimfile = NULL;
- FILE* skip3file = NULL;
- char* sorted_fexcepts = NULL;
- uintptr_t* fexcept_bitfield = NULL;
- uint32_t* fexcept_idxs = NULL;
- Ll_str* contig_list = NULL;
+ gzFile gz_infile = nullptr;
+ FILE* outfile = nullptr;
+ FILE* bimfile = nullptr;
+ FILE* skip3file = nullptr;
+ char* sorted_fexcepts = nullptr;
+ uintptr_t* fexcept_bitfield = nullptr;
+ uint32_t* fexcept_idxs = nullptr;
+ Ll_str* contig_list = nullptr;
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
uintptr_t contig_ct = 0;
uintptr_t max_contig_len = 0;
@@ -9154,536 +9235,508 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
uint32_t ujj;
uint32_t ukk;
int32_t ii;
- if (check_qual) {
- if (vcf_min_qual > FLT_MAXD) {
- logerrprint("Error: --vcf-min-qual parameter too large.\n");
- goto bcf_to_bed_ret_INVALID_CMDLINE;
- }
- vcf_min_qualf = (float)vcf_min_qual;
- memcpy(&vcf_min_qualf_compare_bits, &vcf_min_qualf, 4);
- // +infinity = 0x7f800000; this should pass the comparison
- // quiet nan = 0x7f800001; this (and other nans) should fail
- vcf_min_qualf_compare_bits += 0x807fffffU;
- }
- // todo: check if a specialized bgzf reader can do faster forward seeks when
- // we don't have precomputed virtual offsets
- retval = gzopen_read_checked(bcfname, &gz_infile);
- if (retval) {
- goto bcf_to_bed_ret_1;
- }
- if (gzread(gz_infile, g_textbuf, 5) < 5) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- if (memcmp(g_textbuf, "BCF\2", 4)) {
- if (memcmp(g_textbuf, "BCF\4", 4)) {
- LOGPREPRINTFWW("Error: %s is not a BCF2 file.\n", bcfname);
- } else {
- LOGPREPRINTFWW("Error: %s appears to be a BCF1 file; --bcf only supports BCF2. Use 'bcftools view' to convert it to a PLINK-readable VCF.\n", bcfname);
+ {
+ if (check_qual) {
+ if (vcf_min_qual > FLT_MAXD) {
+ logerrprint("Error: --vcf-min-qual parameter too large.\n");
+ goto bcf_to_bed_ret_INVALID_CMDLINE;
+ }
+ vcf_min_qualf = (float)vcf_min_qual;
+ memcpy(&vcf_min_qualf_compare_bits, &vcf_min_qualf, 4);
+ // +infinity = 0x7f800000; this should pass the comparison
+ // quiet nan = 0x7f800001; this (and other nans) should fail
+ vcf_min_qualf_compare_bits += 0x807fffffU;
+ }
+ // todo: check if a specialized bgzf reader can do faster forward seeks
+ // when we don't have precomputed virtual offsets
+ retval = gzopen_read_checked(bcfname, &gz_infile);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
}
- goto bcf_to_bed_ret_INVALID_FORMAT_2;
- }
- if (((unsigned char)(g_textbuf[4])) > 2) {
- // defend against 0x82-0x87 being given a meaning in 8-bit int vectors,
- // etc.
- LOGPREPRINTFWW("Error: %s appears to be formatted as BCFv2.%u; this PLINK build only supports v2.0-2.2. You may need to obtain an updated version of PLINK.\n", bcfname, ((unsigned char)(g_textbuf[4])));
- goto bcf_to_bed_ret_INVALID_FORMAT_2;
- }
- if (gzread(gz_infile, &header_size, 4) < 4) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- // must have at least fileformat, and first eight fields of #CHROM line. GT
- // not required with --allow-no-samples, contig not require with
- // --allow-no-vars.
- if (header_size < 59) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- if (vcf_filter_exceptions_flattened) {
- // vcf_filter guaranteed to be true
- fexcept_ct = count_and_measure_multistr(vcf_filter_exceptions_flattened, &max_fexcept_len);
- if (bigstack_end_alloc_c(fexcept_ct * max_fexcept_len, &sorted_fexcepts)) {
- goto bcf_to_bed_ret_NOMEM;
+ if (gzread(gz_infile, g_textbuf, 5) < 5) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
}
- bufptr = vcf_filter_exceptions_flattened;
- for (ulii = 0; ulii < fexcept_ct; ulii++) {
- slen = strlen(bufptr) + 1;
- memcpy(&(sorted_fexcepts[ulii * max_fexcept_len]), bufptr, slen);
- bufptr = &(bufptr[slen]);
+ if (memcmp(g_textbuf, "BCF\2", 4)) {
+ if (memcmp(g_textbuf, "BCF\4", 4)) {
+ LOGPREPRINTFWW("Error: %s is not a BCF2 file.\n", bcfname);
+ } else {
+ LOGPREPRINTFWW("Error: %s appears to be a BCF1 file; --bcf only supports BCF2. Use 'bcftools view' to convert it to a PLINK-readable VCF.\n", bcfname);
+ }
+ goto bcf_to_bed_ret_INVALID_FORMAT_2;
}
- qsort(sorted_fexcepts, fexcept_ct, max_fexcept_len, strcmp_casted);
- fexcept_ct = collapse_duplicate_ids(sorted_fexcepts, fexcept_ct, max_fexcept_len, NULL);
- if (bigstack_end_calloc_ui(fexcept_ct, &fexcept_idxs)) {
- goto bcf_to_bed_ret_NOMEM;
+ if (((unsigned char)(g_textbuf[4])) > 2) {
+ // defend against 0x82-0x87 being given a meaning in 8-bit int vectors,
+ // etc.
+ LOGPREPRINTFWW("Error: %s appears to be formatted as BCFv2.%u; this PLINK build only supports v2.0-2.2. You may need to obtain an updated version of PLINK.\n", bcfname, ((unsigned char)(g_textbuf[4])));
+ goto bcf_to_bed_ret_INVALID_FORMAT_2;
}
- }
- if (bigstack_left() <= header_size) {
- goto bcf_to_bed_ret_NOMEM;
- }
- loadbuf = (char*)bigstack_alloc(header_size + 1);
- if ((uint32_t)((uint64_t)gzread(gz_infile, loadbuf, header_size)) != header_size) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- if (!(*loadbuf)) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- if (!loadbuf[header_size - 1]) {
- loadbuf_end = &(loadbuf[header_size - 2]);
- while (!(*loadbuf_end)) {
- loadbuf_end--;
+ if (gzread(gz_infile, &header_size, 4) < 4) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
}
- loadbuf_end++;
- header_size = (uintptr_t)(loadbuf_end - loadbuf);
- } else {
- loadbuf_end = &(loadbuf[header_size]);
- }
- *loadbuf_end = '\n';
- header_size++;
- linebuf = loadbuf;
- while (1) {
- linebuf_end = (char*)memchr(linebuf, '\n', header_size);
- if (linebuf[0] != '#') {
+ // must have at least fileformat, and first eight fields of #CHROM line.
+ // GT not required with --allow-no-samples, contig not require with
+ // --allow-no-vars.
+ if (header_size < 59) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if (linebuf[1] != '#') {
- if (linebuf[1] == 'C') {
- break; // end of meta-info
+ if (vcf_filter_exceptions_flattened) {
+ // vcf_filter guaranteed to be true
+ fexcept_ct = count_and_measure_multistr(vcf_filter_exceptions_flattened, &max_fexcept_len);
+ if (bigstack_end_alloc_c(fexcept_ct * max_fexcept_len, &sorted_fexcepts)) {
+ goto bcf_to_bed_ret_NOMEM;
+ }
+ bufptr = vcf_filter_exceptions_flattened;
+ for (ulii = 0; ulii < fexcept_ct; ulii++) {
+ slen = strlen(bufptr) + 1;
+ memcpy(&(sorted_fexcepts[ulii * max_fexcept_len]), bufptr, slen);
+ bufptr = &(bufptr[slen]);
+ }
+ qsort(sorted_fexcepts, fexcept_ct, max_fexcept_len, strcmp_casted);
+ fexcept_ct = collapse_duplicate_ids(sorted_fexcepts, fexcept_ct, max_fexcept_len, nullptr);
+ if (bigstack_end_calloc_ui(fexcept_ct, &fexcept_idxs)) {
+ goto bcf_to_bed_ret_NOMEM;
}
+ }
+ if (bigstack_left() <= header_size) {
+ goto bcf_to_bed_ret_NOMEM;
+ }
+ loadbuf = (char*)bigstack_alloc(header_size + 1);
+ if ((uint32_t)((uint64_t)gzread(gz_infile, loadbuf, header_size)) != header_size) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
+ }
+ if (!(*loadbuf)) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if (linebuf[2] == 'F') {
- if (!memcmp(&(linebuf[3]), "ORMAT=<ID=", 10)) {
- if (!memcmp(&(linebuf[13]), "GT,", 3)) {
- if (gt_idx) {
- logerrprint("Error: Duplicate GT format specifier in .bcf file.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
+ if (!loadbuf[header_size - 1]) {
+ loadbuf_end = &(loadbuf[header_size - 2]);
+ while (!(*loadbuf_end)) {
+ loadbuf_end--;
+ }
+ loadbuf_end++;
+ header_size = (uintptr_t)(loadbuf_end - loadbuf);
+ } else {
+ loadbuf_end = &(loadbuf[header_size]);
+ }
+ *loadbuf_end = '\n';
+ header_size++;
+ linebuf = loadbuf;
+ while (1) {
+ linebuf_end = (char*)memchr(linebuf, '\n', header_size);
+ if (linebuf[0] != '#') {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (linebuf[1] != '#') {
+ if (linebuf[1] == 'C') {
+ break; // end of meta-info
+ }
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (linebuf[2] == 'F') {
+ if (!memcmp(&(linebuf[3]), "ORMAT=<ID=", 10)) {
+ if (!memcmp(&(linebuf[13]), "GT,", 3)) {
+ if (gt_idx) {
+ logerrprint("Error: Duplicate GT format specifier in .bcf file.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
+ }
+ if (memcmp(&(linebuf[16]), "Number=1,Type=String,Description=", 33)) {
+ logerrprint("Error: Unrecognized GT field format in .bcf file.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
+ }
+ gt_idx = stringdict_ct;
}
- if (memcmp(&(linebuf[16]), "Number=1,Type=String,Description=", 33)) {
- logerrprint("Error: Unrecognized GT field format in .bcf file.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
+ stringdict_ct++;
+ } else if (!memcmp(&(linebuf[3]), "ILTER=<ID=", 10)) {
+ bufptr = &(linebuf[13]);
+ bufptr2 = (char*)memchr(bufptr, ',', linebuf_end - bufptr);
+ if (bufptr2 == linebuf_end) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (memcmp(bufptr, "PASS,", 5)) {
+ if (fexcept_ct) {
+ ii = bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_fexcepts, max_fexcept_len, fexcept_ct);
+ if (ii != -1) {
+ if (fexcept_idxs[(uint32_t)ii]) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ fexcept_idxs[(uint32_t)ii] = stringdict_ct;
+ }
+ }
+ stringdict_ct++;
}
- gt_idx = stringdict_ct;
}
+ } else if (!memcmp(&(linebuf[2]), "INFO=<ID=", 9)) {
stringdict_ct++;
- } else if (!memcmp(&(linebuf[3]), "ILTER=<ID=", 10)) {
+ } else if (!memcmp(&(linebuf[2]), "contig=<ID=", 11)) {
bufptr = &(linebuf[13]);
bufptr2 = (char*)memchr(bufptr, ',', linebuf_end - bufptr);
if (bufptr2 == linebuf_end) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if (memcmp(bufptr, "PASS,", 5)) {
- if (fexcept_ct) {
- ii = bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_fexcepts, max_fexcept_len, fexcept_ct);
- if (ii != -1) {
- if (fexcept_idxs[(uint32_t)ii]) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- fexcept_idxs[(uint32_t)ii] = stringdict_ct;
- }
- }
- stringdict_ct++;
+ slen = (uintptr_t)(bufptr2 - bufptr);
+ if (slen >= max_contig_len) {
+ max_contig_len = slen + 1;
}
+ if (bigstack_end_alloc_llstr(slen + 1, &ll_ptr)) {
+ goto bcf_to_bed_ret_NOMEM;
+ }
+ ll_ptr->next = contig_list;
+ memcpyx(ll_ptr->ss, bufptr, slen, '\0');
+ contig_list = ll_ptr;
+ contig_ct++;
}
- } else if (!memcmp(&(linebuf[2]), "INFO=<ID=", 9)) {
- stringdict_ct++;
- } else if (!memcmp(&(linebuf[2]), "contig=<ID=", 11)) {
- bufptr = &(linebuf[13]);
- bufptr2 = (char*)memchr(bufptr, ',', linebuf_end - bufptr);
- if (bufptr2 == linebuf_end) {
+ linebuf = &(linebuf_end[1]);
+ if (linebuf >= loadbuf_end) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- slen = (uintptr_t)(bufptr2 - bufptr);
- if (slen >= max_contig_len) {
- max_contig_len = slen + 1;
- }
- if (bigstack_end_alloc_llstr(slen + 1, &ll_ptr)) {
- goto bcf_to_bed_ret_NOMEM;
- }
- ll_ptr->next = contig_list;
- memcpyx(ll_ptr->ss, bufptr, slen, '\0');
- contig_list = ll_ptr;
- contig_ct++;
- }
- linebuf = &(linebuf_end[1]);
- if (linebuf >= loadbuf_end) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- }
- if ((!allow_no_variants) && (!contig_ct)) {
- logerrprint("Error: No contig fields in .bcf header.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
- }
- if (memcmp(linebuf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- if (!memcmp(&(linebuf[38]), "\tFORMAT\t", 8)) {
- *linebuf_end = '\0';
- retval = vcf_sample_line(outname, outname_end, missing_pheno, &(linebuf[46]), const_fid, double_id, id_delim, vcf_idspace_to, 'b', &ulii);
- if (retval) {
- goto bcf_to_bed_ret_1;
}
- if (ulii >= 0x1000000) {
- // variant records only have 24 bits allocated for n_sample
- logerrprint("Error: .bcf file contains >= 2^24 sample IDs.\n");
+ if ((!allow_no_variants) && (!contig_ct)) {
+ logerrprint("Error: No contig fields in .bcf header.\n");
goto bcf_to_bed_ret_INVALID_FORMAT;
}
- sample_ct = ulii;
- } else if (allow_no_samples) {
- gt_idx = 0;
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile)) {
- goto bcf_to_bed_ret_OPEN_FAIL;
- }
- if (fclose_null(&outfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
+ if (memcmp(linebuf, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", 38)) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- }
- if ((!sample_ct) && (!allow_no_samples)) {
- logerrprint("Error: No samples in .bcf file.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
- }
- sample_ct4 = (sample_ct + 3) / 4;
- sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
- sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
- bigstack_reset(loadbuf);
- ulii = BITCT_TO_WORDCT(contig_ct);
- if (bigstack_calloc_ul(ulii, &contig_bitfield) ||
- bigstack_alloc_c(contig_ct * max_contig_len, &contigdict)) {
- goto bcf_to_bed_ret_NOMEM;
- }
- ulii = contig_ct;
- while (ulii) {
- ulii--;
- ii = get_chrom_code(chrom_info_ptr, contig_list->ss);
- if (ii < 0) {
- if (chrom_error(".bcf file", chrom_info_ptr, contig_list->ss, 0, ii, allow_extra_chroms)) {
+ if (!memcmp(&(linebuf[38]), "\tFORMAT\t", 8)) {
+ *linebuf_end = '\0';
+ retval = vcf_sample_line(outname, outname_end, missing_pheno, &(linebuf[46]), const_fid, double_id, id_delim, vcf_idspace_to, 'b', &ulii);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
+ }
+ if (ulii >= 0x1000000) {
+ // variant records only have 24 bits allocated for n_sample
+ logerrprint("Error: .bcf file contains >= 2^24 sample IDs.\n");
goto bcf_to_bed_ret_INVALID_FORMAT;
}
- retval = resolve_or_add_chrom_name(contig_list->ss, ".bcf file", 0, chrom_info_ptr, &ii);
- if (retval) {
- goto bcf_to_bed_ret_1;
+ sample_ct = ulii;
+ } else if (allow_no_samples) {
+ gt_idx = 0;
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto bcf_to_bed_ret_OPEN_FAIL;
+ }
+ if (fclose_null(&outfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
}
}
- if (is_set(chrom_info_ptr->chrom_mask, ii)) {
- set_bit_ul(ulii, contig_bitfield);
- strcpy(&(contigdict[ulii * max_contig_len]), contig_list->ss);
+ if ((!sample_ct) && (!allow_no_samples)) {
+ logerrprint("Error: No samples in .bcf file.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
}
- contig_list = contig_list->next;
- }
- if (vcf_filter) {
- uii = BITCT_TO_WORDCT(stringdict_ct);
- if (bigstack_calloc_ul(uii, &fexcept_bitfield)) {
+ sample_ct4 = (sample_ct + 3) / 4;
+ sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
+ sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
+ bigstack_reset(loadbuf);
+ ulii = BITCT_TO_WORDCT(contig_ct);
+ if (bigstack_calloc_ul(ulii, &contig_bitfield) ||
+ bigstack_alloc_c(contig_ct * max_contig_len, &contigdict)) {
goto bcf_to_bed_ret_NOMEM;
}
- fexcept_bitfield[0] = 1; // 'PASS'
- for (ulii = 0; ulii < fexcept_ct; ulii++) {
- // fexcept_idxs[] not dereferenced if --vcf-filter had no parameters
- SET_BIT(fexcept_idxs[ulii], fexcept_bitfield);
+ ulii = contig_ct;
+ while (ulii) {
+ ulii--;
+ const uint32_t chrom_name_slen = strlen(contig_list->ss);
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code(contig_list->ss, ".bcf file", 0, chrom_name_slen, allow_extra_chroms, chrom_info_ptr, &cur_chrom_code);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
+ }
+ if (is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ set_bit_ul(ulii, contig_bitfield);
+ strcpy(&(contigdict[ulii * max_contig_len]), contig_list->ss);
+ }
+ contig_list = contig_list->next;
}
- }
- bigstack_end_reset(bigstack_end_mark);
-
- final_mask = (~ZEROLU) >> (2 * ((0x7fffffe0 - sample_ct) % BITCT2));
- if (bigstack_alloc_c(sample_ct * 12, &loadbuf) ||
- bigstack_alloc_c(65536, &marker_id) ||
- bigstack_alloc_c(NON_BIGSTACK_MIN, &allele_buf) ||
- bigstack_alloc_ui(65535, &allele_lens) ||
- bigstack_alloc_ui(MAX_VCF_ALT, &vcf_alt_cts)) {
- goto bcf_to_bed_ret_NOMEM;
- }
- allele_ptrs = (char**)bigstack_alloc(65535 * sizeof(intptr_t));
- if (!allele_ptrs) {
- goto bcf_to_bed_ret_NOMEM;
- }
- max_allele_ct = bigstack_left() / (sample_ctv2 * sizeof(intptr_t));
- if (max_allele_ct < 3) {
- goto bcf_to_bed_ret_NOMEM;
- } else if (max_allele_ct > 65535) {
- max_allele_ct = 65535;
- }
- bigstack_alloc_ul(sample_ctv2 * max_allele_ct, &base_bitfields);
- memcpy(outname_end, ".bim", 5);
- if (fopen_checked(outname, "w", &bimfile)) {
- goto bcf_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bed", 5);
- if (fopen_checked(outname, FOPEN_WB, &outfile)) {
- goto bcf_to_bed_ret_OPEN_FAIL;
- }
- if (fwrite_checked("l\x1b\x01", 3, outfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- if ((!gt_idx) && require_gt) {
- if (!allow_no_variants) {
- logerrprint("Error: .bcf header doesn't define FORMAT:GT.\n");
- retval = RET_ALL_MARKERS_EXCLUDED;
- goto bcf_to_bed_ret_1;
+ if (vcf_filter) {
+ uii = BITCT_TO_WORDCT(stringdict_ct);
+ if (bigstack_calloc_ul(uii, &fexcept_bitfield)) {
+ goto bcf_to_bed_ret_NOMEM;
+ }
+ fexcept_bitfield[0] = 1; // 'PASS'
+ for (ulii = 0; ulii < fexcept_ct; ulii++) {
+ // fexcept_idxs[] not dereferenced if --vcf-filter had no parameters
+ SET_BIT(fexcept_idxs[ulii], fexcept_bitfield);
+ }
}
- logerrprint("Warning: Skipping all variants since .bcf header doesn't define FORMAT:GT.\n");
- goto bcf_to_bed_skip_all_variants;
- }
- // possible todo: optimize other no-GT cases. e.g. if no sample information
- // is needed, don't write the .bed or .fam.
+ bigstack_end_reset(bigstack_end_mark);
- memcpyl3(tbuf2, "\t0\t");
- while (1) {
- lastloc = gztell(gz_infile) + 8;
- if (gzread(gz_infile, bcf_var_header, 32) < 32) {
- break;
+ final_mask = (~ZEROLU) >> (2 * ((0x7fffffe0 - sample_ct) % BITCT2));
+ if (bigstack_alloc_c(sample_ct * 12, &loadbuf) ||
+ bigstack_alloc_c(65536, &marker_id) ||
+ bigstack_alloc_c(NON_BIGSTACK_MIN, &allele_buf) ||
+ bigstack_alloc_ui(65535, &allele_lens) ||
+ bigstack_alloc_ui(MAX_VCF_ALT, &vcf_alt_cts)) {
+ goto bcf_to_bed_ret_NOMEM;
}
- if ((bcf_var_header[0] <= 24) || (bcf_var_header[2] >= contig_ct)) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ allele_ptrs = (char**)bigstack_alloc(65535 * sizeof(intptr_t));
+ if (!allele_ptrs) {
+ goto bcf_to_bed_ret_NOMEM;
}
- if ((!bcf_var_header[1]) || (!is_set(contig_bitfield, bcf_var_header[2]))) {
- goto bcf_to_bed_marker_skip;
+ max_allele_ct = bigstack_left() / (sample_ctv2 * sizeof(intptr_t));
+ if (max_allele_ct < 3) {
+ goto bcf_to_bed_ret_NOMEM;
+ } else if (max_allele_ct > 65535) {
+ max_allele_ct = 65535;
}
- if (check_qual) {
- if (bcf_var_header[5] + 0x807fffffU < vcf_min_qualf_compare_bits) {
- goto bcf_to_bed_marker_skip;
- }
+ bigstack_alloc_ul(sample_ctv2 * max_allele_ct, &base_bitfields);
+ memcpy(outname_end, ".bim", 5);
+ if (fopen_checked(outname, "w", &bimfile)) {
+ goto bcf_to_bed_ret_OPEN_FAIL;
}
- retval = read_bcf_typed_string(gz_infile, marker_id, 65535, &marker_id_len);
- if (retval) {
- goto bcf_to_bed_ret_1;
+ memcpy(outname_end, ".bed", 5);
+ if (fopen_checked(outname, FOPEN_WB, &outfile)) {
+ goto bcf_to_bed_ret_OPEN_FAIL;
}
- n_allele = bcf_var_header[6] >> 16;
- if (biallelic_strict && (n_allele > 2)) {
- goto bcf_to_bed_skip3;
+ if (fwrite_checked("l\x1b\x01", 3, outfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
}
- if (n_allele > max_allele_ct) {
- goto bcf_to_bed_ret_NOMEM;
+ if ((!gt_idx) && require_gt) {
+ if (!allow_no_variants) {
+ logerrprint("Error: .bcf header doesn't define FORMAT:GT.\n");
+ retval = RET_ALL_MARKERS_EXCLUDED;
+ goto bcf_to_bed_ret_1;
+ }
+ logerrprint("Warning: Skipping all variants since .bcf header doesn't define FORMAT:GT.\n");
+ goto bcf_to_bed_skip_all_variants;
}
- ujj = NON_BIGSTACK_MIN; // remaining allele name buffer space
- bufptr = allele_buf;
- if (n_allele) {
- for (uii = 0; uii < n_allele; uii++) {
- retval = read_bcf_typed_string(gz_infile, bufptr, ujj, &ukk);
- if (retval) {
- goto bcf_to_bed_ret_1;
- }
- if ((!uii) && ((!ukk) || ((ukk == 1) && (*bufptr == 'N')))) {
- // convert ref 'N' or '.' to missing genotype. ('.' case was skipped
- // the past, and 'N' was not converted.)
- allele_lens[0] = 1;
- allele_ptrs[0] = bufptr;
- *bufptr++ = missing_geno;
- } else {
- allele_lens[uii] = ukk;
- allele_ptrs[uii] = bufptr;
- bufptr = &(bufptr[ukk]);
+ // possible todo: optimize other no-GT cases. e.g. if no sample
+ // information is needed, don't write the .bed or .fam.
+
+ memcpyl3(tbuf2, "\t0\t");
+ while (1) {
+ lastloc = gztell(gz_infile) + 8;
+ if (gzread(gz_infile, bcf_var_header, 32) < 32) {
+ break;
+ }
+ if ((bcf_var_header[0] <= 24) || (bcf_var_header[2] >= contig_ct)) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if ((!bcf_var_header[1]) || (!is_set(contig_bitfield, bcf_var_header[2]))) {
+ goto bcf_to_bed_marker_skip;
+ }
+ if (check_qual) {
+ if (bcf_var_header[5] + 0x807fffffU < vcf_min_qualf_compare_bits) {
+ goto bcf_to_bed_marker_skip;
}
}
- } else {
- // n_allele == 0 case was previously skipped, but it might have a place
- // with --allow-no-samples.
- allele_lens[0] = 1;
- allele_ptrs[0] = bufptr;
- *bufptr = missing_geno;
- }
- if (vcf_filter) {
- ii = gzgetc(gz_infile);
- if (ii == -1) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- } else {
- ujj = ((uint32_t)ii) >> 4;
- if (ujj == 15) {
- retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
+ retval = read_bcf_typed_string(gz_infile, marker_id, 65535, &marker_id_len);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
+ }
+ n_allele = bcf_var_header[6] >> 16;
+ if (biallelic_strict && (n_allele > 2)) {
+ goto bcf_to_bed_skip3;
+ }
+ if (n_allele > max_allele_ct) {
+ goto bcf_to_bed_ret_NOMEM;
+ }
+ ujj = NON_BIGSTACK_MIN; // remaining allele name buffer space
+ bufptr = allele_buf;
+ if (n_allele) {
+ for (uii = 0; uii < n_allele; uii++) {
+ retval = read_bcf_typed_string(gz_infile, bufptr, ujj, &ukk);
if (retval) {
goto bcf_to_bed_ret_1;
}
+ if ((!uii) && ((!ukk) || ((ukk == 1) && (*bufptr == 'N')))) {
+ // convert ref 'N' or '.' to missing genotype. ('.' case was
+ // skipped the past, and 'N' was not converted.)
+ allele_lens[0] = 1;
+ allele_ptrs[0] = bufptr;
+ *bufptr++ = missing_geno;
+ } else {
+ allele_lens[uii] = ukk;
+ allele_ptrs[uii] = bufptr;
+ bufptr = &(bufptr[ukk]);
+ }
}
- if (ujj) {
- uii = ((uint32_t)ii) & 0x0f;
- if ((uii < 1) || (uii > 3)) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ } else {
+ // n_allele == 0 case was previously skipped, but it might have a place
+ // with --allow-no-samples.
+ allele_lens[0] = 1;
+ allele_ptrs[0] = bufptr;
+ *bufptr = missing_geno;
+ }
+ if (vcf_filter) {
+ ii = gzgetc(gz_infile);
+ if (ii == -1) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
+ } else {
+ ujj = ((uint32_t)ii) >> 4;
+ if (ujj == 15) {
+ retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
+ }
}
- if (uii == 1) {
- if (ujj > 256) {
+ if (ujj) {
+ uii = ((uint32_t)ii) & 0x0f;
+ if ((uii < 1) || (uii > 3)) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- ucptr = (unsigned char*)g_textbuf;
- if ((uint32_t)((uint64_t)gzread(gz_infile, ucptr, ujj)) < ujj) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- for (ukk = 0; ukk < ujj; ukk++) {
- if (ucptr[ukk] >= stringdict_ct) {
+ if (uii == 1) {
+ if (ujj > 256) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if (!is_set(fexcept_bitfield, ucptr[ukk])) {
- goto bcf_to_bed_marker_skip;
- }
- }
- } else if (uii == 2) {
- if (ujj > 65536) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- ui16ptr = (uint16_t*)g_textbuf;
- if ((uint32_t)((uint64_t)gzread(gz_infile, ui16ptr, ujj * sizeof(int16_t))) < ujj * sizeof(int16_t)) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- for (ukk = 0; ukk < ujj; ukk++) {
- if (ui16ptr[ukk] >= stringdict_ct) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ ucptr = (unsigned char*)g_textbuf;
+ if ((uint32_t)((uint64_t)gzread(gz_infile, ucptr, ujj)) < ujj) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
}
- if (!is_set(fexcept_bitfield, ui16ptr[ukk])) {
- goto bcf_to_bed_marker_skip;
+ for (ukk = 0; ukk < ujj; ukk++) {
+ if (ucptr[ukk] >= stringdict_ct) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (!is_set(fexcept_bitfield, ucptr[ukk])) {
+ goto bcf_to_bed_marker_skip;
+ }
}
- }
- } else {
- // a bit more care required to avoid buffer overflow, if for some
- // reason there are more than 32k filters...
- uiptr = (uint32_t*)g_textbuf;
- do {
- if (ujj > (MAXLINELEN / sizeof(int32_t))) {
- ukk = MAXLINELEN / sizeof(int32_t);
- } else {
- ukk = ujj;
+ } else if (uii == 2) {
+ if (ujj > 65536) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if ((uint32_t)((uint64_t)gzread(gz_infile, uiptr, ukk * sizeof(int32_t))) < ukk * sizeof(int32_t)) {
+ ui16ptr = (uint16_t*)g_textbuf;
+ if ((uint32_t)((uint64_t)gzread(gz_infile, ui16ptr, ujj * sizeof(int16_t))) < ujj * sizeof(int16_t)) {
goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
}
- for (umm = 0; umm < ukk; umm++) {
- if (uiptr[umm] >= stringdict_ct) {
+ for (ukk = 0; ukk < ujj; ukk++) {
+ if (ui16ptr[ukk] >= stringdict_ct) {
goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
}
- if (!is_set(fexcept_bitfield, uiptr[umm])) {
+ if (!is_set(fexcept_bitfield, ui16ptr[ukk])) {
goto bcf_to_bed_marker_skip;
}
}
- ujj -= ukk;
- } while (ujj);
+ } else {
+ // a bit more care required to avoid buffer overflow, if for some
+ // reason there are more than 32k filters...
+ uiptr = (uint32_t*)g_textbuf;
+ do {
+ if (ujj > (MAXLINELEN / sizeof(int32_t))) {
+ ukk = MAXLINELEN / sizeof(int32_t);
+ } else {
+ ukk = ujj;
+ }
+ if ((uint32_t)((uint64_t)gzread(gz_infile, uiptr, ukk * sizeof(int32_t))) < ukk * sizeof(int32_t)) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
+ }
+ for (umm = 0; umm < ukk; umm++) {
+ if (uiptr[umm] >= stringdict_ct) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (!is_set(fexcept_bitfield, uiptr[umm])) {
+ goto bcf_to_bed_marker_skip;
+ }
+ }
+ ujj -= ukk;
+ } while (ujj);
+ }
}
}
}
- }
- alt_allele_idx = 1;
- if ((!gt_idx) || (!bcf_var_header[1])) {
- if (require_gt) {
- goto bcf_to_bed_marker_skip;
- }
- ulljj = gztell(gz_infile);
- ullii = lastloc + bcf_var_header[0] + bcf_var_header[1];
- if (!sample_ct) {
- goto bcf_to_bed_skip_genotype_write;
+ alt_allele_idx = 1;
+ if ((!gt_idx) || (!bcf_var_header[1])) {
+ if (require_gt) {
+ goto bcf_to_bed_marker_skip;
+ }
+ ulljj = gztell(gz_infile);
+ ullii = lastloc + bcf_var_header[0] + bcf_var_header[1];
+ if (!sample_ct) {
+ goto bcf_to_bed_skip_genotype_write;
+ }
+ missing_gt_ct++;
+ fill_quatervec_55(sample_ct, base_bitfields);
+ goto bcf_to_bed_genotype_write;
}
- missing_gt_ct++;
- fill_quatervec_55(sample_ct, base_bitfields);
- goto bcf_to_bed_genotype_write;
- }
- // skip INFO
- ullii = lastloc + bcf_var_header[0];
- if (gzseek(gz_infile, ullii, SEEK_SET) == -1) {
- goto bcf_to_bed_ret_READ_FAIL;
- }
- ullii += bcf_var_header[1];
- while (1) {
- retval = read_bcf_typed_nonnegative_integer(gz_infile, &uii);
- if (retval) {
- goto bcf_to_bed_ret_1;
- }
- ii = gzgetc(gz_infile);
- if (ii == -1) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
+ // skip INFO
+ ullii = lastloc + bcf_var_header[0];
+ if (gzseek(gz_infile, ullii, SEEK_SET) == -1) {
+ goto bcf_to_bed_ret_READ_FAIL;
}
- ujj = ((uint32_t)ii) >> 4;
- if (ujj == 15) {
- retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
+ ullii += bcf_var_header[1];
+ while (1) {
+ retval = read_bcf_typed_nonnegative_integer(gz_infile, &uii);
if (retval) {
goto bcf_to_bed_ret_1;
}
- }
- if (ujj) {
- ukk = ((uint32_t)ii) & 0x0f;
- if ((ukk == 3) || (ukk == 5)) {
- umm = 4; // int32, float = 4 bytes
- } else if ((!ukk) || (ukk > 2)) {
- logerrprint("Error: Unrecognized type in .bcf file.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
- } else {
- umm = ukk;
+ ii = gzgetc(gz_infile);
+ if (ii == -1) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
}
- }
- ulljj = gztell(gz_infile) + ((uint64_t)ujj) * umm * sample_ct;
- // uii = format key
- // ujj = for GT, max ploidy
- // ukk = integer/float/character type code
- // umm = bytes per entry
- if (ulljj > ullii) {
- goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
- }
- if (uii == gt_idx) {
- break;
- }
- // possible todo: --vcf-min-gq and --vcf-min-gp support
- if (ujj) {
- if (gzseek(gz_infile, ((uint64_t)ujj) * umm * sample_ct, SEEK_CUR) == -1) {
- goto bcf_to_bed_ret_READ_FAIL;
+ ujj = ((uint32_t)ii) >> 4;
+ if (ujj == 15) {
+ retval = read_bcf_typed_nonnegative_integer(gz_infile, &ujj);
+ if (retval) {
+ goto bcf_to_bed_ret_1;
+ }
}
- if (ulljj == ullii) {
- if (require_gt) {
- goto bcf_to_bed_marker_skip2;
+ if (ujj) {
+ ukk = ((uint32_t)ii) & 0x0f;
+ if ((ukk == 3) || (ukk == 5)) {
+ umm = 4; // int32, float = 4 bytes
+ } else if ((!ukk) || (ukk > 2)) {
+ logerrprint("Error: Unrecognized type in .bcf file.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
} else {
- missing_gt_ct++;
- fill_quatervec_55(sample_ct, base_bitfields);
- goto bcf_to_bed_genotype_write;
+ umm = ukk;
}
}
- }
- }
- if (!ujj) {
- // ploidy zero previously caused the variant to be skipped
- fill_quatervec_55(sample_ct, base_bitfields);
- goto bcf_to_bed_genotype_write;
- }
- if (ukk == 5) {
- logerrprint("Error: GT field cannot contain floating point values.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
- }
- if (ujj * umm > 12) {
- // 12 = 12-ploid, or 6-ploid and >= 127 alleles, or triploid and >= 32767
- // alleles. this is pretty darn generous.
- logerrprint("Error: --bcf does not support GT vectors requiring >12 bytes per sample.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
- }
- // ujj * umm <= 12 and sample_ct < 2^24, so no uint64_t cast needed there
- if ((uint32_t)((uint64_t)gzread(gz_infile, loadbuf, ujj * umm * sample_ct)) < ujj * umm * sample_ct) {
- goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
- }
- if (n_allele < 2) {
- fill_ulong_zero(base_bitfields, 2 * sample_ctv2);
- } else {
- fill_ulong_zero(base_bitfields, n_allele * sample_ctv2);
- }
- if (ukk == 1) {
- ucptr = (unsigned char*)loadbuf;
- if (ujj == 2) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, ucptr++) {
- // discard all phase bits for now
- // missing = 0x80 or 0x81
- ulii = (*ucptr++) & 0x7e;
- if (ulii) {
- ulii = ((ulii / 2) - 1) * sample_ctv2;
- uljj = (*ucptr) & 0x7e;
- if (uljj) {
- set_bit(sample_idx * 2, &(base_bitfields[ulii]));
- base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ ulljj = gztell(gz_infile) + ((uint64_t)ujj) * umm * sample_ct;
+ // uii = format key
+ // ujj = for GT, max ploidy
+ // ukk = integer/float/character type code
+ // umm = bytes per entry
+ if (ulljj > ullii) {
+ goto bcf_to_bed_ret_INVALID_FORMAT_GENERIC;
+ }
+ if (uii == gt_idx) {
+ break;
+ }
+ // possible todo: --vcf-min-gq and --vcf-min-gp support
+ if (ujj) {
+ if (gzseek(gz_infile, ((uint64_t)ujj) * umm * sample_ct, SEEK_CUR) == -1) {
+ goto bcf_to_bed_ret_READ_FAIL;
+ }
+ if (ulljj == ullii) {
+ if (require_gt) {
+ goto bcf_to_bed_marker_skip2;
} else {
- // could be MT or male X. don't validate for now
- set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ missing_gt_ct++;
+ fill_quatervec_55(sample_ct, base_bitfields);
+ goto bcf_to_bed_genotype_write;
}
}
}
- } else if (ujj == 1) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- ulii = (*ucptr++) & 0x7e;
- if (ulii) {
- set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
- }
- }
+ }
+ if (!ujj) {
+ // ploidy zero previously caused the variant to be skipped
+ fill_quatervec_55(sample_ct, base_bitfields);
+ goto bcf_to_bed_genotype_write;
+ }
+ if (ukk == 5) {
+ logerrprint("Error: GT field cannot contain floating point values.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
+ }
+ if (ujj * umm > 12) {
+ // 12 = 12-ploid, or 6-ploid and >= 127 alleles, or triploid and >=
+ // 32767 alleles. this is pretty darn generous.
+ logerrprint("Error: --bcf does not support GT vectors requiring >12 bytes per sample.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
+ }
+ // ujj * umm <= 12 and sample_ct < 2^24, so no uint64_t cast needed there
+ if ((uint32_t)((uint64_t)gzread(gz_infile, loadbuf, ujj * umm * sample_ct)) < ujj * umm * sample_ct) {
+ goto bcf_to_bed_ret_READ_OR_FORMAT_FAIL;
+ }
+ if (n_allele < 2) {
+ fill_ulong_zero(2 * sample_ctv2, base_bitfields);
} else {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- if (ucptr[2]) {
- ucptr = &(ucptr[ujj]);
- } else {
+ fill_ulong_zero(n_allele * sample_ctv2, base_bitfields);
+ }
+ if (ukk == 1) {
+ ucptr = (unsigned char*)loadbuf;
+ if (ujj == 2) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, ucptr++) {
+ // discard all phase bits for now
+ // missing = 0x80 or 0x81
ulii = (*ucptr++) & 0x7e;
if (ulii) {
ulii = ((ulii / 2) - 1) * sample_ctv2;
@@ -9692,46 +9745,47 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
set_bit(sample_idx * 2, &(base_bitfields[ulii]));
base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
} else {
+ // could be MT or male X. don't validate for now
set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
}
}
- ucptr = &(ucptr[ujj - 1]);
- }
- }
- }
- } else if (ukk == 2) {
- ui16ptr = (uint16_t*)loadbuf;
- // bleah, this should totally use templates instead of cut-and-paste
- if (ujj == 2) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, ui16ptr++) {
- ulii = (*ui16ptr++) & 0x7ffe;
- if (ulii) {
- ulii = ((ulii / 2) - 1) * sample_ctv2;
- uljj = (*ui16ptr) & 0x7ffe;
- if (uljj) {
- set_bit(sample_idx * 2, &(base_bitfields[ulii]));
- base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
- } else {
- set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ }
+ } else if (ujj == 1) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ ulii = (*ucptr++) & 0x7e;
+ if (ulii) {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
}
}
- }
- } else if (ujj == 1) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- ulii = (*ui16ptr++) & 0x7ffe;
- if (ulii) {
- set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
+ } else {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ if (ucptr[2]) {
+ ucptr = &(ucptr[ujj]);
+ } else {
+ ulii = (*ucptr++) & 0x7e;
+ if (ulii) {
+ ulii = ((ulii / 2) - 1) * sample_ctv2;
+ uljj = (*ucptr) & 0x7e;
+ if (uljj) {
+ set_bit(sample_idx * 2, &(base_bitfields[ulii]));
+ base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ } else {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ }
+ }
+ ucptr = &(ucptr[ujj - 1]);
+ }
}
}
- } else {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- if (ui16ptr[2]) {
- ui16ptr = &(ui16ptr[ujj]);
- } else {
+ } else if (ukk == 2) {
+ ui16ptr = (uint16_t*)loadbuf;
+ // bleah, this should totally use templates instead of cut-and-paste
+ if (ujj == 2) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, ui16ptr++) {
ulii = (*ui16ptr++) & 0x7ffe;
if (ulii) {
ulii = ((ulii / 2) - 1) * sample_ctv2;
- uljj = (*ui16ptr) & 0x7ffe;
+ uljj = (*ui16ptr) & 0x7ffe;
if (uljj) {
set_bit(sample_idx * 2, &(base_bitfields[ulii]));
base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
@@ -9739,42 +9793,42 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
}
}
- ui16ptr = &(ui16ptr[ujj - 1]);
}
- }
- }
- } else {
- uiptr = (uint32_t*)loadbuf;
- if (ujj == 2) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, uiptr++) {
- ulii = (*uiptr++) & 0x7ffffffe;
- if (ulii) {
- ulii = ((ulii / 2) - 1) * sample_ctv2;
- uljj = (*uiptr) & 0x7ffffffe;
- if (uljj) {
- set_bit(sample_idx * 2, &(base_bitfields[ulii]));
- base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
- } else {
- set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ } else if (ujj == 1) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ ulii = (*ui16ptr++) & 0x7ffe;
+ if (ulii) {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
}
}
- }
- } else if (ujj == 1) {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- ulii = (*uiptr++) & 0x7ffffffe;
- if (ulii) {
- set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
+ } else {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ if (ui16ptr[2]) {
+ ui16ptr = &(ui16ptr[ujj]);
+ } else {
+ ulii = (*ui16ptr++) & 0x7ffe;
+ if (ulii) {
+ ulii = ((ulii / 2) - 1) * sample_ctv2;
+ uljj = (*ui16ptr) & 0x7ffe;
+ if (uljj) {
+ set_bit(sample_idx * 2, &(base_bitfields[ulii]));
+ base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ } else {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ }
+ }
+ ui16ptr = &(ui16ptr[ujj - 1]);
+ }
}
}
} else {
- for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
- if (uiptr[2]) {
- uiptr = &(uiptr[ujj]);
- } else {
+ uiptr = (uint32_t*)loadbuf;
+ if (ujj == 2) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++, uiptr++) {
ulii = (*uiptr++) & 0x7ffffffe;
if (ulii) {
ulii = ((ulii / 2) - 1) * sample_ctv2;
- uljj = (*uiptr) & 0x7ffffffe;
+ uljj = (*uiptr) & 0x7ffffffe;
if (uljj) {
set_bit(sample_idx * 2, &(base_bitfields[ulii]));
base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
@@ -9782,138 +9836,163 @@ int32_t bcf_to_bed(char* bcfname, char* outname, char* outname_end, int32_t miss
set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
}
}
- uiptr = &(uiptr[ujj - 1]);
}
- }
- }
- }
- if (n_allele > 2) {
- ulii = popcount2_longs(&(base_bitfields[sample_ctv2]), sample_ctl2);
- for (ulkk = 2; ulkk < n_allele; ulkk++) {
- uljj = popcount2_longs(&(base_bitfields[sample_ctv2 * ulkk]), sample_ctl2);
- if (!biallelic_only) {
- if (uljj <= ulii) {
- continue;
+ } else if (ujj == 1) {
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ ulii = (*uiptr++) & 0x7ffffffe;
+ if (ulii) {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[((ulii / 2) - 1) * sample_ctv2]));
+ }
}
} else {
- if (!uljj) {
- continue;
+ for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
+ if (uiptr[2]) {
+ uiptr = &(uiptr[ujj]);
+ } else {
+ ulii = (*uiptr++) & 0x7ffffffe;
+ if (ulii) {
+ ulii = ((ulii / 2) - 1) * sample_ctv2;
+ uljj = (*uiptr) & 0x7ffffffe;
+ if (uljj) {
+ set_bit(sample_idx * 2, &(base_bitfields[ulii]));
+ base_bitfields[((uljj / 2) - 1) * sample_ctv2 + sample_idx / BITCT2] += ONELU << (2 * (sample_idx % BITCT2));
+ } else {
+ set_bit(sample_idx * 2 + 1, &(base_bitfields[ulii]));
+ }
+ }
+ uiptr = &(uiptr[ujj - 1]);
+ }
}
- if (ulii) {
- goto bcf_to_bed_skip3;
+ }
+ }
+ if (n_allele > 2) {
+ ulii = popcount2_longs(&(base_bitfields[sample_ctv2]), sample_ctl2);
+ for (ulkk = 2; ulkk < n_allele; ulkk++) {
+ uljj = popcount2_longs(&(base_bitfields[sample_ctv2 * ulkk]), sample_ctl2);
+ if (!biallelic_only) {
+ if (uljj <= ulii) {
+ continue;
+ }
+ } else {
+ if (!uljj) {
+ continue;
+ }
+ if (ulii) {
+ goto bcf_to_bed_skip3;
+ }
}
+ ulii = uljj;
+ alt_allele_idx = ulkk;
}
- ulii = uljj;
- alt_allele_idx = ulkk;
}
- }
- ref_ptr = base_bitfields;
- alt_ptr = &(base_bitfields[alt_allele_idx * sample_ctv2]);
- for (sample_idx = 0; sample_idx < sample_ctl2; sample_idx++) {
- ulii = *ref_ptr;
- uljj = *alt_ptr++;
- ulkk = (ulii + uljj) & AAAAMASK;
- uljj = ulii + ((ulii | (ulii >> 1)) & FIVEMASK);
- ulii = ulkk | (ulkk >> 1); // nonmissing?
- *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
- }
- ref_ptr[-1] &= final_mask;
- bcf_to_bed_genotype_write:
- if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- bcf_to_bed_skip_genotype_write:
- fputs(&(contigdict[bcf_var_header[2] * max_contig_len]), bimfile);
- putc('\t', bimfile);
- if (marker_id_len) {
- fwrite(marker_id, 1, marker_id_len, bimfile);
- } else {
- putc('.', bimfile);
- }
- // bcf2 coordinates are 0-based while vcf is 1-based... (seriously, whose
- // idea was this? this is basically a bug in the spec due to how e.g.
- // telomeres are supposed to be encoded, but we have to play along)
- bufptr = uint32toa_x(bcf_var_header[3] + 1, '\t', &(tbuf2[3]));
- if (fwrite_checked(tbuf2, bufptr - tbuf2, bimfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- if (n_allele > 1) {
- fwrite(allele_ptrs[alt_allele_idx], 1, allele_lens[alt_allele_idx], bimfile);
- } else {
- putc(missing_geno, bimfile);
- }
- putc('\t', bimfile);
- fwrite(allele_ptrs[0], 1, allele_lens[0], bimfile);
- if (putc_checked('\n', bimfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- marker_ct++;
- if (!(marker_ct % 1000)) {
- printf("\r--bcf: %uk variants complete.", marker_ct / 1000);
- fflush(stdout);
- }
- if (ulljj < ullii) {
- if (gzseek(gz_infile, ullii, SEEK_SET) == -1) {
- goto bcf_to_bed_ret_READ_FAIL;
+ ref_ptr = base_bitfields;
+ alt_ptr = &(base_bitfields[alt_allele_idx * sample_ctv2]);
+ for (sample_idx = 0; sample_idx < sample_ctl2; sample_idx++) {
+ ulii = *ref_ptr;
+ uljj = *alt_ptr++;
+ ulkk = (ulii + uljj) & AAAAMASK;
+ uljj = ulii + ((ulii | (ulii >> 1)) & FIVEMASK);
+ ulii = ulkk | (ulkk >> 1); // nonmissing?
+ *ref_ptr++ = (uljj & ulii) | (((~ulkk) >> 1) & FIVEMASK);
}
- }
- continue;
- bcf_to_bed_skip3:
- if (skip3_list) {
- if (!marker_skip_ct) {
- memcpy(outname_end, ".skip.3allele", 14);
- if (fopen_checked(outname, "w", &skip3file)) {
- goto bcf_to_bed_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bed", 5);
+ ref_ptr[-1] &= final_mask;
+ bcf_to_bed_genotype_write:
+ if (fwrite_checked(base_bitfields, sample_ct4, outfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
}
+ bcf_to_bed_skip_genotype_write:
+ fputs(&(contigdict[bcf_var_header[2] * max_contig_len]), bimfile);
+ putc_unlocked('\t', bimfile);
if (marker_id_len) {
- fwrite(marker_id, 1, marker_id_len, skip3file);
+ fwrite(marker_id, 1, marker_id_len, bimfile);
} else {
- // up to the user to figure this out...
- putc('.', skip3file);
+ putc_unlocked('.', bimfile);
}
- if (putc_checked('\n', skip3file)) {
- goto bcf_to_bed_ret_OPEN_FAIL;
+ // bcf2 coordinates are 0-based while vcf is 1-based... (seriously, whose
+ // idea was this? this is basically a bug in the spec due to how e.g.
+ // telomeres are supposed to be encoded, but we have to play along)
+ bufptr = uint32toa_x(bcf_var_header[3] + 1, '\t', &(tbuf2[3]));
+ if (fwrite_checked(tbuf2, bufptr - tbuf2, bimfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
+ }
+ if (n_allele > 1) {
+ fwrite(allele_ptrs[alt_allele_idx], 1, allele_lens[alt_allele_idx], bimfile);
+ } else {
+ putc_unlocked(missing_geno, bimfile);
+ }
+ putc_unlocked('\t', bimfile);
+ fwrite(allele_ptrs[0], 1, allele_lens[0], bimfile);
+ if (putc_checked('\n', bimfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
+ }
+ marker_ct++;
+ if (!(marker_ct % 1000)) {
+ printf("\r--bcf: %uk variants complete.", marker_ct / 1000);
+ fflush(stdout);
+ }
+ if (ulljj < ullii) {
+ if (gzseek(gz_infile, ullii, SEEK_SET) == -1) {
+ goto bcf_to_bed_ret_READ_FAIL;
+ }
+ }
+ continue;
+ bcf_to_bed_skip3:
+ if (skip3_list) {
+ if (!marker_skip_ct) {
+ memcpy(outname_end, ".skip.3allele", 14);
+ if (fopen_checked(outname, "w", &skip3file)) {
+ goto bcf_to_bed_ret_OPEN_FAIL;
+ }
+ memcpy(outname_end, ".bed", 5);
+ }
+ if (marker_id_len) {
+ fwrite(marker_id, 1, marker_id_len, skip3file);
+ } else {
+ // up to the user to figure this out...
+ putc_unlocked('.', skip3file);
+ }
+ if (putc_checked('\n', skip3file)) {
+ goto bcf_to_bed_ret_OPEN_FAIL;
+ }
+ }
+ bcf_to_bed_marker_skip:
+ if (gzseek(gz_infile, (lastloc + bcf_var_header[0]) + bcf_var_header[1], SEEK_SET) == -1) {
+ goto bcf_to_bed_ret_READ_FAIL;
+ }
+ bcf_to_bed_marker_skip2:
+ marker_skip_ct++;
+ }
+ if ((!marker_ct) && (!allow_no_variants)) {
+ if (marker_skip_ct) {
+ logerrprint("Error: All variants in .bcf file skipped.\n");
+ retval = RET_ALL_MARKERS_EXCLUDED;
+ goto bcf_to_bed_ret_1;
+ } else {
+ logerrprint("Error: No variants in .bcf file.\n");
+ goto bcf_to_bed_ret_INVALID_FORMAT;
}
}
- bcf_to_bed_marker_skip:
- if (gzseek(gz_infile, (lastloc + bcf_var_header[0]) + bcf_var_header[1], SEEK_SET) == -1) {
+ if (gzclose(gz_infile) != Z_OK) {
+ gz_infile = nullptr;
goto bcf_to_bed_ret_READ_FAIL;
}
- bcf_to_bed_marker_skip2:
- marker_skip_ct++;
- }
- if ((!marker_ct) && (!allow_no_variants)) {
+ gz_infile = nullptr;
+ if (fclose_null(&bimfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
+ }
+ if (fclose_null(&outfile)) {
+ goto bcf_to_bed_ret_WRITE_FAIL;
+ }
+ putc_unlocked('\r', stdout);
+ bcf_to_bed_skip_all_variants:
+ *outname_end = '\0';
+ LOGPRINTFWW("--bcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
if (marker_skip_ct) {
- logerrprint("Error: All variants in .bcf file skipped.\n");
- retval = RET_ALL_MARKERS_EXCLUDED;
- goto bcf_to_bed_ret_1;
- } else {
- logerrprint("Error: No variants in .bcf file.\n");
- goto bcf_to_bed_ret_INVALID_FORMAT;
+ LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
+ }
+ if (missing_gt_ct) {
+ LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
}
- }
- if (gzclose(gz_infile) != Z_OK) {
- gz_infile = NULL;
- goto bcf_to_bed_ret_READ_FAIL;
- }
- gz_infile = NULL;
- if (fclose_null(&bimfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- if (fclose_null(&outfile)) {
- goto bcf_to_bed_ret_WRITE_FAIL;
- }
- putchar('\r');
- bcf_to_bed_skip_all_variants:
- *outname_end = '\0';
- LOGPRINTFWW("--bcf: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
- if (marker_skip_ct) {
- LOGPRINTF("(%" PRIuPTR " variant%s skipped.)\n", marker_skip_ct, (marker_skip_ct == 1)? "" : "s");
- }
- if (missing_gt_ct) {
- LOGERRPRINTF("Warning: %" PRIuPTR " variant record%s had no GT field.\n", missing_gt_ct, (missing_gt_ct == 1)? "" : "s");
}
while (0) {
bcf_to_bed_ret_NOMEM:
@@ -9962,7 +10041,7 @@ uint32_t write_23_cached_chrom(char* write_cache, uint32_t markers_left, char ch
if (putc_checked('2', outfile_bim)) {
return 1;
}
- putc(chrom_second_char, outfile_bim);
+ putc_unlocked(chrom_second_char, outfile_bim);
if (fwrite_checked(write_cache, uii, outfile_bim)) {
return 1;
}
@@ -9976,9 +10055,9 @@ uint32_t write_23_cached_chrom(char* write_cache, uint32_t markers_left, char ch
int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_t modifier_23, char* fid_23, char* iid_23, double pheno_23, uint64_t misc_flags, char* paternal_id_23, char* maternal_id_23, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile_23 = NULL;
- FILE* outfile_bed = NULL;
- FILE* outfile_txt = NULL;
+ FILE* infile_23 = nullptr;
+ FILE* outfile_bed = nullptr;
+ FILE* outfile_txt = nullptr;
uintptr_t line_idx = 0;
uint32_t is_male = modifier_23 & M23_MALE;
uint32_t is_female = modifier_23 & M23_FEMALE;
@@ -10004,224 +10083,224 @@ int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_
uint32_t allele_calls;
uint32_t null_chrom;
uint32_t uii;
- int32_t ii;
char cc;
char cc2;
unsigned char ucc;
- if (bigstack_alloc_c(MAXLINELEN, &writebuf2)) {
- goto bed_from_23_ret_NOMEM;
- }
- if (fopen_checked(infile_name, "r", &infile_23)) {
- goto bed_from_23_ret_OPEN_FAIL;
- }
- memcpy(outname_end, ".bim", 5);
- if (fopen_checked(outname, "w", &outfile_txt)) {
- goto bed_from_23_ret_OPEN_FAIL;
- }
- memcpy(&(outname_end[2]), "ed", 2);
- if (fopen_checked(outname, FOPEN_WB, &outfile_bed)) {
- goto bed_from_23_ret_OPEN_FAIL;
- }
- if (bigstack_left() < MAXLINELEN) {
- goto bed_from_23_ret_NOMEM;
- }
- writebuf_cur = (unsigned char*)memcpyl3a((char*)writebuf, "l\x1b\x01");
- writebuf_end = &(writebuf[MAXLINELEN]);
- g_textbuf[MAXLINELEN - 1] = ' ';
- while (fgets(g_textbuf, MAXLINELEN, infile_23)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, infile_name);
- goto bed_from_23_ret_INVALID_FORMAT_2;
+ {
+ if (bigstack_alloc_c(MAXLINELEN, &writebuf2)) {
+ goto bed_from_23_ret_NOMEM;
}
- id_start = skip_initial_spaces(g_textbuf);
- cc = *id_start;
- if (is_eoln_kns(cc) || (cc == '#')) {
- continue;
+ if (fopen_checked(infile_name, "r", &infile_23)) {
+ goto bed_from_23_ret_OPEN_FAIL;
}
- chrom_start = token_endnn(id_start);
- id_len = (uintptr_t)(chrom_start - id_start);
- chrom_start = skip_initial_spaces(chrom_start);
- pos_start = next_token(chrom_start);
- allele_start = next_token(pos_start);
- if (no_more_tokens_kns(allele_start)) {
- goto bed_from_23_ret_MISSING_TOKENS;
- }
- allele_calls = strlen_se(allele_start);
- if (allele_calls > 2) {
- LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s has more allele calls than expected.\n", line_idx, infile_name);
- goto bed_from_23_ret_INVALID_FORMAT_2;
- }
- ii = get_chrom_code(chrom_info_ptr, chrom_start);
- if (ii < 0) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s.\n", line_idx, infile_name);
- goto bed_from_23_ret_INVALID_FORMAT_2;
- }
- uii = (uint32_t)ii;
- if (!(chrom_mask_23 & (1 << uii))) {
- continue;
+ memcpy(outname_end, ".bim", 5);
+ if (fopen_checked(outname, "w", &outfile_txt)) {
+ goto bed_from_23_ret_OPEN_FAIL;
}
- if (!uii) {
- null_chrom = 1;
- } else {
- if (uii < cur_chrom) {
- LOGPREPRINTFWW("Error: Chromosomes in %s are out of order.\n", infile_name);
+ memcpy(&(outname_end[2]), "ed", 2);
+ if (fopen_checked(outname, FOPEN_WB, &outfile_bed)) {
+ goto bed_from_23_ret_OPEN_FAIL;
+ }
+ if (bigstack_left() < MAXLINELEN) {
+ goto bed_from_23_ret_NOMEM;
+ }
+ writebuf_cur = (unsigned char*)memcpyl3a((char*)writebuf, "l\x1b\x01");
+ writebuf_end = &(writebuf[MAXLINELEN]);
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ while (fgets(g_textbuf, MAXLINELEN, infile_23)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, infile_name);
+ goto bed_from_23_ret_INVALID_FORMAT_2;
+ }
+ id_start = skip_initial_spaces(g_textbuf);
+ if (is_eoln_or_comment_kns(*id_start)) {
+ continue;
+ }
+ chrom_start = token_endnn(id_start);
+ id_len = (uintptr_t)(chrom_start - id_start);
+ chrom_start = skip_initial_spaces(chrom_start);
+ pos_start = next_token(chrom_start);
+ allele_start = next_token(pos_start);
+ if (no_more_tokens_kns(allele_start)) {
+ goto bed_from_23_ret_MISSING_TOKENS;
+ }
+ allele_calls = strlen_se(allele_start);
+ if (allele_calls > 2) {
+ LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s has more allele calls than expected.\n", line_idx, infile_name);
+ goto bed_from_23_ret_INVALID_FORMAT_2;
+ }
+ int32_t cur_chrom_code = get_chrom_code_raw(chrom_start);
+ if (cur_chrom_code < 0) {
+ sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s.\n", line_idx, infile_name);
goto bed_from_23_ret_INVALID_FORMAT_2;
- } else if (uii > cur_chrom) {
- cur_chrom = uii;
- if (cur_chrom == 23) {
- x_present = 1;
- } else if (cur_chrom == 24) {
- y_present = 1;
+ }
+ uii = (uint32_t)cur_chrom_code;
+ if (!(chrom_mask_23 & (1 << uii))) {
+ continue;
+ }
+ if (!uii) {
+ null_chrom = 1;
+ } else {
+ if (uii < cur_chrom) {
+ LOGPREPRINTFWW("Error: Chromosomes in %s are out of order.\n", infile_name);
+ goto bed_from_23_ret_INVALID_FORMAT_2;
+ } else if (uii > cur_chrom) {
+ cur_chrom = uii;
+ if (cur_chrom == 23) {
+ x_present = 1;
+ } else if (cur_chrom == 24) {
+ y_present = 1;
+ }
}
+ null_chrom = 0;
}
- null_chrom = 0;
- }
- cc2 = allele_start[0];
- if ((cur_chrom == 24) && (!nonmissing_y_present)) {
- if (cc2 != '-') {
- nonmissing_y_present = 1;
+ cc2 = allele_start[0];
+ if ((cur_chrom == 24) && (!nonmissing_y_present)) {
+ if (cc2 != '-') {
+ nonmissing_y_present = 1;
+ }
}
- }
- if (cc2 == '-') {
- ucc = 1;
- cc = '0';
- cc2 = '0';
- } else if (allele_calls == 2) {
- cc = allele_start[1];
- if (cc == cc2) {
+ if (cc2 == '-') {
+ ucc = 1;
cc = '0';
- ucc = 3;
+ cc2 = '0';
+ } else if (allele_calls == 2) {
+ cc = allele_start[1];
+ if (cc == cc2) {
+ cc = '0';
+ ucc = 3;
+ } else {
+ ucc = 2;
+ }
} else {
- ucc = 2;
+ cc = '0';
+ ucc = 3;
}
- } else {
- cc = '0';
- ucc = 3;
- }
- if ((cc2 == 'D') || (cc2 == 'I')) {
- indel_ct++;
- cc = (char)(((unsigned char)cc2) ^ 13); // swaps D and I
- }
- if (!null_chrom) {
- if ((cur_chrom == 25) && (allele_calls != 2)) {
- goto bed_from_23_ret_MISSING_ALLELE_CALLS;
+ if ((cc2 == 'D') || (cc2 == 'I')) {
+ indel_ct++;
+ cc = (char)(((unsigned char)cc2) ^ 13); // swaps D and I
}
- if ((allele_calls == 1) && (cur_chrom <= 23)) {
- if ((cur_chrom == 23) && (!is_female)) {
- is_male = 1;
- haploid_x_present = 1;
- } else {
+ if (!null_chrom) {
+ if ((cur_chrom == 25) && (allele_calls != 2)) {
goto bed_from_23_ret_MISSING_ALLELE_CALLS;
}
- } else if ((cur_chrom == 24) && (cc2 != '0') && (!is_male)) {
- if (!is_female) {
- is_male = 1;
- } else {
- LOGPREPRINTFWW("Error: Nonmissing female allele call on line %" PRIuPTR " of %s.\n", line_idx, infile_name);
- goto bed_from_23_ret_INVALID_FORMAT_2;
+ if ((allele_calls == 1) && (cur_chrom <= 23)) {
+ if ((cur_chrom == 23) && (!is_female)) {
+ is_male = 1;
+ haploid_x_present = 1;
+ } else {
+ goto bed_from_23_ret_MISSING_ALLELE_CALLS;
+ }
+ } else if ((cur_chrom == 24) && (cc2 != '0') && (!is_male)) {
+ if (!is_female) {
+ is_male = 1;
+ } else {
+ LOGPREPRINTFWW("Error: Nonmissing female allele call on line %" PRIuPTR " of %s.\n", line_idx, infile_name);
+ goto bed_from_23_ret_INVALID_FORMAT_2;
+ }
}
+ } else if (allele_calls == 1) {
+ goto bed_from_23_ret_MISSING_ALLELE_CALLS;
+ }
+ if (!null_chrom) {
+ writebuf2_cur = uint32toa(cur_chrom, writebuf2);
+ } else {
+ writebuf2[0] = '0';
+ writebuf2_cur = &(writebuf2[1]);
+ }
+ *writebuf2_cur++ = '\t';
+ writebuf2_cur = memcpya(writebuf2_cur, id_start, id_len);
+ writebuf2_cur = memcpyl3a(writebuf2_cur, "\t0\t");
+ writebuf2_cur = memcpyax(writebuf2_cur, pos_start, strlen_se(pos_start), '\t');
+ *writebuf2_cur++ = cc;
+ *writebuf2_cur++ = '\t';
+ *writebuf2_cur++ = cc2;
+ *writebuf2_cur++ = '\n';
+ if (fwrite_checked(writebuf2, (uintptr_t)(writebuf2_cur - writebuf2), outfile_txt)) {
+ goto bed_from_23_ret_WRITE_FAIL;
}
- } else if (allele_calls == 1) {
- goto bed_from_23_ret_MISSING_ALLELE_CALLS;
+ if (writebuf_cur == writebuf_end) {
+ if (fwrite_checked(writebuf, (uintptr_t)(writebuf_cur - writebuf), outfile_bed)) {
+ goto bed_from_23_ret_WRITE_FAIL;
+ }
+ writebuf_cur = writebuf;
+ }
+ *writebuf_cur++ = (char)ucc;
}
- if (!null_chrom) {
- writebuf2_cur = uint32toa(cur_chrom, writebuf2);
- } else {
- writebuf2[0] = '0';
- writebuf2_cur = &(writebuf2[1]);
- }
- *writebuf2_cur++ = '\t';
- writebuf2_cur = memcpya(writebuf2_cur, id_start, id_len);
- writebuf2_cur = memcpyl3a(writebuf2_cur, "\t0\t");
- writebuf2_cur = memcpyax(writebuf2_cur, pos_start, strlen_se(pos_start), '\t');
- *writebuf2_cur++ = cc;
- *writebuf2_cur++ = '\t';
- *writebuf2_cur++ = cc2;
- *writebuf2_cur++ = '\n';
- if (fwrite_checked(writebuf2, (uintptr_t)(writebuf2_cur - writebuf2), outfile_txt)) {
- goto bed_from_23_ret_WRITE_FAIL;
+ if (!feof(infile_23)) {
+ goto bed_from_23_ret_READ_FAIL;
}
- if (writebuf_cur == writebuf_end) {
- if (fwrite_checked(writebuf, (uintptr_t)(writebuf_cur - writebuf), outfile_bed)) {
- goto bed_from_23_ret_WRITE_FAIL;
+ if ((writebuf_cur == &(writebuf[3])) && (writebuf[0] == 'l') && (!allow_no_variants)) {
+ if (chrom_mask_23 == 0x7ffffff) {
+ logerrprint("Error: No --23file variants.\n");
+ goto bed_from_23_ret_INVALID_FORMAT;
+ } else {
+ logerrprint("Error: No --23file variants pass chromosome filter.\n");
+ goto bed_from_23_ret_INVALID_CMDLINE;
}
- writebuf_cur = writebuf;
}
- *writebuf_cur++ = (char)ucc;
- }
- if (!feof(infile_23)) {
- goto bed_from_23_ret_READ_FAIL;
- }
- if ((writebuf_cur == &(writebuf[3])) && (writebuf[0] == 'l') && (!allow_no_variants)) {
- if (chrom_mask_23 == 0x7ffffff) {
- logerrprint("Error: No --23file variants.\n");
- goto bed_from_23_ret_INVALID_FORMAT;
+ if (fwrite_checked(writebuf, (uintptr_t)(writebuf_cur - writebuf), outfile_bed)) {
+ goto bed_from_23_ret_WRITE_FAIL;
+ }
+ if (fclose_null(&outfile_txt)) {
+ goto bed_from_23_ret_WRITE_FAIL;
+ }
+ memcpy(outname_end, ".fam", 5);
+ if (fopen_checked(outname, "w", &outfile_txt)) {
+ goto bed_from_23_ret_OPEN_FAIL;
+ }
+ if (fid_23) {
+ fputs(fid_23, outfile_txt);
+ putc_unlocked(' ', outfile_txt);
} else {
- logerrprint("Error: No --23file variants pass chromosome filter.\n");
- goto bed_from_23_ret_INVALID_CMDLINE;
+ fputs("FAM001 ", outfile_txt);
}
- }
- if (fwrite_checked(writebuf, (uintptr_t)(writebuf_cur - writebuf), outfile_bed)) {
- goto bed_from_23_ret_WRITE_FAIL;
- }
- if (fclose_null(&outfile_txt)) {
- goto bed_from_23_ret_WRITE_FAIL;
- }
- memcpy(outname_end, ".fam", 5);
- if (fopen_checked(outname, "w", &outfile_txt)) {
- goto bed_from_23_ret_OPEN_FAIL;
- }
- if (fid_23) {
- fputs(fid_23, outfile_txt);
- putc(' ', outfile_txt);
- } else {
- fputs("FAM001 ", outfile_txt);
- }
- if (iid_23) {
- fputs(iid_23, outfile_txt);
- putc(' ', outfile_txt);
- } else {
- fputs("ID001 ", outfile_txt);
- }
- if (paternal_id_23) {
- fputs(paternal_id_23, outfile_txt);
- putc(' ', outfile_txt);
- } else {
- fputs("0 ", outfile_txt);
- }
- if (maternal_id_23) {
- fputs(maternal_id_23, outfile_txt);
- } else {
- putc('0', outfile_txt);
- }
- if (modifier_23 & M23_FORCE_MISSING_SEX) {
- cc = '0';
- } else if (is_male) {
- cc = '1';
- } else {
- cc = '2';
- }
- fprintf(outfile_txt, " %c %g\n", cc, pheno_23);
- if (fclose_null(&outfile_txt)) {
- goto bed_from_23_ret_WRITE_FAIL;
- }
- *outname_end = '\0';
- LOGPRINTFWW("--23file: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
- if (indel_ct) {
- LOGPRINTF("%u variants with indel calls present. '--snps-only no-DI' or\n--list-23-indels may be useful here.\n", indel_ct);
- }
- if (!(modifier_23 & M23_SEX)) {
- LOGPRINTF("Inferred sex: %smale.\n", is_male? "" : "fe");
- }
- if (modifier_23 & M23_MALE) {
- if (y_present && (!nonmissing_y_present)) {
- if (x_present) {
- if (!haploid_x_present) {
- logerrprint("Warning: No explicit haploid calls on X chromosome, and no nonmissing calls on\nY chromosome. Double-check whether this is really a male sample.\n");
+ if (iid_23) {
+ fputs(iid_23, outfile_txt);
+ putc_unlocked(' ', outfile_txt);
+ } else {
+ fputs("ID001 ", outfile_txt);
+ }
+ if (paternal_id_23) {
+ fputs(paternal_id_23, outfile_txt);
+ putc_unlocked(' ', outfile_txt);
+ } else {
+ fputs("0 ", outfile_txt);
+ }
+ if (maternal_id_23) {
+ fputs(maternal_id_23, outfile_txt);
+ } else {
+ putc_unlocked('0', outfile_txt);
+ }
+ if (modifier_23 & M23_FORCE_MISSING_SEX) {
+ cc = '0';
+ } else if (is_male) {
+ cc = '1';
+ } else {
+ cc = '2';
+ }
+ fprintf(outfile_txt, " %c %g\n", cc, pheno_23);
+ if (fclose_null(&outfile_txt)) {
+ goto bed_from_23_ret_WRITE_FAIL;
+ }
+ *outname_end = '\0';
+ LOGPRINTFWW("--23file: %s.bed + %s.bim + %s.fam written.\n", outname, outname, outname);
+ if (indel_ct) {
+ LOGPRINTF("%u variants with indel calls present. '--snps-only no-DI' or\n--list-23-indels may be useful here.\n", indel_ct);
+ }
+ if (!(modifier_23 & M23_SEX)) {
+ LOGPRINTF("Inferred sex: %smale.\n", is_male? "" : "fe");
+ }
+ if (modifier_23 & M23_MALE) {
+ if (y_present && (!nonmissing_y_present)) {
+ if (x_present) {
+ if (!haploid_x_present) {
+ logerrprint("Warning: No explicit haploid calls on X chromosome, and no nonmissing calls on\nY chromosome. Double-check whether this is really a male sample.\n");
+ }
+ } else {
+ logerrprint("Warning: No nonmissing calls on Y chromosome. Double-check whether this is\nreally a male sample.\n");
}
- } else {
- logerrprint("Warning: No nonmissing calls on Y chromosome. Double-check whether this is\nreally a male sample.\n");
}
}
}
@@ -10261,7 +10340,7 @@ int32_t bed_from_23(char* infile_name, char* outname, char* outname_end, uint32_
}
int32_t generate_dummy(char* outname, char* outname_end, uint32_t flags, uintptr_t marker_ct, uintptr_t sample_ct, double geno_mrate, double pheno_mrate, int32_t missing_pheno) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t sample_ct4 = (sample_ct + 3) / 4;
uintptr_t urand = 0;
@@ -10456,19 +10535,19 @@ int32_t generate_dummy(char* outname, char* outname_end, uint32_t flags, uintptr
reverse_loadbuf(sample_ct, writebuf);
}
if (fwrite_checked(writebuf, sample_ct4, outfile)) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
goto generate_dummy_ret_WRITE_FAIL;
}
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
*outname_end = '\0';
LOGPRINTFWW("Dummy data (%" PRIuPTR " %s, %" PRIuPTR " SNP%s) written to %s.bed + %s.bim + %s.fam .\n", sample_ct, species_str(sample_ct), marker_ct, (marker_ct == 1)? "" : "s", outname, outname, outname);
while (0) {
@@ -10819,16 +10898,16 @@ void simulate_init_freqs_cc(uint32_t do_haps, double dprime, double* freqs, doub
}
int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char* simulate_fname, uint32_t case_ct, uint32_t ctrl_ct, double prevalence, uint32_t sample_ct, double missing_freq, char* name_prefix) {
- FILE* infile = NULL;
- FILE* outfile_txt = NULL;
- FILE* outfile_simfreq = NULL;
- FILE* outfile_bed = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile_txt = nullptr;
+ FILE* outfile_simfreq = nullptr;
+ FILE* outfile_bed = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
- double* qt_vals = NULL;
+ double* qt_vals = nullptr;
char* cur_snp_label = &(g_textbuf[MAXLINELEN]);
- char* marker_freq_lb_ptr = NULL;
- char* marker_ld_ptr = NULL;
- uintptr_t* writebuf2 = NULL;
+ char* marker_freq_lb_ptr = nullptr;
+ char* marker_ld_ptr = nullptr;
+ uintptr_t* writebuf2 = nullptr;
double dxx = 0;
double dyy = 0;
double qt_totvar = 0;
@@ -10973,7 +11052,7 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
if (!ullii) {
sprintf(g_logbuf, "Error: --simulate%s input file specifies zero SNPs.\n", is_qt? "-qt" : "");
goto simulate_ret_INVALID_FORMAT_2N;
- } else if (ullii > (do_haps? 0x3fffffff : 0x7fffffff)) {
+ } else if (ullii > (do_haps? 0x3ffffffe : 0x7ffffffd)) {
sprintf(g_logbuf, "Error: --simulate%s input file specifies too many SNPs.\n", is_qt? "-qt" : "");
goto simulate_ret_INVALID_FORMAT_2N;
}
@@ -11055,14 +11134,14 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
sprintf(g_logbuf, "Error: Invalid heterozygote disease odds ratio on line %" PRIuPTR " of\n--simulate file.\n", line_idx);
goto simulate_ret_INVALID_FORMAT_2N;
}
- if ((strlen_se(last_ptr) == 4) && match_upper_nt(last_ptr, "MULT", 4)) {
+ if ((strlen_se(last_ptr) == 4) && match_upper_counted(last_ptr, "MULT", 4)) {
hom0_odds = het_odds * het_odds;
} else if (scan_double(last_ptr, &hom0_odds) || (hom0_odds < 0)) {
sprintf(g_logbuf, "Error: Invalid homozygote disease odds ratio on line %" PRIuPTR " of --simulate\nfile.\n", line_idx);
goto simulate_ret_INVALID_FORMAT_2N;
}
if ((!zero_odds_ratio_warning_given) && ((het_odds == 0) || (hom0_odds == 0))) {
- putchar('\r');
+ putc_unlocked('\r', stdout);
logstr("\n");
logerrprint("Warning: Zero odds ratio present in --simulate input file. Did you mean\n--simulate-qt instead?\n");
zero_odds_ratio_warning_given = 1;
@@ -11329,7 +11408,7 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
}
if (cur_marker_idx >= loop_end) {
if (pct > 9) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ((cur_marker_idx + marker_idx_offset) * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -11395,7 +11474,7 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
}
*outname_end = '\0';
if (pct > 9) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -11430,7 +11509,7 @@ int32_t simulate_dataset(char* outname, char* outname_end, uint32_t flags, char*
int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_allele_name, char*** allele_missing_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* recode_allele_reverse, char* recode_allele_extra) {
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* rafile = NULL;
+ FILE* rafile = nullptr;
uint32_t missing_allele = 0;
uint32_t marker_id_htable_size = get_id_htable_size(marker_ct);
uintptr_t rae_size = 0;
@@ -11514,7 +11593,7 @@ int32_t recode_allele_load(char* loadbuf, uintptr_t loadbuf_size, char* recode_a
recode_allele_extra = (char*)bigstack_alloc(rae_size);
} else {
bigstack_reset(*allele_missing_ptr);
- *allele_missing_ptr = NULL;
+ *allele_missing_ptr = nullptr;
}
return retval;
}
@@ -11566,19 +11645,19 @@ static inline int32_t recode_write_first_cols(FILE* outfile, uintptr_t sample_ui
char* cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
uintptr_t ulii = strlen_se(cptr);
fwrite(cptr, 1, ulii, outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(&(cptr[ulii + 1]), outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(paternal_ids? (&(paternal_ids[sample_uidx * max_paternal_id_len])) : "0", outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(maternal_ids? (&(maternal_ids[sample_uidx * max_maternal_id_len])) : "0", outfile);
- putc(delimiter, outfile);
- putc(sexchar(sex_nm, sex_male, sample_uidx), outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
+ putc_unlocked(sexchar(sex_nm, sex_male, sample_uidx), outfile);
+ putc_unlocked(delimiter, outfile);
if (!IS_SET(pheno_nm, sample_uidx)) {
fputs(output_missing_pheno, outfile);
} else if (pheno_c) {
- putc('1' + IS_SET(pheno_c, sample_uidx), outfile);
+ putc_unlocked('1' + IS_SET(pheno_c, sample_uidx), outfile);
} else {
cptr = dtoa_g(pheno_d[sample_uidx], wbuf);
fwrite(wbuf, 1, cptr - wbuf, outfile);
@@ -11844,7 +11923,7 @@ uint32_t write_ped_lines(FILE* outfile, unsigned char* loadbuf, uintptr_t* marke
return 1;
}
} else {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
}
@@ -11859,7 +11938,7 @@ uint32_t write_haploview_map(FILE* outfile, uintptr_t* marker_exclude, uintptr_t
for (marker_idx = 0; marker_idx < marker_ct; marker_uidx_start++, marker_idx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx_start);
fputs(&(marker_ids[marker_uidx_start * max_marker_id_len]), outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
wptr = uint32toa_x(marker_pos[marker_uidx_start], '\n', wbuf);
fwrite(wbuf, 1, wptr - wbuf, outfile);
}
@@ -11920,10 +11999,10 @@ int32_t flexbputc_checked(unsigned char ucc, uint32_t output_bgz, FILE* outfile,
}
int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, char* recode_allele_name, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uint32_t* marker_pos, uintptr_t* marker_reverse, char* sample_ids, [...]
- FILE* outfile = NULL;
- FILE* outfile2 = NULL;
- BGZF* bgz_outfile = NULL;
- char* pzwritep = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile2 = nullptr;
+ BGZF* bgz_outfile = nullptr;
+ char* pzwritep = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
@@ -11935,21 +12014,21 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
char delimiter = (recode_modifier & RECODE_TAB)? '\t' : ' ';
- uintptr_t* recode_allele_reverse = NULL;
- uintptr_t* sample_exclude_y = NULL;
- uintptr_t* cur_sample_exclude = NULL;
+ uintptr_t* recode_allele_reverse = nullptr;
+ uintptr_t* sample_exclude_y = nullptr;
+ uintptr_t* cur_sample_exclude = nullptr;
char** mk_allele_ptrs = marker_allele_ptrs;
- char** allele_missing = NULL;
- char* recode_allele_extra = NULL;
- unsigned char* overflow_buf = NULL;
+ char** allele_missing = nullptr;
+ char* recode_allele_extra = nullptr;
+ unsigned char* overflow_buf = nullptr;
const char* missing_geno_ptr = g_missing_geno_ptr;
char delim2 = delimiter;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_include2_y = NULL;
- uintptr_t* cur_sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* sample_male_include2_y = NULL;
- uintptr_t* cur_sample_male_include2 = NULL;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_include2_y = nullptr;
+ uintptr_t* cur_sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* sample_male_include2_y = nullptr;
+ uintptr_t* cur_sample_male_include2 = nullptr;
uint32_t lgen_ref = (recode_modifier & RECODE_LGEN_REF);
uint32_t rlist = (recode_modifier & RECODE_RLIST);
uint32_t beagle_nomap = (recode_modifier & RECODE_BEAGLE_NOMAP);
@@ -11973,17 +12052,17 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
uint32_t last_pos = 0;
char missing_geno = *g_missing_geno_ptr;
char output_missing_geno = *g_output_missing_geno_ptr;
- uintptr_t* loadbuf_collapsed = NULL;
- uintptr_t* loadbuf_collapsed_end = NULL;
- char* sample_ids_collapsed = NULL;
- char* sample_ids_collapsed_y = NULL;
- char* cur_sample_ids_collapsed = NULL;
- char* writebuf = NULL;
- char* writebuf2 = NULL;
- char* writebuf3 = NULL;
- uint32_t* fid_map = NULL;
- uint32_t* missing_cts = NULL;
- char* cur_mk_allelesx_buf = NULL;
+ uintptr_t* loadbuf_collapsed = nullptr;
+ uintptr_t* loadbuf_collapsed_end = nullptr;
+ char* sample_ids_collapsed = nullptr;
+ char* sample_ids_collapsed_y = nullptr;
+ char* cur_sample_ids_collapsed = nullptr;
+ char* writebuf = nullptr;
+ char* writebuf2 = nullptr;
+ char* writebuf3 = nullptr;
+ uint32_t* fid_map = nullptr;
+ uint32_t* missing_cts = nullptr;
+ char* cur_mk_allelesx_buf = nullptr;
int32_t retval = 0;
char* writebufl[4];
char* writebuflp[4];
@@ -12135,9 +12214,9 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
aptr = (char*)memchr(cptr, '\t', max_sample_id_len);
- ulii = (uintptr_t)(aptr - cptr);
- wbufptr = memcpyax(wbufptr, cptr, ulii, ' ');
- wbufptr = memcpyax(wbufptr, cptr, ulii, ' ');
+ uljj = (uintptr_t)(aptr - cptr);
+ wbufptr = memcpyax(wbufptr, cptr, uljj, ' ');
+ wbufptr = memcpyax(wbufptr, cptr, uljj, ' ');
}
wbufptr = memcpya(wbufptr, "\nI IID ", 7);
sample_uidx = 0;
@@ -12146,9 +12225,9 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
cptr = &(sample_ids[sample_uidx * max_sample_id_len]);
cptr = (char*)memchr(cptr, '\t', max_sample_id_len);
cptr++;
- ulii = strlen(cptr);
- wbufptr = memcpyax(wbufptr, cptr, ulii, ' ');
- wbufptr = memcpyax(wbufptr, cptr, ulii, ' ');
+ uljj = strlen(cptr);
+ wbufptr = memcpyax(wbufptr, cptr, uljj, ' ');
+ wbufptr = memcpyax(wbufptr, cptr, uljj, ' ');
}
sample_uidx = 0;
if (pheno_c) {
@@ -12339,7 +12418,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
bigstack_alloc_c(16, &writebuf2)) {
goto recode_ret_NOMEM;
}
- fill_uint_zero(fid_map, fid_ct);
+ fill_uint_zero(fid_ct, fid_map);
} else {
if (recode_modifier & RECODE_A_TRANSPOSE) {
// format is new to PLINK 1.9, so use tab delimiter unless 'spacex'
@@ -12388,8 +12467,8 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
ulii = 0;
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
uljj = 0;
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- for (marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end); marker_uidx < chrom_end;) {
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ for (marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end); marker_uidx < chrom_end;) {
alen = strlen(mk_allele_ptrs[marker_uidx * 2]);
alen2 = strlen(mk_allele_ptrs[marker_uidx * 2 + 1]);
uljj += MAXV(alen, alen2) + 1;
@@ -12445,7 +12524,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_NOMEM;
}
recode_allele_extra = (char*)g_bigstack_base;
- fill_ulong_zero((uintptr_t*)allele_missing, unfiltered_marker_ct);
+ fill_ulong_zero(unfiltered_marker_ct, (uintptr_t*)allele_missing);
ulii = round_up_pow2(max_marker_allele_len + MAXLINELEN, END_ALLOC_CHUNK);
loadbuf = (unsigned char*)bigstack_end_alloc_presized(ulii);
if (!loadbuf) {
@@ -12582,7 +12661,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -12605,9 +12684,9 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
}
aptr = (char*)memchr(cptr, '\t', max_sample_id_len);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fwrite(cptr, 1, (uintptr_t)(aptr - cptr), outfile);
- putc('_', outfile);
+ putc_unlocked('_', outfile);
fputs(&(aptr[1]), outfile);
}
if (putc_checked('\n', outfile)) {
@@ -12648,12 +12727,12 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
uii = IS_NONNULL_AND_SET(recode_allele_reverse, marker_uidx);
if (allele_missing && allele_missing[marker_uidx]) {
fputs(allele_missing[marker_uidx], outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(mk_allele_ptrs[2 * marker_uidx + uii], outfile);
- putc(',', outfile);
+ putc_unlocked(',', outfile);
} else {
fputs(mk_allele_ptrs[2 * marker_uidx + uii], outfile);
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
}
fputs(mk_allele_ptrs[2 * marker_uidx + 1 - uii], outfile);
wbufptr = writebuf;
@@ -12700,7 +12779,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -12754,7 +12833,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
cptr = memcpya(cptr, ",length=", 8);
if (!(map_is_unsorted & UNSORTED_BP)) {
- cptr = uint32toa(marker_pos[chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - 1] + 1, cptr);
+ cptr = uint32toa(marker_pos[chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - 1] + 1, cptr);
} else {
cptr = memcpya(cptr, "2147483645", 10); // unknown
}
@@ -12942,7 +13021,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -12953,10 +13032,10 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (output_bgz) {
if (bgzf_close(bgz_outfile)) {
- bgz_outfile = NULL;
+ bgz_outfile = nullptr;
goto recode_ret_WRITE_FAIL;
}
- bgz_outfile = NULL;
+ bgz_outfile = nullptr;
}
} else if (recode_modifier & RECODE_OXFORD) {
memcpy(outname_end, output_gen_gz? ".gen.gz" : ".gen", output_gen_gz? 8 : 5);
@@ -13027,7 +13106,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -13046,8 +13125,8 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
, outfile)) {
goto recode_ret_WRITE_FAIL;
}
- putc(pheno_d? 'P' : 'B', outfile);
- putc('\n', outfile);
+ putc_unlocked(pheno_d? 'P' : 'B', outfile);
+ putc_unlocked('\n', outfile);
dxx = 1.0 / ((double)((intptr_t)marker_ct));
for (sample_idx = 0, sample_uidx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
@@ -13114,7 +13193,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if (fseeko(bedfile, bed_offset + (sample_uidx / 4) + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
goto recode_ret_READ_FAIL;
}
- ii = fgetc(bedfile);
+ ii = getc_unlocked(bedfile);
if (ii == EOF) {
goto recode_ret_READ_FAIL;
}
@@ -13153,8 +13232,8 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
// for backward compatibility, also exclude XY. don't exclude custom name
// chromosomes, though, since chromosome 0 was actually processed
autosomal_marker_ct = marker_ct - count_non_autosomal_markers(chrom_info_ptr, marker_exclude, 1, 1);
- if (chrom_info_ptr->xy_code != -1) {
- autosomal_marker_ct -= count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->xy_code);
+ if (chrom_info_ptr->xymt_codes[XY_OFFSET] != -1) {
+ autosomal_marker_ct -= count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->xymt_codes[XY_OFFSET]);
}
if (!autosomal_marker_ct) {
// could allow this?
@@ -13173,7 +13252,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
fputs("0%", stdout);
fflush(stdout);
marker_uidx = next_unset_unsafe(marker_exclude, 0);
- retval = recode_beagle_new_chrom(outname, &(outname_end[5]), marker_exclude, chrom_info_ptr, &marker_uidx, &chrom_fo_idx, &chrom_idx, &chrom_end, bedfile, bed_offset, unfiltered_sample_ct4, &outfile, beagle_nomap? NULL : (&outfile2), writebuf2, header_len);
+ retval = recode_beagle_new_chrom(outname, &(outname_end[5]), marker_exclude, chrom_info_ptr, &marker_uidx, &chrom_fo_idx, &chrom_idx, &chrom_end, bedfile, bed_offset, unfiltered_sample_ct4, &outfile, beagle_nomap? nullptr : (&outfile2), writebuf2, header_len);
if (retval) {
goto recode_ret_1;
}
@@ -13184,7 +13263,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if (fwrite_checked(writebuf2, header_len, outfile)) {
goto recode_ret_WRITE_FAIL;
}
- outfile2 = NULL;
+ outfile2 = nullptr;
}
for (pct = 1; pct <= 100; pct++) {
loop_end = (((uint64_t)pct) * autosomal_marker_ct) / 100;
@@ -13202,7 +13281,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_WRITE_FAIL;
}
}
- retval = recode_beagle_new_chrom(outname, &(outname_end[5]), marker_exclude, chrom_info_ptr, &marker_uidx, &chrom_fo_idx, &chrom_idx, &chrom_end, bedfile, bed_offset, unfiltered_sample_ct4, &outfile, beagle_nomap? NULL : (&outfile2), writebuf2, header_len);
+ retval = recode_beagle_new_chrom(outname, &(outname_end[5]), marker_exclude, chrom_info_ptr, &marker_uidx, &chrom_fo_idx, &chrom_idx, &chrom_end, bedfile, bed_offset, unfiltered_sample_ct4, &outfile, beagle_nomap? nullptr : (&outfile2), writebuf2, header_len);
if (retval) {
goto recode_ret_1;
}
@@ -13215,12 +13294,12 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_WRITE_FAIL;
}
fputs(cptr, outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (outfile2) {
if (fputs_checked(cptr, outfile2)) {
goto recode_ret_WRITE_FAIL;
}
- putc('\t', outfile2);
+ putc_unlocked('\t', outfile2);
wbufptr = uint32toa_x(marker_pos[marker_uidx], '\t', g_textbuf);
fwrite(g_textbuf, 1, wbufptr - g_textbuf, outfile2);
}
@@ -13235,7 +13314,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
memcpy(wbufptr, aptr2, alen2);
if (outfile2) {
fputs((aptr != missing_geno_ptr)? aptr : "X", outfile2);
- putc('\t', outfile2);
+ putc_unlocked('\t', outfile2);
fputs((aptr2 != missing_geno_ptr)? aptr2 : "X", outfile2);
}
cmalen[2] = alen + alen2 + 2;
@@ -13271,7 +13350,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -13314,8 +13393,8 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
wbufptr = uint32toa(marker_pos[marker_uidx], &(writebuf2[1]));
if (ulii) {
if (marker_uidx >= chrom_end) {
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
- chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, chrom_idx);
}
*wbufptr++ = ' ';
wbufptr = chrom_name_write(chrom_info_ptr, chrom_idx, wbufptr);
@@ -13337,7 +13416,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
if (IS_SET(pheno_nm, sample_uidx)) {
- putc('1' + IS_SET(pheno_c, sample_uidx), outfile);
+ putc_unlocked('1' + IS_SET(pheno_c, sample_uidx), outfile);
} else {
fputs(output_missing_pheno, outfile);
}
@@ -13376,7 +13455,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
for (sample_idx = 0; sample_idx < sample_ct; sample_uidx++, sample_idx++) {
next_unset_ul_unsafe_ck(sample_exclude, &sample_uidx);
cptr = (char*)memchr(&(sample_ids[sample_uidx * max_sample_id_len]), '\t', max_sample_id_len);
- putc(',', outfile);
+ putc_unlocked(',', outfile);
fputs(&(cptr[1]), outfile);
}
if (putc_checked('\n', outfile)) {
@@ -13448,11 +13527,11 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_WRITE_FAIL;
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -13505,7 +13584,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_1;
}
marker_uidx_start = marker_uidx;
- if (recode_load_to(loadbuf, bedfile, bed_offset, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1], 0, ulii, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
+ if (recode_load_to(loadbuf, bedfile, bed_offset, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1], 0, ulii, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
goto recode_ret_READ_FAIL;
}
if (set_hh_missing) {
@@ -13528,7 +13607,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
cptr = (char*)memchr(&(sample_ids[sample_uidx * max_sample_id_len]), '\t', max_sample_id_len);
fputs(&(cptr[1]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
bufptr = &(loadbuf[sample_uidx / 4]);
shiftval = (sample_uidx % 4) * 2;
aptr = writebuf;
@@ -13566,7 +13645,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
} while (marker_idx < ulii);
fwrite(writebuf, 1, ulii, outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
fwrite(writebuf2, 1, ulii, outfile);
if (putc_checked('\n', outfile)) {
goto recode_ret_WRITE_FAIL;
@@ -13574,7 +13653,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (recode_modifier & RECODE_FASTPHASE) {
if (chrom_idx > onechar_max) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
}
if (fclose_null(&outfile)) {
@@ -13660,11 +13739,11 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if ((aptr[0] != missing_geno) || aptr[1] || (aptr2[0] != missing_geno) || aptr2[1]) {
fputs(wbufptr, outfile2);
if ((aptr[0] != missing_geno) || aptr[1]) {
- putc(delimiter, outfile2);
+ putc_unlocked(delimiter, outfile2);
fputs(aptr, outfile2);
}
if ((aptr2[0] != missing_geno) || aptr2[1]) {
- putc(delimiter, outfile2);
+ putc_unlocked(delimiter, outfile2);
fputs(aptr2, outfile2);
}
if (putc_checked('\n', outfile2)) {
@@ -13700,7 +13779,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -13727,22 +13806,22 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
} else {
aptr = mk_allele_ptrs[2 * marker_uidx + uii];
}
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(cptr, outfile);
- putc('_', outfile);
+ putc_unlocked('_', outfile);
fputs(aptr, outfile);
if (recode_modifier & RECODE_INCLUDE_ALT) {
- putc('(', outfile);
- putc('/', outfile);
+ putc_unlocked('(', outfile);
+ putc_unlocked('/', outfile);
if (allele_missing && allele_missing[marker_uidx]) {
fputs(mk_allele_ptrs[2 * marker_uidx + uii], outfile);
- putc(',', outfile);
+ putc_unlocked(',', outfile);
}
fputs(mk_allele_ptrs[2 * marker_uidx + 1 - uii], outfile);
- putc(')', outfile);
+ putc_unlocked(')', outfile);
}
if (recode_modifier & RECODE_AD) {
- putc(delimiter, outfile);
+ putc_unlocked(delimiter, outfile);
fputs(cptr, outfile);
fputs("_HET", outfile);
}
@@ -13823,12 +13902,12 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_WRITE_FAIL;
}
} else {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -14016,7 +14095,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -14065,7 +14144,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
goto recode_ret_OPEN_FAIL;
}
marker_uidx_start = marker_uidx;
- if (recode_load_to(loadbuf, bedfile, bed_offset, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1], 0, ulii, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
+ if (recode_load_to(loadbuf, bedfile, bed_offset, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1], 0, ulii, marker_exclude, marker_reverse, &marker_uidx, unfiltered_sample_ct)) {
goto recode_ret_READ_FAIL;
}
if (set_hh_missing && marker_ct) {
@@ -14091,7 +14170,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
}
if (recode_modifier & RECODE_HV) {
if (chrom_idx > onechar_max) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
*wbufptr = '\0';
LOGPREPRINTFWW("%s.ped + %s.info created.\n", outname, outname);
@@ -14113,7 +14192,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
if (putc_checked('\n', outfile)) {
goto recode_ret_WRITE_FAIL;
@@ -14125,7 +14204,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
if (marker_uidx >= chrom_end) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
fputs("-1 ", outfile);
} else {
@@ -14178,11 +14257,11 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if (fwrite_checked(writebuf, wbufptr - writebuf, outfile)) {
goto recode_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -14215,7 +14294,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
sample_idx = loop_end;
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -14237,7 +14316,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
delimiter = ' ';
}
}
- retval = write_fam(outname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, delimiter, NULL);
+ retval = write_fam(outname, unfiltered_sample_ct, sample_exclude, sample_ct, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, sex_nm, sex_male, pheno_nm, pheno_c, pheno_d, output_missing_pheno, delimiter, nullptr);
if (retval) {
goto recode_ret_1;
}
@@ -14245,7 +14324,7 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
if (!(recode_modifier & (RECODE_TRANSPOSE | RECODE_23 | RECODE_A | RECODE_A_TRANSPOSE | RECODE_AD | RECODE_BEAGLE | RECODE_BEAGLE_NOMAP | RECODE_BIMBAM | RECODE_BIMBAM_1CHR | RECODE_FASTPHASE | RECODE_FASTPHASE_1CHR | RECODE_HV | RECODE_HV_1CHR | RECODE_LIST | RECODE_STRUCTURE | RECODE_VCF))) {
strcpy(outname_end, ".map");
- retval = write_map_or_bim(outname, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, NULL, ((recode_modifier & (RECODE_TAB | RECODE_DELIMX)) == RECODE_DELIMX)? ' ' : '\t', chrom_info_ptr);
+ retval = write_map_or_bim(outname, marker_exclude, marker_ct, marker_ids, max_marker_id_len, marker_cms, marker_pos, nullptr, ((recode_modifier & (RECODE_TAB | RECODE_DELIMX)) == RECODE_DELIMX)? ' ' : '\t', chrom_info_ptr);
if (retval) {
goto recode_ret_1;
}
@@ -14314,9 +14393,9 @@ int32_t recode(uint32_t recode_modifier, FILE* bedfile, uintptr_t bed_offset, ch
int32_t sample_sort_file_map(char* sample_sort_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uint32_t** sample_sort_map_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
- FILE* infile = NULL;
+ FILE* infile = nullptr;
// temporary: sample_id_map[ascii-sorted idx] = uidx in input fileset
- uint32_t* sample_id_map = NULL;
+ uint32_t* sample_id_map = nullptr;
uintptr_t line_idx = 0;
uint32_t cur_seq = 0;
int32_t retval = 0;
@@ -14363,7 +14442,7 @@ int32_t sample_sort_file_map(char* sample_sort_fname, uintptr_t unfiltered_sampl
if (is_eoln_kns(*bufptr)) {
continue;
}
- if (bsearch_read_fam_indiv(bufptr, sorted_sample_ids, max_sample_id_len, sample_ct, NULL, &ii, idbuf)) {
+ if (bsearch_read_fam_indiv(bufptr, sorted_sample_ids, max_sample_id_len, sample_ct, nullptr, &ii, idbuf)) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --indiv-sort file has fewer tokens than expected.\n", line_idx);
goto sample_sort_file_map_ret_INVALID_FORMAT_2;
}
@@ -14468,7 +14547,7 @@ int32_t merge_fam_id_scan(char* bedname, char* famname, uint32_t allow_no_sample
uint64_t tot_sample_ct = *tot_sample_ct_ptr;
uintptr_t max_sample_id_len = *max_sample_id_len_ptr;
uintptr_t line_idx = 0;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uint32_t max_sample_full_len = *max_sample_full_len_ptr;
uint32_t is_dichot_pheno = *is_dichot_pheno_ptr;
uint32_t orig_idx = *orig_idx_ptr;
@@ -14591,7 +14670,7 @@ int32_t merge_fam_id_scan(char* bedname, char* famname, uint32_t allow_no_sample
if (bigstack_end_alloc_llfam(tot_len, &llfam_ptr)) {
goto merge_fam_id_scan_ret_NOMEM;
}
- llfam_ptr->next = NULL;
+ llfam_ptr->next = nullptr;
llfam_ptr->pheno = pheno;
llfam_ptr->orig_order = orig_idx++;
wptr = memcpyax(memcpyax(memcpyax(memcpyax(llfam_ptr->idstr, col1_start_ptr, col1_len, '\t'), col2_start_ptr, col2_len, '\t'), col3_start_ptr, col3_len, '\t'), col4_start_ptr, col4_len, '\t');
@@ -14668,7 +14747,7 @@ int32_t merge_sample_sortf(char* sample_sort_fname, char* sample_fids, uintptr_t
for (sample_uidx = 0; sample_uidx < tot_sample_ct; sample_uidx++) {
strcpy(&(sample_ids[sample_uidx * max_sample_id_len]), &(sample_fids[sample_uidx * max_sample_full_len]));
}
- retval = sample_sort_file_map(sample_sort_fname, tot_sample_ct, NULL, tot_sample_ct, sample_ids, max_sample_id_len, &map_reverse);
+ retval = sample_sort_file_map(sample_sort_fname, tot_sample_ct, nullptr, tot_sample_ct, sample_ids, max_sample_id_len, &map_reverse);
bigstack_reset(bigstack_mark);
return retval;
}
@@ -14682,7 +14761,7 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uint32_t allow_no_vari
uint64_t position_warning_ct = *position_warning_ct_ptr;
uint32_t cur_marker_ct = 0;
double cm = 0.0;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
int32_t retval = 0;
uint32_t alen1 = 1;
uint32_t alen2 = 1;
@@ -14705,243 +14784,243 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uint32_t allow_no_vari
uint32_t uii;
uint32_t ujj;
uint32_t ukk;
- int32_t ii;
int32_t jj;
- if (fopen_checked(bimname, "r", &infile)) {
- goto merge_bim_scan_ret_OPEN_FAIL;
- }
- if (is_binary) {
- loadbuf_size = (bigstack_left() / 2) & (~(CACHELINE - ONELU));
- if (bigstack_left() > 0x3fffffc0) {
- loadbuf_size = 0x3fffffc0;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto merge_bim_scan_ret_NOMEM;
+ {
+ if (fopen_checked(bimname, "r", &infile)) {
+ goto merge_bim_scan_ret_OPEN_FAIL;
}
- }
- bigstack_alloc_c(loadbuf_size, &loadbuf);
- loadbuf[loadbuf_size - 1] = ' ';
- if (check_cm_col(infile, loadbuf, is_binary, allow_no_variants, loadbuf_size, &cm_col_exists, &line_idx)) {
- goto merge_bim_scan_ret_MISSING_TOKENS;
- }
- if (!line_idx) {
- // no variants
- *cur_marker_ct_ptr = 0;
- goto merge_bim_scan_ret_1;
- }
- line_idx--;
- do {
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- if ((loadbuf_size == 0x3fffffc0) || ((!is_binary) && (loadbuf_size == MAXLINELEN))) {
- LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, bimname);
- goto merge_bim_scan_ret_INVALID_FORMAT_2;
- } else {
+ if (is_binary) {
+ loadbuf_size = (bigstack_left() / 2) & (~(CACHELINE - ONELU));
+ if (bigstack_left() > 0x3fffffc0) {
+ loadbuf_size = 0x3fffffc0;
+ } else if (loadbuf_size <= MAXLINELEN) {
goto merge_bim_scan_ret_NOMEM;
}
}
- uii = strlen(loadbuf);
- if (uii >= max_bim_linelen) {
- max_bim_linelen = uii + 1;
+ bigstack_alloc_c(loadbuf_size, &loadbuf);
+ loadbuf[loadbuf_size - 1] = ' ';
+ if (check_cm_col(infile, loadbuf, is_binary, allow_no_variants, loadbuf_size, &cm_col_exists, &line_idx)) {
+ goto merge_bim_scan_ret_MISSING_TOKENS;
}
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_or_comment_kns(*bufptr)) {
- continue;
+ if (!line_idx) {
+ // no variants
+ *cur_marker_ct_ptr = 0;
+ goto merge_bim_scan_ret_1;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- if (chrom_error(bimname, chrom_info_ptr, bufptr, line_idx, ii, allow_extra_chroms)) {
- goto merge_bim_scan_ret_INVALID_FORMAT;
+ line_idx--;
+ do {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ if ((loadbuf_size == 0x3fffffc0) || ((!is_binary) && (loadbuf_size == MAXLINELEN))) {
+ LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, bimname);
+ goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ } else {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ }
+ uii = strlen(loadbuf);
+ if (uii >= max_bim_linelen) {
+ max_bim_linelen = uii + 1;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_or_comment_kns(*bufptr)) {
+ continue;
}
- retval = resolve_or_add_chrom_name(bufptr, bimname, line_idx, chrom_info_ptr, &ii);
+ char* chrom_token_end = token_endnn(bufptr);
+ if (!(*chrom_token_end)) {
+ goto merge_bim_scan_ret_MISSING_TOKENS;
+ }
+ int32_t cur_chrom_code;
+ retval = get_or_add_chrom_code_destructive(bimname, line_idx, allow_extra_chroms, bufptr, chrom_token_end, chrom_info_ptr, &cur_chrom_code);
if (retval) {
goto merge_bim_scan_ret_1;
}
- }
- // do not filter on chrom_mask here, since that happens later
- bufptr = skip_initial_spaces(token_endnn(bufptr));
- if (is_eoln_kns(*bufptr)) {
- goto merge_bim_scan_ret_MISSING_TOKENS;
- }
- bufptr2 = token_endnn(bufptr);
- uii = bufptr2 - bufptr;
- bufptr2 = skip_initial_spaces(bufptr2);
- if (is_eoln_kns(*bufptr2)) {
- goto merge_bim_scan_ret_MISSING_TOKENS;
- }
- if (cm_col_exists) {
- if (scan_double(bufptr2, &cm)) {
- cm = 0;
+ // do not filter on chrom_mask here, since that happens later
+ bufptr = skip_initial_spaces(&(chrom_token_end[1]));
+ if (is_eoln_kns(*bufptr)) {
+ goto merge_bim_scan_ret_MISSING_TOKENS;
}
- bufptr2 = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr2)) {
+ bufptr2 = token_endnn(bufptr);
+ uii = bufptr2 - bufptr;
+ bufptr2 = skip_initial_spaces(bufptr2);
+ if (is_eoln_kns(*bufptr2)) {
goto merge_bim_scan_ret_MISSING_TOKENS;
}
- }
- if (scan_int_abs_defcap(bufptr2, &jj)) {
- LOGPREPRINTFWW("Error: Invalid bp coordinate on line %" PRIuPTR " of %s.\n", line_idx, bimname);
- goto merge_bim_scan_ret_INVALID_FORMAT_2;
- }
- if (jj >= 0) {
- if (is_binary) {
- aptr1 = next_token(bufptr2);
- aptr2 = next_token(aptr1);
- if (no_more_tokens_kns(aptr2)) {
- goto merge_bim_scan_ret_MISSING_TOKENS;
- }
- alen1 = strlen_se(aptr1);
- alen2 = strlen_se(aptr2);
- aptr1[alen1] = '\0';
- aptr2[alen2] = '\0';
- if ((alen1 == 1) && (*aptr1 == '0')) {
- aptr1 = NULL;
+ if (cm_col_exists) {
+ if (scan_double(bufptr2, &cm)) {
+ cm = 0;
}
- if (aptr1 && (alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
- LOGPREPRINTFWW("Error: Identical A1 and A2 alleles on line %" PRIuPTR " of %s.\n", line_idx, bimname);
- goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ bufptr2 = next_token(bufptr2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto merge_bim_scan_ret_MISSING_TOKENS;
}
- if ((alen2 == 1) && (*aptr2 == '0')) {
- aptr2 = NULL;
+ }
+ if (scan_int_abs_defcap(bufptr2, &jj)) {
+ LOGPREPRINTFWW("Error: Invalid bp coordinate on line %" PRIuPTR " of %s.\n", line_idx, bimname);
+ goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ }
+ if (jj >= 0) {
+ if (is_binary) {
+ aptr1 = next_token(bufptr2);
+ aptr2 = next_token(aptr1);
+ if (no_more_tokens_kns(aptr2)) {
+ goto merge_bim_scan_ret_MISSING_TOKENS;
+ }
+ alen1 = strlen_se(aptr1);
+ alen2 = strlen_se(aptr2);
+ aptr1[alen1] = '\0';
+ aptr2[alen2] = '\0';
+ if ((alen1 == 1) && (*aptr1 == '0')) {
+ aptr1 = nullptr;
+ }
+ if (aptr1 && (alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
+ LOGPREPRINTFWW("Error: Identical A1 and A2 alleles on line %" PRIuPTR " of %s.\n", line_idx, bimname);
+ goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ }
+ if ((alen2 == 1) && (*aptr2 == '0')) {
+ aptr2 = nullptr;
+ }
+ } else {
+ aptr1 = nullptr;
+ aptr2 = nullptr;
}
- } else {
- aptr1 = NULL;
- aptr2 = NULL;
- }
- llxx = (((uint64_t)((uint32_t)ii)) << 32) + ((uint32_t)jj);
- ujj = hashval2(bufptr, uii);
- llbim_pptr = &(htable_bim[ujj]);
- llbim_ptr = *llbim_pptr;
- name_match = 0;
- bufptr[uii++] = '\0';
- while (llbim_ptr) {
- if (!strcmp(llbim_ptr->idstr, bufptr)) {
- if (is_binary) {
- bufptr2 = llbim_ptr->allele[0];
- allele_ct = 0;
- if (bufptr2) {
- cur_alleles[0] = bufptr2;
- allele_ct = 1;
- }
- bufptr3 = llbim_ptr->allele[1];
- if (bufptr3) {
- cur_alleles[allele_ct++] = bufptr3;
- }
- if (aptr2) {
- for (ukk = 0; ukk < allele_ct; ukk++) {
- if (!strcmp(aptr2, cur_alleles[ukk])) {
- break;
- }
+ llxx = (((uint64_t)((uint32_t)cur_chrom_code)) << 32) + ((uint32_t)jj);
+ ujj = hashval2(bufptr, uii);
+ llbim_pptr = &(htable_bim[ujj]);
+ llbim_ptr = *llbim_pptr;
+ name_match = 0;
+ bufptr[uii++] = '\0';
+ while (llbim_ptr) {
+ if (!strcmp(llbim_ptr->idstr, bufptr)) {
+ if (is_binary) {
+ bufptr2 = llbim_ptr->allele[0];
+ allele_ct = 0;
+ if (bufptr2) {
+ cur_alleles[0] = bufptr2;
+ allele_ct = 1;
}
- if (ukk == allele_ct) {
- if (allele_ct == 2) {
- if (bigstack_end_alloc_llstr(uii, &llstr_new_ptr)) {
- goto merge_bim_scan_ret_NOMEM;
- }
- llstr_new_ptr->next = *non_biallelics_ptr;
- memcpy(llstr_new_ptr->ss, bufptr, uii);
- *non_biallelics_ptr = llstr_new_ptr;
- } else {
- if (allele_set(aptr2, alen2, &new_aptr)) {
- goto merge_bim_scan_ret_NOMEM;
+ bufptr3 = llbim_ptr->allele[1];
+ if (bufptr3) {
+ cur_alleles[allele_ct++] = bufptr3;
+ }
+ if (aptr2) {
+ for (ukk = 0; ukk < allele_ct; ukk++) {
+ if (!strcmp(aptr2, cur_alleles[ukk])) {
+ break;
}
- if (!llbim_ptr->allele[1]) {
- llbim_ptr->allele[1] = new_aptr;
+ }
+ if (ukk == allele_ct) {
+ if (allele_ct == 2) {
+ if (bigstack_end_alloc_llstr(uii, &llstr_new_ptr)) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ llstr_new_ptr->next = *non_biallelics_ptr;
+ memcpy(llstr_new_ptr->ss, bufptr, uii);
+ *non_biallelics_ptr = llstr_new_ptr;
} else {
- llbim_ptr->allele[0] = new_aptr;
+ if (allele_set(aptr2, alen2, &new_aptr)) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ if (!llbim_ptr->allele[1]) {
+ llbim_ptr->allele[1] = new_aptr;
+ } else {
+ llbim_ptr->allele[0] = new_aptr;
+ }
+ cur_alleles[allele_ct++] = new_aptr;
}
- cur_alleles[allele_ct++] = new_aptr;
- }
- }
- }
- if (aptr1) {
- for (ukk = 0; ukk < allele_ct; ukk++) {
- if (!strcmp(aptr1, cur_alleles[ukk])) {
- break;
}
}
- if (ukk == allele_ct) {
- if (allele_ct == 2) {
- if (bigstack_end_alloc_llstr(uii, &llstr_new_ptr)) {
- goto merge_bim_scan_ret_NOMEM;
- }
- llstr_new_ptr->next = *non_biallelics_ptr;
- memcpy(llstr_new_ptr->ss, bufptr, uii);
- *non_biallelics_ptr = llstr_new_ptr;
- } else {
- if (allele_set(aptr1, alen1, &new_aptr)) {
- goto merge_bim_scan_ret_NOMEM;
+ if (aptr1) {
+ for (ukk = 0; ukk < allele_ct; ukk++) {
+ if (!strcmp(aptr1, cur_alleles[ukk])) {
+ break;
}
- if (!llbim_ptr->allele[1]) {
- llbim_ptr->allele[1] = new_aptr;
+ }
+ if (ukk == allele_ct) {
+ if (allele_ct == 2) {
+ if (bigstack_end_alloc_llstr(uii, &llstr_new_ptr)) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ llstr_new_ptr->next = *non_biallelics_ptr;
+ memcpy(llstr_new_ptr->ss, bufptr, uii);
+ *non_biallelics_ptr = llstr_new_ptr;
} else {
- llbim_ptr->allele[0] = new_aptr;
+ if (allele_set(aptr1, alen1, &new_aptr)) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ if (!llbim_ptr->allele[1]) {
+ llbim_ptr->allele[1] = new_aptr;
+ } else {
+ llbim_ptr->allele[0] = new_aptr;
+ }
+ cur_alleles[allele_ct++] = new_aptr;
}
- cur_alleles[allele_ct++] = new_aptr;
}
}
}
- }
- if (llbim_ptr->pos != llxx) {
- if ((((uint64_t)llbim_ptr->pos) >> 32) == (((uint64_t)llxx) >> 32)) {
- LOGPREPRINTFWW("Warning: Multiple positions seen for variant '%s'.\n", bufptr);
- if (position_warning_ct < 3) {
- logerrprintb();
+ if (llbim_ptr->pos != llxx) {
+ if ((((uint64_t)llbim_ptr->pos) >> 32) == (((uint64_t)llxx) >> 32)) {
+ LOGPREPRINTFWW("Warning: Multiple positions seen for variant '%s'.\n", bufptr);
+ if (position_warning_ct < 3) {
+ logerrprintb();
+ } else {
+ logstr(g_logbuf);
+ }
+ position_warning_ct++;
} else {
- logstr(g_logbuf);
+ LOGERRPRINTFWW("Warning: Multiple chromosomes seen for variant '%s'.\n", bufptr);
}
- position_warning_ct++;
- } else {
- LOGERRPRINTFWW("Warning: Multiple chromosomes seen for variant '%s'.\n", bufptr);
}
+ name_match = 1;
+ break;
}
- name_match = 1;
- break;
- }
- llbim_pptr = &(llbim_ptr->next);
- llbim_ptr = *llbim_pptr;
- }
- if (!name_match) {
- if (uii > max_marker_id_len) {
- max_marker_id_len = uii;
- }
- if (bigstack_end_alloc_llbim(uii, &llbim_ptr)) {
- goto merge_bim_scan_ret_NOMEM;
+ llbim_pptr = &(llbim_ptr->next);
+ llbim_ptr = *llbim_pptr;
}
- llbim_ptr->next = NULL;
- llbim_ptr->pos = llxx;
- llbim_ptr->cm = cm;
- if (aptr1) {
- if (allele_set(aptr1, alen1, &(llbim_ptr->allele[0]))) {
- goto merge_bim_scan_ret_NOMEM;
+ if (!name_match) {
+ if (uii > max_marker_id_len) {
+ max_marker_id_len = uii;
}
- } else {
- llbim_ptr->allele[0] = NULL;
- }
- if (aptr2) {
- if (allele_set(aptr2, alen2, &(llbim_ptr->allele[1]))) {
+ if (bigstack_end_alloc_llbim(uii, &llbim_ptr)) {
goto merge_bim_scan_ret_NOMEM;
}
- } else {
- llbim_ptr->allele[1] = NULL;
+ llbim_ptr->next = nullptr;
+ llbim_ptr->pos = llxx;
+ llbim_ptr->cm = cm;
+ if (aptr1) {
+ if (allele_set(aptr1, alen1, &(llbim_ptr->allele[0]))) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ } else {
+ llbim_ptr->allele[0] = nullptr;
+ }
+ if (aptr2) {
+ if (allele_set(aptr2, alen2, &(llbim_ptr->allele[1]))) {
+ goto merge_bim_scan_ret_NOMEM;
+ }
+ } else {
+ llbim_ptr->allele[1] = nullptr;
+ }
+ memcpy(llbim_ptr->idstr, bufptr, uii);
+ *llbim_pptr = llbim_ptr;
+ tot_marker_ct++;
}
- memcpy(llbim_ptr->idstr, bufptr, uii);
- *llbim_pptr = llbim_ptr;
- tot_marker_ct++;
+ cur_marker_ct++;
}
- cur_marker_ct++;
+ } while (fgets(loadbuf, loadbuf_size, infile));
+ if (!feof(infile)) {
+ goto merge_bim_scan_ret_READ_FAIL;
}
- } while (fgets(loadbuf, loadbuf_size, infile));
- if (!feof(infile)) {
- goto merge_bim_scan_ret_READ_FAIL;
- }
- if (!cur_marker_ct) {
- LOGPREPRINTFWW("Error: No variants in %s.\n", bimname);
- goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ if (!cur_marker_ct) {
+ LOGPREPRINTFWW("Error: No variants in %s.\n", bimname);
+ goto merge_bim_scan_ret_INVALID_FORMAT_2;
+ }
+ *max_marker_id_len_ptr = max_marker_id_len;
+ *max_bim_linelen_ptr = max_bim_linelen;
+ *tot_marker_ct_ptr = tot_marker_ct;
+ *cur_marker_ct_ptr = cur_marker_ct;
+ *position_warning_ct_ptr = position_warning_ct;
}
- *max_marker_id_len_ptr = max_marker_id_len;
- *max_bim_linelen_ptr = max_bim_linelen;
- *tot_marker_ct_ptr = tot_marker_ct;
- *cur_marker_ct_ptr = cur_marker_ct;
- *position_warning_ct_ptr = position_warning_ct;
while (0) {
merge_bim_scan_ret_NOMEM:
@@ -14957,7 +15036,6 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uint32_t allow_no_vari
LOGPREPRINTFWW("Error: Line %" PRIuPTR " of %s has fewer tokens than expected.\n", line_idx, bimname);
merge_bim_scan_ret_INVALID_FORMAT_2:
logerrprintb();
- merge_bim_scan_ret_INVALID_FORMAT:
retval = RET_INVALID_FORMAT;
}
merge_bim_scan_ret_1:
@@ -14967,7 +15045,7 @@ int32_t merge_bim_scan(char* bimname, uint32_t is_binary, uint32_t allow_no_vari
}
int32_t report_non_biallelics(char* outname, char* outname_end, Ll_str* non_biallelics) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
int32_t retval = 0;
uintptr_t nbmarker_ct_dup = 0;
uintptr_t nbmarker_ct = 1;
@@ -15005,7 +15083,7 @@ int32_t report_non_biallelics(char* outname, char* outname_end, Ll_str* non_bial
if (fputs_checked(id_arr_ptr, outfile)) {
goto report_non_biallelics_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
id_arr_end = &(id_arr[(nbmarker_ct_dup - 1) * max_nbmarker_id_len]);
while (id_arr_ptr != id_arr_end) {
id_arr_ptr_old = id_arr_ptr;
@@ -15059,8 +15137,8 @@ void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint3
uint32_t merge_alleles(char** marker_allele_ptrs, uint32_t marker_uidx, uint32_t marker_uidx2) {
uint32_t distinct_allele_ct = 0;
char* allele_ptrs[2];
- allele_ptrs[0] = NULL;
- allele_ptrs[1] = NULL;
+ allele_ptrs[0] = nullptr;
+ allele_ptrs[1] = nullptr;
// reverse order so --keep-allele-order works
merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx + 1], allele_ptrs, &distinct_allele_ct);
merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx], allele_ptrs, &distinct_allele_ct);
@@ -15218,8 +15296,8 @@ int32_t merge_diff_print(FILE* outfile, char* idbuf, char* marker_id, char* samp
int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbuf, uint32_t max_bim_linelen, uint32_t tot_sample_ct, uint32_t tot_marker_ct, uint32_t dedup_marker_ct, uint32_t start_marker_idx, uint32_t marker_window_size, char** marker_allele_ptrs, char* marker_ids, uintptr_t max_marker_id_len, char* sample_ids, uintptr_t max_sample_id_len, uint32_t merge_nsort, uint32_t* sample_nsmap, uint32_t* flex_map, uint32_t* marker_map, char* idbuf, unsigned char* readbuf, unsigne [...]
// flex_map maps samples for binary filesets, and markers for text filesets.
uint32_t is_binary = famname? 1 : 0;
- FILE* bedfile = NULL;
- FILE* infile2 = NULL;
+ FILE* bedfile = nullptr;
+ FILE* infile2 = nullptr;
int32_t retval = 0;
// bugfix: there was a potential integer overflow back when these were
// uint32_t
@@ -15229,8 +15307,8 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
uint32_t marker_in_idx = 0xffffffffU; // overflow to zero on first add
uint32_t last_marker_in_idx = 0xfffffffeU;
uint32_t cur_sample_ct = 0;
- uintptr_t* mbufptr = NULL; // merge mode 1, 4, 6, 7
- uintptr_t* readbuf_w = NULL; // used for main binary load
+ uintptr_t* mbufptr = nullptr; // merge mode 1, 4, 6, 7
+ uintptr_t* readbuf_w = nullptr; // used for main binary load
const char* missing_geno_ptr = g_missing_geno_ptr;
uint64_t diff_total_overlap = 0;
uint64_t diff_not_both_genotyped = 0;
@@ -15858,8 +15936,8 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
}
int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outname, char* outname_end, char* mergename1, char* mergename2, char* mergename3, char* sample_sort_fname, uint64_t calculation_type, uint32_t merge_type, uint32_t sample_sort, uint64_t misc_flags, Chrom_info* chrom_info_ptr) {
- FILE* mergelistfile = NULL;
- FILE* outfile = NULL;
+ FILE* mergelistfile = nullptr;
+ FILE* outfile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
uintptr_t max_sample_id_len = 0;
@@ -15874,16 +15952,16 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
uint32_t merge_equal_pos = (merge_type / MERGE_EQUAL_POS) & 1;
uint32_t allow_no_samples = (misc_flags / MISC_ALLOW_NO_SAMPLES) & 1;
uint32_t allow_no_variants = (misc_flags / MISC_ALLOW_NO_VARS) & 1;
- Ll_str* non_biallelics = NULL;
+ Ll_str* non_biallelics = nullptr;
uint32_t ped_buflen = MAXLINELEN;
uint32_t max_bim_linelen = 0;
char* missing_geno_ptr = (char*)g_missing_geno_ptr;
- char* pheno_c_char = NULL;
- double* pheno_d = NULL;
- uint32_t* sample_nsmap = NULL;
+ char* pheno_c_char = nullptr;
+ double* pheno_d = nullptr;
+ uint32_t* sample_nsmap = nullptr;
uint32_t max_cur_sample_ct = 0;
uint32_t max_cur_marker_text_ct = 0;
- uintptr_t* markbuf = NULL; // needed for merge modes 1, 4, 6, 7
+ uintptr_t* markbuf = nullptr; // needed for merge modes 1, 4, 6, 7
uint64_t diff_total_overlap = 0;
uint64_t diff_not_both_genotyped = 0;
uint64_t diff_discordant = 0;
@@ -15892,11 +15970,11 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
uint32_t cur_marker_ct = 0;
uint32_t tot_marker_ct = 0;
int32_t retval = 0;
- uint32_t* map_reverse = NULL;
- uintptr_t* reversed = NULL;
- char* bim_loadbuf = NULL;
+ uint32_t* map_reverse = nullptr;
+ uintptr_t* reversed = nullptr;
+ char* bim_loadbuf = nullptr;
// N.B. marker_allele_ptrs are ordered by marker_id instead of position
- char** marker_allele_ptrs = NULL;
+ char** marker_allele_ptrs = nullptr;
Ll_fam** htable_fam;
Ll_bim** htable_bim;
uintptr_t* pcptr;
@@ -16052,7 +16130,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
mergelist_bim[mlpos] = bufptr4;
bufptr4 = memcpyax(bufptr4, bufptr3, ulii, 0);
if (no_more_tokens_kns(bufptr)) {
- mergelist_fam[mlpos] = NULL;
+ mergelist_fam[mlpos] = nullptr;
} else {
bufptr2 = token_endnn(bufptr);
ulii = bufptr2 - bufptr;
@@ -16075,7 +16153,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
mergelist_fam = (char**)bigstack_alloc(2 * sizeof(intptr_t));
mergelist_bed[1] = mergename1;
mergelist_bim[1] = mergename2;
- mergelist_fam[1] = (merge_type & MERGE_BINARY)? mergename3 : NULL;
+ mergelist_fam[1] = (merge_type & MERGE_BINARY)? mergename3 : nullptr;
}
if (famname[0]) {
mergelist_bed[0] = bedname;
@@ -16094,7 +16172,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
goto merge_datasets_ret_NOMEM;
}
for (uii = 0; uii < HASHSIZE_S; uii++) {
- htable_fam[uii] = NULL;
+ htable_fam[uii] = nullptr;
}
ullxx = 0;
@@ -16319,7 +16397,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
goto merge_datasets_ret_NOMEM;
}
for (uii = 0; uii < HASHSIZE; uii++) {
- htable_bim[uii] = NULL;
+ htable_bim[uii] = nullptr;
}
ullxx = 0;
@@ -16352,8 +16430,8 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
fprintf(stderr, "%" PRIu64 " more multiple-position warning%s: see log file.\n", position_warning_ct - 3, (position_warning_ct == 4)? "" : "s");
}
#ifdef __LP64__
- if (ullxx > 0x7fffffff) {
- logerrprint("Error: Too many variants (max 2147483647).\n");
+ if (ullxx > 0x7ffffffd) {
+ logerrprint("Error: Too many variants (max 2147483645).\n");
goto merge_datasets_ret_INVALID_FORMAT;
}
#else
@@ -16375,8 +16453,9 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
if (!marker_allele_ptrs) {
goto merge_datasets_ret_NOMEM;
}
+ // prevent cleanup from failing
for (uii = 0; uii < tot_marker_ct * 2; uii++) {
- marker_allele_ptrs[uii] = NULL;
+ marker_allele_ptrs[uii] = missing_geno_ptr;
}
if (max_bim_linelen) {
max_bim_linelen++;
@@ -16429,14 +16508,12 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
bufptr = llbim_ptr->allele[0];
if (bufptr) {
marker_allele_ptrs[ujj * 2] = bufptr;
- } else {
- marker_allele_ptrs[ujj * 2] = missing_geno_ptr;
}
+ // already initialized to missing_geno_ptr otherwise
+
bufptr = llbim_ptr->allele[1];
if (bufptr) {
marker_allele_ptrs[ujj * 2 + 1] = bufptr;
- } else {
- marker_allele_ptrs[ujj * 2 + 1] = missing_geno_ptr;
}
marker_cms_tmp[ujj] = llbim_ptr->cm;
ll_buf[ujj] = (((uint64_t)llxx) & 0xffffffff00000000LL) | ujj;
@@ -16444,10 +16521,10 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
} while (llbim_ptr);
}
}
- sort_marker_chrom_pos(ll_buf, tot_marker_ct, pos_buf, chrom_start, chrom_id, NULL, &chrom_ct);
+ sort_marker_chrom_pos(ll_buf, tot_marker_ct, pos_buf, chrom_start, chrom_id, nullptr, &chrom_ct);
// bugfix: when chromosomes are filtered out, flag the corresponding markers
// in marker_map[]
- fill_uint_one(marker_map, tot_marker_ct);
+ fill_uint_one(tot_marker_ct, marker_map);
if (merge_post_msort_update_maps(marker_ids, max_marker_id_len, marker_map, marker_cms, marker_cms_tmp, pos_buf, ll_buf, chrom_start, chrom_id, chrom_ct, &dedup_marker_ct, merge_equal_pos, marker_allele_ptrs, chrom_info_ptr)) {
goto merge_datasets_ret_INVALID_FORMAT;
}
@@ -16546,7 +16623,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
memset(writebuf, 0x55, ((uintptr_t)ujj) * tot_sample_ct4);
}
if (merge_must_track_write(merge_mode)) {
- fill_ulong_zero(markbuf, ujj * ulii);
+ fill_ulong_zero(ujj * ulii, markbuf);
}
for (mlpos = 0; mlpos < merge_ct; mlpos++) {
retval = merge_main(mergelist_bed[mlpos], mergelist_bim[mlpos], mergelist_fam[mlpos], bim_loadbuf, max_bim_linelen, tot_sample_ct, tot_marker_ct, dedup_marker_ct, uii * markers_per_pass, ujj, marker_allele_ptrs, marker_ids, max_marker_id_len, sample_ids, max_sample_id_len, merge_nsort, sample_nsmap, flex_map, marker_map, idbuf, readbuf, writebuf, mlpos? merge_mode : merge_first_mode(merge_mode, merge_equal_pos), markbuf, outfile, &diff_total_overlap, &diff_not_both_genotyped, &diff [...]
@@ -16630,7 +16707,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
LOGPRINTF("%" PRIu64 " overlapping call%s, %" PRIu64 " nonmissing in both filesets.\n%" PRIu64 " concordant, for a concordance rate of %g.\n", diff_total_overlap, (diff_total_overlap == 1)? "" : "s", diff_not_both_genotyped, diff_not_both_genotyped - diff_discordant, 1.0 - (((double)diff_discordant) / ((double)diff_not_both_genotyped)));
}
- forget_extra_chrom_names(chrom_info_ptr);
+ forget_extra_chrom_names(1, chrom_info_ptr);
while (0) {
merge_datasets_ret_NOMEM:
retval = RET_NOMEM;
@@ -16651,14 +16728,7 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
break;
}
merge_datasets_ret_1:
- if (marker_allele_ptrs) {
- for (uii = 0; uii < tot_marker_ct * 2; uii++) {
- bufptr = marker_allele_ptrs[uii];
- if (bufptr && ((bufptr < g_one_char_strs) || (bufptr >= (&(g_one_char_strs[512]))))) {
- free(bufptr);
- }
- }
- }
+ cleanup_allele_storage(2, tot_marker_ct * 2, marker_allele_ptrs);
fclose_cond(mergelistfile);
fclose_cond(outfile);
bigstack_double_reset(bigstack_mark, bigstack_end_mark);
diff --git a/plink_data.h b/plink_data.h
index 0dd15ac..400e8c0 100644
--- a/plink_data.h
+++ b/plink_data.h
@@ -3,13 +3,13 @@
int32_t sample_major_to_snp_major(char* sample_major_fname, char* outname, uintptr_t unfiltered_marker_ct, uintptr_t unfiltered_sample_ct, uint64_t fsize);
-int32_t load_bim(char* bimname, uint32_t* map_cols_ptr, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_len_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_len_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_len, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_ [...]
+int32_t load_bim(char* bimname, uintptr_t* unfiltered_marker_ct_ptr, uintptr_t* marker_exclude_ct_ptr, uintptr_t* max_marker_id_blen_ptr, uintptr_t** marker_exclude_ptr, double** set_allele_freqs_ptr, uint32_t** nchrobs_ptr, char*** marker_allele_pp, uintptr_t* max_marker_allele_blen_ptr, char** marker_ids_ptr, char* missing_mid_template, uint32_t new_id_max_allele_slen, const char* missing_marker_id_match, Chrom_info* chrom_info_ptr, double** marker_cms_ptr, uint32_t** marker_pos_ptr, u [...]
int32_t load_covars(char* covar_fname, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* sex_nm, uintptr_t* sex_male, char* sample_ids, uintptr_t max_sample_id_len, double missing_phenod, uint32_t covar_modifier, Range_list* covar_range_list_ptr, uint32_t gxe_mcovar, uintptr_t* covar_ctx_ptr, char** covar_names_ptr, uintptr_t* max_covar_name_len_ptr, uintptr_t* pheno_nm, uintptr_t** covar_nm_ptr, double** covar_d_ptr, uintptr_t** gxe_covar_nm_ptr, [...]
int32_t write_covars(char* outname, char* outname_end, uint32_t write_covar_modifier, uint32_t write_covar_dummy_max_categories, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_ids, uintptr_t max_maternal_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, uintptr_t* pheno_nm, uintptr_t* pheno_c, double* pheno_d, double missing_phenod, char* output_ [...]
-int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, uint32_t map_cols, char* outname, char* outname_end, uint64_t calculation_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_i [...]
+int32_t make_bed(FILE* bedfile, uintptr_t bed_offset, char* bimname, char* outname, char* outname_end, uint64_t calculation_type, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, double* marker_cms, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, char* patern [...]
int32_t load_fam(char* famname, uint32_t fam_cols, uint32_t tmp_fam_col_6, int32_t missing_pheno, uint32_t affection_01, uintptr_t* unfiltered_sample_ct_ptr, char** sample_ids_ptr, uintptr_t* max_sample_id_len_ptr, char** paternal_ids_ptr, uintptr_t* max_paternal_id_len_ptr, char** maternal_ids_ptr, uintptr_t* max_maternal_id_len_ptr, uintptr_t** sex_nm_ptr, uintptr_t** sex_male_ptr, uint32_t* affection_ptr, uintptr_t** pheno_nm_ptr, uintptr_t** pheno_c_ptr, double** pheno_d_ptr, uintptr [...]
diff --git a/plink_dosage.c b/plink_dosage.c
index bf7ceb1..4865d2c 100644
--- a/plink_dosage.c
+++ b/plink_dosage.c
@@ -10,7 +10,7 @@
#include "pigz.h"
void dosage_init(Dosage_info* doip) {
- doip->fname = NULL;
+ doip->fname = nullptr;
doip->modifier = 0;
doip->skip0 = 0;
doip->skip1 = 0;
@@ -33,7 +33,7 @@ int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_
// the relevant variant IDs and check for duplicates; third pass, save
// allele codes and scores.
// 2. If --q-score-range was specified, load those files.
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t score_marker_ct = 0;
uintptr_t max_score_marker_id_len = 0;
uintptr_t miss_ct = 0;
@@ -41,8 +41,8 @@ int32_t dosage_load_score_files(Score_info* sc_ip, char* outname, char* outname_
uintptr_t qrange_ct = 0;
uintptr_t max_qrange_name_len = 0;
uint64_t allele_code_buf_len = 0;
- uintptr_t* score_qrange_key_exists = NULL;
- double* score_qrange_keys = NULL;
+ uintptr_t* score_qrange_key_exists = nullptr;
+ double* score_qrange_keys = nullptr;
uint32_t modifier = sc_ip->modifier;
int32_t retval = 0;
char* bufptr_arr[3];
@@ -467,82 +467,82 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
// sucks to duplicate so much, but this code will be thrown out later so
// there's no long-term maintenance problem
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* phenofile = NULL;
- FILE* infile = NULL;
- FILE* profile_outfile = NULL;
- gzFile* gz_infiles = NULL;
- char* marker_ids = NULL;
- char* sample_ids = NULL;
- char* paternal_ids = NULL;
- char* maternal_ids = NULL;
- char* cluster_ids = NULL;
- char* covar_names = NULL;
- char* sorted_sample_ids = NULL;
- char* sep_fnames = NULL;
- char* cur_marker_id_buf = NULL;
- char* score_marker_ids = NULL;
- char* score_qrange_names = NULL;
- char** score_allele_codes = NULL;
- char* a1_ptr = NULL;
- char* a2_ptr = NULL;
- char* pzwritep = NULL;
- uintptr_t* marker_exclude = NULL;
- uintptr_t* sample_exclude = NULL;
- uintptr_t* sex_nm = NULL;
- uintptr_t* sex_male = NULL;
- uintptr_t* pheno_nm = NULL;
- uintptr_t* pheno_c = NULL;
- uintptr_t* pheno_nm_collapsed = NULL;
- uintptr_t* pheno_c_collapsed = NULL;
- uintptr_t* founder_info = NULL;
- uintptr_t* covar_nm = NULL;
- uintptr_t* perm_vec = NULL;
- uintptr_t* perm_fails = NULL; // need to enforce alignment
- uintptr_t* score_qrange_key_exists = NULL;
- double* pheno_d = NULL;
- double* covar_d = NULL;
- double* cur_dosages2 = NULL;
- double* score_effect_sizes = NULL;
- double* score_qrange_keys = NULL;
- double* score_qrange_bounds = NULL;
- double* cur_scores = NULL;
- double* score_bases = NULL;
+ FILE* phenofile = nullptr;
+ FILE* infile = nullptr;
+ FILE* profile_outfile = nullptr;
+ gzFile* gz_infiles = nullptr;
+ char* marker_ids = nullptr;
+ char* sample_ids = nullptr;
+ char* paternal_ids = nullptr;
+ char* maternal_ids = nullptr;
+ char* cluster_ids = nullptr;
+ char* covar_names = nullptr;
+ char* sorted_sample_ids = nullptr;
+ char* sep_fnames = nullptr;
+ char* cur_marker_id_buf = nullptr;
+ char* score_marker_ids = nullptr;
+ char* score_qrange_names = nullptr;
+ char** score_allele_codes = nullptr;
+ char* a1_ptr = nullptr;
+ char* a2_ptr = nullptr;
+ char* pzwritep = nullptr;
+ uintptr_t* marker_exclude = nullptr;
+ uintptr_t* sample_exclude = nullptr;
+ uintptr_t* sex_nm = nullptr;
+ uintptr_t* sex_male = nullptr;
+ uintptr_t* pheno_nm = nullptr;
+ uintptr_t* pheno_c = nullptr;
+ uintptr_t* pheno_nm_collapsed = nullptr;
+ uintptr_t* pheno_c_collapsed = nullptr;
+ uintptr_t* founder_info = nullptr;
+ uintptr_t* covar_nm = nullptr;
+ uintptr_t* perm_vec = nullptr;
+ uintptr_t* perm_fails = nullptr; // need to enforce alignment
+ uintptr_t* score_qrange_key_exists = nullptr;
+ double* pheno_d = nullptr;
+ double* covar_d = nullptr;
+ double* cur_dosages2 = nullptr;
+ double* score_effect_sizes = nullptr;
+ double* score_qrange_keys = nullptr;
+ double* score_qrange_bounds = nullptr;
+ double* cur_scores = nullptr;
+ double* score_bases = nullptr;
#ifndef NOLAPACK
- double* pheno_d2 = NULL;
- double* covars_cov_major_buf = NULL;
- double* covars_sample_major_buf = NULL;
- double* pheno_d_collapsed = NULL;
- double* param_2d_buf = NULL;
- double* param_2d_buf2 = NULL;
- double* regression_results = NULL;
- double* dgels_a = NULL;
- double* dgels_b = NULL;
- double* dgels_work = NULL;
- MATRIX_INVERT_BUF1_TYPE* mi_buf = NULL;
+ double* pheno_d2 = nullptr;
+ double* covars_cov_major_buf = nullptr;
+ double* covars_sample_major_buf = nullptr;
+ double* pheno_d_collapsed = nullptr;
+ double* param_2d_buf = nullptr;
+ double* param_2d_buf2 = nullptr;
+ double* regression_results = nullptr;
+ double* dgels_a = nullptr;
+ double* dgels_b = nullptr;
+ double* dgels_work = nullptr;
+ MATRIX_INVERT_BUF1_TYPE* mi_buf = nullptr;
#endif
- float* covar_f = NULL;
- float* covars_cov_major_f_buf = NULL;
- float* coef_f = NULL;
- float* pp_f = NULL;
- float* sample_1d_buf_f = NULL;
- float* pheno_buf_f = NULL;
- float* param_1d_buf_f = NULL;
- float* param_1d_buf2_f = NULL;
- float* param_2d_buf_f = NULL;
- float* param_2d_buf2_f = NULL;
- float* regression_results_f = NULL;
- Ll_ctstr_entry** htable = NULL;
- uint32_t* marker_pos = NULL;
- uint32_t* cluster_map = NULL;
- uint32_t* cluster_starts = NULL;
- uint32_t* marker_id_htable = NULL;
- uint32_t* sample_id_map = NULL;
- uint32_t* batch_sizes = NULL;
- uint32_t* score_range_obs_cts = NULL;
- uint32_t* score_miss_cts = NULL;
- uint32_t* uiptr = NULL;
- uint32_t* uiptr2 = NULL;
- uint32_t* uiptr3 = NULL;
+ float* covar_f = nullptr;
+ float* covars_cov_major_f_buf = nullptr;
+ float* coef_f = nullptr;
+ float* pp_f = nullptr;
+ float* sample_1d_buf_f = nullptr;
+ float* pheno_buf_f = nullptr;
+ float* param_1d_buf_f = nullptr;
+ float* param_1d_buf2_f = nullptr;
+ float* param_2d_buf_f = nullptr;
+ float* param_2d_buf2_f = nullptr;
+ float* regression_results_f = nullptr;
+ Ll_ctstr_entry** htable = nullptr;
+ uint32_t* marker_pos = nullptr;
+ uint32_t* cluster_map = nullptr;
+ uint32_t* cluster_starts = nullptr;
+ uint32_t* marker_id_htable = nullptr;
+ uint32_t* sample_id_map = nullptr;
+ uint32_t* batch_sizes = nullptr;
+ uint32_t* score_range_obs_cts = nullptr;
+ uint32_t* score_miss_cts = nullptr;
+ uint32_t* uiptr = nullptr;
+ uint32_t* uiptr2 = nullptr;
+ uint32_t* uiptr3 = nullptr;
uintptr_t unfiltered_marker_ct = 0;
uintptr_t marker_exclude_ct = 0;
uintptr_t max_marker_id_len = 0;
@@ -586,7 +586,6 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
uint32_t standard_beta = glm_modifier & GLM_STANDARD_BETA;
uint32_t score_center = sc_ip->modifier & SCORE_CENTER;
uint32_t score_mean_impute = !(sc_ip->modifier & SCORE_NO_MEAN_IMPUTATION);
- uint32_t map_cols = 3;
uint32_t map_is_unsorted = 0;
uint32_t affection = 0;
uint32_t infile_ct = 0;
@@ -675,7 +674,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
int32_t ii;
pzwrite_init_null(&ps);
if (load_map) {
- retval = load_bim(mapname, &map_cols, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, NULL, NULL, NULL, &ulii, &marker_ids, NULL, 0, NULL, chrom_info_ptr, NULL, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, NULL, ".map file", NULL);
+ retval = load_bim(mapname, &unfiltered_marker_ct, &marker_exclude_ct, &max_marker_id_len, &marker_exclude, nullptr, nullptr, nullptr, &ulii, &marker_ids, nullptr, 0, nullptr, chrom_info_ptr, nullptr, &marker_pos, misc_flags, filter_flags, marker_pos_start, marker_pos_end, snp_window_size, markername_from, markername_to, markername_snp, snps_range_list_ptr, &map_is_unsorted, do_glm || min_bp_space || (misc_flags & (MISC_EXTRACT_RANGE | MISC_EXCLUDE_RANGE)), 0, 0, nullptr, ".map file", [...]
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -744,7 +743,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
goto plink1_dosage_ret_1;
}
} else if (phenofile) {
- retval = load_pheno(phenofile, unfiltered_sample_ct, 0, sorted_sample_ids, max_sample_id_len, sample_id_map, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, NULL, 0);
+ retval = load_pheno(phenofile, unfiltered_sample_ct, 0, sorted_sample_ids, max_sample_id_len, sample_id_map, missing_pheno, (misc_flags / MISC_AFFECTION_01) & 1, mpheno_col, phenoname_str, pheno_nm, &pheno_c, &pheno_d, nullptr, 0);
if (retval) {
if (retval == LOAD_PHENO_LAST_COL) {
logprintb();
@@ -964,7 +963,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
if (filter_flags & (FILTER_BINARY_FOUNDERS | FILTER_BINARY_NONFOUNDERS)) {
ii = sample_exclude_ct;
- filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, (filter_flags / FILTER_BINARY_FOUNDERS) & 1, NULL);
+ filter_samples_bitfields(unfiltered_sample_ct, sample_exclude, &sample_exclude_ct, founder_info, (filter_flags / FILTER_BINARY_FOUNDERS) & 1, nullptr);
if (sample_exclude_ct == unfiltered_sample_ct) {
LOGERRPRINTF("Error: All %s removed due to founder status (--filter-%s).\n", g_species_plural, (filter_flags & FILTER_BINARY_FOUNDERS)? "founders" : "nonfounders");
goto plink1_dosage_ret_ALL_SAMPLES_EXCLUDED;
@@ -1018,7 +1017,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
if (!do_glm) {
logerrprint("Warning: Ignoring --covar since no commands reference the covariates.\n");
} else {
- retval = load_covars(covar_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sex_covar? sex_nm : NULL, sex_covar? sex_male : NULL, sample_ids, max_sample_id_len, missing_phenod, covar_modifier, covar_range_list_ptr, 0, &covar_ct, &covar_names, &max_covar_name_len, pheno_nm, &covar_nm, &covar_d, NULL, NULL);
+ retval = load_covars(covar_fname, unfiltered_sample_ct, sample_exclude, sample_ct, sex_covar? sex_nm : nullptr, sex_covar? sex_male : nullptr, sample_ids, max_sample_id_len, missing_phenod, covar_modifier, covar_range_list_ptr, 0, &covar_ct, &covar_names, &max_covar_name_len, pheno_nm, &covar_nm, &covar_d, nullptr, nullptr);
if (retval) {
goto plink1_dosage_ret_1;
}
@@ -1357,7 +1356,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
bigstack_alloc_ui(sample_ct, &read_idx_to_sample_idx) ||
bigstack_alloc_ui(sample_ct, &skip_vals) ||
bigstack_alloc_d(sample_ct, &cur_dosages) ||
- bigstack_alloc_c(MAX_ID_LEN, &cur_marker_id_buf)) {
+ bigstack_alloc_c(MAX_ID_SLEN, &cur_marker_id_buf)) {
goto plink1_dosage_ret_NOMEM;
}
gz_infiles = (gzFile*)bigstack_alloc(infile_ct * sizeof(gzFile));
@@ -1366,7 +1365,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
goto plink1_dosage_ret_NOMEM;
}
for (uii = 0; uii < infile_ct; uii++) {
- gz_infiles[uii] = NULL;
+ gz_infiles[uii] = nullptr;
}
if (noheader) {
if (infile_ct != 1) {
@@ -1377,7 +1376,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
}
}
- // sorted_sample_ids = NULL;
+ // sorted_sample_ids = nullptr;
} else {
retval = sort_item_ids(unfiltered_sample_ct, sample_exclude, sample_exclude_ct, sample_ids, max_sample_id_len, 0, 1, strcmp_deref, &sorted_sample_ids, &sample_id_map);
if (retval) {
@@ -1501,7 +1500,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
goto plink1_dosage_ret_NOMEM;
}
for (uii = 0; uii < HASHSIZE; uii++) {
- htable[uii] = NULL;
+ htable[uii] = nullptr;
}
bufptr2 = memcpyb(outname_end, ".occur.dosage", 14);
} else if (!do_score) {
@@ -1551,7 +1550,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
loadbuf = (char*)g_bigstack_base;
loadbuf[loadbuf_size - 1] = ' ';
- fill_ulong_zero(batch_samples, sample_ctl);
+ fill_ulong_zero(sample_ctl, batch_samples);
bufptr = memcpya(g_logbuf, "--dosage: Reading from ", 23);
if (cur_batch_size == 1) {
bufptr = strcpya(bufptr, &(fnames[file_idx_start * max_fn_len]));
@@ -1610,10 +1609,10 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
}
if (gzclose(gz_infiles[file_idx]) != Z_OK) {
- gz_infiles[file_idx] = NULL;
+ gz_infiles[file_idx] = nullptr;
goto plink1_dosage_ret_READ_FAIL;
}
- gz_infiles[file_idx] = NULL;
+ gz_infiles[file_idx] = nullptr;
if (read_idx_start == read_idx) {
sprintf(g_logbuf, "Error: %s is empty.\n", &(sep_fnames[(file_idx + file_idx_start) * max_sepheader_len]));
goto plink1_dosage_ret_INVALID_FORMAT_WW;
@@ -1742,7 +1741,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
bufptr4 = token_endnn(bufptr3);
bufptr6 = token_endnn(bufptr5);
slen = (uintptr_t)(bufptr2 - bufptr);
- if (slen > MAX_ID_LEN) {
+ if (slen > MAX_ID_SLEN) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s has an excessively long variant ID.\n", line_idx, &(fnames[(file_idx + file_idx_start) * max_fn_len]));
goto plink1_dosage_ret_INVALID_FORMAT_WW;
}
@@ -1816,7 +1815,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
} else {
goto plink1_dosage_ret_NOMEM;
}
- ll_ptr->next = NULL;
+ ll_ptr->next = nullptr;
memcpy(ll_ptr->ss, cur_marker_id_buf, slen);
if (slen > max_occur_id_len) {
max_occur_id_len = slen;
@@ -1940,7 +1939,7 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
}
}
if (load_map) {
- pzwritep = width_force(4, pzwritep, chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_idx), pzwritep));
+ pzwritep = width_force(4, pzwritep, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_idx), pzwritep));
*pzwritep++ = ' ';
pzwritep = fw_strcpyn(11, cur_marker_id_len, cur_marker_id_buf, pzwritep);
pzwritep = memseta(pzwritep, 32, 2);
@@ -2151,20 +2150,20 @@ int32_t plink1_dosage(Dosage_info* doip, char* famname, char* mapname, char* out
if (a1_ptr[1]) {
free(a1_ptr);
}
- a1_ptr = NULL;
+ a1_ptr = nullptr;
if (a2_ptr[1]) {
free(a2_ptr);
}
- a2_ptr = NULL;
+ a2_ptr = nullptr;
}
}
plink1_dosage_end_loop:
for (file_idx = 0; file_idx < cur_batch_size; file_idx++) {
if (gzclose(gz_infiles[file_idx]) != Z_OK) {
- gz_infiles[file_idx] = NULL;
+ gz_infiles[file_idx] = nullptr;
goto plink1_dosage_ret_READ_FAIL;
}
- gz_infiles[file_idx] = NULL;
+ gz_infiles[file_idx] = nullptr;
}
bigstack_reset(bigstack_mark);
}
diff --git a/plink_family.c b/plink_family.c
index d18a3ac..d8056f5 100644
--- a/plink_family.c
+++ b/plink_family.c
@@ -70,7 +70,7 @@ int32_t get_trios_and_families(uintptr_t unfiltered_sample_ct, uintptr_t* sample
// fids is a list of null-terminated FIDs using trio_list indices, and iids
// is a list of IIDs using regular unfiltered indices. If include_duos is
// set, iids has a trailing entry set to '0'. (fids_ptr, iids_ptr, and the
- // corresponding lengths can be NULL.)
+ // corresponding lengths can be nullptr.)
//
// PLINK 1.07 enforces <= 1 father and <= 1 mother per sample (and ambiguous
// sex parents are not permitted), but the IDs CAN be reversed in the .fam
@@ -79,10 +79,10 @@ int32_t get_trios_and_families(uintptr_t unfiltered_sample_ct, uintptr_t* sample
// It won't be replicated in PLINK 2.0.
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- uint64_t* edge_list = NULL;
- uint32_t* toposort_queue = NULL;
- char* fids = NULL;
- char* iids = NULL;
+ uint64_t* edge_list = nullptr;
+ uint32_t* toposort_queue = nullptr;
+ char* fids = nullptr;
+ char* iids = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctp1l = 1 + (unfiltered_sample_ct / BITCT);
uintptr_t sample_uidx = next_unset_unsafe(sample_exclude, 0);
@@ -661,15 +661,15 @@ void fill_mendel_errstr(uint32_t error_code, char** allele_ptrs, uint32_t* alens
int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t plink_maxfid, uint32_t plink_maxiid, uint32_t plink_maxsnp, uint32_t allow_no_variants, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_ [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- FILE* outfile_l = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* error_locs = NULL;
- char* varptr = NULL;
- char* chrom_name_ptr = NULL;
- unsigned char* cur_errors = NULL;
- uint64_t* family_error_cts = NULL;
- uint32_t* child_cts = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_l = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* error_locs = nullptr;
+ char* varptr = nullptr;
+ char* chrom_name_ptr = nullptr;
+ unsigned char* cur_errors = nullptr;
+ uint64_t* family_error_cts = nullptr;
+ uint32_t* child_cts = nullptr;
uintptr_t marker_ct = unfiltered_marker_ct - *marker_exclude_ct_ptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctp1l2 = 1 + (unfiltered_sample_ct / BITCT2);
@@ -769,7 +769,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
}
loadbuf[unfiltered_sample_ctp1l2 - 1] = 0;
hh_exists &= XMHH_EXISTS;
- if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_exists, 0, sample_exclude, sex_male, NULL, &sample_male_include2)) {
+ if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_exists, 0, sample_exclude, sex_male, nullptr, &sample_male_include2)) {
goto mendel_error_scan_ret_NOMEM;
}
alens[0] = 0;
@@ -816,16 +816,16 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
fprintf(outfile_l, g_textbuf, "SNP");
} else {
// suppress warning
- fill_ulong_zero((uintptr_t*)errstrs, 10);
+ fill_ulong_zero(10, (uintptr_t*)errstrs);
}
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- is_x = (((uint32_t)chrom_info_ptr->x_code) == chrom_idx);
- if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->mt_code) == chrom_idx)) {
+ is_x = (((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]) == chrom_idx);
+ if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET]) == chrom_idx)) {
continue;
}
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- uii = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ uii = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (uii == chrom_end) {
continue;
}
@@ -857,7 +857,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
varlen = strlen(varptr);
alens[0] = 0;
alens[1] = 0;
- fill_uint_zero(errstr_lens, 11);
+ fill_uint_zero(11, errstr_lens);
}
if (!multigen) {
for (trio_idx = 0; trio_idx < trio_ct; trio_idx++) {
@@ -948,7 +948,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
goto mendel_error_scan_ret_WRITE_FAIL;
}
}
- fill_ulong_zero(error_locs, trio_ctl);
+ fill_ulong_zero(trio_ctl, error_locs);
}
}
if (calc_mendel) {
@@ -982,7 +982,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
error_cts_tmp[trio_idx] += error_cts_tmp2[trio_idx];
}
#endif
- fill_uint_zero(error_cts_tmp2, trio_ct);
+ fill_uint_zero(trio_ct, error_cts_tmp2);
}
error_ct_fill++;
if (error_ct_fill == 255) {
@@ -996,7 +996,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
*uiptr += uii >> 16;
uiptr++;
}
- fill_uint_zero(error_cts_tmp, trio_ct);
+ fill_uint_zero(trio_ct, error_cts_tmp);
error_ct_fill = 0;
}
}
@@ -1045,8 +1045,8 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
}
sprintf(g_textbuf, "%%%us %%%us %%%us CHLD N\n", plink_maxfid, plink_maxiid, plink_maxiid);
fprintf(outfile, g_textbuf, "FID", "PAT", "MAT");
- fill_ull_zero(family_error_cts, family_ct * 3);
- fill_uint_zero(child_cts, family_ct);
+ fill_ull_zero(family_ct * 3, family_error_cts);
+ fill_uint_zero(family_ct, child_cts);
for (trio_idx = 0; trio_idx < trio_ct; trio_idx++) {
uii = (uint32_t)(trio_list[trio_idx] >> 32);
child_cts[uii] += 1;
@@ -1128,7 +1128,7 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
ukk = (uint32_t)(family_code >> 32);
if (ukk != unfiltered_sample_ct) {
if (ujj != unfiltered_sample_ct) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
wptr = fw_strcpy(plink_maxiid, &(iids[ukk * max_iid_len]), &(g_textbuf[plink_maxfid + 1]));
*wptr++ = ' ';
@@ -1141,8 +1141,8 @@ int32_t mendel_error_scan(Family_info* fam_ip, FILE* bedfile, uintptr_t bed_offs
goto mendel_error_scan_ret_WRITE_FAIL;
}
}
- putc(' ', outfile); // PLINK 1.07 formatting quirk
- putc('\n', outfile);
+ putc_unlocked(' ', outfile); // PLINK 1.07 formatting quirk
+ putc_unlocked('\n', outfile);
}
wptr = fw_strcpy(plink_maxiid, &(iids[((uint32_t)trio_code) * max_iid_len]), &(g_textbuf[plink_maxfid + 1]));
*wptr++ = ' ';
@@ -1243,10 +1243,10 @@ int32_t populate_pedigree_rel_info(Pedigree_rel_info* pri_ptr, uintptr_t unfilte
uintptr_t max_family_id_len = 0;
uintptr_t max_indiv_id_len = 0;
uintptr_t max_pm_id_len = MAXV(max_paternal_id_len, max_maternal_id_len);
- char* last_family_id = NULL;
- double* tmp_rel_space = NULL;
- double* tmp_rel_writer = NULL;
- uint32_t* uiptr2 = NULL;
+ char* last_family_id = nullptr;
+ double* tmp_rel_space = nullptr;
+ double* tmp_rel_writer = nullptr;
+ uint32_t* uiptr2 = nullptr;
int32_t max_family_nf = 0;
unsigned char* bigstack_mark;
unsigned char* bigstack_mark2;
@@ -1459,7 +1459,7 @@ int32_t populate_pedigree_rel_info(Pedigree_rel_info* pri_ptr, uintptr_t unfilte
if (bigstack_alloc_ul(unfiltered_sample_ctl + ulii, &processed_samples)) {
return RET_NOMEM;
}
- fill_ulong_one(&(processed_samples[unfiltered_sample_ctl]), ulii);
+ fill_ulong_one(ulii, &(processed_samples[unfiltered_sample_ctl]));
bigstack_mark2 = g_bigstack_base;
for (fidx = 0; fidx < family_id_ct; fidx++) {
@@ -1707,7 +1707,7 @@ int32_t populate_pedigree_rel_info(Pedigree_rel_info* pri_ptr, uintptr_t unfilte
}
int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct_ax, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sex_male, uintptr_t* sample_male_include2, uint32_t* trio_nuclear_lookup, uint32_ [...]
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint64_t mendel_error_ct = 0;
double pat_a2transmit_recip = 0.0;
double mat_a1transmit_recip = 0.0;
@@ -1784,12 +1784,12 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
fflush(stdout);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->mt_code) == chrom_idx)) {
+ is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET]) == chrom_idx)) {
continue;
}
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- uii = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ uii = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (uii == chrom_end) {
continue;
}
@@ -1914,7 +1914,7 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
}
if (++markers_done >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (markers_done * 100LLU) / marker_ct_ax;
if (pct < 100) {
@@ -1941,7 +1941,7 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
if (fclose_null(&outfile)) {
goto tdt_poo_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("--tdt poo: Report written to %s .\n", outname);
while (0) {
tdt_poo_ret_OPEN_FAIL:
@@ -1960,9 +1960,9 @@ int32_t tdt_poo(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* o
int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double ci_size, double ci_zt, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, uintptr_t unfilter [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
char* textbuf = g_textbuf;
- double* orig_chisq = NULL; // pval if exact test
+ double* orig_chisq = nullptr; // pval if exact test
uint64_t last_parents = 0;
// uint64_t mendel_error_ct = 0;
double chisq = 0;
@@ -2170,7 +2170,7 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
loadbuf[unfiltered_sample_ctl2 - 1] = 0;
workbuf[unfiltered_sample_ctp1l2 - 1] = 0;
hh_exists &= XMHH_EXISTS;
- if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_exists, 1, sample_exclude, sex_male, NULL, &sample_male_include2)) {
+ if (alloc_raw_haploid_filters(unfiltered_sample_ct, hh_exists, 1, sample_exclude, sex_male, nullptr, &sample_male_include2)) {
goto tdt_ret_NOMEM;
}
if (fam_ip->tdt_modifier & (TDT_PERM | TDT_MPERM)) {
@@ -2178,7 +2178,7 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
retval = RET_CALC_NOT_YET_SUPPORTED;
goto tdt_ret_1;
}
- ulii = 2 * max_marker_allele_len + plink_maxsnp + MAX_ID_LEN + 256;
+ ulii = 2 * max_marker_allele_len + plink_maxsnp + MAX_ID_SLEN + 256;
if (ulii > MAXLINELEN) {
if (bigstack_alloc_c(ulii, &textbuf)) {
goto tdt_ret_NOMEM;
@@ -2224,12 +2224,12 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
fflush(stdout);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->mt_code) == chrom_idx)) {
+ is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x)) || (((uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET]) == chrom_idx)) {
continue;
}
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- uii = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ uii = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (uii == chrom_end) {
continue;
}
@@ -2442,7 +2442,7 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
if (++markers_done >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (markers_done * 100LLU) / marker_ct;
if (pct < 100) {
@@ -2466,7 +2466,7 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("--tdt: Report written to %s .\n", outname);
if (mtest_adjust) {
tdt_multcomp:
@@ -2479,13 +2479,13 @@ int32_t tdt(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outna
memcpy(marker_exclude_tmp, marker_exclude, ulii * sizeof(intptr_t));
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- if ((is_set(chrom_info_ptr->haploid_mask, chrom_idx) && ((int32_t)chrom_idx != chrom_info_ptr->x_code)) || ((int32_t)chrom_idx == chrom_info_ptr->mt_code)) {
- uii = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- fill_bits(uii, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - uii, marker_exclude_tmp);
+ if ((is_set(chrom_info_ptr->haploid_mask, chrom_idx) && ((int32_t)chrom_idx != chrom_info_ptr->xymt_codes[X_OFFSET])) || ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ fill_bits(uii, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - uii, marker_exclude_tmp);
}
}
fill_idx_to_uidx(marker_exclude_tmp, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, is_exact? NULL : orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, NULL, is_exact? orig_chisq : NULL);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, is_exact? nullptr : orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, nullptr, is_exact? orig_chisq : nullptr);
if (retval) {
goto tdt_ret_1;
}
@@ -2530,9 +2530,9 @@ int32_t get_sibship_info(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclu
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
uintptr_t max_merged_id_len = max_fid_len + max_paternal_id_len + max_maternal_id_len + sizeof(int32_t);
uintptr_t trio_idx = 0;
- uintptr_t* tmp_within2_founder = NULL;
- uintptr_t* lm_within2_founder = NULL;
- uintptr_t* lm_eligible = NULL;
+ uintptr_t* tmp_within2_founder = nullptr;
+ uintptr_t* lm_within2_founder = nullptr;
+ uintptr_t* lm_eligible = nullptr;
uint32_t is_within2 = (test_type == QFAM_WITHIN2);
uint32_t family_idx = 0;
uint32_t fssc_idx = 0;
@@ -2611,7 +2611,7 @@ int32_t get_sibship_info(uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclu
}
bitarr_invert_copy(sample_exclude, unfiltered_sample_ct, not_in_family);
- fill_uint_one(sample_to_fss_idx, sample_ct);
+ fill_uint_one(sample_ct, sample_to_fss_idx);
fill_uidx_to_idx(sample_exclude, unfiltered_sample_ct, sample_ct, sample_uidx_to_idx);
if (family_ct) {
// iterate over all parents
@@ -2951,7 +2951,7 @@ void dfam_sibship_or_unrelated_perm_calc(uintptr_t* loadbuf_ptr, const uint32_t*
uint32_t max_incr8;
uint32_t uii;
// first check if all genotypes are identical
- fill_uint_zero(cur_genotype_cts, 4);
+ fill_uint_zero(4, cur_genotype_cts);
for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
sample_idx = cur_dfam_ptr[sib_idx];
cur_geno = EXTRACT_2BIT_GENO(loadbuf_ptr, sample_idx);
@@ -2982,13 +2982,13 @@ void dfam_sibship_or_unrelated_perm_calc(uintptr_t* loadbuf_ptr, const uint32_t*
}
#ifdef __LP64__
- fill_vec_zero(acc4, acc4_vec_ct);
- fill_vec_zero(acc8, acc8_vec_ct);
+ fill_vvec_zero(acc4_vec_ct, acc4);
+ fill_vvec_zero(acc8_vec_ct, acc8);
#else
- fill_ulong_zero(acc4, acc4_word_ct);
- fill_ulong_zero(acc8, acc8_word_ct);
+ fill_ulong_zero(acc4_word_ct, acc4);
+ fill_ulong_zero(acc8_word_ct, acc8);
#endif
- fill_uint_zero(cur_case_a1_cts, perm_vec_ct);
+ fill_uint_zero(perm_vec_ct, cur_case_a1_cts);
max_incr4 = 0;
max_incr8 = 0;
for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
@@ -3106,13 +3106,13 @@ void dfam_sibship_or_unrelated_perm_calc(uintptr_t* loadbuf_ptr, const uint32_t*
}
#ifdef __LP64__
- fill_vec_zero(acc4, acc4_vec_ct);
- fill_vec_zero(acc8, acc8_vec_ct);
+ fill_vvec_zero(acc4_vec_ct, acc4);
+ fill_vvec_zero(acc8_vec_ct, acc8);
#else
- fill_ulong_zero(acc4, acc4_word_ct);
- fill_ulong_zero(acc8, acc8_word_ct);
+ fill_ulong_zero(acc4_word_ct, acc4);
+ fill_ulong_zero(acc8_word_ct, acc8);
#endif
- fill_uint_zero(cur_case_missing_cts, perm_vec_ct);
+ fill_uint_zero(perm_vec_ct, cur_case_missing_cts);
uii = 0;
for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
sample_idx = cur_dfam_ptr[sib_idx];
@@ -3233,16 +3233,16 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
uint32_t* __restrict__ perm_attempt_ct = g_perm_attempt_ct;
uint32_t* __restrict__ perm_2success_ct = g_perm_2success_ct;
double* __restrict__ mperm_save_all = g_mperm_save_all;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
double* numers = &(g_dfam_numers[tidx * perm_vec_cta128]);
double* denoms = &(g_dfam_denoms[tidx * perm_vec_cta128]);
const uint32_t* dfam_iteration_order = g_dfam_iteration_order;
- unsigned char* perm_adapt_stop = NULL;
+ unsigned char* perm_adapt_stop = nullptr;
double adaptive_intercept = 0.0;
double adaptive_slope = 0.0;
double adaptive_ci_zt = 0.0;
double aperm_alpha = 0.0;
- double* maxt_results = NULL;
+ double* maxt_results = nullptr;
uint32_t perm_adapt = g_test_type;
uint32_t next_adapt_check = 0;
uint32_t cur_case_a1_ct_flip[2];
@@ -3362,9 +3362,9 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
}
quad_denom = 0;
twice_numer_subtract = 0;
- fill_uint_zero(total_counts, perm_vec_ct);
- fill_double_zero(numers, perm_vec_ct);
- fill_double_zero(denoms, perm_vec_ct);
+ fill_uint_zero(perm_vec_ct, total_counts);
+ fill_double_zero(perm_vec_ct, numers);
+ fill_double_zero(perm_vec_ct, denoms);
cur_dfam_ptr = dfam_iteration_order;
success_2start = perm_2success_ct[marker_idx];
@@ -3372,7 +3372,7 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
chisq_high = orig_chisq[marker_idx] + EPSILON;
chisq_low = orig_chisq[marker_idx] - EPSILON;
#ifdef __LP64__
- fill_vec_zero(case_a1_ct_acc8, acc8_vec_ct);
+ fill_vvec_zero(acc8_vec_ct, case_a1_ct_acc8);
max_incr4 = 0;
max_incr8 = 0;
#endif
@@ -3487,13 +3487,13 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
quad_denom += (2 - (parental_a1_ct & 1)) * nonmissing_sib_ct;
cur_flipa = &(flipa[fs_idx * perm_vec_wcta]);
- fill_uint_zero(cur_case_a1_cts, perm_vec_ct);
+ fill_uint_zero(perm_vec_ct, cur_case_a1_cts);
#ifdef __LP64__
- fill_vec_zero(acc4, acc4_vec_ct);
- fill_vec_zero(acc8, acc8_vec_ct);
+ fill_vvec_zero(acc4_vec_ct, acc4);
+ fill_vvec_zero(acc8_vec_ct, acc8);
#else
- fill_ulong_zero(acc4, acc4_word_ct);
- fill_ulong_zero(acc8, acc8_word_ct);
+ fill_ulong_zero(acc4_word_ct, acc4);
+ fill_ulong_zero(acc8_word_ct, acc8);
#endif
// compute (unflipped) case_a1_ct for each permutation
max_incr4 = 0; // maximum possible value in acc4
@@ -3597,13 +3597,13 @@ THREAD_RET_TYPE dfam_perm_thread(void* arg) {
// case, but we focus our attention on the far more common sparse
// missingness scenario.)
cur_max_incr = 0;
- fill_uint_zero(cur_case_missing_cts, perm_vec_ct);
+ fill_uint_zero(perm_vec_ct, cur_case_missing_cts);
#ifdef __LP64__
- fill_vec_zero(acc4, acc4_vec_ct);
- fill_vec_zero(acc8, acc8_vec_ct);
+ fill_vvec_zero(acc4_vec_ct, acc4);
+ fill_vvec_zero(acc8_vec_ct, acc8);
#else
- fill_ulong_zero(acc4, acc4_word_ct);
- fill_ulong_zero(acc8, acc8_word_ct);
+ fill_ulong_zero(acc4_word_ct, acc4);
+ fill_ulong_zero(acc8_word_ct, acc8);
#endif
for (sib_idx = 0; sib_idx < sibling_ct; sib_idx++) {
sample_idx = cur_dfam_ptr[sib_idx];
@@ -3776,8 +3776,8 @@ void dfam_flipa_shuffle(uintptr_t* perms, uintptr_t* shuffled_perms, uint32_t pe
int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double pfilter, double output_min_p, uint32_t mtest_adjust, double adjust_lambda, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude_orig, uintptr_t marker_ct_orig, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
char* textbuf = g_textbuf;
uintptr_t marker_ct_orig_autosomal = marker_ct_orig;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
@@ -3792,9 +3792,9 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
uintptr_t perm_vec_wcta = 0;
uintptr_t perm_vec_ctcl8m = 0;
uintptr_t* marker_exclude_orig_autosomal = marker_exclude_orig;
- uintptr_t* founder_pnm = NULL;
- double* orig_chisq = NULL;
- double* maxt_extreme_stat = NULL;
+ uintptr_t* founder_pnm = nullptr;
+ double* orig_chisq = nullptr;
+ double* maxt_extreme_stat = nullptr;
uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
uint32_t multigen = (fam_ip->mendel_modifier / MENDEL_MULTIGEN) & 1;
uint32_t is_set_test = fam_ip->dfam_modifier & DFAM_SET_TEST;
@@ -3918,9 +3918,9 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
memcpy(marker_exclude_orig_autosomal, marker_exclude_orig, unfiltered_marker_ctl * sizeof(intptr_t));
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- if (is_set(chrom_info_ptr->haploid_mask, chrom_idx) || ((int32_t)chrom_idx == chrom_info_ptr->mt_code)) {
- uii = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- fill_bits(uii, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - uii, marker_exclude_orig_autosomal);
+ if (is_set(chrom_info_ptr->haploid_mask, chrom_idx) || ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ fill_bits(uii, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - uii, marker_exclude_orig_autosomal);
}
}
} else if (is_set(chrom_info_ptr->haploid_mask, 0)) {
@@ -3946,13 +3946,13 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
}
memcpy(founder_pnm, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
bitvec_and(founder_info, unfiltered_sample_ctl, founder_pnm);
- if (extract_set_union_unfiltered(sip, NULL, unfiltered_marker_ct, marker_exclude_orig_autosomal, &marker_exclude, &marker_ct)) {
+ if (extract_set_union_unfiltered(sip, nullptr, unfiltered_marker_ct, marker_exclude_orig_autosomal, &marker_exclude, &marker_ct)) {
goto dfam_ret_NOMEM;
}
}
// no --mendel-duos support for now
- retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, NULL, &max_fid_len, NULL, NULL, &family_list, &family_ct, &trio_list, &trio_ct, &trio_error_lookup, 0, multigen);
+ retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, nullptr, &max_fid_len, nullptr, nullptr, &family_list, &family_ct, &trio_list, &trio_ct, &trio_error_lookup, 0, multigen);
if (retval) {
goto dfam_ret_1;
}
@@ -3962,7 +3962,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto dfam_ret_INVALID_CMDLINE;
}
#endif
- if (get_sibship_info(unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, founder_info, sample_ids, max_sample_id_len, max_fid_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, family_list, trio_list, family_ct, trio_ct, 0, &size_one_sibships, NULL, NULL, &fs_starts, &fss_contents, &sample_to_fss_idx, &fs_ct, NULL, NULL)) {
+ if (get_sibship_info(unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, founder_info, sample_ids, max_sample_id_len, max_fid_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, family_list, trio_list, family_ct, trio_ct, 0, &size_one_sibships, nullptr, nullptr, &fs_starts, &fss_contents, &sample_to_fss_idx, &fs_ct, nullptr, nullptr)) {
goto dfam_ret_NOMEM;
}
// Prepare final family, sibship, and unrelated cluster data structures.
@@ -4082,11 +4082,11 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
// (no unrelated cluster at all) than no --within at all (one big unrelated
// cluster) in PLINK 1.07. Replicate this for now.
if (within_cmdflag) {
- if (fill_sample_to_cluster(unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, cluster_starts, sample_to_cluster, NULL)) {
+ if (fill_sample_to_cluster(unfiltered_sample_ct, sample_exclude, sample_ct, cluster_ct, cluster_map, cluster_starts, sample_to_cluster, nullptr)) {
goto dfam_ret_NOMEM;
}
} else {
- fill_uint_zero(sample_to_cluster, sample_ct);
+ fill_uint_zero(sample_ct, sample_to_cluster);
cluster_ct = 1;
}
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
@@ -4259,7 +4259,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
}
}
- ulii = 2 * max_marker_allele_len + plink_maxsnp + MAX_ID_LEN + 256;
+ ulii = 2 * max_marker_allele_len + plink_maxsnp + MAX_ID_SLEN + 256;
if (ulii > MAXLINELEN) {
if (bigstack_alloc_c(ulii, &textbuf)) {
goto dfam_ret_NOMEM;
@@ -4269,8 +4269,8 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
// permutation test boilerplate mostly copied from qassoc() in plink_assoc.c,
// since it's also restricted to autosomes
g_perms_done = 0;
- g_mperm_save_all = NULL;
- g_perm_vecs = NULL;
+ g_mperm_save_all = nullptr;
+ g_perm_vecs = nullptr;
if (perm_maxt_nst) {
perms_total = fam_ip->dfam_mperm_val;
if (bigstack_calloc_d(perms_total, &maxt_extreme_stat)) {
@@ -4714,7 +4714,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
}
pval = chiprob_p(chisq, 1);
if ((pfilter == 2.0) || ((pval <= pfilter) && (pval >= 0.0))) {
- wptr = width_force(4, textbuf, chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_uidx2), textbuf));
+ wptr = width_force(4, textbuf, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_uidx2), textbuf));
*wptr++ = ' ';
wptr = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx2 * max_marker_id_len]), wptr);
*wptr++ = ' ';
@@ -4746,7 +4746,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
if (marker_idx < marker_unstopped_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_unstopped_ct;
printf("\b\b%u%%", pct);
@@ -4757,7 +4757,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
} while (marker_idx < marker_unstopped_ct);
if (!perm_pass_idx) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -4776,7 +4776,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto dfam_ret_NOMEM;
}
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, idx_to_uidx);
- retval = multcomp(outname, outname_end, idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, NULL, NULL);
+ retval = multcomp(outname, outname_end, idx_to_uidx, marker_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, orig_chisq, pfilter, output_min_p, mtest_adjust, 0, adjust_lambda, nullptr, nullptr);
if (retval) {
goto dfam_ret_1;
}
@@ -4817,7 +4817,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt_nst? "max(T)" : "adaptive", (g_perms_done != 1)? "s" : "");
if (perm_adapt_nst) {
memcpy(outname_end2, ".perm", 6);
@@ -4850,7 +4850,7 @@ int32_t dfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
dxx = 0.5 * dyy;
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, uii, g_textbuf));
@@ -5201,14 +5201,14 @@ THREAD_RET_TYPE qfam_thread(void* arg) {
double* pheno_d2 = g_pheno_d2;
double* beta_sum = g_beta_sum;
double* beta_ssq = g_beta_ssq;
- uint32_t* qfam_permute = only_within? NULL : g_qfam_permute;
- uint32_t* permute_edit_buf = only_within? NULL : (&(g_permute_edit[tidx * round_up_pow2(fss_ct, CACHELINE_INT32)]));
+ uint32_t* qfam_permute = only_within? nullptr : g_qfam_permute;
+ uint32_t* permute_edit_buf = only_within? nullptr : (&(g_permute_edit[tidx * round_up_pow2(fss_ct, CACHELINE_INT32)]));
uint32_t* perm_2success_ct = g_perm_2success_ct;
uint32_t* perm_attempt_ct = g_perm_attempt_ct;
uint32_t* fs_starts = g_fs_starts;
uint32_t* fss_contents = g_fss_contents;
uint32_t* sample_lm_to_fss_idx = g_sample_lm_to_fss_idx;
- uint32_t* perm_ptr = NULL;
+ uint32_t* perm_ptr = nullptr;
uint32_t* beta_fail_cts = g_beta_fail_cts;
uintptr_t cur_perm_ct = g_cur_perm_ct;
uintptr_t sample_ct = g_qfam_sample_ct;
@@ -5371,7 +5371,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
// Fortunately, this can use some of qassoc()'s logic instead of punting to
// LAPACK, since it doesn't support covariates.
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctp1l2 = 1 + (unfiltered_sample_ct / BITCT2);
@@ -5382,8 +5382,8 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
double geno_sum = 0.0;
double geno_ssq = 0.0;
double qt_g_prod = 0.0;
- double* orig_beta = NULL;
- char* chrom_name_ptr = NULL;
+ double* orig_beta = nullptr;
+ char* chrom_name_ptr = nullptr;
uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
uint32_t test_type = fam_ip->qfam_modifier & QFAM_TEST;
uint32_t perm_adapt = fam_ip->qfam_modifier & QFAM_PERM;
@@ -5397,12 +5397,12 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
uint32_t chrom_name_len = 0;
uint32_t regress_fail_ct = 0;
uint32_t pct = 0;
- int32_t mt_code = chrom_info_ptr->mt_code;
+ int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
int32_t retval = 0;
const char qfam_flag_suffixes[][8] = {"within", "parents", "total", "between"};
const char qfam_test_str[][6] = {"WITH ", " TOT ", " BET "};
const char* qfam_test_ptr = qfam_test_str[0];
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
uint32_t mu_table[MODEL_BLOCKSIZE];
const char* flag_suffix;
uintptr_t* lm_within2_founder;
@@ -5491,7 +5491,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto qfam_ret_INVALID_CMDLINE;
}
// no --mendel-duos support for now
- retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, NULL, &max_fid_len, NULL, NULL, &family_list, &family_ct, &trio_list, &trio_ct, &trio_error_lookup, 0, multigen);
+ retval = get_trios_and_families(unfiltered_sample_ct, sample_exclude, sample_ct, founder_info, sex_nm, sex_male, sample_ids, max_sample_id_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, nullptr, &max_fid_len, nullptr, nullptr, &family_list, &family_ct, &trio_list, &trio_ct, &trio_error_lookup, 0, multigen);
if (retval) {
goto qfam_ret_1;
}
@@ -5506,7 +5506,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto qfam_ret_INVALID_CMDLINE;
}
#endif
- if (get_sibship_info(unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, founder_info, sample_ids, max_sample_id_len, max_fid_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, family_list, trio_list, family_ct, trio_ct, test_type, NULL, &lm_eligible, &lm_within2_founder, &fs_starts, &fss_contents, &sample_lm_to_fss_idx, &fs_ct, &lm_ct, &singleton_ct)) {
+ if (get_sibship_info(unfiltered_sample_ct, sample_exclude, sample_ct, pheno_nm, founder_info, sample_ids, max_sample_id_len, max_fid_len, paternal_ids, max_paternal_id_len, maternal_ids, max_maternal_id_len, family_list, trio_list, family_ct, trio_ct, test_type, nullptr, &lm_eligible, &lm_within2_founder, &fs_starts, &fss_contents, &sample_lm_to_fss_idx, &fs_ct, &lm_ct, &singleton_ct)) {
goto qfam_ret_NOMEM;
}
fss_ct = fs_ct + singleton_ct;
@@ -5557,7 +5557,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
g_adaptive_intercept = apip->init_interval;
g_adaptive_slope = apip->interval_slope;
} else {
- g_perm_attempt_ct = NULL;
+ g_perm_attempt_ct = nullptr;
perms_total = fam_ip->qfam_mperm_val;
g_first_adapt_check = perms_total + 1;
g_aperm_alpha = 0.0;
@@ -5594,9 +5594,9 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto qfam_ret_NOMEM;
}
} else {
- g_beta_sum = NULL;
- g_beta_ssq = NULL;
- g_beta_fail_cts = NULL;
+ g_beta_sum = nullptr;
+ g_beta_ssq = nullptr;
+ g_beta_fail_cts = nullptr;
}
if (bigstack_alloc_ul(MODEL_BLOCKSIZE * sample_ctl2, &g_loadbuf) ||
bigstack_alloc_d(marker_ct, &g_orig_stat) ||
@@ -5635,7 +5635,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
for (uii = 0; uii < fss_ct; uii++) {
dummy_perm[uii] = uii;
}
- fill_ulong_zero(dummy_flip, fss_ctl);
+ fill_ulong_zero(fss_ctl, dummy_flip);
LOGPRINTFWW("--qfam-%s: Permuting %" PRIuPTR " families/singletons, and including %u %s in linear regression.\n", flag_suffix, fss_ct, lm_ct, g_species_plural);
LOGPRINTFWW5("Writing report to %s ... ", outname);
@@ -5666,7 +5666,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
// permutation generation, since integer divison/modulus sucks that badly?
// Todo: test with a dataset with ~10k samples.)
if (only_within) {
- fill_ulong_zero(g_qfam_flip, cur_perm_ct * lm_ctl);
+ fill_ulong_zero(cur_perm_ct * lm_ctl, g_qfam_flip);
ujj = fss_ctl * (BITCT / 32);
ulptr = g_qfam_flip;
for (ulii = 0; ulii < cur_perm_ct; ulii++) {
@@ -5681,7 +5681,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
}
ulptr = &(ulptr[lm_ctl]);
}
- fill_ulong_zero(dummy_flip, fss_ctl);
+ fill_ulong_zero(fss_ctl, dummy_flip);
} else {
for (ulii = 0; ulii < cur_perm_ct; ulii++) {
uint32_permute(&(g_qfam_permute[ulii * fss_ct]), &(precomputed_mods[-1]), &g_sfmt, fss_ct);
@@ -5711,7 +5711,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
if (marker_uidx >= chrom_end) {
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
if ((!IS_SET(chrom_info_ptr->haploid_mask, chrom_idx)) && (chrom_idx != (uint32_t)mt_code)) {
@@ -5752,9 +5752,9 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
// note that chrom_fo_idx/chrom_end state actually needs to be
// restored at the end of this loop. fortunately, that
// automatically happens.
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx_cur);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx_cur);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
chrom_name_ptr = chrom_name_buf5w4write(chrom_info_ptr, chrom_idx, &chrom_name_len, chrom_name_buf);
}
bufptr = memcpyax(g_textbuf, chrom_name_ptr, chrom_name_len, ' ');
@@ -5808,7 +5808,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
marker_idx2_base += block_size;
if ((!perms_done) && (marker_idx2_base >= loop_end) && (marker_idx2_base != marker_unstopped_ct)) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx2_base * 100LLU) / marker_unstopped_ct;
printf("\b\b%u%%", pct);
@@ -5822,7 +5822,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
goto qfam_ret_WRITE_FAIL;
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -5838,7 +5838,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
fflush(stdout);
marker_unstopped_ct = marker_ct - popcount_longs((uintptr_t*)perm_adapt_stop, (marker_ct + sizeof(intptr_t) - 1) / sizeof(intptr_t));
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
memcpy(outname_end, ".perm", 6);
if (fopen_checked(outname, "w", &outfile)) {
goto qfam_ret_OPEN_FAIL;
@@ -5852,7 +5852,7 @@ int32_t qfam(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outn
if (marker_uidx >= chrom_end) {
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
if ((!IS_SET(chrom_info_ptr->haploid_mask, chrom_idx)) && (chrom_idx != (uint32_t)mt_code)) {
diff --git a/plink_filter.c b/plink_filter.c
index e2f44ce..77013ac 100644
--- a/plink_filter.c
+++ b/plink_filter.c
@@ -8,11 +8,11 @@
void oblig_missing_init(Oblig_missing_info* om_ip) {
om_ip->cluster_ct = 0;
om_ip->entry_ct = 0;
- om_ip->entries = NULL;
- om_ip->cluster_ref_cts = NULL;
- om_ip->sample_lookup = NULL;
- om_ip->marker_fname = NULL;
- om_ip->sample_fname = NULL;
+ om_ip->entries = nullptr;
+ om_ip->cluster_ref_cts = nullptr;
+ om_ip->sample_lookup = nullptr;
+ om_ip->marker_fname = nullptr;
+ om_ip->sample_fname = nullptr;
}
void oblig_missing_cleanup(Oblig_missing_info* om_ip) {
@@ -22,7 +22,7 @@ void oblig_missing_cleanup(Oblig_missing_info* om_ip) {
free_cond(om_ip->sample_lookup);
free_cond(om_ip->marker_fname);
free_cond(om_ip->sample_fname);
- om_ip->marker_fname = NULL;
+ om_ip->marker_fname = nullptr;
}
}
@@ -42,13 +42,13 @@ const char* keep_or_remove_flag_str(uint32_t flags) {
case 3:
return remove_fam_str;
}
- return NULL;
+ return nullptr;
}
int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr, uint32_t flags, uint32_t allow_no_samples) {
- FILE* infile = NULL;
+ FILE* infile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
- uintptr_t* exclude_arr_new = NULL;
+ uintptr_t* exclude_arr_new = nullptr;
uintptr_t unfiltered_ctl = BITCT_TO_WORDCT(unfiltered_ct);
uintptr_t duplicate_ct = 0;
uintptr_t line_idx = 0;
@@ -80,7 +80,7 @@ int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, u
if (bigstack_alloc_c(max_id_len, &id_buf)) {
goto keep_or_remove_ret_NOMEM;
}
- while (fgets(g_textbuf, MAXLINELEN, infile) != NULL) {
+ while (fgets(g_textbuf, MAXLINELEN, infile) != nullptr) {
line_idx++;
if (!g_textbuf[MAXLINELEN - 1]) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --%s file is pathologically long.\n", line_idx, keep_or_remove_flag_str(flags));
@@ -91,7 +91,7 @@ int32_t keep_or_remove(char* fname, char* sorted_ids, uintptr_t sorted_ids_ct, u
continue;
}
if (!families_only) {
- if (bsearch_read_fam_indiv(bufptr0, sorted_ids, max_id_len, sorted_ids_ct, NULL, &ii, id_buf)) {
+ if (bsearch_read_fam_indiv(bufptr0, sorted_ids, max_id_len, sorted_ids_ct, nullptr, &ii, id_buf)) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --%s file has fewer tokens than expected.\n", line_idx, keep_or_remove_flag_str(flags));
goto keep_or_remove_ret_INVALID_FORMAT_2;
}
@@ -237,7 +237,7 @@ void extract_exclude_process_token(const char* tok_start, const uint32_t* marker
int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t do_exclude, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t allow_no_variants) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t duplicate_ct = 0;
// needs to be synced with populate_id_htable
@@ -347,13 +347,13 @@ int32_t extract_exclude_flag_norange(char* fname, uint32_t* marker_id_htable, ui
}
int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uint32_t id_htable_size, uint32_t allow_no_variants, char* item_ids, uintptr_t max_id_len, uintptr_t unfiltered_ct, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
- gzFile gz_infile = NULL;
+ gzFile gz_infile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t include_ct = 0;
uintptr_t unfiltered_ctl = BITCT_TO_WORDCT(unfiltered_ct);
- char* sorted_pos_match = NULL;
- char* sorted_neg_match = NULL;
- char* bufptr2 = NULL;
+ char* sorted_pos_match = nullptr;
+ char* sorted_neg_match = nullptr;
+ char* bufptr2 = nullptr;
uint32_t pos_match_ct = 0;
uint32_t neg_match_ct = 0;
uintptr_t max_pos_match_len = 0;
@@ -380,8 +380,8 @@ int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uin
}
fill_all_bits(unfiltered_ct, exclude_arr_new);
if (condition_str) {
- // allow NULL condition_str; this means all samples/variants named in the
- // file are included
+ // allow nullptr condition_str; this means all samples/variants named in
+ // the file are included
cond_ptr = condition_str;
while (1) {
while (*cond_ptr == ',') {
@@ -578,14 +578,14 @@ int32_t filter_attrib(char* fname, char* condition_str, uint32_t* id_htable, uin
int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids, uintptr_t sorted_ids_ct, uintptr_t max_id_len, uint32_t* id_map, uintptr_t unfiltered_ct, uint32_t allow_no_samples, uintptr_t* exclude_arr, uintptr_t* exclude_ct_ptr) {
// re-merge this with filter_attrib() after making sample ID lookup
// hash-based
- gzFile gz_infile = NULL;
+ gzFile gz_infile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t include_ct = 0;
uintptr_t unfiltered_ctl = BITCT_TO_WORDCT(unfiltered_ct);
- char* sorted_pos_match = NULL;
- char* sorted_neg_match = NULL;
- char* id_buf = NULL;
- char* bufptr2 = NULL;
+ char* sorted_pos_match = nullptr;
+ char* sorted_neg_match = nullptr;
+ char* id_buf = nullptr;
+ char* bufptr2 = nullptr;
uint32_t pos_match_ct = 0;
uint32_t neg_match_ct = 0;
uintptr_t max_pos_match_len = 0;
@@ -613,8 +613,8 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
}
fill_all_bits(unfiltered_ct, exclude_arr_new);
if (condition_str) {
- // allow NULL condition_str; this means all samples/variants named in the
- // file are included
+ // allow nullptr condition_str; this means all samples/variants named in
+ // the file are included
cond_ptr = condition_str;
while (1) {
while (*cond_ptr == ',') {
@@ -812,7 +812,7 @@ int32_t filter_attrib_sample(char* fname, char* condition_str, char* sorted_ids,
int32_t filter_qual_scores(Two_col_params* qual_filter, double qual_min_thresh, double qual_max_thresh, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, uint32_t allow_no_variants, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t miss_ct = 0;
uint32_t varid_first = (qual_filter->colid < qual_filter->colx);
@@ -1082,9 +1082,9 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
// 4. scan through .bed sequentially, update oblig_missing_..._cts
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char* idbuf = &(g_textbuf[MAXLINELEN]);
- Ll_str* cluster_names = NULL;
+ Ll_str* cluster_names = nullptr;
uint64_t tot_missing = 0;
uintptr_t marker_ct = unfiltered_marker_ct - marker_exclude_ct;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
@@ -1096,7 +1096,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
uintptr_t y_start = 0;
uintptr_t y_end = 0;
uintptr_t line_idx = 0;
- int32_t y_code = chrom_info_ptr->y_code;
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
uint32_t y_present = ((y_code != -1) && is_set(chrom_info_ptr->chrom_mask, y_code));
int32_t retval = 0;
Ll_str* ll_ptr;
@@ -1127,8 +1127,8 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
int32_t ii;
if (y_present) {
- y_start = chrom_info_ptr->chrom_start[(uint32_t)y_code];
- y_end = chrom_info_ptr->chrom_end[(uint32_t)y_code];
+ y_start = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)y_code);
+ y_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)y_code);
}
if (bigstack_alloc_ul(unfiltered_sample_ctl2, &loadbuf)) {
goto load_oblig_missing_ret_NOMEM;
@@ -1141,7 +1141,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
// two-pass load, same as load_clusters()
// use loadbuf as duplicate IID detector
- fill_ulong_zero(loadbuf, sorted_sample_ctl);
+ fill_ulong_zero(sorted_sample_ctl, loadbuf);
while (fgets(g_textbuf, MAXLINELEN, infile)) {
line_idx++;
if (!g_textbuf[MAXLINELEN - 1]) {
@@ -1195,7 +1195,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
}
bigstack_end_reset(bigstack_end_mark);
qsort(cluster_ids, possible_distinct_ct, max_cluster_id_len, strcmp_casted);
- cluster_ct = collapse_duplicate_ids(cluster_ids, possible_distinct_ct, max_cluster_id_len, NULL);
+ cluster_ct = collapse_duplicate_ids(cluster_ids, possible_distinct_ct, max_cluster_id_len, nullptr);
bigstack_shrink_top(cluster_ids, cluster_ct * max_cluster_id_len);
cluster_mct = cluster_ct * (y_present + 1);
sample_lookup = (uint32_t*)malloc(unfiltered_sample_ct * sizeof(int32_t));
@@ -1207,7 +1207,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
bigstack_calloc_ul(cluster_mct * unfiltered_sample_ctl2, &cluster_zmask2s)) {
goto load_oblig_missing_ret_NOMEM;
}
- fill_uint_one(sample_lookup, unfiltered_sample_ct);
+ fill_uint_one(unfiltered_sample_ct, sample_lookup);
// second pass
rewind(infile);
@@ -1251,7 +1251,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
goto load_oblig_missing_ret_NOMEM;
}
om_ip->cluster_ref_cts = cluster_ref_cts;
- fill_uint_zero(cluster_ref_cts, cluster_ct * 2);
+ fill_uint_zero(cluster_ct * 2, cluster_ref_cts);
retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_marker_ids, &marker_id_map);
if (retval) {
goto load_oblig_missing_ret_1;
@@ -1372,7 +1372,7 @@ int32_t load_oblig_missing(FILE* bedfile, uintptr_t bed_offset, uintptr_t unfilt
}
int32_t filter_samples_file(char* filtername, char* sorted_sample_ids, uintptr_t sorted_ids_len, uintptr_t max_sample_id_len, uint32_t* id_map, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sample_exclude_ct_ptr, char* filtervals_flattened, uint32_t mfilter_col, uint32_t allow_no_samples) {
- FILE* infile = NULL;
+ FILE* infile = nullptr;
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t include_ct = 0;
@@ -1510,7 +1510,7 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
return 0;
}
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint32_t marker_ct = unfiltered_marker_ct - marker_exclude_ct;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
@@ -1519,12 +1519,12 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
uintptr_t marker_idx = 0;
uintptr_t y_start = 0;
uintptr_t y_end = 0;
- uintptr_t* sample_male_include2 = NULL;
+ uintptr_t* sample_male_include2 = nullptr;
uint32_t unfiltered_sample_ctl2m1 = (unfiltered_sample_ct - 1) / BITCT2;
uint32_t sample_uidx = 0;
uint32_t sample_idx = 0;
uint32_t removed_ct = 0;
- int32_t y_code = chrom_info_ptr->y_code;
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
uint32_t y_present = (y_code != -1) && is_set(chrom_info_ptr->chrom_mask, y_code);
uint32_t nony_marker_ct = marker_ct;
int32_t retval = 0;
@@ -1545,8 +1545,8 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
uintptr_t ulii;
if (y_present) {
- y_start = chrom_info_ptr->chrom_start[(uint32_t)y_code];
- y_end = chrom_info_ptr->chrom_end[(uint32_t)y_code];
+ y_start = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)y_code);
+ y_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)y_code);
if (bigstack_alloc_ul(unfiltered_sample_ctl2, &sample_male_include2)) {
goto mind_filter_ret_NOMEM;
}
@@ -1598,7 +1598,7 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
}
}
}
- fill_ulong_zero(newly_excluded, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, newly_excluded);
if (!om_ip->entry_ct) {
mind_int_thresh[0] = (int32_t)(mind_thresh * ((int32_t)nony_marker_ct) * (1 + SMALL_EPSILON));
mind_int_thresh[1] = (int32_t)(mind_thresh * ((int32_t)marker_ct) * (1 + SMALL_EPSILON));
@@ -1652,7 +1652,7 @@ int32_t mind_filter(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
for (sample_idx = 0; sample_idx < removed_ct; sample_idx++, sample_uidx++) {
next_set_unsafe_ck(newly_excluded, &sample_uidx);
fputs(&(sample_ids[sample_uidx * max_sample_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (fclose_null(&outfile)) {
goto mind_filter_ret_WRITE_FAIL;
@@ -2141,7 +2141,7 @@ static inline void haploid_single_marker_freqs(uintptr_t unfiltered_sample_ct, u
}
int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_exclude_ct, char* sample_ids, uintptr_t max_sample_id_len, uintptr_t* founder_info, int32_t nonfounders, int32_t maf_succ, double* set_allele_freqs, uintptr_t bed_offset, uint32_t hwe_needed, uint32_t hwe_all, uin [...]
- FILE* hhfile = NULL;
+ FILE* hhfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctv2 = 2 * unfiltered_sample_ctl;
@@ -2169,9 +2169,9 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
uint32_t nonmissing_nonmale_y = 0;
int32_t ii = chrom_info_ptr->chrom_file_order[0];
uint32_t is_haploid = is_set(chrom_info_ptr->haploid_mask, ii);
- uint32_t next_chrom_start = chrom_info_ptr->chrom_file_order_marker_idx[1];
- uint32_t is_x = (ii == chrom_info_ptr->x_code);
- uint32_t is_y = (ii == chrom_info_ptr->y_code);
+ uint32_t next_chrom_start = chrom_info_ptr->chrom_fo_vidx_start[1];
+ uint32_t is_x = (ii == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ uint32_t is_y = (ii == chrom_info_ptr->xymt_codes[Y_OFFSET]);
uint32_t ll_ct = 0;
uint32_t lh_ct = 0;
uint32_t hh_ct = 0;
@@ -2181,26 +2181,26 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
uint32_t ukk = 0;
uint32_t cur_oblig_missing = 0;
uint32_t om_cluster_ct = 0;
- uint32_t* om_cluster_sizes = NULL;
- int32_t* hwe_lls = NULL;
- int32_t* hwe_lhs = NULL;
- int32_t* hwe_hhs = NULL;
- int32_t* hwe_ll_cases = NULL;
- int32_t* hwe_lh_cases = NULL;
- int32_t* hwe_hh_cases = NULL;
- int32_t* hwe_ll_allfs = NULL;
- int32_t* hwe_lh_allfs = NULL;
- int32_t* hwe_hh_allfs = NULL;
- uintptr_t* sample_nonmale_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* founder_nonmale_include2 = NULL;
- uintptr_t* founder_ctrl_nonmale_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
- uintptr_t* founder_case_include2 = NULL;
- uintptr_t* founder_case_nonmale_include2 = NULL;
- uintptr_t* geno_excl_bitfield = NULL;
- uintptr_t* ac_excl_bitfield = NULL;
- uint64_t* om_entry_ptr = NULL;
+ uint32_t* om_cluster_sizes = nullptr;
+ int32_t* hwe_lls = nullptr;
+ int32_t* hwe_lhs = nullptr;
+ int32_t* hwe_hhs = nullptr;
+ int32_t* hwe_ll_cases = nullptr;
+ int32_t* hwe_lh_cases = nullptr;
+ int32_t* hwe_hh_cases = nullptr;
+ int32_t* hwe_ll_allfs = nullptr;
+ int32_t* hwe_lh_allfs = nullptr;
+ int32_t* hwe_hh_allfs = nullptr;
+ uintptr_t* sample_nonmale_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* founder_nonmale_include2 = nullptr;
+ uintptr_t* founder_ctrl_nonmale_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
+ uintptr_t* founder_case_include2 = nullptr;
+ uintptr_t* founder_case_nonmale_include2 = nullptr;
+ uintptr_t* geno_excl_bitfield = nullptr;
+ uintptr_t* ac_excl_bitfield = nullptr;
+ uint64_t* om_entry_ptr = nullptr;
uint32_t sample_nonmale_ct = 0;
uint32_t sample_f_nonmale_ct = 0;
uint32_t sample_f_ctl_nonmale_ct = 0;
@@ -2293,9 +2293,9 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
loadbuf[unfiltered_sample_ctv2 - 2] = 0;
loadbuf[unfiltered_sample_ctv2 - 1] = 0;
init_quaterarr_from_inverted_bitarr(sample_exclude, unfiltered_sample_ct, sample_include2);
- ii = chrom_info_ptr->x_code;
+ ii = chrom_info_ptr->xymt_codes[X_OFFSET];
nonmales_needed = (!is_split_chrom) && (ii != -1) && is_set(chrom_info_ptr->chrom_mask, ii);
- ii = chrom_info_ptr->y_code;
+ ii = chrom_info_ptr->xymt_codes[Y_OFFSET];
males_needed = nonmales_needed || ((!is_split_chrom) && (ii != -1) && is_set(chrom_info_ptr->chrom_mask, ii));
if (bigstack_alloc_ul(unfiltered_sample_ctv2, &sample_male_include2)) {
goto calc_freqs_and_hwe_ret_NOMEM;
@@ -2452,12 +2452,12 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
}
if (marker_uidx >= next_chrom_start) {
do {
- next_chrom_start = chrom_info_ptr->chrom_file_order_marker_idx[(++cur_chrom_idx) + 1];
+ next_chrom_start = chrom_info_ptr->chrom_fo_vidx_start[(++cur_chrom_idx) + 1];
} while (marker_uidx >= next_chrom_start);
ii = chrom_info_ptr->chrom_file_order[cur_chrom_idx];
is_haploid = is_set(chrom_info_ptr->haploid_mask, ii);
- is_x = (ii == chrom_info_ptr->x_code);
- is_y = (ii == chrom_info_ptr->y_code);
+ is_x = (ii == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (ii == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
if (om_entry_ptr) {
cur_oblig_missing = 0;
@@ -2590,9 +2590,9 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
while (ulii) {
ukk = sample_uidx * BITCT2 + CTZLU(ulii) / 2;
fputs(&(sample_ids[ukk * max_sample_id_len]), hhfile);
- putc('\t', hhfile);
+ putc_unlocked('\t', hhfile);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), hhfile);
- putc('\n', hhfile);
+ putc_unlocked('\n', hhfile);
ulii &= ulii - ONELU;
}
}
@@ -2603,9 +2603,9 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
while (ulii) {
ukk = sample_uidx * BITCT2 + CTZLU(ulii) / 2;
fputs(&(sample_ids[ukk * max_sample_id_len]), hhfile);
- putc('\t', hhfile);
+ putc_unlocked('\t', hhfile);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), hhfile);
- putc('\n', hhfile);
+ putc_unlocked('\n', hhfile);
ulii &= ulii - ONELU;
}
}
@@ -2645,7 +2645,7 @@ int32_t calc_freqs_and_hwe(FILE* bedfile, char* outname, char* outname_end, uint
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -2689,20 +2689,20 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctv2 = round_up_pow2(unfiltered_sample_ctl2, 2);
uintptr_t marker_ct_y = 0;
- uintptr_t* sample_male_include2 = NULL;
- uint64_t* om_entry_ptr = NULL;
- uintptr_t* cur_omidxs = NULL;
- char* pzwritep = NULL;
- uint32_t* sample_to_cluster = NULL;
- uint32_t* missing_ct_by_cluster = NULL;
- uint32_t* oblig_missing_ct_by_cluster = NULL;
- uint32_t* cluster_sizes = NULL;
- uint32_t* cluster_sizes_y = NULL;
- uint32_t* om_cluster_sizes = NULL;
- uint32_t* om_sample_lookup = NULL;
- uint32_t* om_cluster_ref_cts = NULL;
+ uintptr_t* sample_male_include2 = nullptr;
+ uint64_t* om_entry_ptr = nullptr;
+ uintptr_t* cur_omidxs = nullptr;
+ char* pzwritep = nullptr;
+ uint32_t* sample_to_cluster = nullptr;
+ uint32_t* missing_ct_by_cluster = nullptr;
+ uint32_t* oblig_missing_ct_by_cluster = nullptr;
+ uint32_t* cluster_sizes = nullptr;
+ uint32_t* cluster_sizes_y = nullptr;
+ uint32_t* om_cluster_sizes = nullptr;
+ uint32_t* om_sample_lookup = nullptr;
+ uint32_t* om_cluster_ref_cts = nullptr;
uint64_t cur_om_entry = 0;
- int32_t y_code = chrom_info_ptr->y_code;
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
uint32_t y_present = (y_code != -1) && is_set(chrom_info_ptr->chrom_mask, y_code);
uint32_t sample_uidx = 0;
uint32_t sample_idx = 0;
@@ -2754,7 +2754,7 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
memcpy(sample_male_include2, sample_include2, unfiltered_sample_ctv2 * sizeof(intptr_t));
apply_bitarr_mask_to_quaterarr_01(sex_male, unfiltered_sample_ct, sample_male_include2);
if (y_present) {
- marker_ct_y = count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->y_code);
+ marker_ct_y = count_chrom_markers(chrom_info_ptr, marker_exclude, chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
marker_ct_nony = marker_ct - marker_ct_y;
if (fseeko(bedfile, bed_offset, SEEK_SET)) {
@@ -2826,11 +2826,11 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
pzwritep += sprintf(pzwritep, g_textbuf, "SNP");
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (marker_uidx < chrom_end) {
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
if (!is_y) {
cur_nm = sample_include2;
@@ -2864,7 +2864,7 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
cur_om_entry = *(++om_entry_ptr);
}
if (cluster_ct) {
- fill_uint_zero(oblig_missing_ct_by_cluster, cluster_ct);
+ fill_uint_zero(cluster_ct, oblig_missing_ct_by_cluster);
}
}
if (!cluster_ct) {
@@ -2894,7 +2894,7 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
goto write_missingness_reports_ret_WRITE_FAIL;
}
} else {
- fill_uint_zero(missing_ct_by_cluster, cluster_ct);
+ fill_uint_zero(cluster_ct, missing_ct_by_cluster);
if ((!om_entry_ptr) || ((cur_om_entry >> 32) != marker_uidx)) {
for (uii = 0; uii < ujj; uii += BITCT2) {
ulii = *lptr++;
@@ -2910,7 +2910,7 @@ int32_t write_missingness_reports(FILE* bedfile, uintptr_t bed_offset, char* out
}
}
} else {
- fill_ulong_zero(cur_omidxs, om_cluster_ctl);
+ fill_ulong_zero(om_cluster_ctl, cur_omidxs);
do {
set_bit(((uint32_t)cur_om_entry) - om_ycorr, cur_omidxs);
cur_om_entry = *(++om_entry_ptr);
@@ -3071,7 +3071,7 @@ int32_t hardy_report_write_line(Pigz_state* ps_ptr, char** pzwritep_ptr, char* p
int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, int32_t* hwe_lls, int32_t* hwe_lhs, int32_t* hwe_hhs, uint32_t hwe_modifier, uint32_t nonfounders, int32_t* hwe_ll_cases, int32_t* hwe_lh_cases, int32_t* hwe_hh_cases, int [...]
unsigned char* bigstack_mark = g_bigstack_base;
- char* pzwritep = NULL;
+ char* pzwritep = nullptr;
uintptr_t marker_ct = unfiltered_marker_ct - marker_exclude_ct;
uintptr_t marker_uidx = 0;
uintptr_t marker_idx = 0;
@@ -3188,7 +3188,7 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -3237,7 +3237,7 @@ int32_t hardy_report(char* outname, char* outname_end, double output_min_p, uint
}
if (pct < 100) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
printf("\b\b%u%%", pct);
fflush(stdout);
@@ -3291,11 +3291,11 @@ uint32_t enforce_hwe_threshold(double hwe_thresh, uintptr_t unfiltered_marker_ct
}
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (marker_uidx < chrom_end) {
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- if ((((int32_t)chrom_idx) == chrom_info_ptr->mt_code) || (is_set(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x))) {
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ if ((((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[MT_OFFSET]) || (is_set(chrom_info_ptr->haploid_mask, chrom_idx) && (!is_x))) {
continue;
}
// okay if min_obs_x is an underestimate
@@ -3387,7 +3387,7 @@ void enforce_min_bp_space(int32_t min_bp_space, uint32_t unfiltered_marker_ct, u
int32_t last_pos;
int32_t cur_pos;
for (chrom_fo_idx_p1 = 1; chrom_fo_idx_p1 <= chrom_ct; chrom_fo_idx_p1++) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx_p1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx_p1];
if (marker_uidx >= chrom_end) {
continue;
}
diff --git a/plink_glm.c b/plink_glm.c
index 2d1c0c0..0310da9 100644
--- a/plink_glm.c
+++ b/plink_glm.c
@@ -75,8 +75,8 @@ int32_t glm_scan_conditions(char* condition_mname, char* condition_fname, uintpt
// side effects: load_mask and sample_valid_ct potentially updated,
// condition_ct should be changed, condition_uidxs should be malloc'd
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* condition_file = NULL;
- uint32_t* condition_uidxs = NULL;
+ FILE* condition_file = nullptr;
+ uint32_t* condition_uidxs = nullptr;
uintptr_t marker_ctl = BITCT_TO_WORDCT(marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
@@ -210,10 +210,10 @@ int32_t glm_scan_conditions(char* condition_mname, char* condition_fname, uintpt
if (load_raw(unfiltered_sample_ct4, bedfile, loadbuf_raw)) {
goto glm_scan_conditions_ret_READ_FAIL;
}
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
- if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code)) {
- is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- is_y = ((int32_t)chrom_idx == chrom_info_ptr->y_code);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
+ if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
haploid_fix(hh_or_mt_exists, sample_raw_include2, sample_raw_male_include2, unfiltered_sample_ct, is_x, is_y, (unsigned char*)loadbuf_raw);
}
// clear loadbuf_mask bits where loadbuf is 01.
@@ -275,7 +275,7 @@ int32_t glm_scan_conditions(char* condition_mname, char* condition_fname, uintpt
}
uint32_t glm_loadbuf_to_doubles(uintptr_t* loadbuf_collapsed, uint32_t sample_valid_ct, double* covar_row, double* geno_map, uintptr_t* cur_missing) {
- // ok for cur_missing to be NULL if there can't possibly be any missing
+ // ok for cur_missing to be nullptr if there can't possibly be any missing
uintptr_t* ulptr_end = &(loadbuf_collapsed[sample_valid_ct / BITCT2]);
uint32_t cur_missing_ct = 0;
uint32_t sample_idx = 0;
@@ -332,7 +332,7 @@ uint32_t glm_loadbuf_to_doubles_x(uintptr_t* loadbuf_collapsed, uintptr_t* sex_m
}
uint32_t glm_loadbuf_to_floats(uintptr_t* loadbuf_collapsed, uint32_t sample_valid_ct, float* covar_row, float* geno_map, uintptr_t* cur_missing) {
- // ok for cur_missing to be NULL if there can't possibly be any missing
+ // ok for cur_missing to be nullptr if there can't possibly be any missing
uintptr_t* ulptr_end = &(loadbuf_collapsed[sample_valid_ct / BITCT2]);
uint32_t cur_missing_ct = 0;
uint32_t sample_idx = 0;
@@ -496,7 +496,7 @@ uint32_t glm_linear(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t samp
double dxx;
double dyy;
double dzz;
- fill_ulong_zero(perm_fails, BITCT_TO_WORDCT(cur_batch_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(cur_batch_size), perm_fails);
col_major_matrix_multiply((uint32_t)param_ct, (uint32_t)param_ct, (uint32_t)sample_valid_ct, covars_sample_major, covars_cov_major, param_2d_buf);
if (invert_matrix((uint32_t)param_ct, param_2d_buf, mi_buf, param_2d_buf2)) {
return 1;
@@ -1354,7 +1354,7 @@ static inline void mult_tmatrix_nxd_vect_d(const float* tm, const float* vect, f
float vect_val;
uint32_t col_idx;
uint32_t row_idx;
- fill_float_zero(dest, col_ct);
+ fill_float_zero(col_ct, dest);
for (row_idx = 0; row_idx < row_ct; row_idx++) {
vect_val = vect[row_idx];
tm_ptr = &(tm[row_idx * col_cta4]);
@@ -1599,7 +1599,7 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
float fxx;
uint32_t param_idx;
- fill_float_zero(ll, param_ct * param_cta4);
+ fill_float_zero(param_ct * param_cta4, ll);
while (1) {
iteration++;
@@ -1619,7 +1619,7 @@ uint32_t logistic_regression(uint32_t sample_ct, uint32_t param_ct, float* vv, f
cholesky_decomposition(hh, ll, param_ct);
- fill_float_zero(dcoef, param_ct);
+ fill_float_zero(param_ct, dcoef);
solve_linear_system(ll, grad, dcoef, param_ct);
delta_coef = 0.0;
@@ -1678,7 +1678,7 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
float* fptr2;
double dxx;
float fxx;
- fill_ulong_zero(perm_fails, BITCT_TO_WORDCT(cur_batch_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(cur_batch_size), perm_fails);
for (perm_idx = 0; perm_idx < cur_batch_size; perm_idx++) {
fptr = pheno_buf;
if (!missing_ct) {
@@ -1709,7 +1709,7 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
// compute S
// param_2d_buf = S, param_1d_buf = y, param_1d_buf2 = x
for (param_idx = 0; param_idx < param_ct; param_idx++) {
- fill_float_zero(param_1d_buf, param_ct);
+ fill_float_zero(param_ct, param_1d_buf);
param_1d_buf[param_idx] = 1.0;
solve_linear_system(param_2d_buf2, param_1d_buf, param_1d_buf2, param_ct);
// S does *not* currently have 16-byte aligned rows
@@ -1762,7 +1762,7 @@ uint32_t glm_logistic(uintptr_t cur_batch_size, uintptr_t param_ct, uintptr_t sa
}
if (0) {
glm_logistic_fail:
- fill_float_zero(&(logistic_results[perm_idx * param_ctx_msi]), param_ct_msi);
+ fill_float_zero(param_ct_msi, &(logistic_results[perm_idx * param_ctx_msi]));
SET_BIT(perm_idx, perm_fails);
perm_fail_ct++;
if (joint_test_requested) {
@@ -2178,7 +2178,7 @@ uint32_t glm_fill_design(uintptr_t* loadbuf_collapsed, double* fixed_covars_cov_
dzz = dxx / ((double)((intptr_t)cur_sample_valid_ct));
dyy = sqrt((dyy - dxx * dzz) / ((double)((intptr_t)(cur_sample_valid_ct - 1))));
if (dyy == 0) {
- fill_double_zero(dptr, cur_sample_valid_ct);
+ fill_double_zero(cur_sample_valid_ct, dptr);
} else {
dyy = 1.0 / dyy;
for (sample_idx = 0; sample_idx < cur_sample_valid_ct; sample_idx++) {
@@ -2226,7 +2226,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
for (sample_idx = 0; sample_idx < cur_sample_valid_ct; sample_idx++) {
*fptr++ = 1;
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
if (IS_SET(active_params, 1)) {
ulptr = loadbuf_collapsed;
@@ -2318,7 +2318,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
sample_idx_stop = sample_valid_ct;
}
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
if (genotypic_or_hethom && (!is_nonx_haploid) && IS_SET(active_params, 2)) {
@@ -2344,7 +2344,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
ulptr_end++;
sample_idx_stop = sample_valid_ct;
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
for (fixed_covar_idx = 0; fixed_covar_idx < fixed_covar_nonsex_ct; fixed_covar_idx++) {
@@ -2352,7 +2352,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
continue;
}
copy_when_nonmissing(loadbuf_collapsed, (char*)(&(fixed_covars_cov_major[fixed_covar_idx * sample_valid_ct])), sizeof(float), sample_valid_ct, missing_ct, (char*)fptr);
- fill_float_zero(&(fptr[cur_sample_valid_ct]), align_skip);
+ fill_float_zero(align_skip, &(fptr[cur_sample_valid_ct]));
fptr = &(fptr[cur_sample_valid_cta4]);
}
if (interactions_present) {
@@ -2447,7 +2447,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
sample_idx_stop = sample_valid_ct;
}
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
if (genotypic_or_hethom) {
@@ -2477,7 +2477,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
ulptr_end++;
sample_idx_stop = sample_valid_ct;
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
}
@@ -2486,7 +2486,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
if (include_sex) {
if (IS_SET(active_params, sex_start_idx)) {
copy_when_nonmissing(loadbuf_collapsed, (char*)(&(fixed_covars_cov_major[fixed_covar_nonsex_ct * sample_valid_ct])), sizeof(float), sample_valid_ct, missing_ct, (char*)fptr);
- fill_float_zero(&(fptr[cur_sample_valid_ct]), align_skip);
+ fill_float_zero(align_skip, &(fptr[cur_sample_valid_ct]));
fptr = &(fptr[cur_sample_valid_cta4]);
}
if (interactions_present) {
@@ -2579,7 +2579,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
sample_idx_stop = sample_valid_ct;
}
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
if (genotypic_or_hethom && (!is_nonx_haploid) && is_set(active_params, sex_start_idx + 2)) {
@@ -2607,7 +2607,7 @@ uint32_t glm_fill_design_float(uintptr_t* loadbuf_collapsed, float* fixed_covars
ulptr_end++;
sample_idx_stop = sample_valid_ct;
}
- fill_float_zero(fptr, align_skip);
+ fill_float_zero(align_skip, fptr);
fptr = &(fptr[align_skip]);
}
}
@@ -2976,7 +2976,7 @@ THREAD_RET_TYPE glm_logistic_adapt_thread(void* arg) {
success_2incr = 0;
cur_fail_ct = 0;
// todo: try better starting position
- fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
+ fill_float_zero(cur_param_cta4 * perm_vec_ct, coef);
glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
for (pidx = 0; pidx < perm_vec_ct;) {
if (!IS_SET(perm_fails, pidx)) {
@@ -3069,7 +3069,7 @@ THREAD_RET_TYPE glm_linear_maxt_thread(void* arg) {
uint32_t is_nonx_haploid = g_min_ploidy_1 && (!g_is_x);
double* fixed_covars_cov_major = g_fixed_covars_cov_major;
double* constraints_con_major = g_constraints_con_major;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
double* param_2d_buf = g_linear_mt[tidx].param_2d_buf;
double* param_2d_buf2 = g_linear_mt[tidx].param_2d_buf2;
MATRIX_INVERT_BUF1_TYPE* mi_buf = g_linear_mt[tidx].mi_buf;
@@ -3239,7 +3239,7 @@ THREAD_RET_TYPE glm_logistic_maxt_thread(void* arg) {
uint32_t is_nonx_haploid = g_min_ploidy_1 && (!g_is_x);
float* fixed_covars_cov_major = g_fixed_covars_cov_major_f;
double* constraints_con_major = g_constraints_con_major;
- double* msa_ptr = NULL;
+ double* msa_ptr = nullptr;
float* cur_covars_cov_major = g_logistic_mt[tidx].cur_covars_cov_major;
float* coef = g_logistic_mt[tidx].coef;
float* pp = g_logistic_mt[tidx].pp;
@@ -3299,7 +3299,7 @@ THREAD_RET_TYPE glm_logistic_maxt_thread(void* arg) {
cur_sample_valid_ct = sample_valid_ct - cur_missing_ct;
success_2incr = 0;
// todo: try better starting position
- fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
+ fill_float_zero(cur_param_cta4 * perm_vec_ct, coef);
perm_fail_ct = glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, cur_constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
if (!IS_SET(perm_fails, pidx)) {
@@ -3435,7 +3435,7 @@ THREAD_RET_TYPE glm_linear_set_thread(void* arg) {
}
dgels_nrhs = (int32_t)((uint32_t)perm_vec_ct);
dgels_(&dgels_trans, &dgels_m, &dgels_n, &dgels_nrhs, dgels_a, &dgels_m, dgels_b, &dgels_ldb, dgels_work, &dgels_lwork, &dgels_info);
- glm_linear(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, standard_beta, pheno_sum_base, pheno_ssq_base, cur_covars_cov_major, cur_covars_sample_major, perm_pmajor, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, &perm_fail_ct, perm_fails);
+ glm_linear(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, loadbuf_ptr, standard_beta, pheno_sum_base, pheno_ssq_base, cur_covars_cov_major, cur_covars_sample_major, perm_pmajor, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, 0, nullptr, nullptr, nullptr, nullptr, nullptr, &perm_fail_ct, perm_fails);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
if (!IS_SET(perm_fails, pidx)) {
dxx = fabs(dgels_b[pidx * cur_sample_valid_ct + 1] / sqrt(regression_results[pidx * param_ct_m1]));
@@ -3501,8 +3501,8 @@ THREAD_RET_TYPE glm_logistic_set_thread(void* arg) {
cur_missing_ct = glm_fill_design_float(loadbuf_ptr, fixed_covars_cov_major, sample_valid_ct, cur_param_ct, coding_flags, glm_xchr_model, condition_list_start_idx, interaction_start_idx, sex_start_idx, active_params, haploid_params, include_sex, male_x_01, sex_male_collapsed, is_nonx_haploid, cur_covars_cov_major);
cur_sample_valid_ct = sample_valid_ct - cur_missing_ct;
// todo: try better starting position
- fill_float_zero(coef, cur_param_cta4 * perm_vec_ct);
- glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails);
+ fill_float_zero(cur_param_cta4 * perm_vec_ct, coef);
+ glm_logistic(perm_vec_ct, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, 1, loadbuf_ptr, cur_covars_cov_major, perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, perm_fails);
for (pidx = 0; pidx < perm_vec_ct; pidx++) {
if (!IS_SET(perm_fails, pidx)) {
dxx = (double)coef[pidx * cur_param_cta4 + 1];
@@ -3531,10 +3531,10 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
uintptr_t condition_ct = 0;
uintptr_t np_sex = 0;
uintptr_t param_idx_end = 2;
- uintptr_t* load_mask = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uint32_t* condition_uidxs = NULL;
+ uintptr_t* load_mask = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uint32_t* condition_uidxs = nullptr;
uint32_t covar_interactions = (glm_modifier / GLM_INTERACTION) & 1;
uint32_t genotypic = glm_modifier & GLM_GENOTYPIC;
uint32_t genotypic_or_hethom = (glm_modifier & (GLM_GENOTYPIC | GLM_HETHOM))? 1 : 0;
@@ -3542,7 +3542,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
uint32_t slen_add = 0;
uint32_t sex_covar_everywhere = glm_modifier & GLM_SEX;
uint32_t x_sex_interaction = (glm_xchr_model == 3);
- uint32_t x_present = (chrom_info_ptr->x_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->x_code);
+ uint32_t x_present = (chrom_info_ptr->xymt_codes[X_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[X_OFFSET]);
uint32_t hide_covar = glm_modifier & GLM_HIDE_COVAR;
uint32_t variation_in_sex = 0; // zero if no-x-sex specified
int32_t retval = 0;
@@ -3568,7 +3568,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
uint32_t slen;
uint32_t uii;
uint32_t ujj;
- g_joint_test_params = NULL;
+ g_joint_test_params = nullptr;
if (max_marker_allele_len > MAXLINELEN) {
if (bigstack_alloc_c(max_marker_allele_len + MAXLINELEN, writebuf_ptr)) {
goto glm_common_init_ret_NOMEM;
@@ -3632,8 +3632,8 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
// need to set to null for next alloc_collapsed_haploid_filters() call to
// work properly
- sample_include2 = NULL;
- sample_male_include2 = NULL;
+ sample_include2 = nullptr;
+ sample_male_include2 = nullptr;
np_base_raw += condition_ct * (1 + covar_interactions);
}
np_base_raw += covar_ct * (1 + covar_interactions);
@@ -3740,7 +3740,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
g_sex_male_collapsed = sex_male_collapsed;
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(active_params, param_raw_ctl);
+ fill_ulong_zero(param_raw_ctl, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct_max, 0, 1, active_params);
if ((!(active_params[0] & 2)) && ((!np_diploid_raw) || (active_params[0] & 4)) && ((!covar_interactions) || ((!popcount_bit_idx(active_params, interaction_start_idx, sex_start_idx)) && ((!variation_in_sex) || (!popcount_bit_idx(active_params, sex_start_idx + 1, param_raw_ct_max)))))) {
@@ -3910,7 +3910,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
}
g_haploid_params = haploid_params;
if (genotypic_or_hethom) {
- fill_ulong_zero(haploid_params, param_ctl_max);
+ fill_ulong_zero(param_ctl_max, haploid_params);
ujj = np_base;
for (uii = 0, param_idx = 0; uii < ujj; uii++, param_idx++) {
next_set_unsafe_ck(active_params, &uii);
@@ -3926,7 +3926,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
if (bigstack_alloc_ul(param_ctl_max, &g_joint_test_params)) {
goto glm_common_init_ret_NOMEM;
}
- fill_ulong_zero(g_joint_test_params, param_ctl_max);
+ fill_ulong_zero(param_ctl_max, g_joint_test_params);
if (tests_range_list_ptr->name_ct) {
numeric_range_list_to_bitarr(tests_range_list_ptr, param_ct_max - 1, 1, 1, g_joint_test_params);
constraint_ct_max = popcount_longs(g_joint_test_params, param_ctl_max);
@@ -3955,7 +3955,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
param_ctx_max++;
} else {
bigstack_reset(g_joint_test_params);
- g_joint_test_params = NULL;
+ g_joint_test_params = nullptr;
constraint_ct_max = 0;
logerrprint("Warning: Ignoring --tests since too few parameter indices are in range.\n");
}
@@ -3972,7 +3972,7 @@ int32_t glm_common_init(FILE* bedfile, uintptr_t bed_offset, uint32_t glm_modifi
g_perm_cluster_ct = 0;
g_perm_pheno_nm_ct = sample_valid_ct;
g_perms_done = 0;
- g_mperm_save_all = NULL;
+ g_mperm_save_all = nullptr;
if ((!do_perms) || is_set_test) {
*perm_batch_size_ptr = 1;
}
@@ -4028,16 +4028,16 @@ int32_t glm_linear_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t b
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t cur_param_ct = 0;
uintptr_t* marker_exclude = marker_exclude_mid;
- uintptr_t* unstopped_markers = NULL;
+ uintptr_t* unstopped_markers = nullptr;
uintptr_t* loadbuf = g_loadbuf;
- uintptr_t* perm_adapt_set_unstopped = NULL;
+ uintptr_t* perm_adapt_set_unstopped = nullptr;
double* orig_stats = g_orig_stats;
- double* sorted_chisq_buf = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- uint32_t* sorted_marker_idx_buf = NULL;
- uint32_t* proxy_arr = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* perm_attempt_ct = NULL;
+ double* sorted_chisq_buf = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ uint32_t* sorted_marker_idx_buf = nullptr;
+ uint32_t* proxy_arr = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* perm_attempt_ct = nullptr;
uintptr_t marker_ct = marker_ct_mid;
uintptr_t set_ct = 0;
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -4301,7 +4301,7 @@ int32_t glm_linear_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t b
goto glm_linear_assoc_set_test_more_perms;
}
glm_linear_assoc_set_test_perms_done:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.\n", perms_done, (perms_done != 1)? "s" : "");
glm_linear_assoc_set_test_write:
if (glm_modifier & GLM_PERM) {
@@ -4331,8 +4331,8 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
uintptr_t marker_ct = marker_ct_orig;
uintptr_t sample_uidx = 0;
uintptr_t cur_constraint_ct = 0;
@@ -4370,16 +4370,16 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
__CLPK_integer dgels_nrhs = 0;
__CLPK_integer dgels_ldb = 0;
uintptr_t* marker_exclude = marker_exclude_orig;
- uintptr_t* founder_pnm = NULL;
- uintptr_t* regression_skip = NULL;
- double* constraints_con_major = NULL;
- double* orig_pvals = NULL;
- uint32_t* condition_uidxs = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- uint32_t* tcnt = NULL;
- char* cur_param_names = NULL;
- char* haploid_param_names = NULL;
- char* wptr_start = NULL;
+ uintptr_t* founder_pnm = nullptr;
+ uintptr_t* regression_skip = nullptr;
+ double* constraints_con_major = nullptr;
+ double* orig_pvals = nullptr;
+ uint32_t* condition_uidxs = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ uint32_t* tcnt = nullptr;
+ char* cur_param_names = nullptr;
+ char* haploid_param_names = nullptr;
+ char* wptr_start = nullptr;
double geno_map[12];
uint32_t mu_table[GLM_BLOCKSIZE];
char numbuf[16];
@@ -4468,7 +4468,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
uint32_t ujj;
uint32_t ukk;
numbuf[0] = ' ';
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (is_set_test) {
@@ -4477,7 +4477,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
memcpy(founder_pnm, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
bitvec_and(founder_info, unfiltered_sample_ctl, founder_pnm);
- if (extract_set_union_unfiltered(sip, NULL, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
+ if (extract_set_union_unfiltered(sip, nullptr, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
goto glm_linear_assoc_ret_NOMEM;
}
if (glm_modifier & GLM_PERM) {
@@ -4516,7 +4516,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
goto glm_linear_assoc_ret_NOMEM;
}
} else {
- g_perm_adapt_stop = NULL;
+ g_perm_adapt_stop = nullptr;
if (bigstack_calloc_ul(BITCT_TO_WORDCT(marker_initial_ct), ®ression_skip)) {
goto glm_linear_assoc_ret_NOMEM;
}
@@ -4547,7 +4547,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
// 0..3: diploid chromosomes, X chromosome female
// 4..7: X chromosome male
// 8..11: haploid
- fill_double_zero(geno_map, 12);
+ fill_double_zero(12, geno_map);
geno_map[0] = 1;
geno_map[2] = 1;
geno_map[4] = 1;
@@ -4568,11 +4568,11 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (load_and_collapse_incl(unfiltered_sample_ct, sample_valid_ct, load_mask, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, g_loadbuf)) {
goto glm_linear_assoc_ret_READ_FAIL;
}
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
geno_map_ptr = geno_map;
- if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx)) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code)) {
- g_is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- g_is_y = ((int32_t)chrom_idx == chrom_info_ptr->y_code);
+ if ((IS_SET(chrom_info_ptr->haploid_mask, chrom_idx)) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ g_is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ g_is_y = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
if (hh_or_mt_exists) {
haploid_fix(hh_or_mt_exists, sample_include2, sample_male_include2, sample_valid_ct, g_is_x, g_is_y, (unsigned char*)g_loadbuf);
}
@@ -4583,9 +4583,9 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
g_is_x = 0;
}
if (!g_is_x) {
- glm_loadbuf_to_doubles(g_loadbuf, sample_valid_ct, &(g_fixed_covars_cov_major[param_idx_fixed * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_doubles(g_loadbuf, sample_valid_ct, &(g_fixed_covars_cov_major[param_idx_fixed * sample_valid_ct]), geno_map_ptr, nullptr);
} else {
- glm_loadbuf_to_doubles_x(g_loadbuf, sex_male_collapsed, sample_valid_ct, &(g_fixed_covars_cov_major[param_idx_fixed * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_doubles_x(g_loadbuf, sex_male_collapsed, sample_valid_ct, &(g_fixed_covars_cov_major[param_idx_fixed * sample_valid_ct]), geno_map_ptr, nullptr);
}
if (is_set(active_params, param_idx_fixed + condition_list_start_idx)) {
strcpy(&(param_names[param_idx * max_param_name_len]), &(marker_ids[marker_uidx * max_marker_id_len]));
@@ -4655,7 +4655,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
if (variation_in_sex) {
dptr = &(g_fixed_covars_cov_major[param_idx_fixed * sample_valid_ct]);
- fill_double_zero(dptr, sample_valid_ct);
+ fill_double_zero(sample_valid_ct, dptr);
sample_idx = 0;
while (1) {
next_set_ul_ck(sex_male_collapsed, sample_valid_ct, &sample_idx);
@@ -4785,7 +4785,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (do_perms) {
if (cluster_starts) {
// Pointless to include size-1 clusters in permutation.
- retval = cluster_include_and_reindex(unfiltered_sample_ct, load_mask, 1, NULL, sample_valid_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, NULL, NULL);
+ retval = cluster_include_and_reindex(unfiltered_sample_ct, load_mask, 1, nullptr, sample_valid_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, nullptr, nullptr);
if (retval) {
goto glm_linear_assoc_ret_1;
}
@@ -4999,7 +4999,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, writebuf, chrom_name_write(chrom_info_ptr, uii, writebuf));
*wptr_start++ = ' ';
- fill_double_zero(constraints_con_major, constraint_ct_max * param_ct_max);
+ fill_double_zero(constraint_ct_max * param_ct_max, constraints_con_major);
g_male_x_01 = 0;
if (g_is_x) {
cur_param_ct = param_ct_max;
@@ -5331,7 +5331,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
if (marker_idx < marker_initial_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_initial_ct;
printf("\b\b%u%%", pct);
@@ -5343,7 +5343,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
// if more permutations, reevaluate marker_unstopped_ct, etc.
if (!perm_pass_idx) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -5352,7 +5352,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
if (!is_set_test) {
if (mtest_adjust) {
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_initial_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, constraint_ct_max? NULL : g_orig_stats, pfilter, output_min_p, mtest_adjust, constraint_ct_max, adjust_lambda, constraint_ct_max? NULL : tcnt, constraint_ct_max? orig_pvals : NULL);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_initial_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, constraint_ct_max? nullptr : g_orig_stats, pfilter, output_min_p, mtest_adjust, constraint_ct_max, adjust_lambda, constraint_ct_max? nullptr : tcnt, constraint_ct_max? orig_pvals : nullptr);
if (retval) {
goto glm_linear_assoc_ret_1;
}
@@ -5372,7 +5372,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (do_perms_nst) {
if (mperm_save_all) {
if (perm_pass_idx) {
- putchar(' ');
+ putc_unlocked(' ', stdout);
}
fputs("[dumping stats]", stdout);
fflush(stdout);
@@ -5430,7 +5430,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt_nst? "max(T)" : "(adaptive)", (g_perms_done != 1)? "s" : "");
if (perm_adapt_nst) {
@@ -5492,7 +5492,7 @@ int32_t glm_linear_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
marker_idx = 0;
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, uii, g_textbuf));
@@ -5581,16 +5581,16 @@ int32_t glm_logistic_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t cur_param_ct = 0;
uintptr_t* marker_exclude = marker_exclude_mid;
- uintptr_t* unstopped_markers = NULL;
+ uintptr_t* unstopped_markers = nullptr;
uintptr_t* loadbuf = g_loadbuf;
- uintptr_t* perm_adapt_set_unstopped = NULL;
+ uintptr_t* perm_adapt_set_unstopped = nullptr;
double* orig_stats = g_orig_stats;
- double* sorted_chisq_buf = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- uint32_t* sorted_marker_idx_buf = NULL;
- uint32_t* proxy_arr = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* perm_attempt_ct = NULL;
+ double* sorted_chisq_buf = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ uint32_t* sorted_marker_idx_buf = nullptr;
+ uint32_t* proxy_arr = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* perm_attempt_ct = nullptr;
uintptr_t marker_ct = marker_ct_mid;
uintptr_t set_ct = 0;
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -5830,7 +5830,7 @@ int32_t glm_logistic_assoc_set_test(pthread_t* threads, FILE* bedfile, uintptr_t
goto glm_logistic_assoc_set_test_more_perms;
}
glm_logistic_assoc_set_test_perms_done:
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.\n", perms_done, (perms_done != 1)? "s" : "");
glm_logistic_assoc_set_test_write:
if (glm_modifier & GLM_PERM) {
@@ -5859,8 +5859,8 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
- FILE* outfile = NULL;
- FILE* outfile_msa = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_msa = nullptr;
uintptr_t marker_ct = marker_ct_orig;
uintptr_t sample_uidx = 0;
uintptr_t cur_constraint_ct = 0;
@@ -5890,17 +5890,17 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
uint32_t hh_or_mt_exists = hh_exists;
uint32_t skip_intercept = !(glm_modifier & GLM_INTERCEPT);
int32_t retval = 0;
- double* constraints_con_major = NULL;
- double* orig_pvals = NULL;
+ double* constraints_con_major = nullptr;
+ double* orig_pvals = nullptr;
uintptr_t* marker_exclude = marker_exclude_orig;
- uintptr_t* founder_pnm = NULL;
- uintptr_t* regression_skip = NULL;
- uintptr_t* pheno_c_collapsed = NULL;
- uint32_t* condition_uidxs = NULL;
- uint32_t* marker_idx_to_uidx = NULL;
- char* cur_param_names = NULL;
- char* haploid_param_names = NULL;
- char* wptr_start = NULL;
+ uintptr_t* founder_pnm = nullptr;
+ uintptr_t* regression_skip = nullptr;
+ uintptr_t* pheno_c_collapsed = nullptr;
+ uint32_t* condition_uidxs = nullptr;
+ uint32_t* marker_idx_to_uidx = nullptr;
+ char* cur_param_names = nullptr;
+ char* haploid_param_names = nullptr;
+ char* wptr_start = nullptr;
float geno_map[12];
uint32_t mu_table[GLM_BLOCKSIZE];
char numbuf[16];
@@ -5990,7 +5990,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
logerrprint("Warning: Skipping --logistic since less than two phenotypes are present.\n");
goto glm_logistic_assoc_ret_1;
}
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (is_set_test) {
@@ -5999,7 +5999,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
}
memcpy(founder_pnm, pheno_nm, unfiltered_sample_ctl * sizeof(intptr_t));
bitvec_and(founder_info, unfiltered_sample_ctl, founder_pnm);
- if (extract_set_union_unfiltered(sip, NULL, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
+ if (extract_set_union_unfiltered(sip, nullptr, unfiltered_marker_ct, marker_exclude_orig, &marker_exclude, &marker_ct)) {
goto glm_logistic_assoc_ret_NOMEM;
}
if (glm_modifier & GLM_PERM) {
@@ -6040,7 +6040,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
goto glm_logistic_assoc_ret_NOMEM;
}
} else {
- g_perm_adapt_stop = NULL;
+ g_perm_adapt_stop = nullptr;
if (bigstack_calloc_ul(BITCT_TO_WORDCT(marker_initial_ct), ®ression_skip)) {
goto glm_logistic_assoc_ret_NOMEM;
}
@@ -6071,7 +6071,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
// 0..3: diploid chromosomes, X chromosome female
// 4..7: X chromosome male
// 8..11: haploid
- fill_float_zero(geno_map, 12);
+ fill_float_zero(12, geno_map);
geno_map[0] = 1;
geno_map[2] = 1;
geno_map[4] = 1;
@@ -6092,11 +6092,11 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if (load_and_collapse_incl(unfiltered_sample_ct, sample_valid_ct, load_mask, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, g_loadbuf)) {
goto glm_logistic_assoc_ret_READ_FAIL;
}
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
geno_map_ptr = geno_map;
- if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code)) {
- g_is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- g_is_y = ((int32_t)chrom_idx == chrom_info_ptr->y_code);
+ if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ g_is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ g_is_y = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
if (hh_or_mt_exists) {
haploid_fix(hh_or_mt_exists, sample_include2, sample_male_include2, sample_valid_ct, g_is_x, g_is_y, (unsigned char*)g_loadbuf);
}
@@ -6107,9 +6107,9 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
g_is_x = 0;
}
if (!g_is_x) {
- glm_loadbuf_to_floats(g_loadbuf, sample_valid_ct, &(g_fixed_covars_cov_major_f[param_idx_fixed * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_floats(g_loadbuf, sample_valid_ct, &(g_fixed_covars_cov_major_f[param_idx_fixed * sample_valid_ct]), geno_map_ptr, nullptr);
} else {
- glm_loadbuf_to_floats_x(g_loadbuf, sex_male_collapsed, sample_valid_ct, &(g_fixed_covars_cov_major_f[param_idx_fixed * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_floats_x(g_loadbuf, sex_male_collapsed, sample_valid_ct, &(g_fixed_covars_cov_major_f[param_idx_fixed * sample_valid_ct]), geno_map_ptr, nullptr);
}
if (is_set(active_params, param_idx_fixed + condition_list_start_idx)) {
strcpy(&(param_names[param_idx * max_param_name_len]), &(marker_ids[marker_uidx * max_marker_id_len]));
@@ -6178,7 +6178,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
}
if (variation_in_sex) {
fptr = &(g_fixed_covars_cov_major_f[param_idx_fixed * sample_valid_ct]);
- fill_float_zero(fptr, sample_valid_ct);
+ fill_float_zero(sample_valid_ct, fptr);
sample_idx = 0;
while (1) {
next_set_ul_ck(sex_male_collapsed, sample_valid_ct, &sample_idx);
@@ -6355,13 +6355,13 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
goto glm_logistic_assoc_ret_NOMEM;
}
} else {
- g_logistic_mt[tidx].mi_buf = NULL;
- g_logistic_mt[tidx].param_1d_dbuf = NULL;
- g_logistic_mt[tidx].param_2d_dbuf = NULL;
- g_logistic_mt[tidx].param_2d_dbuf2 = NULL;
- g_logistic_mt[tidx].param_df_dbuf = NULL;
- g_logistic_mt[tidx].df_df_dbuf = NULL;
- g_logistic_mt[tidx].df_dbuf = NULL;
+ g_logistic_mt[tidx].mi_buf = nullptr;
+ g_logistic_mt[tidx].param_1d_dbuf = nullptr;
+ g_logistic_mt[tidx].param_2d_dbuf = nullptr;
+ g_logistic_mt[tidx].param_2d_dbuf2 = nullptr;
+ g_logistic_mt[tidx].param_df_dbuf = nullptr;
+ g_logistic_mt[tidx].df_df_dbuf = nullptr;
+ g_logistic_mt[tidx].df_dbuf = nullptr;
}
}
@@ -6473,7 +6473,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, writebuf, chrom_name_write(chrom_info_ptr, uii, writebuf));
*wptr_start++ = ' ';
- fill_double_zero(constraints_con_major, constraint_ct_max * param_ct_max);
+ fill_double_zero(constraint_ct_max * param_ct_max, constraints_con_major);
g_male_x_01 = 0;
if (g_is_x) {
cur_param_ct = param_ct_max;
@@ -6586,7 +6586,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
g_nm_cts[marker_idx3] = cur_sample_valid_ct;
if (cur_sample_valid_ct > cur_param_ct) {
// todo: try better starting position
- fill_float_zero(g_logistic_mt[0].coef, round_up_pow2(cur_param_ct, 4));
+ fill_float_zero(round_up_pow2(cur_param_ct, 4), g_logistic_mt[0].coef);
regression_fail = glm_logistic(1, cur_param_ct, cur_sample_valid_ct, cur_missing_ct, skip_intercept, loadbuf_ptr, g_logistic_mt[0].cur_covars_cov_major, pheno_c_collapsed, g_logistic_mt[0].coef, g_logistic_mt[0].pp, g_logistic_mt[0].sample_1d_buf, g_logistic_mt[0].pheno_buf, g_logistic_mt[0].param_1d_buf, g_logistic_mt[0].param_1d_buf2, g_logistic_mt[0].param_2d_buf, g_logistic_mt[0].param_2d_buf2, g_logistic_mt[0].regression_results, cur_constraint_ct, constraints_con_major, g_logist [...]
} else {
regression_fail = 1;
@@ -6766,7 +6766,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if ((!perm_pass_idx) && (marker_idx >= loop_end)) {
if (marker_idx < marker_initial_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_initial_ct;
printf("\b\b%u%%", pct);
@@ -6778,7 +6778,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
// if more permutations, reevaluate marker_unstopped_ct, etc.
if (!perm_pass_idx) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -6787,7 +6787,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
}
if (!is_set_test) {
if (mtest_adjust) {
- retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_initial_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, NULL, pfilter, output_min_p, mtest_adjust, constraint_ct_max, adjust_lambda, NULL, orig_pvals);
+ retval = multcomp(outname, outname_end, marker_idx_to_uidx, marker_initial_ct, marker_ids, max_marker_id_len, plink_maxsnp, chrom_info_ptr, nullptr, pfilter, output_min_p, mtest_adjust, constraint_ct_max, adjust_lambda, nullptr, orig_pvals);
if (retval) {
goto glm_logistic_assoc_ret_1;
}
@@ -6807,7 +6807,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if (do_perms_nst) {
if (mperm_save_all) {
if (perm_pass_idx) {
- putchar(' ');
+ putc_unlocked(' ', stdout);
}
fputs("[dumping stats]", stdout);
fflush(stdout);
@@ -6865,7 +6865,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
}
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u %s permutation%s complete.\n", g_perms_done, perm_maxt_nst? "max(T)" : "(adaptive)", (g_perms_done != 1)? "s" : "");
if (perm_adapt_nst) {
@@ -6927,7 +6927,7 @@ int32_t glm_logistic_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
marker_idx = 0;
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, uii, g_textbuf));
@@ -7016,7 +7016,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctv2 = 2 * unfiltered_sample_ctl;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t sample_uidx = 0;
uintptr_t max_param_name_len = 2;
uintptr_t param_raw_ct = 1;
@@ -7040,31 +7040,31 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
__CLPK_integer dgels_m = 0;
__CLPK_integer dgels_n = 0;
__CLPK_integer dgels_nrhs = 0;
- double* dgels_a = NULL;
- double* dgels_b = NULL;
- double* dgels_work = NULL;
- double* param_df_buf = NULL;
- double* param_df_buf2 = NULL;
+ double* dgels_a = nullptr;
+ double* dgels_b = nullptr;
+ double* dgels_work = nullptr;
+ double* param_df_buf = nullptr;
+ double* param_df_buf2 = nullptr;
__CLPK_integer dgels_ldb = 0;
__CLPK_integer dgels_lwork = -1;
__CLPK_integer dgels_info;
double dzz;
- double* regression_results = NULL;
- double* mperm_save_stats = NULL;
- double* constraints_con_major = NULL;
- double* df_df_buf = NULL;
- double* df_buf = NULL;
- uintptr_t* loadbuf_raw = NULL;
- uintptr_t* loadbuf_collapsed = NULL;
- uintptr_t* load_mask = NULL;
- uintptr_t* sex_male_collapsed = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* active_params = NULL;
- uintptr_t* joint_test_params = NULL;
- uintptr_t* perm_fails = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* condition_uidxs = NULL;
+ double* regression_results = nullptr;
+ double* mperm_save_stats = nullptr;
+ double* constraints_con_major = nullptr;
+ double* df_df_buf = nullptr;
+ double* df_buf = nullptr;
+ uintptr_t* loadbuf_raw = nullptr;
+ uintptr_t* loadbuf_collapsed = nullptr;
+ uintptr_t* load_mask = nullptr;
+ uintptr_t* sex_male_collapsed = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* active_params = nullptr;
+ uintptr_t* joint_test_params = nullptr;
+ uintptr_t* perm_fails = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* condition_uidxs = nullptr;
double geno_map[12];
double* geno_map_ptr;
double* param_2d_buf;
@@ -7114,7 +7114,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
goto glm_linear_nosnp_ret_NOMEM;
}
sample_valid_ct = popcount_longs(load_mask, unfiltered_sample_ctl);
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (condition_mname || condition_fname) {
@@ -7199,7 +7199,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
goto glm_linear_nosnp_ret_NOMEM;
}
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(active_params, param_raw_ctl);
+ fill_ulong_zero(param_raw_ctl, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct, 0, 1, active_params);
param_ct = popcount_longs(active_params, param_raw_ctl);
@@ -7245,7 +7245,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (aligned_malloc(ulii * sizeof(intptr_t), &joint_test_params)) {
goto glm_linear_nosnp_ret_NOMEM;
}
- fill_ulong_zero(joint_test_params, ulii);
+ fill_ulong_zero(ulii, joint_test_params);
if (tests_range_list_ptr->name_ct) {
numeric_range_list_to_bitarr(tests_range_list_ptr, param_ct - 1, 1, 1, joint_test_params);
constraint_ct = popcount_longs(joint_test_params, ulii);
@@ -7281,7 +7281,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
covars_cov_major[sample_idx] = 1;
}
param_idx = 1;
- fill_double_zero(geno_map, 12);
+ fill_double_zero(12, geno_map);
geno_map[0] = 1;
geno_map[2] = 1;
geno_map[4] = 1;
@@ -7304,11 +7304,11 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
if (load_and_collapse_incl(unfiltered_sample_ct, sample_valid_ct, load_mask, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, loadbuf_collapsed)) {
goto glm_linear_nosnp_ret_READ_FAIL;
}
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
geno_map_ptr = geno_map;
- if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code)) {
- is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- is_y = ((int32_t)chrom_idx == chrom_info_ptr->y_code);
+ if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
if (hh_or_mt_exists) {
haploid_fix(hh_or_mt_exists, sample_include2, sample_male_include2, sample_valid_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
}
@@ -7319,9 +7319,9 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
is_x = 0;
}
if (!is_x) {
- glm_loadbuf_to_doubles(loadbuf_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_doubles(loadbuf_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_ct]), geno_map_ptr, nullptr);
} else {
- glm_loadbuf_to_doubles_x(loadbuf_collapsed, sex_male_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_ct]), geno_map_ptr, NULL);
+ glm_loadbuf_to_doubles_x(loadbuf_collapsed, sex_male_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_ct]), geno_map_ptr, nullptr);
}
strcpy(&(param_names[param_idx * max_param_name_len]), &(marker_ids[marker_uidx * max_marker_id_len]));
param_idx++;
@@ -7434,7 +7434,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
dzz = dxx / ((double)((intptr_t)sample_valid_ct));
dyy = sqrt((dyy - dxx * dzz) / ((double)((intptr_t)(sample_valid_ct - 1))));
if (dyy == 0) {
- fill_double_zero(dptr, sample_valid_ct);
+ fill_double_zero(sample_valid_ct, dptr);
} else {
dyy = 1.0 / dyy;
for (sample_idx = 0; sample_idx < sample_valid_ct; sample_idx++) {
@@ -7492,7 +7492,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
bigstack_alloc_d(perm_batch_size * sample_valid_ct, &dgels_b)) {
goto glm_linear_nosnp_ret_NOMEM;
}
- fill_double_zero(regression_results, param_ctx - 1);
+ fill_double_zero(param_ctx - 1, regression_results);
memcpy(dgels_a, covars_cov_major, param_ct * sample_valid_ct * sizeof(double));
memcpy(dgels_b, g_perm_pheno_d2, sample_valid_ct * sizeof(double));
dgels_(&dgels_trans, &dgels_m, &dgels_n, &dgels_nrhs, dgels_a, &dgels_m, dgels_b, &dgels_ldb, &dxx, &dgels_lwork, &dgels_info);
@@ -7524,7 +7524,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
max_thread_ct = uii;
}
if (cluster_starts && do_perms) {
- retval = cluster_include_and_reindex(unfiltered_sample_ct, load_mask, 1, NULL, sample_valid_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, NULL, NULL);
+ retval = cluster_include_and_reindex(unfiltered_sample_ct, load_mask, 1, nullptr, sample_valid_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, nullptr, nullptr);
if (retval) {
goto glm_linear_nosnp_ret_1;
}
@@ -7548,7 +7548,7 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
transpose_copy(param_ct, sample_valid_ct, covars_cov_major, covars_sample_major);
- if (glm_linear(1, param_ct, sample_valid_ct, 0, NULL, 0, 0, 0, covars_cov_major, covars_sample_major, g_perm_pheno_d2, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_df_buf, param_df_buf2, df_df_buf, df_buf, &perm_fail_ct, perm_fails) || perm_fail_ct) {
+ if (glm_linear(1, param_ct, sample_valid_ct, 0, nullptr, 0, 0, 0, covars_cov_major, covars_sample_major, g_perm_pheno_d2, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_df_buf, param_df_buf2, df_df_buf, df_buf, &perm_fail_ct, perm_fails) || perm_fail_ct) {
logerrprint("Warning: Skipping --linear no-snp due to multicollinearity.\n");
goto glm_linear_nosnp_ret_1;
}
@@ -7680,11 +7680,11 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
join_threads(threads, g_perm_generation_thread_ct);
dgels_nrhs = cur_batch_size;
- fill_double_zero(regression_results, (param_ctx - 1) * cur_batch_size);
+ fill_double_zero((param_ctx - 1) * cur_batch_size, regression_results);
memcpy(dgels_a, covars_cov_major, param_ct * sample_valid_ct * sizeof(double));
memcpy(dgels_b, g_perm_pmajor, cur_batch_size * sample_valid_ct * sizeof(double));
dgels_(&dgels_trans, &dgels_m, &dgels_n, &dgels_nrhs, dgels_a, &dgels_m, dgels_b, &dgels_ldb, dgels_work, &dgels_lwork, &dgels_info);
- if (glm_linear(cur_batch_size, param_ct, sample_valid_ct, 0, NULL, 0, 0, 0, covars_cov_major, covars_sample_major, g_perm_pmajor, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_df_buf, param_df_buf2, df_df_buf, df_buf, &perm_fail_ct, perm_fails)) {
+ if (glm_linear(cur_batch_size, param_ct, sample_valid_ct, 0, nullptr, 0, 0, 0, covars_cov_major, covars_sample_major, g_perm_pmajor, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_df_buf, param_df_buf2, df_df_buf, df_buf, &perm_fail_ct, perm_fails)) {
perm_fail_ct = cur_batch_size;
fill_bits(0, cur_batch_size, perm_fails);
}
@@ -7740,12 +7740,12 @@ int32_t glm_linear_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
}
perms_done += cur_batch_size;
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.", perms_done, (perms_done != 1)? "s" : "");
fflush(stdout);
}
if (do_perms) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
memcpy(outname_end2, ".mperm", 7);
if (fopen_checked(outname, "w", &outfile)) {
goto glm_linear_nosnp_ret_OPEN_FAIL;
@@ -7879,7 +7879,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctv2 = 2 * unfiltered_sample_ctl;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t sample_uidx = 0;
uintptr_t max_param_name_len = 2;
uintptr_t param_raw_ct = 1;
@@ -7899,26 +7899,26 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
uint32_t max_thread_ct = g_thread_ct;
uint32_t skip_intercept = !(glm_modifier & GLM_INTERCEPT);
int32_t retval = 0;
- uintptr_t* loadbuf_raw = NULL;
- uintptr_t* loadbuf_collapsed = NULL;
- uintptr_t* load_mask = NULL;
- uintptr_t* sex_male_collapsed = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* active_params = NULL;
- uintptr_t* joint_test_params = NULL;
- double* mperm_save_stats = NULL;
- double* constraints_con_major = NULL;
- double* param_1d_dbuf = NULL;
- double* param_2d_dbuf = NULL;
- double* param_2d_dbuf2 = NULL;
- double* param_df_dbuf = NULL;
- double* df_df_dbuf = NULL;
- MATRIX_INVERT_BUF1_TYPE* mi_buf = NULL;
- double* df_dbuf = NULL;
- uintptr_t* perm_fails = NULL;
- uint32_t* perm_2success_ct = NULL;
- uint32_t* condition_uidxs = NULL;
+ uintptr_t* loadbuf_raw = nullptr;
+ uintptr_t* loadbuf_collapsed = nullptr;
+ uintptr_t* load_mask = nullptr;
+ uintptr_t* sex_male_collapsed = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* active_params = nullptr;
+ uintptr_t* joint_test_params = nullptr;
+ double* mperm_save_stats = nullptr;
+ double* constraints_con_major = nullptr;
+ double* param_1d_dbuf = nullptr;
+ double* param_2d_dbuf = nullptr;
+ double* param_2d_dbuf2 = nullptr;
+ double* param_df_dbuf = nullptr;
+ double* df_df_dbuf = nullptr;
+ MATRIX_INVERT_BUF1_TYPE* mi_buf = nullptr;
+ double* df_dbuf = nullptr;
+ uintptr_t* perm_fails = nullptr;
+ uint32_t* perm_2success_ct = nullptr;
+ uint32_t* condition_uidxs = nullptr;
float geno_map[12];
float* geno_map_ptr;
char* param_names;
@@ -7975,7 +7975,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
goto glm_logistic_nosnp_ret_NOMEM;
}
sample_valid_ct = popcount_longs(load_mask, unfiltered_sample_ctl);
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (condition_mname || condition_fname) {
@@ -8062,7 +8062,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
goto glm_logistic_nosnp_ret_NOMEM;
}
if (parameters_range_list_ptr->name_ct) {
- fill_ulong_zero(active_params, param_raw_ctl);
+ fill_ulong_zero(param_raw_ctl, active_params);
active_params[0] = 1;
numeric_range_list_to_bitarr(parameters_range_list_ptr, param_raw_ct, 0, 1, active_params);
param_ct = popcount_longs(active_params, param_raw_ctl);
@@ -8109,7 +8109,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if (aligned_malloc(ulii * sizeof(intptr_t), &joint_test_params)) {
goto glm_logistic_nosnp_ret_NOMEM;
}
- fill_ulong_zero(joint_test_params, ulii);
+ fill_ulong_zero(ulii, joint_test_params);
if (tests_range_list_ptr->name_ct) {
numeric_range_list_to_bitarr(tests_range_list_ptr, param_ct - 1, 1, 1, joint_test_params);
constraint_ct = popcount_longs(joint_test_params, ulii);
@@ -8148,11 +8148,11 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
// need to zero out end of every row
uii = sample_valid_cta4 - sample_valid_ct;
for (param_idx = 0; param_idx < param_ct; param_idx++) {
- fill_float_zero(&(covars_cov_major[param_idx * sample_valid_cta4 + sample_valid_ct]), uii);
+ fill_float_zero(uii, &(covars_cov_major[param_idx * sample_valid_cta4 + sample_valid_ct]));
}
}
param_idx = 1;
- fill_float_zero(geno_map, 12);
+ fill_float_zero(12, geno_map);
geno_map[0] = 1;
geno_map[2] = 1;
geno_map[4] = 1;
@@ -8175,11 +8175,11 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
if (load_and_collapse_incl(unfiltered_sample_ct, sample_valid_ct, load_mask, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, loadbuf_collapsed)) {
goto glm_logistic_nosnp_ret_READ_FAIL;
}
- chrom_idx = get_marker_chrom(chrom_info_ptr, marker_uidx);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, marker_uidx);
geno_map_ptr = geno_map;
- if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->mt_code)) {
- is_x = ((int32_t)chrom_idx == chrom_info_ptr->x_code);
- is_y = ((int32_t)chrom_idx == chrom_info_ptr->y_code);
+ if (IS_SET(chrom_info_ptr->haploid_mask, chrom_idx) || (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ is_x = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = ((int32_t)chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
if (hh_or_mt_exists) {
haploid_fix(hh_or_mt_exists, sample_include2, sample_male_include2, sample_valid_ct, is_x, is_y, (unsigned char*)loadbuf_collapsed);
}
@@ -8190,9 +8190,9 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
is_x = 0;
}
if (!is_x) {
- glm_loadbuf_to_floats(loadbuf_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_cta4]), geno_map_ptr, NULL);
+ glm_loadbuf_to_floats(loadbuf_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_cta4]), geno_map_ptr, nullptr);
} else {
- glm_loadbuf_to_floats_x(loadbuf_collapsed, sex_male_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_cta4]), geno_map_ptr, NULL);
+ glm_loadbuf_to_floats_x(loadbuf_collapsed, sex_male_collapsed, sample_valid_ct, &(covars_cov_major[param_idx * sample_valid_cta4]), geno_map_ptr, nullptr);
}
strcpy(&(param_names[param_idx * max_param_name_len]), &(marker_ids[marker_uidx * max_marker_id_len]));
param_idx++;
@@ -8218,7 +8218,7 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
wptr = uint32toa(constraint_ct, wptr);
memcpy(wptr, "DF", 3);
}
- fill_uint_zero(perm_2success_ct, param_ctx - 1);
+ fill_uint_zero(param_ctx - 1, perm_2success_ct);
sample_uidx = 0;
sample_idx = 0;
uii = condition_ct + 1;
@@ -8333,8 +8333,8 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
g_perm_is_1bit = 1;
}
- fill_float_zero(coef, param_cta4);
- if (glm_logistic(1, param_ct, sample_valid_ct, 0, skip_intercept, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails)) {
+ fill_float_zero(param_cta4, coef);
+ if (glm_logistic(1, param_ct, sample_valid_ct, 0, skip_intercept, nullptr, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails)) {
logerrprint("Warning: Skipping --logistic no-snp due to multicollinearity.\n");
goto glm_logistic_nosnp_ret_1;
}
@@ -8472,8 +8472,8 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
generate_cc_cluster_perms_thread((void*)ulii);
}
join_threads(threads, g_perm_generation_thread_ct);
- fill_float_zero(coef, cur_batch_size * param_cta4);
- perm_fail_total += glm_logistic(cur_batch_size, param_ct, sample_valid_ct, 0, 1, NULL, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
+ fill_float_zero(cur_batch_size * param_cta4, coef);
+ perm_fail_total += glm_logistic(cur_batch_size, param_ct, sample_valid_ct, 0, 1, nullptr, covars_cov_major, g_perm_vecs, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, constraint_ct, constraints_con_major, param_1d_dbuf, param_2d_dbuf, param_2d_dbuf2, param_df_dbuf, df_df_dbuf, mi_buf, df_dbuf, perm_fails);
ulii = param_ct - 1;
uljj = param_ctx - 1;
for (perm_idx = 0; perm_idx < cur_batch_size; perm_idx++) {
@@ -8524,12 +8524,12 @@ int32_t glm_logistic_nosnp(pthread_t* threads, FILE* bedfile, uintptr_t bed_offs
}
}
perms_done += cur_batch_size;
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("%u permutation%s complete.", perms_done, (perms_done != 1)? "s" : "");
fflush(stdout);
}
if (do_perms) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
memcpy(outname_end2, ".mperm", 7);
if (fopen_checked(outname, "w", &outfile)) {
goto glm_logistic_nosnp_ret_OPEN_FAIL;
@@ -8751,11 +8751,11 @@ uint32_t glm_linear_dosage(uintptr_t sample_ct, uintptr_t* cur_samples, uintptr_
return 0;
}
transpose_copy(param_ct, sample_valid_ct, covars_cov_major, covars_sample_major);
- fill_double_zero(regression_results, param_ct - 1);
+ fill_double_zero(param_ct - 1, regression_results);
memcpy(dgels_a, covars_cov_major, param_ct * sample_valid_ct * sizeof(double));
memcpy(dgels_b, pheno_d2, sample_valid_ct * sizeof(double));
dgels_(&dgels_trans, &dgels_m, &dgels_n, &dgels_nrhs, dgels_a, &dgels_m, dgels_b, &dgels_ldb, dgels_work, &dgels_lwork, &dgels_info);
- glm_linear(1, param_ct, sample_valid_ct, 0, NULL, 0, 0, 0, covars_cov_major, covars_sample_major, pheno_d2, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, &perm_fail_ct, perm_fails);
+ glm_linear(1, param_ct, sample_valid_ct, 0, nullptr, 0, 0, 0, covars_cov_major, covars_sample_major, pheno_d2, dgels_b, param_2d_buf, mi_buf, param_2d_buf2, regression_results, 0, nullptr, nullptr, nullptr, nullptr, nullptr, &perm_fail_ct, perm_fails);
if (perm_fail_ct) {
return 0;
}
@@ -8811,8 +8811,8 @@ uint32_t glm_logistic_dosage(uintptr_t sample_ct, uintptr_t* cur_samples, uintpt
}
covar_f++;
}
- fill_float_zero(coef, param_cta4);
- if (glm_logistic(1, param_ct, sample_valid_ct, 0, 1, NULL, covars_cov_major, perm_vec, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, perm_fails) || perm_fails[0]) {
+ fill_float_zero(param_cta4, coef);
+ if (glm_logistic(1, param_ct, sample_valid_ct, 0, 1, nullptr, covars_cov_major, perm_vec, coef, pp, sample_1d_buf, pheno_buf, param_1d_buf, param_1d_buf2, param_2d_buf, param_2d_buf2, regression_results, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, perm_fails) || perm_fails[0]) {
return 0;
}
dxx = (double)coef[1];
diff --git a/plink_help.c b/plink_help.c
index d4e97e2..232687b 100644
--- a/plink_help.c
+++ b/plink_help.c
@@ -169,7 +169,7 @@ void help_print(const char* cur_params, Help_ctrl* help_ctrl_ptr, uint32_t postp
payload_end = (char*)(&(payload[payload_len]));
}
if (help_ctrl_ptr->preprint_newline) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
}
help_ctrl_ptr->preprint_newline = postprint_newline;
payload_ptr = (char*)payload;
@@ -206,9 +206,9 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
help_ctrl.param_ct = param_ct;
help_ctrl.argv = argv;
help_ctrl.unmatched_ct = param_ct;
- help_ctrl.param_lens = NULL;
- help_ctrl.all_match_arr = NULL;
- help_ctrl.argv = NULL;
+ help_ctrl.param_lens = nullptr;
+ help_ctrl.all_match_arr = nullptr;
+ help_ctrl.argv = nullptr;
if (param_ct) {
help_ctrl.param_lens = (uint32_t*)malloc(param_ct * sizeof(int32_t));
if (!help_ctrl.param_lens) {
@@ -247,12 +247,12 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
for (arg_idx = 0; arg_idx < param_ct; arg_idx++) {
help_ctrl.param_lens[arg_idx] = strlen(help_ctrl.argv[arg_idx]);
}
- fill_ulong_zero(help_ctrl.all_match_arr, param_ctl * 3);
+ fill_ulong_zero(param_ctl * 3, help_ctrl.all_match_arr);
help_ctrl.prefix_match_arr = &(help_ctrl.all_match_arr[param_ctl]);
help_ctrl.perfect_match_arr = &(help_ctrl.all_match_arr[param_ctl * 2]);
help_ctrl.preprint_newline = 1;
} else {
- help_ctrl.argv = NULL;
+ help_ctrl.argv = nullptr;
fputs(
"\nIn the command line flag definitions that follow,\n"
" * [square brackets] denote a required parameter, where the text between the\n"
@@ -376,7 +376,7 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" used with only .bim/.fam input.\n"
" USE THESE CAUTIOUSLY. It is very easy to desynchronize your binary\n"
" genotype data and your .bim/.fam indexes if you use these commands\n"
-" improperly. If you have any doubt, stick with --make-bed.\n"
+" improperly. If you have any doubt, stick with --make-bed.\n\n"
);
help_print("recode\trecode12\ttab\ttranspose\trecode-lgen\trecodeAD\trecodead\trecodeA\trecodea\trecode-rlist\trecode-allele\tlist\twith-reference\trecode-vcf\tfid\tiid\trecode-beagle\trecode-bimbam\trecode-fastphase\trecodeHV\trecodehv\trecode-structure", &help_ctrl, 1,
@@ -1372,10 +1372,11 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --vcf-min-gp [val] : No-call a genotype when 0-1 scaled GP is\n"
" below the given threshold.\n"
" --vcf-half-call [m] : Specify how '0/.' and similar VCF GT values should be\n"
-" handled. The following three modes are supported:\n"
+" handled. The following four modes are supported:\n"
" * 'error'/'e' (default) errors out and reports line #.\n"
" * 'haploid'/'h' treats them as haploid calls.\n"
" * 'missing'/'m' treats them as missing.\n"
+" * 'reference'/'r' treats the missing value as 0.\n"
);
help_print("oxford-single-chr\tdata\tgen", &help_ctrl, 0,
" --oxford-single-chr [chr nm] : Specify single-chromosome .gen file with\n"
@@ -1971,6 +1972,12 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
" --lasso-select-covars {cov(s)...} : Subject some or all covariates to LASSO\n"
" model selection.\n"
);
+#ifndef STABLE_BUILD
+ help_print("lasso\tlasso-lambda", &help_ctrl, 0,
+" --lasso-lambda [iters] {h2} : Customize LASSO warm-start procedure.\n"
+" (h2 required if not used with --lasso.)\n"
+ );
+#endif
help_print("adjust\tgc\tlog10\tqq-plot", &help_ctrl, 0,
" --adjust <gc> <log10> <qq-plot> : Report some multiple-testing corrections.\n"
);
@@ -2226,24 +2233,24 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
if (help_ctrl.unmatched_ct) {
if (net_unmatched_ct == 2) {
if (help_ctrl.param_lens[arg_uidx] + col_num > 76) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
col_num = 2 + help_ctrl.param_lens[arg_uidx];
} else {
- putchar(' ');
+ putc_unlocked(' ', stdout);
col_num += 3 + help_ctrl.param_lens[arg_uidx];
}
- putchar('\'');
+ putc_unlocked('\'', stdout);
fputs(argv[arg_uidx], stdout);
- putchar('\'');
+ putc_unlocked('\'', stdout);
} else {
if (help_ctrl.param_lens[arg_uidx] + col_num > 75) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
col_num = 3 + help_ctrl.param_lens[arg_uidx];
} else {
- putchar(' ');
+ putc_unlocked(' ', stdout);
col_num += 4 + help_ctrl.param_lens[arg_uidx];
}
- putchar('\'');
+ putc_unlocked('\'', stdout);
fputs(argv[arg_uidx], stdout);
fputs("',", stdout);
}
@@ -2257,8 +2264,8 @@ int32_t disp_help(uint32_t param_ct, char** argv) {
}
}
} else {
- putchar((help_ctrl.param_lens[arg_uidx] + col_num > 75)? '\n' : ' ');
- putchar('\'');
+ putc_unlocked((help_ctrl.param_lens[arg_uidx] + col_num > 75)? '\n' : ' ', stdout);
+ putc_unlocked('\'', stdout);
fputs(argv[arg_uidx], stdout);
fputs("\'.\n", stdout);
}
diff --git a/plink_homozyg.c b/plink_homozyg.c
index 0364b81..65fceca 100644
--- a/plink_homozyg.c
+++ b/plink_homozyg.c
@@ -222,7 +222,7 @@ void save_confirmed_roh_extend(uint32_t uidx_first, uint32_t older_uidx, uint32_
uint32_t roh_update(Homozyg_info* hp, uintptr_t* readbuf_cur, uintptr_t* swbuf_cur, uint32_t* het_cts, uint32_t* missing_cts, uint32_t* marker_pos, uintptr_t* cur_sample_male, uint32_t sample_ct, uint32_t swhit_min, uint32_t older_uidx, uint32_t old_uidx, uint32_t marker_cidx, uintptr_t max_roh_ct, uint32_t* swhit_cts, uint32_t* cur_roh_uidx_starts, uint32_t* cur_roh_cidx_starts, uint32_t* cur_roh_het_cts, uint32_t* cur_roh_missing_cts, uintptr_t* sample_to_last_roh, uint32_t* roh_list, [...]
// Finishes processing current marker, saving all ROH that end at the
- // previous one. If readbuf_cur is NULL, the previous marker is assumed to
+ // previous one. If readbuf_cur is nullptr, the previous marker is assumed to
// be the last one on the chromosome.
uint32_t* roh_list_cur = &(roh_list[(*roh_ct_ptr) * ROH_ENTRY_INTS]);
uint32_t min_snp = hp->min_snp;
@@ -253,7 +253,7 @@ uint32_t roh_update(Homozyg_info* hp, uintptr_t* readbuf_cur, uintptr_t* swbuf_c
cur_word >>= 2;
}
if (cur_sample_male && IS_SET(cur_sample_male, sample_idx)) {
- // skip males on Xchr (cur_sample_male should be NULL when not Xchr)
+ // skip males on Xchr (cur_sample_male should be nullptr when not Xchr)
continue;
}
if (readbuf_cur) {
@@ -394,11 +394,11 @@ uint32_t roh_update(Homozyg_info* hp, uintptr_t* readbuf_cur, uintptr_t* swbuf_c
int32_t write_main_roh_reports(char* outname, char* outname_end, uintptr_t* marker_exclude, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t sample_ct, uintptr_t* sample_exclude, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, uintptr_t* pheno_nm, uintptr_t* pheno_c, double* pheno_d, char* missing_pheno_str, uint32_t omp_is_numeric, uint32_t missing_pheno_len, uin [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- FILE* outfile_indiv = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_indiv = nullptr;
char* wptr_iid = &(g_textbuf[plink_maxfid + 1]);
char* wptr_phe = &(g_textbuf[plink_maxfid + plink_maxiid + 2]);
- int32_t* roh_ct_aff_adj = NULL;
+ int32_t* roh_ct_aff_adj = nullptr;
uintptr_t next_roh_idx = 0;
uint32_t max_pool_size = 0;
uint32_t max_roh_len = 0;
@@ -483,7 +483,7 @@ int32_t write_main_roh_reports(char* outname, char* outname_end, uintptr_t* mark
cur_roh = &(roh_list[cur_roh_idx * ROH_ENTRY_INTS]);
marker_uidx1 = cur_roh[0];
marker_uidx2 = cur_roh[1];
- wptr = width_force(4, wptr_chr, chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, marker_uidx1), wptr_chr));
+ wptr = width_force(4, wptr_chr, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, marker_uidx1), wptr_chr));
*wptr++ = ' ';
cptr = &(marker_ids[marker_uidx1 * max_marker_id_len]);
slen = strlen(cptr);
@@ -562,8 +562,8 @@ int32_t write_main_roh_reports(char* outname, char* outname_end, uintptr_t* mark
chrom_roh_start = roh_list_chrom_starts[chrom_fo_idx];
chrom_roh_ct = roh_list_chrom_starts[chrom_fo_idx + 1] - chrom_roh_start;
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_start = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- chrom_len = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - chrom_start;
+ chrom_start = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ chrom_len = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - chrom_start;
bigstack_reset(bigstack_mark);
if (bigstack_calloc_i(chrom_len + 1, &roh_ct_unaff_adj)) {
goto write_main_roh_reports_ret_NOMEM;
@@ -768,7 +768,7 @@ void initialize_roh_slot(uint32_t* cur_roh, uint32_t chrom_start, uint32_t* mark
#else
roh_slot[0] = 0x15555555 >> (2 * (15 - uii));
#endif
- fill_ulong_zero(&(roh_slot[cur_bidx]), end_bidx - cur_bidx);
+ fill_ulong_zero(end_bidx - cur_bidx, &(roh_slot[cur_bidx]));
uii = cidx_last & (BITCT2 - 1);
#ifdef __LP64__
if (cidx_last & 32) {
@@ -1116,7 +1116,7 @@ static inline uint32_t is_allelic_match(double mismatch_max, uintptr_t* roh_slot
}
void compute_allelic_match_matrix(double mismatch_max, uintptr_t roh_slot_wsize, uint32_t pool_size, uintptr_t* roh_slots, uintptr_t* roh_slot_occupied, uintptr_t* roh_slot_uncached, uint32_t* roh_slot_cidx_start, uint32_t* roh_slot_cidx_end, uint64_t* roh_slot_map, uint32_t overlap_cidx_start, uint32_t overlap_cidx_end, uint32_t* allelic_match_cts, uintptr_t* allelic_match_matrix) {
- // consensus_match in effect iff roh_slot_uncached is NULL
+ // consensus_match in effect iff roh_slot_uncached is nullptr
// may want to make this multithreaded in the future
uint32_t cidx_end_idxl = 0;
uint32_t skip_cached = 0;
@@ -1135,7 +1135,7 @@ void compute_allelic_match_matrix(double mismatch_max, uintptr_t roh_slot_wsize,
uint32_t block_start_idxl;
uint32_t block_start_idxs;
uint32_t uii;
- fill_uint_zero(allelic_match_cts, pool_size);
+ fill_uint_zero(pool_size, allelic_match_cts);
if (roh_slot_uncached) {
// count cached results
map_first_unset = next_unset(roh_slot_uncached, 0, pool_size);
@@ -1315,7 +1315,7 @@ char* roh_pool_write_middle(char* wptr, char* marker_ids, uintptr_t max_marker_i
int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* outname, char* outname_end, uintptr_t* rawbuf, uintptr_t* marker_exclude, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uintptr_t sample_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, char* sample_ids, uint32_t plink_maxfid, uint32_t plin [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint64_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
double mismatch_max = 1 - (hp->overlap_min * (1 - EPSILON)); // fuzz
@@ -1329,11 +1329,11 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
uintptr_t pool_list_size = 0;
uint32_t pool_ct = 0;
uint32_t onechar_max = (chrom_info_ptr->max_code > 9)? 9 : chrom_info_ptr->max_code;
- uintptr_t* roh_slot_uncached = NULL;
- uint64_t* verbose_group_sort_buf = NULL;
- uint32_t* verbose_uidx_bounds = NULL;
- uint32_t* verbose_sample_uidx = NULL;
- char* writebuf = NULL;
+ uintptr_t* roh_slot_uncached = nullptr;
+ uint64_t* verbose_group_sort_buf = nullptr;
+ uint32_t* verbose_uidx_bounds = nullptr;
+ uint32_t* verbose_sample_uidx = nullptr;
+ char* writebuf = nullptr;
int32_t retval = 0;
char* allele_strs[4];
@@ -1417,7 +1417,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
if (roh_list_chrom_starts[chrom_fo_idx] == roh_list_chrom_starts[chrom_fo_idx + 1]) {
continue;
}
- chrom_len = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1] - chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
+ chrom_len = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1] - chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
if (chrom_len > uii) {
uii = chrom_len;
}
@@ -1465,7 +1465,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
cur_roh_heap = &(roh_slot_map[-1]);
sample_uidx_sort_buf = roh_slot_cidx_start;
- fill_ulong_one(pool_size_first_plidx, pool_size_ct);
+ fill_ulong_one(pool_size_ct, pool_size_first_plidx);
pool_list = (uintptr_t*)g_bigstack_base;
max_pool_list_size = bigstack_left() / sizeof(intptr_t);
@@ -1638,8 +1638,8 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
}
fflush(stdout);
- chrom_start = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- marker_uidx2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_start = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ marker_uidx2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
marker_cidx = 0;
for (marker_uidx1 = chrom_start; marker_uidx1 < marker_uidx2; marker_uidx1++) {
if (IS_SET(marker_exclude, marker_uidx1)) {
@@ -1647,11 +1647,11 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
}
marker_uidx_to_cidx[marker_uidx1 - chrom_start] = marker_cidx++;
}
- fill_ulong_zero(roh_slot_occupied, max_pool_sizel);
+ fill_ulong_zero(max_pool_sizel, roh_slot_occupied);
// an extra slot because this is a "1-terminated" list
- fill_ulong_one((uintptr_t*)roh_slot_map, (max_pool_size + 1) * (sizeof(int64_t) / sizeof(intptr_t)));
- fill_uint_zero(roh_slot_end_uidx, max_pool_size);
- fill_ulong_zero(allelic_match_matrix, (((uintptr_t)max_pool_size) * (max_pool_size - 1)) / 2);
+ fill_ulong_one((max_pool_size + 1) * (sizeof(int64_t) / sizeof(intptr_t)), (uintptr_t*)roh_slot_map);
+ fill_uint_zero(max_pool_size, roh_slot_end_uidx);
+ fill_ulong_zero((((uintptr_t)max_pool_size) * (max_pool_size - 1)) / 2, allelic_match_matrix);
lookahead_end_uidx = next_unset_unsafe(marker_exclude, chrom_start);
cur_lookahead_start = 0;
cur_lookahead_size = 0;
@@ -1684,7 +1684,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
slot_idx1++;
}
} else {
- fill_ulong_zero(roh_slot_uncached, BITCT_TO_WORDCT(pool_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(pool_size), roh_slot_uncached);
}
slot_idx1 = 0;
while (1) {
@@ -1913,7 +1913,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
goto roh_pool_ret_WRITE_FAIL;
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
wptr = memseta(g_textbuf, 32, plink_maxsnp - 3);
wptr = memcpya(wptr, "SNP ", 4);
fwrite(g_textbuf, 1, wptr - g_textbuf, outfile);
@@ -1930,7 +1930,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
for (marker_uidx1 = union_uidx1; marker_uidx1 <= union_uidx2; marker_uidx1++) {
next_unset_unsafe_ck(marker_exclude, &marker_uidx1);
if (marker_uidx1 == con_uidx1) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
ulii = marker_cidx + cur_lookahead_start - lookahead_first_cidx;
if (ulii >= max_lookahead) {
@@ -1980,7 +1980,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
goto roh_pool_ret_WRITE_FAIL;
}
if (marker_uidx1 == con_uidx2) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
marker_cidx++;
}
@@ -2045,7 +2045,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
for (marker_uidx1 = union_uidx1; marker_uidx1 <= union_uidx2; marker_uidx1++) {
next_unset_unsafe_ck(marker_exclude, &marker_uidx1);
if (marker_uidx1 == con_uidx1) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
ulii = marker_cidx + cur_lookahead_start - lookahead_first_cidx;
if (ulii >= max_lookahead) {
@@ -2120,7 +2120,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
goto roh_pool_ret_WRITE_FAIL;
}
if (marker_uidx1 == con_uidx2) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
marker_cidx++;
}
@@ -2138,7 +2138,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
for (marker_uidx1 = union_uidx1; marker_uidx1 <= union_uidx2; marker_uidx1++) {
next_unset_unsafe_ck(marker_exclude, &marker_uidx1);
if (marker_uidx1 == con_uidx1) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
ulii = marker_cidx + cur_lookahead_start - lookahead_first_cidx;
if (ulii >= max_lookahead) {
@@ -2179,16 +2179,16 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
} else if (ujj > ukk) {
fputs(allele_strs[0], outfile);
} else {
- putc('?', outfile);
+ putc_unlocked('?', outfile);
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
slot_idx1 = group_slot_end;
} while (slot_idx1 < pool_size);
if (putc_checked('\n', outfile)) {
goto roh_pool_ret_WRITE_FAIL;
}
if (marker_uidx1 == con_uidx2) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
marker_cidx++;
}
@@ -2201,7 +2201,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
}
}
if (chrom_info_ptr->chrom_file_order[chrom_fo_idx] > onechar_max) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
}
fputs("\b\b\b\b\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b\b\b\b\b\bdone.\n", stdout);
@@ -2234,7 +2234,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
union_uidx1 = cur_roh[0];
con_uidx2 = cur_roh[1];
union_uidx2 = cur_roh[1];
- chrom_start = get_marker_chrom(chrom_info_ptr, con_uidx1);
+ chrom_start = get_variant_chrom(chrom_info_ptr, con_uidx1);
// sort pool members primarily by allelic-match group number, then by
// internal ID
for (slot_idx1 = 0; slot_idx1 < pool_size; slot_idx1++) {
@@ -2363,7 +2363,7 @@ int32_t roh_pool(Homozyg_info* hp, FILE* bedfile, uint64_t bed_offset, char* out
goto roh_pool_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("ROH pool report written to %s .\n", outname);
if (is_verbose) {
wptr = strcpya(g_logbuf, "Per-pool report");
@@ -2417,17 +2417,17 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
double hit_threshold = hp->hit_threshold;
uint32_t is_new_lengths = 1 ^ ((hp->modifier / HOMOZYG_OLD_LENGTHS) & 1);
uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
- int32_t x_code = chrom_info_ptr->x_code;
- int32_t mt_code = chrom_info_ptr->mt_code;
+ int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
+ int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
uintptr_t roh_ct = 0;
uintptr_t final_mask = get_final_mask(sample_ct);
- uintptr_t* sample_male = NULL;
+ uintptr_t* sample_male = nullptr;
// support for new 'extend' modifier
- uint32_t* prev_roh_end_cidxs = NULL;
- uint32_t* end_nonhom_uidxs = NULL;
- uint32_t* cur_roh_earliest_extend_uidxs = NULL;
+ uint32_t* prev_roh_end_cidxs = nullptr;
+ uint32_t* end_nonhom_uidxs = nullptr;
+ uint32_t* cur_roh_earliest_extend_uidxs = nullptr;
uint32_t swhit_min = 0;
int32_t retval = 0;
@@ -2544,12 +2544,12 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
if (fseeko(bedfile, bed_offset, SEEK_SET)) {
goto calc_homozyg_ret_READ_FAIL;
}
- fill_ulong_one(sample_to_last_roh, sample_ct);
+ fill_ulong_one(sample_ct, sample_to_last_roh);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
roh_list_chrom_starts[chrom_fo_idx] = roh_ct;
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
if ((x_code == -1) || (uii != ((uint32_t)x_code))) {
if (IS_SET(haploid_mask, uii) || (uii == (uint32_t)mt_code)) {
marker_uidx = chrom_end;
@@ -2558,7 +2558,7 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
}
continue;
}
- cur_sample_male = NULL;
+ cur_sample_male = nullptr;
} else {
cur_sample_male = sample_male;
}
@@ -2568,10 +2568,10 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
fputs("\r--homozyg: Scanning chromosome **.\b", stdout);
}
fflush(stdout);
- marker_uidx = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- fill_ulong_zero(swbuf, sample_ctl * window_size);
- fill_uint_zero(het_cts, sample_ct);
- fill_uint_zero(missing_cts, sample_ct);
+ marker_uidx = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ fill_ulong_zero(sample_ctl * window_size, swbuf);
+ fill_uint_zero(sample_ct, het_cts);
+ fill_uint_zero(sample_ct, missing_cts);
for (widx = 0; widx < window_size; widx++) {
if (IS_SET(marker_exclude, marker_uidx)) {
marker_uidx = next_unset_ul(marker_exclude, marker_uidx, chrom_end);
@@ -2592,16 +2592,16 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
}
if (widx == window_size) {
marker_uidx--;
- fill_ulong_zero(swbuf, window_size * sample_ctl);
- fill_uint_zero(swhit_cts, sample_ct);
- fill_uint_one(cur_roh_cidx_starts, sample_ct);
+ fill_ulong_zero(window_size * sample_ctl, swbuf);
+ fill_uint_zero(sample_ct, swhit_cts);
+ fill_uint_one(sample_ct, cur_roh_cidx_starts);
widx = 0;
swbuf_full = 0;
marker_cidx = 0;
old_uidx = uidx_buf[0];
if (prev_roh_end_cidxs) {
- fill_uint_one(prev_roh_end_cidxs, sample_ct);
+ fill_uint_one(sample_ct, prev_roh_end_cidxs);
for (sample_idx = 0; sample_idx < sample_ct; sample_idx++) {
end_nonhom_uidxs[sample_idx] = old_uidx;
}
@@ -2614,7 +2614,7 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
swbuf_cur = &(swbuf[widx * sample_ctl]);
if (swbuf_full) {
vertical_bitct_subtract(swbuf_cur, sample_ct, swhit_cts);
- fill_ulong_zero(swbuf_cur, sample_ctl);
+ fill_ulong_zero(sample_ctl, swbuf_cur);
} else {
swhit_min = (int32_t)(((double)((int32_t)(widx + 1))) * hit_threshold + 1.0 - EPSILON);
}
@@ -2665,9 +2665,9 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
vertical_bitct_subtract(swbuf_cur, sample_ct, swhit_cts);
swhit_min = (int32_t)(((double)((int32_t)(marker_cidx_max - marker_cidx))) * hit_threshold + 1.0 - EPSILON);
} else {
- readbuf_cur = NULL;
+ readbuf_cur = nullptr;
}
- if (roh_update(hp, readbuf_cur, NULL, het_cts, missing_cts, marker_pos, cur_sample_male, sample_ct, swhit_min, older_uidx, old_uidx, marker_cidx, max_roh_ct, swhit_cts, cur_roh_uidx_starts, cur_roh_cidx_starts, cur_roh_het_cts, cur_roh_missing_cts, sample_to_last_roh, roh_list, &roh_ct, marker_exclude, prev_roh_end_cidxs, end_nonhom_uidxs, cur_roh_earliest_extend_uidxs)) {
+ if (roh_update(hp, readbuf_cur, nullptr, het_cts, missing_cts, marker_pos, cur_sample_male, sample_ct, swhit_min, older_uidx, old_uidx, marker_cidx, max_roh_ct, swhit_cts, cur_roh_uidx_starts, cur_roh_cidx_starts, cur_roh_het_cts, cur_roh_missing_cts, sample_to_last_roh, roh_list, &roh_ct, marker_exclude, prev_roh_end_cidxs, end_nonhom_uidxs, cur_roh_earliest_extend_uidxs)) {
goto calc_homozyg_ret_NOMEM;
}
widx_next = widx + 1;
@@ -2681,7 +2681,7 @@ int32_t calc_homozyg(Homozyg_info* hp, FILE* bedfile, uintptr_t bed_offset, uint
} while (marker_cidx <= marker_cidx_max);
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTF("--homozyg: Scan complete, found %" PRIuPTR " ROH.\n", roh_ct);
roh_list_chrom_starts[chrom_ct] = roh_ct;
// "truncate" the completed list so we can start making workspace allocations
diff --git a/plink_lasso.c b/plink_lasso.c
index f45833c..999b478 100644
--- a/plink_lasso.c
+++ b/plink_lasso.c
@@ -4,7 +4,7 @@
#include "plink_matrix.h"
// need to force to 64-bit integer if >= 2^16
-#define WARM_START_ITERS 1000
+#define DEFAULT_WARM_START_ITERS 1000
#define NLAMBDA 100
#define DELTA_THRESHOLD 0.0001
@@ -195,7 +195,7 @@ int32_t lasso_bigmem(FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_excl
}
*polymorphic_marker_ct_ptr = polymorphic_marker_ct;
if (!polymorphic_marker_ct) {
- putchar('\n');
+ putc_unlocked('\n', stdout);
logerrprint("Warning: Skipping --lasso since no polymorphic loci are present.\n");
return 0;
}
@@ -205,21 +205,21 @@ int32_t lasso_bigmem(FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_excl
sige = sqrt(1.0 - lasso_h2 + 1.0 / ((double)((intptr_t)sample_valid_ct)));
zz = sige * sqrt_n_recip;
if (rand_matrix) {
- bigstack_alloc_d(WARM_START_ITERS * WARM_START_ITERS, &prod_matrix);
+ bigstack_alloc_d(DEFAULT_WARM_START_ITERS * DEFAULT_WARM_START_ITERS, &prod_matrix);
fputs("\r--lasso: Initializing warm start matrix...", stdout);
fflush(stdout);
- fill_double_zero(misc_arr, WARM_START_ITERS);
+ fill_double_zero(DEFAULT_WARM_START_ITERS, misc_arr);
for (col_idx = 0; col_idx < col_ct;) {
- ulii = col_idx + WARM_START_ITERS;
+ ulii = col_idx + DEFAULT_WARM_START_ITERS;
if (ulii > col_ct) {
ulii = col_ct;
}
// splitting this into square blocks reduces memory consumption without
// slowing things down (may even be faster due to locality).
- col_major_matrix_multiply(WARM_START_ITERS, ulii - col_idx, sample_valid_ct, rand_matrix, &(data_arr[col_idx * sample_valid_ct]), prod_matrix);
+ col_major_matrix_multiply(DEFAULT_WARM_START_ITERS, ulii - col_idx, sample_valid_ct, rand_matrix, &(data_arr[col_idx * sample_valid_ct]), prod_matrix);
dptr = prod_matrix;
for (; col_idx < ulii; col_idx++) {
- for (uii = 0; uii < WARM_START_ITERS; uii++) {
+ for (uii = 0; uii < DEFAULT_WARM_START_ITERS; uii++) {
dxx = fabs(*dptr++);
if (dxx > misc_arr[uii]) {
misc_arr[uii] = dxx;
@@ -227,7 +227,7 @@ int32_t lasso_bigmem(FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_excl
}
}
}
- lambda_min = destructive_get_dmedian(WARM_START_ITERS, misc_arr) * zz;
+ lambda_min = destructive_get_dmedian(DEFAULT_WARM_START_ITERS, misc_arr) * zz;
logstr("--lasso:");
LOGPRINTF(" using min lambda = %g.\n", lambda_min);
bigstack_reset(prod_matrix);
@@ -409,6 +409,216 @@ uint32_t load_and_normalize(FILE* bedfile, uintptr_t* loadbuf_raw, uintptr_t unf
}
+// this needs to work in very-low-memory contexts
+#define LASSO_LAMBDA_BLOCK_SIZE 64
+
+int32_t lasso_lambda(const uintptr_t* marker_exclude, const uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t* sex_male, uintptr_t* pheno_nm, const uintptr_t* covar_nm, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t marker_ct, uintptr_t unfiltered_sample_ct, uintptr_t pheno_nm_ct, uint32_t hh_or_mt_exists, uint32_t lasso_lambda_iters, double lasso_h2, FILE* bedfile, char* outname, char* outname_end, double* lasso_minlambda_ptr) {
+ // standalone memory-efficient lambda calculation, since even 1000 x
+ // sample_ct matrices may be too large.
+ unsigned char* bigstack_mark = g_bigstack_base;
+ FILE* outfile = nullptr;
+ int32_t retval = 0;
+ {
+ uintptr_t sample_valid_ct;
+ if (!covar_nm) {
+ sample_valid_ct = pheno_nm_ct;
+ } else {
+ sample_valid_ct = popcount_longs(covar_nm, BITCT_TO_WORDCT(pheno_nm_ct));
+ }
+ const uintptr_t final_mask = get_final_mask(sample_valid_ct);
+ uintptr_t* pheno_nm2;
+ if (sample_valid_ct == pheno_nm_ct) {
+ pheno_nm2 = pheno_nm;
+ } else {
+ const uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
+ if (bigstack_calloc_ul(unfiltered_sample_ctl, &pheno_nm2)) {
+ goto lasso_lambda_ret_NOMEM;
+ }
+ uintptr_t sample_uidx;
+ uintptr_t sample_idx;
+ for (sample_uidx = 0, sample_idx = 0; sample_idx < pheno_nm_ct; sample_uidx++, sample_idx++) {
+ next_set_ul_unsafe_ck(pheno_nm, &sample_uidx);
+ if (IS_SET(covar_nm, sample_idx)) {
+ SET_BIT(sample_uidx, pheno_nm2);
+ }
+ }
+ }
+ double* rand_matrix;
+ double* max_empirical_lambdas;
+ double* data_window;
+ double* prod_matrix;
+ if (bigstack_alloc_d(sample_valid_ct * LASSO_LAMBDA_BLOCK_SIZE, &rand_matrix) ||
+ bigstack_alloc_d(lasso_lambda_iters, &max_empirical_lambdas) ||
+ bigstack_alloc_d(sample_valid_ct * LASSO_LAMBDA_BLOCK_SIZE, &data_window) ||
+ bigstack_alloc_d(LASSO_LAMBDA_BLOCK_SIZE * LASSO_LAMBDA_BLOCK_SIZE, &prod_matrix)) {
+ goto lasso_lambda_ret_NOMEM;
+ }
+ fill_double_zero(lasso_lambda_iters, max_empirical_lambdas);
+
+ const uintptr_t sample_valid_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_valid_ct);
+ const uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
+ const double sqrt_n_recip = sqrt(1.0 / ((double)((intptr_t)sample_valid_ct)));
+ uintptr_t* sample_include2;
+ uintptr_t* loadbuf_raw;
+ uintptr_t* loadbuf_collapsed;
+ double* residuals;
+ if (bigstack_alloc_ul(sample_valid_ctv2, &sample_include2) ||
+ bigstack_alloc_ul(unfiltered_sample_ctv2, &loadbuf_raw) ||
+ bigstack_alloc_ul(sample_valid_ctv2, &loadbuf_collapsed) ||
+ bigstack_alloc_d(sample_valid_ct, &residuals)) {
+ goto lasso_lambda_ret_NOMEM;
+ }
+ fill_quatervec_55(sample_valid_ct, sample_include2);
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
+ hh_or_mt_exists |= NXMHH_EXISTS;
+ }
+ uintptr_t* sample_male_include2;
+ if (alloc_collapsed_haploid_filters(pheno_nm2, sex_male, unfiltered_sample_ct, sample_valid_ct, hh_or_mt_exists, 1, &sample_include2, &sample_male_include2)) {
+ goto lasso_lambda_ret_NOMEM;
+ }
+
+ const uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
+ const uint32_t block_ct = (lasso_lambda_iters + LASSO_LAMBDA_BLOCK_SIZE - 1) / LASSO_LAMBDA_BLOCK_SIZE;
+ double* max_empirical_lambdas_iter = max_empirical_lambdas;
+ for (uint32_t block_idx = 0; block_idx < block_ct; ++block_idx) {
+ printf("\r--lasso-lambda: %u iterations complete.", block_idx * LASSO_LAMBDA_BLOCK_SIZE);
+ fflush(stdout);
+ uint32_t cur_block_size = LASSO_LAMBDA_BLOCK_SIZE;
+ if (block_idx == block_ct - 1) {
+ cur_block_size = 1 + ((lasso_lambda_iters - 1) % LASSO_LAMBDA_BLOCK_SIZE);
+ }
+
+ const uintptr_t rand_matrix_size = cur_block_size * sample_valid_ct;
+ // might populate an extra value past the end of the array, that's okay
+ for (uintptr_t rand_matrix_pos = 0; rand_matrix_pos < rand_matrix_size; rand_matrix_pos += 2) {
+ rand_matrix[rand_matrix_pos] = rand_normal(&(rand_matrix[rand_matrix_pos + 1]));
+ }
+
+ uint32_t chrom_fo_idx = 0xffffffffU; // exploit overflow
+ uint32_t chrom_end = 0;
+ uintptr_t marker_idx = 0;
+ uint32_t marker_uidx = 0;
+ uint32_t is_x = 0;
+ uint32_t is_y = 0;
+ double* dptr = data_window;
+ uint32_t partial_marker_idx = 0;
+ if (fseeko(bedfile, bed_offset, SEEK_SET)) {
+ goto lasso_lambda_ret_READ_FAIL;
+ }
+ for (; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
+ // only care about rand_matrix multiply here
+ if (IS_SET(marker_exclude, marker_uidx)) {
+ marker_uidx = next_unset_unsafe(marker_exclude, marker_uidx);
+ if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
+ goto lasso_lambda_ret_READ_FAIL;
+ }
+ }
+ uint32_t min_ploidy_1;
+ uint32_t uii;
+ if (marker_uidx >= chrom_end) {
+ chrom_fo_idx++;
+ refresh_chrom_info(chrom_info_ptr, marker_uidx, &chrom_end, &chrom_fo_idx, &is_x, &is_y, &uii, &min_ploidy_1);
+ }
+ uii = load_and_normalize(bedfile, loadbuf_raw, unfiltered_sample_ct, loadbuf_collapsed, sample_valid_ct, pheno_nm2, final_mask, IS_SET(marker_reverse, marker_uidx), min_ploidy_1, hh_or_mt_exists, sample_include2, sample_male_include2, is_x, is_y, sqrt_n_recip, dptr);
+ if (uii == 2) {
+ goto lasso_lambda_ret_READ_FAIL;
+ }
+ if (uii == 1) {
+ continue;
+ }
+ partial_marker_idx++;
+ if (partial_marker_idx == LASSO_LAMBDA_BLOCK_SIZE) {
+ col_major_matrix_multiply(cur_block_size, LASSO_LAMBDA_BLOCK_SIZE, sample_valid_ct, rand_matrix, data_window, prod_matrix);
+ dptr = prod_matrix;
+ for (uintptr_t col_idx = 0; col_idx < LASSO_LAMBDA_BLOCK_SIZE; col_idx++) {
+ for (uintptr_t ulii = 0; ulii < cur_block_size; ulii++) {
+ double dxx = fabs(*dptr++);
+ if (dxx > max_empirical_lambdas_iter[ulii]) {
+ max_empirical_lambdas_iter[ulii] = dxx;
+ }
+ }
+ }
+ partial_marker_idx = 0;
+ dptr = data_window;
+ } else {
+ dptr = &(dptr[sample_valid_ct]);
+ }
+ }
+ if (partial_marker_idx) {
+ col_major_matrix_multiply(cur_block_size, partial_marker_idx, sample_valid_ct, rand_matrix, data_window, prod_matrix);
+ dptr = prod_matrix;
+ for (uintptr_t col_idx = 0; col_idx < partial_marker_idx; col_idx++) {
+ for (uintptr_t ulii = 0; ulii < cur_block_size; ulii++) {
+ double dxx = fabs(*dptr++);
+ if (dxx > max_empirical_lambdas_iter[ulii]) {
+ max_empirical_lambdas_iter[ulii] = dxx;
+ }
+ }
+ }
+ }
+ max_empirical_lambdas_iter = &(max_empirical_lambdas_iter[LASSO_LAMBDA_BLOCK_SIZE]);
+ }
+ const double sige = sqrt(1.0 - lasso_h2 + 1.0 / ((double)((intptr_t)sample_valid_ct)));
+ const double zz = sige * sqrt_n_recip;
+ for (uint32_t iter_idx = 0; iter_idx < lasso_lambda_iters; ++iter_idx) {
+ max_empirical_lambdas[iter_idx] *= zz;
+ }
+#ifdef __cplusplus
+ std::sort(max_empirical_lambdas, &(max_empirical_lambdas[lasso_lambda_iters]));
+#else
+ qsort(max_empirical_lambdas, lasso_lambda_iters, sizeof(double), double_cmp);
+#endif
+
+ double lambda_min = get_dmedian(max_empirical_lambdas, lasso_lambda_iters);
+ putc_unlocked('\r', stdout);
+ LOGPRINTF("--lasso-lambda (%u iteration%s): min lambda = %g.\n", lasso_lambda_iters, (lasso_lambda_iters == 1)? "" : "s", lambda_min);
+ *lasso_minlambda_ptr = lambda_min;
+ memcpy(outname_end, ".lambdamin", 11);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto lasso_lambda_ret_OPEN_FAIL;
+ }
+ char* wptr = g_textbuf;
+ char* wptr_flush = &(wptr[MAXLINELEN]);
+ for (uint32_t iter_idx = 0; iter_idx < lasso_lambda_iters; ++iter_idx) {
+ wptr = dtoa_g(max_empirical_lambdas[iter_idx], wptr);
+ *wptr++ = '\n';
+ if (wptr >= wptr_flush) {
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ goto lasso_lambda_ret_WRITE_FAIL;
+ }
+ wptr = g_textbuf;
+ }
+ }
+ if (wptr != wptr_flush) {
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ goto lasso_lambda_ret_WRITE_FAIL;
+ }
+ }
+ if (fclose_null(&outfile)) {
+ goto lasso_lambda_ret_WRITE_FAIL;
+ }
+ LOGPRINTFWW("Lambda distribution written to %s .\n", outname);
+ }
+ while (0) {
+ lasso_lambda_ret_NOMEM:
+ retval = RET_NOMEM;
+ break;
+ lasso_lambda_ret_OPEN_FAIL:
+ retval = RET_OPEN_FAIL;
+ break;
+ lasso_lambda_ret_READ_FAIL:
+ retval = RET_READ_FAIL;
+ break;
+ lasso_lambda_ret_WRITE_FAIL:
+ retval = RET_WRITE_FAIL;
+ break;
+ }
+ fclose_cond(outfile);
+ bigstack_reset(bigstack_mark);
+ return retval;
+}
+
+
int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, uintptr_t* marker_exclude, uintptr_t marker_ct, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm2, double lasso_h2, double lasso_minlambda, uint32_t select_covars, uintptr_t* select_covars_bitfield, double* pheno_d_collapsed, uintptr_t covar_ct, char* covar_names, uintptr_t max_covar_name_len, uintptr_t* covar_nm, double* covar_d, uint32_t hh_or_mt_exi [...]
// Instead of populating and normalizing data_arr before the coordinate
// descent, we reload and renormalize the data every iteration.
@@ -416,7 +626,7 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
// there's also more computational work to do per sample, multithreading is
// more profitable here than in the bigmem case.
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
- double* covar_data_arr = NULL;
+ double* covar_data_arr = nullptr;
double sqrt_n_recip = sqrt(1.0 / ((double)((intptr_t)sample_valid_ct)));
double lambda_max = 0.0;
double err_cur = 0.0;
@@ -432,7 +642,7 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
uint32_t min_ploidy_1 = 0;
uint32_t partial_marker_idx = 0;
int32_t retval = 0;
- double* prod_matrix = NULL;
+ double* prod_matrix = nullptr;
double* data_window;
double* xhat;
double* dptr;
@@ -514,13 +724,13 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
goto lasso_smallmem_ret_NOMEM;
}
if (rand_matrix) {
- if (bigstack_alloc_d(sample_valid_ct * WARM_START_ITERS, &data_window) ||
- bigstack_alloc_d(WARM_START_ITERS * WARM_START_ITERS, &prod_matrix)) {
+ if (bigstack_alloc_d(sample_valid_ct * DEFAULT_WARM_START_ITERS, &data_window) ||
+ bigstack_alloc_d(DEFAULT_WARM_START_ITERS * DEFAULT_WARM_START_ITERS, &prod_matrix)) {
goto lasso_smallmem_ret_NOMEM;
}
fputs("\r--lasso: Initializing warm start matrix...", stdout);
fflush(stdout);
- fill_double_zero(misc_arr, WARM_START_ITERS);
+ fill_double_zero(DEFAULT_WARM_START_ITERS, misc_arr);
} else {
if (bigstack_alloc_d(sample_valid_ct, &data_window)) {
goto lasso_smallmem_ret_NOMEM;
@@ -558,12 +768,12 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
if (dxx > lambda_max) {
lambda_max = dxx;
}
- if (partial_marker_idx == WARM_START_ITERS) {
+ if (partial_marker_idx == DEFAULT_WARM_START_ITERS) {
if (rand_matrix) {
- col_major_matrix_multiply(WARM_START_ITERS, WARM_START_ITERS, sample_valid_ct, rand_matrix, data_window, prod_matrix);
+ col_major_matrix_multiply(DEFAULT_WARM_START_ITERS, DEFAULT_WARM_START_ITERS, sample_valid_ct, rand_matrix, data_window, prod_matrix);
dptr = prod_matrix;
- for (col_idx = 0; col_idx < WARM_START_ITERS; col_idx++) {
- for (ulii = 0; ulii < WARM_START_ITERS; ulii++) {
+ for (col_idx = 0; col_idx < DEFAULT_WARM_START_ITERS; col_idx++) {
+ for (ulii = 0; ulii < DEFAULT_WARM_START_ITERS; ulii++) {
dxx = fabs(*dptr++);
if (dxx > misc_arr[ulii]) {
misc_arr[ulii] = dxx;
@@ -571,7 +781,7 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
}
}
}
- polymorphic_marker_ct += WARM_START_ITERS;
+ polymorphic_marker_ct += DEFAULT_WARM_START_ITERS;
partial_marker_idx = 0;
dptr = data_window;
} else if (!rand_matrix) {
@@ -586,10 +796,10 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
}
if (rand_matrix) {
if (partial_marker_idx) {
- col_major_matrix_multiply(WARM_START_ITERS, partial_marker_idx, sample_valid_ct, rand_matrix, data_window, prod_matrix);
+ col_major_matrix_multiply(DEFAULT_WARM_START_ITERS, partial_marker_idx, sample_valid_ct, rand_matrix, data_window, prod_matrix);
dptr = prod_matrix;
for (col_idx = 0; col_idx < partial_marker_idx; col_idx++) {
- for (ulii = 0; ulii < WARM_START_ITERS; ulii++) {
+ for (ulii = 0; ulii < DEFAULT_WARM_START_ITERS; ulii++) {
dxx = fabs(*dptr++);
if (dxx > misc_arr[ulii]) {
misc_arr[ulii] = dxx;
@@ -597,7 +807,7 @@ int32_t lasso_smallmem(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset,
}
}
}
- lambda_min = destructive_get_dmedian(WARM_START_ITERS, misc_arr) * zz;
+ lambda_min = destructive_get_dmedian(DEFAULT_WARM_START_ITERS, misc_arr) * zz;
logstr("--lasso:");
LOGPRINTF(" using min lambda = %g.\n", lambda_min);
}
@@ -774,18 +984,18 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
// Not yet multithreaded. (Main loop is fairly tightly coupled, so getting
// a performance benefit will be a bit tricky.)
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t polymorphic_marker_ct = 0;
uint64_t iter_tot = 0;
- double* xhat = NULL;
- double* rand_matrix = NULL;
- double* misc_arr = NULL;
- uintptr_t* sample_male_include2 = NULL;
- uintptr_t* select_covars_bitfield = NULL;
- char* wptr_start = NULL;
+ double* xhat = nullptr;
+ double* rand_matrix = nullptr;
+ double* misc_arr = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ uintptr_t* select_covars_bitfield = nullptr;
+ char* wptr_start = nullptr;
uint32_t report_zeroes = (misc_flags / MISC_LASSO_REPORT_ZEROES) & 1;
uint32_t select_covars = (misc_flags / MISC_LASSO_SELECT_COVARS) & 1;
uint32_t chrom_fo_idx = 0xffffffffU; // exploit overflow
@@ -860,8 +1070,8 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
goto lasso_ret_NOMEM;
}
if (lasso_minlambda == -1) {
- if (bigstack_alloc_d(sample_valid_ct * WARM_START_ITERS, &rand_matrix) ||
- bigstack_alloc_d(WARM_START_ITERS, &misc_arr)) {
+ if (bigstack_alloc_d(sample_valid_ct * DEFAULT_WARM_START_ITERS, &rand_matrix) ||
+ bigstack_alloc_d(DEFAULT_WARM_START_ITERS, &misc_arr)) {
goto lasso_ret_NOMEM;
}
}
@@ -910,8 +1120,8 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
dptr++;
}
fill_quatervec_55(sample_valid_ct, sample_include2);
- fill_ulong_zero(polymorphic_markers, unfiltered_marker_ctl);
- if ((chrom_info_ptr->mt_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->mt_code)) {
+ fill_ulong_zero(unfiltered_marker_ctl, polymorphic_markers);
+ if ((chrom_info_ptr->xymt_codes[MT_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[MT_OFFSET])) {
hh_or_mt_exists |= NXMHH_EXISTS;
}
if (alloc_collapsed_haploid_filters(pheno_nm2, sex_male, unfiltered_sample_ct, sample_valid_ct, hh_or_mt_exists, 1, &sample_include2, &sample_male_include2)) {
@@ -935,16 +1145,17 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
// 2a. xhat: col_ct * sizeof(double)
// 2b. active_set: col_ctl * sizeof(intptr_t)
// or
- // 3. prod_matrix: WARM_START_ITERS * WARM_START_ITERS * sizeof(double)
+ // 3. prod_matrix: DEFAULT_WARM_START_ITERS * DEFAULT_WARM_START_ITERS *
+ // sizeof(double)
// (whichever is larger)
ullii = round_up_pow2(((uint64_t)uii) * sizeof(double), CACHELINE) + round_up_pow2((uii + 7) / 8, CACHELINE);
- // assumes WARM_START_ITERS is even
+ // assumes DEFAULT_WARM_START_ITERS is even
if (rand_matrix) {
- uljj = (sample_valid_ct * WARM_START_ITERS) - 1;
+ uljj = (sample_valid_ct * DEFAULT_WARM_START_ITERS) - 1;
for (ulii = 0; ulii < uljj; ulii += 2) {
rand_matrix[ulii] = rand_normal(&(rand_matrix[ulii + 1]));
}
- ulljj = round_up_pow2(WARM_START_ITERS * WARM_START_ITERS * sizeof(double), CACHELINE);
+ ulljj = round_up_pow2(DEFAULT_WARM_START_ITERS * DEFAULT_WARM_START_ITERS * sizeof(double), CACHELINE);
if (ullii < ulljj) {
ullii = ulljj;
}
@@ -1022,7 +1233,7 @@ int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* out
if (fclose_null(&outfile)) {
goto lasso_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("--lasso report written to %s. Total iterations: %" PRIu64 ".\n", outname, iter_tot);
while (0) {
diff --git a/plink_lasso.h b/plink_lasso.h
index 4472770..f8261f2 100644
--- a/plink_lasso.h
+++ b/plink_lasso.h
@@ -1,6 +1,8 @@
#ifndef __PLINK_LASSO_H__
#define __PLINK_LASSO_H__
+int32_t lasso_lambda(const uintptr_t* marker_exclude, const uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t* sex_male, uintptr_t* pheno_nm, const uintptr_t* covar_nm, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t marker_ct, uintptr_t unfiltered_sample_ct, uintptr_t pheno_nm_ct, uint32_t hh_or_mt_exists, uint32_t lasso_lambda_iters, double lasso_h2, FILE* bedfile, char* outname, char* outname_end, double* lasso_minlambda_ptr);
+
int32_t lasso(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t pheno_nm_ct, double lasso_h2, double lasso_minlambda, Range_list* select_covars_range_list_ptr, uint64_t misc_flags, uintptr_t* pheno_nm [...]
#endif // __PLINK_LASSO_H__
diff --git a/plink_ld.c b/plink_ld.c
index df5e2e6..b3d97fb 100644
--- a/plink_ld.c
+++ b/plink_ld.c
@@ -30,28 +30,28 @@ void ld_epi_init(Ld_info* ldip, Epi_info* epi_ip, Clump_info* clump_ip) {
ldip->flipscan_thresh = 0.5;
ldip->show_tags_bp = 250000;
ldip->show_tags_r2 = 0.8;
- ldip->snpstr = NULL;
- ldip->show_tags_fname = NULL;
+ ldip->snpstr = nullptr;
+ ldip->show_tags_fname = nullptr;
range_list_init(&(ldip->snps_rl));
epi_ip->modifier = 0;
epi_ip->case_only_gap = 1000000;
epi_ip->epi1 = 0.0;
epi_ip->epi2 = 0.01;
epi_ip->je_cellmin = 5;
- epi_ip->ld_mkr1 = NULL;
- epi_ip->ld_mkr2 = NULL;
- epi_ip->twolocus_mkr1 = NULL;
- epi_ip->twolocus_mkr2 = NULL;
- epi_ip->summary_merge_prefix = NULL;
+ epi_ip->ld_mkr1 = nullptr;
+ epi_ip->ld_mkr2 = nullptr;
+ epi_ip->twolocus_mkr1 = nullptr;
+ epi_ip->twolocus_mkr2 = nullptr;
+ epi_ip->summary_merge_prefix = nullptr;
clump_ip->modifier = 0;
clump_ip->fname_ct = 0;
clump_ip->bp_radius = 249999;
clump_ip->range_border = 0;
- clump_ip->fnames_flattened = NULL;
- clump_ip->annotate_flattened = NULL;
- clump_ip->snpfield_search_order = NULL;
- clump_ip->pfield_search_order = NULL;
- clump_ip->range_fname = NULL;
+ clump_ip->fnames_flattened = nullptr;
+ clump_ip->annotate_flattened = nullptr;
+ clump_ip->snpfield_search_order = nullptr;
+ clump_ip->pfield_search_order = nullptr;
+ clump_ip->range_fname = nullptr;
clump_ip->p1 = 1e-4;
clump_ip->p2 = 1e-2;
clump_ip->r2 = 0.5;
@@ -621,20 +621,20 @@ uint32_t ld_prune_next_valid_chrom_start(uintptr_t* marker_exclude, uint32_t cur
uint32_t chrom_idx;
cur_uidx = next_unset(marker_exclude, cur_uidx, unfiltered_marker_ct);
while (cur_uidx < unfiltered_marker_ct) {
- chrom_idx = get_marker_chrom(chrom_info_ptr, cur_uidx);
+ chrom_idx = get_variant_chrom(chrom_info_ptr, cur_uidx);
// --aec 0 support
if (chrom_idx && (chrom_idx < chrom_code_end)) {
return cur_uidx;
}
- cur_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_end[chrom_idx], unfiltered_marker_ct);
+ cur_uidx = next_unset(marker_exclude, get_chrom_end_vidx(chrom_info_ptr, chrom_idx), unfiltered_marker_ct);
}
return cur_uidx;
}
void ld_prune_start_chrom(uint32_t ld_window_kb, uint32_t* cur_chrom_ptr, uint32_t* chrom_end_ptr, uint32_t window_unfiltered_start, uint32_t* live_indices, uint32_t* start_arr, uint32_t* window_unfiltered_end_ptr, uint32_t ld_window_size, uint32_t* cur_window_size_ptr, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, uint32_t* marker_pos, uint32_t* is_haploid_ptr, uint32_t* is_x_ptr, uint32_t* is_y_ptr) {
- uint32_t cur_chrom = get_marker_chrom(chrom_info_ptr, window_unfiltered_start);
+ uint32_t cur_chrom = get_variant_chrom(chrom_info_ptr, window_unfiltered_start);
uint32_t window_unfiltered_end = window_unfiltered_start + 1;
- uint32_t chrom_end = chrom_info_ptr->chrom_end[cur_chrom];
+ uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
uint32_t uii = 0;
uint32_t window_size;
live_indices[0] = window_unfiltered_start;
@@ -666,54 +666,59 @@ void ld_prune_start_chrom(uint32_t ld_window_kb, uint32_t* cur_chrom_ptr, uint32
*chrom_end_ptr = chrom_end;
*window_unfiltered_end_ptr = window_unfiltered_end;
*is_haploid_ptr = IS_SET(chrom_info_ptr->haploid_mask, cur_chrom);
- *is_x_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->x_code);
- *is_y_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->y_code);
+ *is_x_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ *is_y_ptr = (((int32_t)cur_chrom) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
int32_t ld_prune_write(char* outname, char* outname_end, uintptr_t* marker_exclude, uintptr_t* pruned_arr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, uint32_t chrom_code_end) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
int32_t retval = 0;
- uint32_t cur_chrom;
- uint32_t chrom_end;
- uint32_t marker_uidx;
- fputs("Writing...", stdout);
- fflush(stdout);
- strcpy(outname_end, ".prune.in");
- if (fopen_checked(outname, "w", &outfile)) {
- goto ld_prune_write_ret_OPEN_FAIL;
- }
- for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
- chrom_end = chrom_info_ptr->chrom_end[cur_chrom];
- for (marker_uidx = chrom_info_ptr->chrom_start[cur_chrom]; marker_uidx < chrom_end; marker_uidx++) {
- // pruned_arr initialized to marker_exclude
- if (!IS_SET(pruned_arr, marker_uidx)) {
- fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ {
+ fputs("Writing...", stdout);
+ fflush(stdout);
+ strcpy(outname_end, ".prune.in");
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto ld_prune_write_ret_OPEN_FAIL;
+ }
+ for (uint32_t cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
+ continue;
+ }
+ const uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
+ for (uint32_t marker_uidx = get_chrom_start_vidx(chrom_info_ptr, cur_chrom); marker_uidx < chrom_end; marker_uidx++) {
+ // pruned_arr initialized to marker_exclude
+ if (!IS_SET(pruned_arr, marker_uidx)) {
+ fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
+ putc_unlocked('\n', outfile);
+ }
}
}
- }
- if (fclose_null(&outfile)) {
- goto ld_prune_write_ret_WRITE_FAIL;
- }
- strcpy(outname_end, ".prune.out");
- if (fopen_checked(outname, "w", &outfile)) {
- goto ld_prune_write_ret_OPEN_FAIL;
- }
- for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
- chrom_end = chrom_info_ptr->chrom_end[cur_chrom];
- for (marker_uidx = chrom_info_ptr->chrom_start[cur_chrom]; marker_uidx < chrom_end; marker_uidx++) {
- if ((!IS_SET(marker_exclude, marker_uidx)) && IS_SET(pruned_arr, marker_uidx)) {
- fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ if (fclose_null(&outfile)) {
+ goto ld_prune_write_ret_WRITE_FAIL;
+ }
+ strcpy(outname_end, ".prune.out");
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto ld_prune_write_ret_OPEN_FAIL;
+ }
+ for (uint32_t cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
+ continue;
+ }
+ const uint32_t chrom_end = get_chrom_end_vidx(chrom_info_ptr, cur_chrom);
+ for (uint32_t marker_uidx = get_chrom_start_vidx(chrom_info_ptr, cur_chrom); marker_uidx < chrom_end; marker_uidx++) {
+ if ((!IS_SET(marker_exclude, marker_uidx)) && IS_SET(pruned_arr, marker_uidx)) {
+ fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
+ putc_unlocked('\n', outfile);
+ }
}
}
+ if (fclose_null(&outfile)) {
+ goto ld_prune_write_ret_WRITE_FAIL;
+ }
+ *outname_end = '\0';
+ putc_unlocked('\r', stdout);
+ LOGPRINTFWW("Marker lists written to %s.prune.in and %s.prune.out .\n", outname, outname);
}
- if (fclose_null(&outfile)) {
- goto ld_prune_write_ret_WRITE_FAIL;
- }
- *outname_end = '\0';
- putchar('\r');
- LOGPRINTFWW("Marker lists written to %s.prune.in and %s.prune.out .\n", outname, outname);
while (0) {
ld_prune_write_ret_OPEN_FAIL:
retval = RET_OPEN_FAIL;
@@ -763,16 +768,16 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
double ld_last_param = ldip->prune_last_param;
uint32_t nonmale_founder_ct = 0;
uintptr_t window_max = 1;
- uintptr_t* geno = NULL;
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
- uintptr_t* nonmale_geno = NULL;
- uintptr_t* nonmale_masks = NULL;
- double* cov_matrix = NULL;
- double* new_cov_matrix = NULL;
- MATRIX_INVERT_BUF1_TYPE* irow = NULL;
- double* work = NULL;
- uint32_t* idx_remap = NULL;
+ uintptr_t* geno = nullptr;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
+ uintptr_t* nonmale_geno = nullptr;
+ uintptr_t* nonmale_masks = nullptr;
+ double* cov_matrix = nullptr;
+ double* new_cov_matrix = nullptr;
+ MATRIX_INVERT_BUF1_TYPE* irow = nullptr;
+ double* work = nullptr;
+ uint32_t* idx_remap = nullptr;
uint32_t tot_exclude_ct = 0;
uint32_t at_least_one_prune = 0;
uint32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
@@ -793,6 +798,7 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
uint32_t ukk;
int32_t ii;
uint32_t cur_chrom;
+ uint32_t chrom_start;
uint32_t chrom_end;
uint32_t is_haploid;
uint32_t is_x;
@@ -861,7 +867,7 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
if (window_is_kb) {
// determine maximum number of markers that may need to be loaded at once
for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
- if (chrom_exists(chrom_info_ptr, cur_chrom)) {
+ if (is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
window_max = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, cur_chrom, 0x7fffffff, ld_window_size * 1000, window_max);
}
}
@@ -910,11 +916,11 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
}
}
for (ulii = 1; ulii <= window_max; ulii++) {
- fill_ulong_zero(&(geno[ulii * founder_ct_192_long - founder_trail_ct - 2]), founder_trail_ct + 2);
- fill_ulong_zero(&(geno_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]), founder_trail_ct + 2);
+ fill_ulong_zero(founder_trail_ct + 2, &(geno[ulii * founder_ct_192_long - founder_trail_ct - 2]));
+ fill_ulong_zero(founder_trail_ct + 2, &(geno_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]));
if (weighted_x) {
- fill_ulong_zero(&(nonmale_geno[ulii * founder_ct_192_long - founder_trail_ct - 2]), founder_trail_ct + 2);
- fill_ulong_zero(&(nonmale_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]), founder_trail_ct + 2);
+ fill_ulong_zero(founder_trail_ct + 2, &(nonmale_geno[ulii * founder_ct_192_long - founder_trail_ct - 2]));
+ fill_ulong_zero(founder_trail_ct + 2, &(nonmale_masks[ulii * founder_ct_192_long - founder_trail_ct - 2]));
}
}
if (!pairwise) {
@@ -969,7 +975,8 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
}
}
pct = 1;
- pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100;
+ chrom_start = get_chrom_start_vidx(chrom_info_ptr, cur_chrom);
+ pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_start)) / 100;
while ((window_unfiltered_start < chrom_end) || (cur_window_size > 1)) {
if (cur_window_size > 1) {
do {
@@ -1192,10 +1199,10 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
break;
}
if (window_unfiltered_start >= pct_thresh) {
- pct = ((window_unfiltered_start - chrom_info_ptr->chrom_start[cur_chrom]) * 100LLU) / (chrom_end - chrom_info_ptr->chrom_start[cur_chrom]);
+ pct = ((window_unfiltered_start - chrom_start) * 100LLU) / (chrom_end - chrom_start);
printf("\r%u%%", pct++);
fflush(stdout);
- pct_thresh = chrom_info_ptr->chrom_start[cur_chrom] + (((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100);
+ pct_thresh = chrom_start + (((uint64_t)pct * (chrom_end - chrom_start)) / 100);
}
ujj = 0;
@@ -1278,9 +1285,8 @@ int32_t ld_prune(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
start_arr[cur_window_size] = window_unfiltered_end;
}
}
- uii = get_marker_chrom(chrom_info_ptr, window_unfiltered_start - 1);
- putchar('\r');
- LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", uii, chrom_info_ptr->chrom_end[uii] - chrom_info_ptr->chrom_start[uii] - popcount_bit_idx(marker_exclude, chrom_info_ptr->chrom_start[uii], chrom_info_ptr->chrom_end[uii]) - cur_exclude_ct);
+ putc_unlocked('\r', stdout);
+ LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", cur_chrom, chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end) - cur_exclude_ct);
tot_exclude_ct += cur_exclude_ct;
// advance chromosomes as necessary
@@ -1429,10 +1435,10 @@ uint32_t ld_missing_ct_intersect(uintptr_t* lptr1, uintptr_t* lptr2, uintptr_t w
int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, Chrom_info* chrom_info_ptr, double* set_allele_freqs, uint32_t* marker_pos, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c, uintptr_t* founder_info, uintptr_t* s [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- FILE* outfile_verbose = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_verbose = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
double min_corr = ldip->flipscan_thresh * (1 - SMALL_EPSILON);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
@@ -1549,13 +1555,20 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
}
for (is_case = 0; is_case < 2; is_case++) {
pheno_ctl[is_case] = BITCT_TO_WORDCT(pheno_ct[is_case]);
+
+ // ulii == total number of blocks, all but last is size MULTIPLEX_LD
ulii = (pheno_ct[is_case] + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
pheno_ct_mld_m1[is_case] = ulii - 1;
+
+ // number of size-{48,192} sub-blocks in trailing block
#ifdef __LP64__
pheno_ct_mld_rem[is_case] = (MULTIPLEX_LD / 192) - (ulii * MULTIPLEX_LD - pheno_ct[is_case]) / 192;
#else
pheno_ct_mld_rem[is_case] = (MULTIPLEX_LD / 48) - (ulii * MULTIPLEX_LD - pheno_ct[is_case]) / 48;
#endif
+
+ // number of genotype words per variant, rounded up to the next 192-sample
+ // boundary
pheno_ct_192_long[is_case] = pheno_ct_mld_m1[is_case] * (MULTIPLEX_LD / BITCT2) + pheno_ct_mld_rem[is_case] * (192 / BITCT2);
}
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
@@ -1581,6 +1594,9 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
for (uljj = 0; uljj < max_window_size; uljj++) {
neg_uidx_buf[uljj * ulii] = 0.0;
neg_uidx_buf[uljj * ulii + 1] = 0.0;
+ // bugfix: initialize r_matrix diagonal
+ r_matrix[uljj * ulii] = 0.0;
+ r_matrix[uljj * ulii + 1] = 0.0;
}
for (is_case = 0; is_case < 2; is_case++) {
quaterarr_collapse_init(sex_male, unfiltered_sample_ct, founder_phenos[is_case], pheno_ct[is_case], pheno_male_include2[is_case]);
@@ -1589,8 +1605,8 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
cur_192_long = pheno_ct_192_long[is_case];
ulii = 2 + pheno_ct_192_long[is_case] - pheno_ctl[is_case] * 2;
for (uljj = 1; uljj <= max_window_size; uljj++) {
- fill_ulong_zero(&(window_geno_ptr[uljj * cur_192_long - ulii]), ulii);
- fill_ulong_zero(&(window_mask_ptr[uljj * cur_192_long - ulii]), ulii);
+ fill_ulong_zero(ulii, &(window_geno_ptr[uljj * cur_192_long - ulii]));
+ fill_ulong_zero(ulii, &(window_mask_ptr[uljj * cur_192_long - ulii]));
}
}
@@ -1624,8 +1640,8 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
fflush(stdout);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
chrom_marker_ct = chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
if (chrom_marker_ct < 2) {
marker_idx += chrom_marker_ct;
@@ -1634,8 +1650,8 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
wptr_start = width_force(6, textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, textbuf));
*wptr_start++ = ' ';
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->x_code));
- is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->y_code));
+ is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]));
+ is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]));
if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
goto flipscan_ret_READ_FAIL;
}
@@ -1792,7 +1808,7 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
if (neg_r_ct) {
for (ulii = 0; ulii < neg_r_ct; ulii++) {
if (ulii) {
- putc('|', outfile);
+ putc_unlocked('|', outfile);
}
fputs(&(marker_ids[neg_uidx_buf[ulii] * max_marker_id_len]), outfile);
}
@@ -1824,10 +1840,10 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
}
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (++marker_idx >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
if (pct < 100) {
@@ -1859,7 +1875,7 @@ int32_t flipscan(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t m
goto flipscan_ret_WRITE_FAIL;
}
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
// not actually possible to have exactly one problem variant, heh
LOGPRINTF("--flip-scan%s: %u variants with at least one negative LD match.\n", verbose? " verbose" : "", problem_ct);
if (verbose) {
@@ -1968,8 +1984,8 @@ THREAD_RET_TYPE ld_block_thread(void* arg) {
uint32_t keep_sign = g_ld_keep_sign;
double* results = g_ld_results;
float* results_f = g_ld_results_f;
- double* rptr = NULL;
- float* rptr_f = NULL;
+ double* rptr = nullptr;
+ float* rptr_f = nullptr;
int32_t dp_result[5];
uintptr_t* geno_fixed_vec_ptr;
uintptr_t* geno_var_vec_ptr;
@@ -2135,7 +2151,7 @@ uint32_t ld_matrix_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
}
int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, uintptr_t* sex_male, uintptr_t* founder_include2, uintptr_t* founder_male_include2, uintptr_t* loadbuf, char* outname, uint32_t hh_exists) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uint32_t ld_modifier = ldip->modifier;
uint32_t is_binary = ld_modifier & (LD_MATRIX_BIN | LD_MATRIX_BIN4);
uint32_t is_square = ((ld_modifier & LD_MATRIX_SHAPEMASK) == LD_MATRIX_SQ);
@@ -2240,12 +2256,12 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
bigstack_alloc_ui(idx1_block_size * 2, &g_ld_interval1);
if (!output_single_prec) {
- // may want to set g_ld_results_f to NULL
+ // may want to set g_ld_results_f to nullptr
if (bigstack_alloc_d(marker_ctm8 * idx1_block_size, &g_ld_results)) {
goto ld_report_matrix_ret_NOMEM;
}
} else {
- g_ld_results = NULL;
+ g_ld_results = nullptr;
if (bigstack_alloc_f(marker_ctm8 * idx1_block_size, &g_ld_results_f)) {
goto ld_report_matrix_ret_NOMEM;
}
@@ -2280,12 +2296,12 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
uljj = founder_trail_ct + 2;
for (ulii = 1; ulii <= idx1_block_size; ulii++) {
- fill_ulong_zero(&(g_ld_geno1[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(g_ld_geno1[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]));
}
for (ulii = 1; ulii <= idx2_block_size; ulii++) {
- fill_ulong_zero(&(g_ld_geno2[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(g_ld_geno2[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]));
}
if (is_square) {
for (ulii = 0; ulii < idx1_block_size; ulii++) {
@@ -2300,9 +2316,9 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
if (is_square0) {
if (is_binary) {
if (!output_single_prec) {
- fill_double_zero((double*)g_textbuf, MAXLINELEN / sizeof(double));
+ fill_double_zero(MAXLINELEN / sizeof(double), (double*)g_textbuf);
} else {
- fill_float_zero((float*)g_textbuf, MAXLINELEN / sizeof(float));
+ fill_float_zero(MAXLINELEN / sizeof(float), (float*)g_textbuf);
}
} else {
ulptr = (uintptr_t*)g_textbuf;
@@ -2351,11 +2367,11 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
}
if (marker_uidx1 >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx1), bedfile, loadbuf, &(g_ld_geno1[block_idx1 * founder_ct_192_long]))) {
goto ld_report_matrix_ret_READ_FAIL;
@@ -2398,11 +2414,11 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
}
if (marker_uidx2 >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf, &(g_ld_geno2[block_idx2 * founder_ct_192_long]))) {
goto ld_report_matrix_ret_READ_FAIL;
@@ -2508,7 +2524,7 @@ int32_t ld_report_matrix(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
if (tests_completed >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (tests_completed * 100LLU) / job_size;
if (pct < 100) {
@@ -2562,8 +2578,8 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
uint32_t* ld_interval1 = g_ld_interval1;
double* results = g_ld_results;
double* set_allele_freqs = g_ld_set_allele_freqs;
- char* fixed_a1 = NULL;
- char* fixed_a2 = NULL;
+ char* fixed_a1 = nullptr;
+ char* fixed_a2 = nullptr;
uintptr_t max_marker_id_len = g_ld_max_marker_id_len;
uintptr_t marker_uidx1 = g_ld_marker_uidx1;
uintptr_t block_idx1 = g_ld_block_idx1;
@@ -2579,9 +2595,9 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
uint32_t is_dprime = (g_ld_modifier / LD_DPRIME) & 1;
uint32_t is_r2 = g_ld_is_r2;
uint32_t prefix_len = g_ld_prefix_len;
- uint32_t chrom_fo_idx1 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
+ uint32_t chrom_fo_idx1 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
uint32_t chrom_idx1 = chrom_info_ptr->chrom_file_order[chrom_fo_idx1];
- uint32_t chrom_end1 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx1 + 1];
+ uint32_t chrom_end1 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx1 + 1];
uint32_t chrom_fo_idx2 = 0;
uint32_t chrom_idx2 = 0;
uint32_t fixed_a1_len = 0;
@@ -2625,9 +2641,9 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
marker_uidx1++;
next_unset_ul_unsafe_ck(marker_exclude_idx1, &marker_uidx1);
if (marker_uidx1 >= chrom_end1) {
- chrom_fo_idx1 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
+ chrom_fo_idx1 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1);
chrom_idx1 = chrom_info_ptr->chrom_file_order[chrom_fo_idx1];
- chrom_end1 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx1 + 1];
+ chrom_end1 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx1 + 1];
}
block_idx2 = ld_interval1[2 * block_idx1];
if (block_idx2_start < block_idx2) {
@@ -2667,9 +2683,9 @@ uint32_t ld_regular_emitn(uint32_t overflow_ct, unsigned char* readbuf) {
sptr_cur = memcpya(sptr_cur, g_textbuf, prefix_len);
if (is_inter_chr) {
if (marker_uidx2 >= chrom_end2) {
- chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
+ chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
- chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
}
sptr_cur = width_force(6, sptr_cur, chrom_name_write(chrom_info_ptr, chrom_idx2, sptr_cur));
sptr_cur = memseta(sptr_cur, 32, 3);
@@ -3053,7 +3069,7 @@ static inline void two_locus_3x3_zmiss_tablev(__m128i* veca0, __m128i* vecb0, ui
static void two_locus_count_table_zmiss1(uintptr_t* lptr1, uintptr_t* lptr2, uint32_t* counts_3x3, uint32_t sample_ctv3, uint32_t is_zmiss2) {
#ifdef __LP64__
- fill_uint_zero(counts_3x3, 6);
+ fill_uint_zero(6, counts_3x3);
if (is_zmiss2) {
two_locus_3x3_zmiss_tablev((__m128i*)lptr1, (__m128i*)lptr2, counts_3x3, sample_ctv3 / 2);
} else {
@@ -3075,7 +3091,7 @@ static void two_locus_count_table_zmiss1(uintptr_t* lptr1, uintptr_t* lptr2, uin
static void two_locus_count_table(uintptr_t* lptr1, uintptr_t* lptr2, uint32_t* counts_3x3, uint32_t sample_ctv3, uint32_t is_zmiss2) {
#ifdef __LP64__
uint32_t uii;
- fill_uint_zero(counts_3x3, 9);
+ fill_uint_zero(9, counts_3x3);
if (!is_zmiss2) {
two_locus_3x3_tablev((__m128i*)lptr1, (__m128i*)lptr2, counts_3x3, sample_ctv3 / 2, 3);
} else {
@@ -3358,7 +3374,7 @@ static uint32_t* g_epi_fail_ct1;
static uintptr_t* g_epi_geno2;
static uintptr_t* g_epi_zmiss2;
static uint32_t* g_epi_tot2;
-static double* g_epi_boost_precalc2 = NULL;
+static double* g_epi_boost_precalc2 = nullptr;
static double* g_epi_best_chisq2;
static uint32_t* g_epi_best_id2;
static uint32_t* g_epi_n_sig_ct2;
@@ -3708,10 +3724,10 @@ THREAD_RET_TYPE fast_epi_thread(void* arg) {
uint32_t is_first_half = 0;
uintptr_t* geno1 = g_epi_geno1;
uintptr_t* zmiss1 = g_epi_zmiss1;
- uintptr_t* cur_geno1 = NULL;
- uintptr_t* cur_geno1_ctrls = NULL;
- double* cur_boost_precalc2 = NULL;
- double* p_bc_ptr = NULL;
+ uintptr_t* cur_geno1 = nullptr;
+ uintptr_t* cur_geno1_ctrls = nullptr;
+ double* cur_boost_precalc2 = nullptr;
+ double* p_bc_ptr = nullptr;
uint32_t* geno1_offsets = g_epi_geno1_offsets;
uint32_t* best_id1 = &(g_epi_best_id1[idx1_block_start16]);
double* alpha1sq_ptr = g_epi_alpha1sq;
@@ -4280,7 +4296,7 @@ THREAD_RET_TYPE epi_linear_thread(void* arg) {
cur_sample_ct = pheno_nm_ct;
cur_sum_ab_pheno = 0.0;
- fill_uint_zero(cur_minor_cts, 4);
+ fill_uint_zero(4, cur_minor_cts);
for (widx = 0; widx < pheno_nm_ctl2; widx++) {
sample_idx = widx * BITCT2;
cur_word1 = cur_geno1[widx];
@@ -4698,21 +4714,21 @@ THREAD_RET_TYPE epi_logistic_thread(void* arg) {
}
if (cur_sample_ct < cur_sample_cta4) {
loop_end = cur_sample_cta4 - cur_sample_ct;
- fill_float_zero(fptr, loop_end);
- fill_float_zero(&(fptr[cur_sample_cta4]), loop_end);
- fill_float_zero(&(fptr[2 * cur_sample_cta4]), loop_end);
- fill_float_zero(&(fptr[3 * cur_sample_cta4]), loop_end);
- fill_float_zero(fptr2, loop_end);
+ fill_float_zero(loop_end, fptr);
+ fill_float_zero(loop_end, &(fptr[cur_sample_cta4]));
+ fill_float_zero(loop_end, &(fptr[2 * cur_sample_cta4]));
+ fill_float_zero(loop_end, &(fptr[3 * cur_sample_cta4]));
+ fill_float_zero(loop_end, fptr2);
}
- fill_float_zero(coef, 4);
+ fill_float_zero(4, coef);
if (logistic_regression(cur_sample_ct, 4, sample_1d_buf, param_2d_buf, param_1d_buf, param_2d_buf2, param_1d_buf2, covars_cov_major, pheno_buf, coef, pp)) {
goto epi_logistic_thread_regression_fail;
}
// compute S
for (param_idx = 0; param_idx < 4; param_idx++) {
- fill_float_zero(param_1d_buf, 4);
+ fill_float_zero(4, param_1d_buf);
param_1d_buf[param_idx] = 1.0;
solve_linear_system(param_2d_buf2, param_1d_buf, param_1d_buf2, 4);
memcpy(&(param_2d_buf[param_idx * 4]), param_1d_buf2, 4 * sizeof(float));
@@ -4996,7 +5012,7 @@ uint32_t em_phase_hethet_nobase(uint32_t* counts, uint32_t is_x1, uint32_t is_x2
known22 -= ((double)(2 * counts[17] + counts[14])) * (1.0 - SQRT_HALF);
}
}
- return em_phase_hethet(known11, known12, known21, known22, counts[4], freq1x_ptr, freq2x_ptr, freqx1_ptr, freqx2_ptr, freq11_ptr, NULL);
+ return em_phase_hethet(known11, known12, known21, known22, counts[4], freq1x_ptr, freq2x_ptr, freqx1_ptr, freqx2_ptr, freq11_ptr, nullptr);
}
THREAD_RET_TYPE ld_dprime_thread(void* arg) {
@@ -5010,7 +5026,7 @@ THREAD_RET_TYPE ld_dprime_thread(void* arg) {
uintptr_t* geno1 = g_ld_geno1;
uintptr_t* zmiss1 = g_epi_zmiss1;
uintptr_t* sex_male = g_ld_sex_male;
- uintptr_t* cur_geno1_male = NULL;
+ uintptr_t* cur_geno1_male = nullptr;
uint32_t* ld_interval1 = g_ld_interval1;
uint32_t is_r2 = g_ld_is_r2;
uint32_t xstart1 = g_ld_xstart1;
@@ -5048,7 +5064,7 @@ THREAD_RET_TYPE ld_dprime_thread(void* arg) {
cur_geno1_male = &(g_ld_thread_wkspace[tidx * round_up_pow2(founder_ctsplit, CACHELINE_WORD)]);
}
// suppress warning
- fill_uint_zero(&(tot1[3]), 3);
+ fill_uint_zero(3, &(tot1[3]));
while (1) {
idx2_block_size = g_ld_idx2_block_size;
idx2_block_start = g_ld_idx2_block_start;
@@ -5194,7 +5210,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
uint32_t marker_uidx2_fwd2 = 0;
uint32_t window_trail_ct = 0;
uint32_t window_lead_ct = 0;
- int32_t x_code = chrom_info_ptr->x_code;
+ int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
uint32_t xstart = 0;
uint32_t xend = 0;
int32_t retval = 0;
@@ -5232,10 +5248,10 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
loadbuf[founder_ctl * 2 - 2] = 0;
loadbuf[founder_ctl * 2 - 1] = 0;
fill_all_bits(founder_ct, dummy_nm);
- g_ld_thread_wkspace = NULL;
+ g_ld_thread_wkspace = nullptr;
if ((x_code != -1) && is_set(chrom_info_ptr->chrom_mask, x_code)) {
- uii = chrom_info_ptr->chrom_start[(uint32_t)x_code];
- chrom_end = chrom_info_ptr->chrom_end[(uint32_t)x_code];
+ uii = get_chrom_start_vidx(chrom_info_ptr, (uint32_t)x_code);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)x_code);
chrom_end = chrom_end - uii - popcount_bit_idx(marker_exclude, uii, chrom_end);
if (chrom_end) {
if (bigstack_alloc_ul(round_up_pow2(founder_ctsplit, CACHELINE_WORD) * thread_ct, &g_ld_thread_wkspace)) {
@@ -5326,8 +5342,8 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
if (idx1_subset) {
if (!is_inter_chr) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
- marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
+ marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
marker_idx2 = marker_idx2_base + uii;
} else {
@@ -5347,7 +5363,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
goto ld_report_dprime_ret_READ_FAIL;
}
chrom_end = 0;
- fill_ulong_zero(g_epi_zmiss1, BITCT_TO_WORDCT(idx1_block_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(idx1_block_size), g_epi_zmiss1);
for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx1_tmp++, block_idx1++, marker_idx2++) {
if (IS_SET(marker_exclude_idx1, marker_uidx1_tmp)) {
ulii = next_unset_ul_unsafe(marker_exclude_idx1, marker_uidx1_tmp);
@@ -5362,19 +5378,19 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
}
if (marker_uidx1_tmp >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
chrom_last = prev_unset_unsafe(marker_exclude, chrom_end);
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
uii = 1;
}
if (!is_inter_chr) {
if (uii) {
if (idx1_subset) {
- marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
+ marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
}
marker_uidx2_fwd = window_forward(marker_pos, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, &window_lead_ct);
marker_uidx2_fwd2 = marker_uidx2_fwd;
@@ -5441,7 +5457,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
if (is_haploid && hh_exists) {
haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
}
- load_and_split3(NULL, loadbuf, founder_ct, &(g_ld_geno1[block_idx1 * founder_ctsplit]), dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
+ load_and_split3(nullptr, loadbuf, founder_ct, &(g_ld_geno1[block_idx1 * founder_ctsplit]), dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
if (ulii == 3) {
SET_BIT(block_idx1, g_epi_zmiss1);
}
@@ -5469,7 +5485,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
g_ld_xstart2 = uii - marker_idx2;
g_ld_xend2 = MINV(xend, marker_idx2 + cur_idx2_block_size) - uii;
}
- fill_ulong_zero(g_epi_zmiss2, BITCT_TO_WORDCT(cur_idx2_block_size));
+ fill_ulong_zero(BITCT_TO_WORDCT(cur_idx2_block_size), g_epi_zmiss2);
for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
if (IS_SET(marker_exclude, marker_uidx2)) {
marker_uidx2 = next_unset_ul_unsafe(marker_exclude, marker_uidx2);
@@ -5478,11 +5494,11 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
}
if (marker_uidx2 >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf_raw, loadbuf)) {
goto ld_report_dprime_ret_READ_FAIL;
@@ -5491,7 +5507,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
}
ulptr = &(g_ld_geno2[block_idx2 * founder_ctsplit]);
- load_and_split3(NULL, loadbuf, founder_ct, ulptr, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
+ load_and_split3(nullptr, loadbuf, founder_ct, ulptr, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulii);
uiptr = &(g_epi_tot2[block_idx2 * 3]);
uiptr[0] = popcount_longs(ulptr, founder_ctv3);
uiptr[1] = popcount_longs(&(ulptr[founder_ctv3]), founder_ctv3);
@@ -5530,7 +5546,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
if (marker_idx1 >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ((marker_idx1 - marker_idx1_start) * 100LLU) / job_size;
if (pct < 100) {
@@ -5563,7 +5579,7 @@ int32_t ld_report_dprime(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintp
}
int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t parallel_idx, uint32_t parallel_tot, uintptr_t* sex_male, uintptr_t* founder_include2, uintptr_t* founder_male_include2, uintptr_t* loadbuf, char* outname, uint32_t hh_exists) {
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t* marker_exclude = g_ld_marker_exclude;
char* marker_ids = g_ld_marker_ids;
uintptr_t max_marker_id_len = g_ld_max_marker_id_len;
@@ -5806,12 +5822,12 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
}
uljj = founder_trail_ct + 2;
for (ulii = 1; ulii <= idx1_block_size; ulii++) {
- fill_ulong_zero(&(g_ld_geno1[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(g_ld_geno1[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(g_ld_geno_masks1[ulii * founder_ct_192_long - uljj]));
}
for (ulii = 1; ulii <= idx2_block_size; ulii++) {
- fill_ulong_zero(&(g_ld_geno2[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(g_ld_geno2[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(g_ld_geno_masks2[ulii * founder_ct_192_long - uljj]));
}
marker_uidx1 = next_unset_unsafe(marker_exclude_idx1, 0);
if (marker_idx1) {
@@ -5836,8 +5852,8 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
if (idx1_subset) {
if (!is_inter_chr) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
- marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
+ marker_uidx2_base = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1, window_size_m1, window_bp, &uii);
marker_idx2_base = marker_uidx2_base - popcount_bit_idx(marker_exclude, 0, marker_uidx2_base);
marker_idx2 = marker_idx2_base + uii;
} else {
@@ -5871,19 +5887,19 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
}
}
if (marker_uidx1_tmp >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx1_tmp);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
chrom_last = prev_unset_unsafe(marker_exclude, chrom_end);
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
uii = 1;
}
if (!is_inter_chr) {
if (uii) {
if (idx1_subset) {
- marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
+ marker_uidx2_back = window_back(marker_pos, marker_exclude, next_unset_unsafe(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx]), marker_uidx1_tmp, window_size_m1, window_bp, &window_trail_ct);
}
marker_uidx2_fwd = window_forward(marker_pos, marker_exclude, marker_uidx1_tmp, chrom_last, window_size_m1, window_bp, &window_lead_ct);
marker_uidx2_fwd2 = marker_uidx2_fwd;
@@ -5981,12 +5997,12 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
}
}
if (marker_uidx2 >= chrom_end2) {
- chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
+ chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
- chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx2);
- is_x = (((int32_t)chrom_idx2) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx2) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx2) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx2) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx2), bedfile, loadbuf, &(g_ld_geno2[block_idx2 * founder_ct_192_long]))) {
goto ld_report_regular_ret_READ_FAIL;
@@ -6027,7 +6043,7 @@ int32_t ld_report_regular(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uint
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
if (marker_idx1 >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ((marker_idx1 - marker_idx1_start) * 100LLU) / job_size;
if (pct < 100) {
@@ -6075,8 +6091,8 @@ int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t be
unsigned char* bigstack_mark = g_bigstack_base;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t founder_ct = popcount_longs(founder_info, unfiltered_sample_ctv2 / 2);
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
uintptr_t founder_ct_mld = (founder_ct + MULTIPLEX_LD - 1) / MULTIPLEX_LD;
uint32_t founder_ct_mld_m1 = ((uint32_t)founder_ct_mld) - 1;
#ifdef __LP64__
@@ -6101,7 +6117,7 @@ int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t be
g_ld_marker_ct = marker_ct;
g_ld_chrom_info_ptr = chrom_info_ptr;
g_ld_thread_ct = g_thread_ct;
- g_ld_set_allele_freqs = (ld_modifier & LD_WITH_FREQS)? set_allele_freqs : NULL;
+ g_ld_set_allele_freqs = (ld_modifier & LD_WITH_FREQS)? set_allele_freqs : nullptr;
if (founder_ct < 2) {
LOGERRPRINTF("Warning: Skipping --r%s since there are less than two founders.\n(--make-founders may come in handy here.)\n", g_ld_is_r2? "2" : "");
goto ld_report_ret_1;
@@ -6138,13 +6154,13 @@ int32_t ld_report(pthread_t* threads, Ld_info* ldip, FILE* bedfile, uintptr_t be
}
*bufptr = '\0';
if (ld_modifier & LD_INPHASE) {
- if (max_marker_allele_len * 4 + plink_maxsnp * 2 + get_max_chrom_len(chrom_info_ptr) * 2 + 128 > MAXLINELEN) {
+ if (max_marker_allele_len * 4 + plink_maxsnp * 2 + get_max_chrom_slen(chrom_info_ptr) * 2 + 128 > MAXLINELEN) {
logerrprint("Error: --r/--r2 in-phase does not support very long allele codes.\n");
goto ld_report_ret_INVALID_CMDLINE;
}
g_ld_marker_allele_ptrs = marker_allele_ptrs;
} else {
- g_ld_marker_allele_ptrs = NULL;
+ g_ld_marker_allele_ptrs = nullptr;
}
if (ld_modifier & (LD_MATRIX_SQ | LD_MATRIX_SQ0 | LD_MATRIX_TRI)) {
retval = ld_report_matrix(threads, ldip, bedfile, bed_offset, unfiltered_marker_ct, marker_exclude, marker_reverse, unfiltered_sample_ct, founder_info, parallel_idx, parallel_tot, sex_male, founder_include2, founder_male_include2, loadbuf, outname, hh_exists);
@@ -6183,12 +6199,12 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
uintptr_t max_window_size = 1;
uintptr_t pct = 1;
uintptr_t pct_thresh = marker_ct / 100;
- FILE* infile = NULL;
- FILE* outfile = NULL;
- uintptr_t* final_set = NULL;
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
- char* chrom_name_ptr = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
+ uintptr_t* final_set = nullptr;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
+ char* chrom_name_ptr = nullptr;
double tag_thresh = ldip->show_tags_r2 * (1 - SMALL_EPSILON);
uint32_t tags_list = (ldip->modifier & LD_SHOW_TAGS_LIST_ALL) || (!ldip->show_tags_fname);
uint32_t twocolumn = ldip->modifier & LD_SHOW_TAGS_MODE2;
@@ -6197,7 +6213,7 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
uint32_t target_ct = 0;
uint32_t chrom_name_len = 0;
int32_t retval = 0;
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
int32_t dp_result[5];
uintptr_t founder_ct_192_long;
uintptr_t founder_ctwd12;
@@ -6268,7 +6284,7 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
}
loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
if (ldip->show_tags_fname) {
- fill_ulong_zero(targets, unfiltered_marker_ctl);
+ fill_ulong_zero(unfiltered_marker_ctl, targets);
retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, unfiltered_marker_ct - marker_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_marker_ids, &marker_id_map);
if (retval) {
goto show_tags_ret_1;
@@ -6365,8 +6381,8 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
}
uii = 2 + founder_ct_192_long - founder_ctl * 2;
for (ulii = 1; ulii <= max_window_size; ulii++) {
- fill_ulong_zero(&(geno[ulii * founder_ct_192_long - uii]), uii);
- fill_ulong_zero(&(geno_masks[ulii * founder_ct_192_long - uii]), uii);
+ fill_ulong_zero(uii, &(geno[ulii * founder_ct_192_long - uii]));
+ fill_ulong_zero(uii, &(geno_masks[ulii * founder_ct_192_long - uii]));
}
if (tags_list) {
@@ -6380,8 +6396,8 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
printf("--show-tags%s: 0%%", final_set? "" : " all");
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
chrom_marker_ct = chrom_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, chrom_end);
if (chrom_marker_ct < 2) {
marker_idx += chrom_marker_ct;
@@ -6389,8 +6405,8 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
}
chrom_name_ptr = chrom_name_buf5w4write(chrom_info_ptr, chrom_idx, &chrom_name_len, chrom_name_buf);
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->x_code));
- is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->y_code));
+ is_x = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]));
+ is_y = (chrom_idx == ((uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]));
if (fseeko(bedfile, bed_offset + (marker_uidx * ((uint64_t)unfiltered_sample_ct4)), SEEK_SET)) {
goto show_tags_ret_READ_FAIL;
}
@@ -6413,7 +6429,7 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
window_cidx_starts[window_cidx] = window_cidx2;
geno_fixed_vec_ptr = &(geno[window_cidx * founder_ct_192_long]);
mask_fixed_vec_ptr = &(geno_masks[window_cidx * founder_ct_192_long]);
- fill_ulong_zero(&(tag_matrix[window_cidx * max_window_ctl]), max_window_ctl);
+ fill_ulong_zero(max_window_ctl, &(tag_matrix[window_cidx * max_window_ctl]));
if (load_and_collapse_incl(unfiltered_sample_ct, founder_ct, founder_info, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, geno_fixed_vec_ptr)) {
goto show_tags_ret_READ_FAIL;
}
@@ -6520,19 +6536,19 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
window_cidx3 = next_set_unsafe(tag_matrix_row_ptr, 0);
}
if (uii) {
- putc('|', outfile);
+ putc_unlocked('|', outfile);
}
fputs(&(marker_ids[window_uidxs[window_cidx3] * max_marker_id_len]), outfile);
}
if (!tag_ct) {
fputs("NONE", outfile);
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
if (++marker_idx >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
if (pct < 100) {
@@ -6554,7 +6570,7 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
}
} while (chrom_marker_idx < chrom_marker_ct);
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (tags_list) {
if (fclose_null(&outfile)) {
goto show_tags_ret_WRITE_FAIL;
@@ -6572,7 +6588,7 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
marker_uidx = next_set(final_set, 0, unfiltered_marker_ct);
while (marker_uidx < unfiltered_marker_ct) {
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
marker_uidx++;
next_set_ul_ck(final_set, unfiltered_marker_ct, &marker_uidx);
}
@@ -6580,9 +6596,9 @@ int32_t show_tags(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uintptr_t
for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\t', outfile);
- putc('0' + IS_SET(final_set, marker_uidx), outfile);
- putc('\n', outfile);
+ putc_unlocked('\t', outfile);
+ putc_unlocked('0' + IS_SET(final_set, marker_uidx), outfile);
+ putc_unlocked('\n', outfile);
}
}
if (fclose_null(&outfile)) {
@@ -6949,12 +6965,12 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
- FILE* outfile = NULL;
- FILE* outfile_det = NULL;
+ FILE* outfile = nullptr;
+ FILE* outfile_det = nullptr;
// circular. [2n] = numStrong, [2n+1] = numRec
- uintptr_t* strong_rec_cts = NULL;
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
+ uintptr_t* strong_rec_cts = nullptr;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
uintptr_t marker_uidx = 0;
uintptr_t block_idx_first = 0;
uintptr_t block_uidx_first = 0;
@@ -7095,7 +7111,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
goto haploview_blocks_ret_1;
}
pct_thresh = marker_ct / 100;
- fill_ulong_zero(in_haploblock, unfiltered_marker_ctl);
+ fill_ulong_zero(unfiltered_marker_ctl, in_haploblock);
loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
founder_ctl2 = QUATERCT_TO_WORDCT(founder_ct);
founder_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(founder_ct);
@@ -7120,8 +7136,8 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
fputs("--blocks: 0%", stdout);
fflush(stdout);
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++, markers_done += cur_marker_ct) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- chrom_start = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ chrom_start = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
cur_marker_ct = chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end);
if (cur_marker_ct < 2) {
continue;
@@ -7140,8 +7156,8 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
}
#endif
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
bigstack_reset(bigstack_mark2);
// Need to compute full 3x3 count tables, but only for a limited window;
// more similar to --clump than --fast-epistasis, so we don't bother with
@@ -7197,7 +7213,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
bigstack_alloc_ui(max_candidates * 3, &candidate_pairs);
candidate_ct = 0;
cur_block_size = 0;
- fill_uint_zero(recent_ci_types, 3);
+ fill_uint_zero(3, recent_ci_types);
// count down instead of up so more memory accesses are sequential
block_cidx = max_block_size;
forward_scan_uidx = marker_uidx;
@@ -7396,7 +7412,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
}
if (markers_done + marker_idx >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = ((markers_done + marker_idx) * 100LLU) / marker_ct;
printf("\b\b%u%%", pct++);
@@ -7434,7 +7450,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
wptr_start = memseta(wptr_start, 32, 3);
for (candidate_idx = 0; candidate_idx < ulii; candidate_idx++) {
- putc('*', outfile);
+ putc_unlocked('*', outfile);
block_cidx = candidate_pairs[2 * candidate_idx];
block_cidx2 = candidate_pairs[2 * candidate_idx + 1];
marker_uidx = block_cidx;
@@ -7450,15 +7466,15 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
for (marker_uidx = block_cidx; marker_uidx <= block_cidx2; marker_uidx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
sptr = &(marker_ids[marker_uidx * max_marker_id_len]);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(sptr, outfile);
if (marker_uidx != block_cidx) {
- putc('|', outfile_det);
+ putc_unlocked('|', outfile_det);
}
fputs(sptr, outfile_det);
}
- putc('\n', outfile);
- putc('\n', outfile_det);
+ putc_unlocked('\n', outfile);
+ putc_unlocked('\n', outfile_det);
}
block_ct += ulii;
}
@@ -7468,7 +7484,7 @@ int32_t haploview_blocks(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uin
if (fclose_null(&outfile_det)) {
goto haploview_blocks_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
LOGPRINTFWW("--blocks: %u haploblock%s written to %s .\n", block_ct, (block_ct == 1)? "" : "s", outname);
LOGPRINTFWW("Extra block details written to %s.det .\n", outname);
if (block_ct) {
@@ -7514,7 +7530,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
uint32_t ujj;
uint32_t ukk;
uint32_t umm;
- fill_uint_zero(marg_b, 4);
+ fill_uint_zero(4, marg_b);
memset(spaces, 32, 7);
for (uii = 0; uii < 4; uii++) {
ukk = 0;
@@ -7534,21 +7550,21 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
fwrite(spaces, 1, 9 - 2 * alen10, outfile);
}
fputs(allele10, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele10, outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (alen10 + alen11 < 7) {
fwrite(spaces, 1, 9 - alen10 - alen11, outfile);
}
fputs(allele10, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele11, outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (alen11 < 4) {
fwrite(spaces, 1, 9 - 2 * alen11, outfile);
}
fputs(allele11, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele11, outfile);
fputs(" 0/0 */*\n", outfile);
@@ -7559,7 +7575,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele00, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele00, outfile);
bufptr = g_textbuf;
*bufptr++ = ' ';
@@ -7576,7 +7592,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele00, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele01, outfile);
bufptr = g_textbuf;
*bufptr++ = ' ';
@@ -7593,7 +7609,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele01, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele01, outfile);
bufptr = g_textbuf;
*bufptr++ = ' ';
@@ -7628,21 +7644,21 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fwrite(g_textbuf, 1, plink_maxsnp + 9, outfile);
fputs(allele10, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele10, outfile);
if (alen10 < 4) {
fwrite(spaces, 1, 9 - 2 * alen10, outfile);
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(allele10, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele11, outfile);
if (alen10 + alen11 < 7) {
fwrite(spaces, 1, 9 - alen10 - alen11, outfile);
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(allele11, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele11, outfile);
if (alen11 < 4) {
fwrite(spaces, 1, 9 - 2 * alen11, outfile);
@@ -7656,7 +7672,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele00, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele00, outfile);
bufptr = memseta(g_textbuf, 32, 2);
bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[0]) * tot_recip, bufptr);
@@ -7677,7 +7693,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele00, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele01, outfile);
bufptr = memseta(g_textbuf, 32, 2);
bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[8]) * tot_recip, bufptr);
@@ -7698,7 +7714,7 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
}
fwrite(g_textbuf, 1, bufptr - g_textbuf, outfile);
fputs(allele01, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fputs(allele01, outfile);
bufptr = memseta(g_textbuf, 32, 2);
bufptr = dtoa_f_w9p6_spaced(((int32_t)counts[12]) * tot_recip, bufptr);
@@ -7742,11 +7758,11 @@ void twolocus_write_table(FILE* outfile, uint32_t* counts, uint32_t plink_maxsnp
int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t pheno_ctrl_ct, uintptr_t* pheno_c, uintptr_t* sex_male, [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
char* mkr1 = outname? epi_ip->twolocus_mkr1 : epi_ip->ld_mkr1;
char* mkr2 = outname? epi_ip->twolocus_mkr2 : epi_ip->ld_mkr2;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t ulii = strlen(mkr1) + 1;
@@ -7857,12 +7873,12 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
if (load_and_collapse(unfiltered_sample_ct, sample_ct, sample_exclude, final_mask, IS_SET(marker_reverse, marker_uidx), bedfile, loadbuf_raw, loadbufs[marker_idx])) {
goto twolocus_ret_READ_FAIL;
}
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_haploid[marker_idx] = IS_SET(chrom_info_ptr->haploid_mask, chrom_idx);
if (is_haploid[marker_idx]) {
- is_x[marker_idx] = (chrom_idx == (uint32_t)chrom_info_ptr->x_code);
- is_y = (chrom_idx == (uint32_t)chrom_info_ptr->y_code);
+ is_x[marker_idx] = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]);
haploid_fix(hh_exists, sample_include2, sample_male_include2, sample_ct, is_x[marker_idx], is_y, (unsigned char*)(loadbufs[marker_idx]));
}
}
@@ -7873,11 +7889,11 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
pheno_c = sex_male;
pheno_nm = sex_male;
} else {
- pheno_c = NULL;
+ pheno_c = nullptr;
}
}
- fill_uint_zero(counts_all, 16);
- fill_uint_zero(counts_cc, 32);
+ fill_uint_zero(16, counts_all);
+ fill_uint_zero(32, counts_cc);
loadbuf0_ptr = loadbufs[0];
loadbuf1_ptr = loadbufs[1];
loadbuf0_end = &(loadbuf0_ptr[sample_ct / BITCT2]);
@@ -7937,7 +7953,7 @@ int32_t twolocus(Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_
twolocus_write_table(outfile, counts_cc, plink_maxsnp, mkr1, mkr2, marker_allele_ptrs[2 * marker_uidxs[0]], marker_allele_ptrs[2 * marker_uidxs[0] + 1], marker_allele_ptrs[2 * marker_uidxs[1]], marker_allele_ptrs[2 * marker_uidxs[1] + 1], alen00, alen01, alen10, alen11);
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (fclose_null(&outfile)) {
goto twolocus_ret_WRITE_FAIL;
}
@@ -8233,7 +8249,7 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
// because we have 3 covariates instead of one, but two of them are still
// restricted to the values {0, 1, 2} and the last is the product of the
// first two. So we're able to use variations of the QT --assoc bit hacks.
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -8346,7 +8362,7 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
}
if (is_triangular) {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
}
ulii = pheno_nm_ctl2 * sizeof(intptr_t) + 2 * sizeof(int32_t) + sizeof(double) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
@@ -8434,7 +8450,7 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
}
} else {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
marker_uidx2 = marker_uidx_base;
marker_idx2 = 0;
}
@@ -8514,10 +8530,10 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
g_epi_idx2_block_size = cur_idx2_block_size;
g_epi_idx2_block_start = marker_idx2;
idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
- fill_uint_zero(g_epi_n_sig_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_fail_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_n_sig_ct2, idx2_block_sizea16 * max_thread_ct);
- fill_uint_zero(g_epi_fail_ct2, idx2_block_sizea16 * max_thread_ct);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
for (tidx = 0; tidx < max_thread_ct; tidx++) {
ulii = g_epi_idx1_block_bounds[tidx];
uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
@@ -8608,18 +8624,18 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
marker_idx2 = 0;
dptr = &(g_epi_all_chisq[block_idx1 * 2 * marker_ct2]);
if (marker_uidx >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
}
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
*wptr_start++ = ' ';
wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
*wptr_start++ = ' ';
marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
- for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
+ for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
- chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
wptr_start2 = width_force(4, wptr_start, chrom_name_write(chrom_info_ptr, chrom_idx2, wptr_start));
*wptr_start2++ = ' ';
for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, chrom_end2, &marker_uidx2), marker_idx2++, dptr = &(dptr[2])) {
@@ -8670,7 +8686,7 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
if (tests_complete >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (tests_complete * 100LLU) / tests_expected;
if (pct < 100) {
@@ -8709,7 +8725,7 @@ int32_t epistasis_linear_regression(pthread_t* threads, Epi_info* epi_ip, FILE*
}
int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t unfiltered_marker_ct, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t marker_uidx_base, uintptr_t marker_ct1, uintptr_t* marker_exclude1, uintptr_t marker_idx1_start, uintptr_t marker_idx1_end, uintptr_t marker_ct2, uintptr_t* marker_exclude2, uint32_t is_triangular, uintptr_t job_ [...]
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t pheno_nm_cta4 = round_up_pow2(pheno_nm_ct, 4);
uintptr_t pheno_nm_ctl2 = QUATERCT_TO_WORDCT(pheno_nm_ct);
@@ -8818,7 +8834,7 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
g_epi_geno1[block_idx1 * pheno_nm_ctl2 + pheno_nm_ctl2 - 1] = 0;
}
if (is_triangular) {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
}
ulii = pheno_nm_ctl2 * sizeof(intptr_t) + max_thread_ct * (3 * sizeof(int32_t) + sizeof(double));
@@ -8904,7 +8920,7 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
}
} else {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
marker_uidx2 = marker_uidx_base;
marker_idx2 = 0;
}
@@ -8991,10 +9007,10 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
g_epi_idx2_block_size = cur_idx2_block_size;
g_epi_idx2_block_start = marker_idx2;
idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
- fill_uint_zero(g_epi_n_sig_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_fail_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_n_sig_ct2, idx2_block_sizea16 * max_thread_ct);
- fill_uint_zero(g_epi_fail_ct2, idx2_block_sizea16 * max_thread_ct);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
for (tidx = 0; tidx < max_thread_ct; tidx++) {
ulii = g_epi_idx1_block_bounds[tidx];
uljj = g_epi_idx1_block_bounds[tidx + 1] - ulii;
@@ -9085,18 +9101,18 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
marker_idx2 = 0;
fptr = &(g_epi_all_chisq_f[block_idx1 * 2 * marker_ct2]);
if (marker_uidx >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
}
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
*wptr_start++ = ' ';
wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
*wptr_start++ = ' ';
marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
- for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
+ for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
chrom_idx2 = chrom_info_ptr->chrom_file_order[chrom_fo_idx2];
- chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
wptr_start2 = width_force(4, wptr_start, chrom_name_write(chrom_info_ptr, chrom_idx2, wptr_start));
*wptr_start2++ = ' ';
for (; marker_uidx2 < chrom_end2; next_unset_ul_ck(marker_exclude2, chrom_end2, &marker_uidx2), marker_idx2++, fptr = &(fptr[2])) {
@@ -9148,7 +9164,7 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
if (tests_complete >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (tests_complete * 100LLU) / tests_expected;
if (pct < 100) {
@@ -9185,7 +9201,7 @@ int32_t epistasis_logistic_regression(pthread_t* threads, Epi_info* epi_ip, FILE
int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct2, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uint32_t ctrl_ct, uintptr_t* pheno_c, double* pheno_d, uint32_t parallel_idx, uint32_t pa [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(unfiltered_sample_ct);
uintptr_t final_mask = get_final_mask(pheno_nm_ct);
@@ -9229,10 +9245,10 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
uint32_t first_pos = 0;
uint32_t uii = 0;
int32_t retval = 0;
- uint32_t* gap_cts = NULL;
- uintptr_t* ctrlbuf = NULL;
- uintptr_t* marker_exclude1 = NULL;
- uintptr_t* ulptr = NULL;
+ uint32_t* gap_cts = nullptr;
+ uintptr_t* ctrlbuf = nullptr;
+ uintptr_t* marker_exclude1 = nullptr;
+ uintptr_t* ulptr = nullptr;
uintptr_t ularr[sizeof(double) / BYTECT];
uintptr_t* casebuf;
uintptr_t* loadbuf;
@@ -9363,10 +9379,10 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
fill_quatervec_55(ulii * BITCT2, ulptr);
}
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
if (is_set(chrom_info_ptr->haploid_mask, chrom_idx)) {
- uii = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
+ uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
fill_bits(uii, chrom_end - uii, marker_exclude2);
marker_uidx = chrom_end;
continue;
@@ -9651,7 +9667,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
g_epi_geno1[block_idx1 * tot_ctsplit + tot_ctsplit - 1] = 0;
}
if (is_triangular) {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
}
// don't actually need best_chisq2, best_id2, n_sig_ct2, fail_ct2 if not
// triangular, but rather not complicate/duplicate the common case inner
@@ -9740,7 +9756,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
g_epi_geno1_offsets[2 * block_idx1 + 1] = ulii;
}
} else {
- fill_uint_zero(g_epi_geno1_offsets, 2 * idx1_block_size);
+ fill_uint_zero(2 * idx1_block_size, g_epi_geno1_offsets);
marker_uidx2 = marker_uidx_base;
marker_idx2 = 0;
}
@@ -9770,7 +9786,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
g_epi_idx1_block_bounds16[tidx] = g_epi_idx1_block_bounds16[tidx - 1] + round_up_pow2_ui(uii, 16);
}
g_epi_idx1_block_bounds[max_thread_ct] = idx1_block_size;
- fill_ulong_zero(g_epi_zmiss1, QUATERCT_TO_WORDCT(idx1_block_size));
+ fill_ulong_zero(QUATERCT_TO_WORDCT(idx1_block_size), g_epi_zmiss1);
chrom_end = 0;
for (block_idx1 = 0; block_idx1 < idx1_block_size; marker_uidx_tmp++, block_idx1++) {
if (IS_SET(marker_exclude1, marker_uidx_tmp)) {
@@ -9790,14 +9806,14 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
if (is_case_only_window) {
cur_window_end = marker_pos[marker_uidx_tmp] + case_only_gap;
if (marker_uidx_tmp >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx_tmp);
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx_tmp);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
if (is_triangular) {
marker_uidx2 = marker_uidx_tmp;
marker_idx2 = block_idx1 + marker_idx1;
last_pos = marker_pos[marker_uidx_tmp];
} else {
- uii = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
+ uii = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
if (marker_pos[marker_uidx_tmp] < case_only_gap) {
ujj = 0;
} else {
@@ -9883,7 +9899,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
if (cur_idx2_block_size > marker_ct2 - marker_idx2) {
cur_idx2_block_size = marker_ct2 - marker_idx2;
}
- fill_ulong_zero(g_epi_zmiss2, QUATERCT_TO_WORDCT(cur_idx2_block_size));
+ fill_ulong_zero(QUATERCT_TO_WORDCT(cur_idx2_block_size), g_epi_zmiss2);
for (block_idx2 = 0; block_idx2 < cur_idx2_block_size; marker_uidx2++, block_idx2++) {
if (IS_SET(marker_exclude2, marker_uidx2)) {
marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx2);
@@ -9915,10 +9931,10 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
g_epi_idx2_block_size = cur_idx2_block_size;
g_epi_idx2_block_start = marker_idx2;
idx2_block_sizea16 = round_up_pow2(cur_idx2_block_size, 16);
- fill_uint_zero(g_epi_n_sig_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_fail_ct1, idx1_block_size + 15 * (max_thread_ct - 1));
- fill_uint_zero(g_epi_n_sig_ct2, idx2_block_sizea16 * max_thread_ct);
- fill_uint_zero(g_epi_fail_ct2, idx2_block_sizea16 * max_thread_ct);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_n_sig_ct1);
+ fill_uint_zero(idx1_block_size + 15 * (max_thread_ct - 1), g_epi_fail_ct1);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_n_sig_ct2);
+ fill_uint_zero(idx2_block_sizea16 * max_thread_ct, g_epi_fail_ct2);
for (tidx = 0; tidx < max_thread_ct; tidx++) {
ulii = g_epi_idx1_block_bounds[tidx];
uljj = g_epi_idx1_block_bounds[tidx + 1];
@@ -10003,17 +10019,17 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
marker_idx2 = 0;
dptr = &(g_epi_all_chisq[block_idx1 * marker_ct2]);
if (marker_uidx >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
}
wptr_start = width_force(4, g_textbuf, chrom_name_write(chrom_info_ptr, chrom_idx, g_textbuf));
*wptr_start++ = ' ';
wptr_start = fw_strcpy(plink_maxsnp, &(marker_ids[marker_uidx * max_marker_id_len]), wptr_start);
*wptr_start++ = ' ';
marker_uidx2 = next_unset_ul_unsafe(marker_exclude2, marker_uidx_base);
- for (chrom_fo_idx2 = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
- chrom_end2 = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx2 + 1];
+ for (chrom_fo_idx2 = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2); chrom_fo_idx2 < chrom_ct; chrom_fo_idx2++) {
+ chrom_end2 = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx2 + 1];
if (marker_uidx2 >= chrom_end2) {
continue;
}
@@ -10094,7 +10110,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
fputs("\b\b\b\b\b\b\b\b\b\b \b\b\b\b\b\b\b\b\b\b", stdout);
if (tests_complete >= pct_thresh) {
if (pct > 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (tests_complete * 100LLU) / tests_expected;
if (pct < 100) {
@@ -10139,7 +10155,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
marker_uidx = jump_forward_unset_unsafe(marker_exclude1, marker_uidx + 1, marker_idx1);
}
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
if (marker_uidx >= chrom_end) {
continue;
}
@@ -10185,7 +10201,7 @@ int32_t epistasis_report(pthread_t* threads, Epi_info* epi_ip, FILE* bedfile, ui
wptr = dtoa_g_wxp8x(best_chisq[marker_idx1], 12, ' ', wptr);
}
uii = marker_idx_to_uidx[best_ids[marker_idx1]];
- wptr = width_force(4, wptr, chrom_name_write(chrom_info_ptr, get_marker_chrom(chrom_info_ptr, uii), wptr));
+ wptr = width_force(4, wptr, chrom_name_write(chrom_info_ptr, get_variant_chrom(chrom_info_ptr, uii), wptr));
*wptr++ = ' ';
wptr = fw_strcpy(plink_maxsnp, &(marker_ids[uii * max_marker_id_len]), wptr);
} else {
@@ -10265,10 +10281,10 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
uintptr_t founder_ctsplit = 3 * founder_ctv3;
uintptr_t final_mask = get_final_mask(founder_ct);
uintptr_t window_max = 1;
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
- uintptr_t* sex_male_collapsed = NULL;
- uintptr_t* cur_geno1_male = NULL;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
+ uintptr_t* sex_male_collapsed = nullptr;
+ uintptr_t* cur_geno1_male = nullptr;
double prune_ld_thresh = ldip->prune_last_param * (1 + SMALL_EPSILON);
uint32_t window_is_kb = (ldip->modifier / LD_PRUNE_KB_WINDOW) & 1;
uint32_t ld_window_size = ldip->prune_window_size;
@@ -10308,6 +10324,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
uint32_t window_unfiltered_end;
uint32_t cur_window_size;
uint32_t cur_chrom;
+ uint32_t chrom_start;
uint32_t chrom_end;
uint32_t is_haploid;
uint32_t is_x;
@@ -10345,7 +10362,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
if (window_is_kb) {
// determine maximum number of markers that may need to be loaded at once
for (cur_chrom = 1; cur_chrom < chrom_code_end; cur_chrom++) {
- if (chrom_exists(chrom_info_ptr, cur_chrom)) {
+ if (is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
window_max = chrom_window_max(marker_pos, marker_exclude, chrom_info_ptr, cur_chrom, 0x7fffffff, ld_window_size * 1000, window_max);
}
}
@@ -10382,7 +10399,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
geno[ulii * founder_ctsplit + 2 * founder_ctv3 - 1] = 0;
geno[ulii * founder_ctsplit + founder_ctsplit - 1] = 0;
}
- if ((chrom_info_ptr->x_code != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->x_code)) {
+ if ((chrom_info_ptr->xymt_codes[X_OFFSET] != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_info_ptr->xymt_codes[X_OFFSET])) {
if (bigstack_alloc_ul(founder_ctl, &sex_male_collapsed) ||
bigstack_alloc_ul(founder_ctsplit, &cur_geno1_male)) {
goto indep_pairphase_ret_NOMEM;
@@ -10393,7 +10410,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
prev_end = 0;
ld_prune_start_chrom(window_is_kb, &cur_chrom, &chrom_end, window_unfiltered_start, live_indices, start_arr, &window_unfiltered_end, ld_window_size, &cur_window_size, unfiltered_marker_ct, pruned_arr, chrom_info_ptr, marker_pos, &is_haploid, &is_x, &is_y);
cur_exclude_ct = 0;
- fill_ulong_zero(zmiss, window_maxl);
+ fill_ulong_zero(window_maxl, zmiss);
if (cur_window_size > 1) {
for (ulii = 0; ulii < (uintptr_t)cur_window_size; ulii++) {
uljj = live_indices[ulii];
@@ -10407,7 +10424,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
}
cur_geno1 = &(geno[ulii * founder_ctsplit]);
- load_and_split3(NULL, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
+ load_and_split3(nullptr, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
cur_tots[ulii * 3] = popcount_longs(cur_geno1, founder_ctv3);
cur_tots[ulii * 3 + 1] = popcount_longs(&(cur_geno1[founder_ctv3]), founder_ctv3);
cur_tots[ulii * 3 + 2] = popcount_longs(&(cur_geno1[2 * founder_ctv3]), founder_ctv3);
@@ -10420,7 +10437,8 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
}
}
pct = 1;
- pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100;
+ chrom_start = get_chrom_start_vidx(chrom_info_ptr, cur_chrom);
+ pct_thresh = window_unfiltered_start + ((uint64_t)pct * (chrom_end - chrom_start)) / 100;
while ((window_unfiltered_start < chrom_end) || (cur_window_size > 1)) {
if (cur_window_size > 1) {
do {
@@ -10524,10 +10542,10 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
break;
}
if (window_unfiltered_start >= pct_thresh) {
- pct = ((window_unfiltered_start - chrom_info_ptr->chrom_start[cur_chrom]) * 100LLU) / (chrom_end - chrom_info_ptr->chrom_start[cur_chrom]);
+ pct = ((window_unfiltered_start - chrom_start) * 100LLU) / (chrom_end - chrom_start);
printf("\r%u%%", pct++);
fflush(stdout);
- pct_thresh = chrom_info_ptr->chrom_start[cur_chrom] + (((uint64_t)pct * (chrom_end - chrom_info_ptr->chrom_start[cur_chrom])) / 100);
+ pct_thresh = chrom_start + (((uint64_t)pct * (chrom_end - chrom_start)) / 100);
}
uljj = 0;
if (window_unfiltered_end < window_unfiltered_start) {
@@ -10590,7 +10608,7 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
haploid_fix(hh_exists, founder_include2, founder_male_include2, founder_ct, is_x, is_y, (unsigned char*)loadbuf);
}
cur_geno1 = &(geno[cur_window_size * founder_ctsplit]);
- load_and_split3(NULL, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
+ load_and_split3(nullptr, loadbuf, founder_ct, cur_geno1, dummy_nm, dummy_nm, founder_ctv3, 0, 0, 1, &ulkk);
cur_tots[((uintptr_t)cur_window_size) * 3] = popcount_longs(cur_geno1, founder_ctv3);
cur_tots[((uintptr_t)cur_window_size) * 3 + 1] = popcount_longs(&(cur_geno1[founder_ctv3]), founder_ctv3);
cur_tots[((uintptr_t)cur_window_size) * 3 + 2] = popcount_longs(&(cur_geno1[2 * founder_ctv3]), founder_ctv3);
@@ -10608,9 +10626,8 @@ int32_t indep_pairphase(Ld_info* ldip, FILE* bedfile, uintptr_t bed_offset, uint
start_arr[cur_window_size] = window_unfiltered_end;
}
}
- uii = get_marker_chrom(chrom_info_ptr, window_unfiltered_start - 1);
- putchar('\r');
- LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", uii, chrom_info_ptr->chrom_end[uii] - chrom_info_ptr->chrom_start[uii] - popcount_bit_idx(marker_exclude, chrom_info_ptr->chrom_start[uii], chrom_info_ptr->chrom_end[uii]) - cur_exclude_ct);
+ putc_unlocked('\r', stdout);
+ LOGPRINTF("Pruned %" PRIuPTR " variant%s from chromosome %u, leaving %" PRIuPTR ".\n", cur_exclude_ct, (cur_exclude_ct == 1)? "" : "s", cur_chrom, chrom_end - chrom_start - popcount_bit_idx(marker_exclude, chrom_start, chrom_end) - cur_exclude_ct);
tot_exclude_ct += cur_exclude_ct;
// advance chromosomes as necessary
@@ -10654,10 +10671,10 @@ Ll_epi_summary* lle_alloc(char* chrom_id, uint32_t chrom_len, char* marker_id, u
uintptr_t alloc_size = (sizeof(Ll_epi_summary) + chrom_len + marker_id_len + sizeof(intptr_t)) & (~(sizeof(intptr_t) - ONELU));
Ll_epi_summary* newptr = (Ll_epi_summary*)g_bigstack_base;
if (bigstack_left() < alloc_size) {
- return NULL;
+ return nullptr;
}
g_bigstack_base = &(g_bigstack_base[alloc_size]);
- newptr->next = NULL;
+ newptr->next = nullptr;
newptr->best_chisq = chisq;
newptr->n_sig = nsig;
newptr->n_tot = ntot;
@@ -10720,15 +10737,15 @@ int32_t validate_epistasis_summary_header(char* bufptr) {
int32_t epi_summary_merge(Epi_info* epi_ip, char* outname, char* outname_end) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- FILE* outfile = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
char* inprefix = epi_ip->summary_merge_prefix;
char* inprefix_end = (char*)memchr(inprefix, 0, FNAMESIZE);
- Ll_epi_summary* list_start = NULL;
+ Ll_epi_summary* list_start = nullptr;
// first .3 entry is later than first .2 entry, etc., so we can save
// ourselves some linked list traversal time by starting the first-entry scan
// after where the last one left off.
- Ll_epi_summary* last_start = NULL;
+ Ll_epi_summary* last_start = nullptr;
Ll_epi_summary** lle_pp = &list_start; // end-of-list pointer for first file
uint32_t file_ct = epi_ip->summary_merge_ct;
int32_t retval = 0;
@@ -11176,14 +11193,14 @@ void test_mishap_write_line(FILE* outfile, char* wptr, uint32_t prev_alen, uint3
int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, double output_min_p, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, char** marker_allele_ptrs, double min_maf, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t sample_ctl2 = QUATERCT_TO_WORDCT(sample_ct);
uintptr_t sample_ctv2 = QUATERCT_TO_ALIGNED_WORDCT(sample_ct);
uintptr_t final_mask = get_final_mask(sample_ct);
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
- char* wptr2 = NULL;
+ char* wptr2 = nullptr;
uint32_t chrom_ct = chrom_info_ptr->chrom_ct;
uint32_t inspected_ct = 0;
uint32_t missing_ct_next = 0;
@@ -11272,8 +11289,8 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
if (is_set(chrom_info_ptr->haploid_mask, chrom_idx)) {
continue;
}
- marker_uidx_cur = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ marker_uidx_cur = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
marker_uidx_cur = next_unset_ul(marker_exclude, marker_uidx_cur, chrom_end);
if (marker_uidx_cur == chrom_end) {
continue;
@@ -11283,7 +11300,7 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
continue;
}
prevsnp_ptr = loadbuf;
- fill_ulong_zero(prevsnp_ptr, sample_ctl2);
+ fill_ulong_zero(sample_ctl2, prevsnp_ptr);
cursnp_ptr = &(loadbuf[sample_ctv2]);
if (fseeko(bedfile, bed_offset + marker_uidx_cur * ((uint64_t)unfiltered_sample_ct4), SEEK_SET)) {
goto test_mishap_ret_READ_FAIL;
@@ -11314,7 +11331,7 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
missing_ct_next = count_01(nextsnp_ptr, sample_ctl2);
} else {
test_mishap_last_chrom_snp:
- fill_ulong_zero(nextsnp_ptr, sample_ctl2);
+ fill_ulong_zero(sample_ctl2, nextsnp_ptr);
}
if (missing_ct_cur < 5) {
continue;
@@ -11452,10 +11469,10 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
}
tot_recip = 1.0 / tot_recip;
if (hap_ct_table[2] + hap_ct_table[3] >= dxx) {
- test_mishap_write_line(outfile, wptr, 0, next_a1len, NULL, marker_allele_ptrs[2 * marker_uidx_next], hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
+ test_mishap_write_line(outfile, wptr, 0, next_a1len, nullptr, marker_allele_ptrs[2 * marker_uidx_next], hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
}
if (hap_ct_table[4] + hap_ct_table[5] >= dxx) {
- test_mishap_write_line(outfile, wptr, 0, next_a2len, NULL, marker_allele_ptrs[2 * marker_uidx_next + 1], hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
+ test_mishap_write_line(outfile, wptr, 0, next_a2len, nullptr, marker_allele_ptrs[2 * marker_uidx_next + 1], hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
}
}
} else {
@@ -11485,17 +11502,17 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
}
tot_recip = 1.0 / tot_recip;
if (hap_ct_table[2] + hap_ct_table[3] >= dxx) {
- test_mishap_write_line(outfile, wptr, prev_a1len, 0, marker_allele_ptrs[2 * marker_uidx_prev], NULL, hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
+ test_mishap_write_line(outfile, wptr, prev_a1len, 0, marker_allele_ptrs[2 * marker_uidx_prev], nullptr, hap_ct_table, &(hap_ct_table[2]), tot_recip, output_min_p, tbuf2, flanklen);
}
if (hap_ct_table[4] + hap_ct_table[5] >= dxx) {
- test_mishap_write_line(outfile, wptr, prev_a2len, 0, marker_allele_ptrs[2 * marker_uidx_prev + 1], NULL, hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
+ test_mishap_write_line(outfile, wptr, prev_a2len, 0, marker_allele_ptrs[2 * marker_uidx_prev + 1], nullptr, hap_ct_table, &(hap_ct_table[4]), tot_recip, output_min_p, tbuf2, flanklen);
}
}
hap_ct_table[0] = orig_cmiss_tot * 0.5;
hap_ct_table[1] = orig_cnm_tot * 0.5;
hap_ct_table[2] = (int32_t)(counts[1] + counts[3] + counts[4] + counts[5] + counts[7]);
hap_ct_table[3] = (int32_t)(counts[10] + counts[12] + counts[13] + counts[14] + counts[16]);
- test_mishap_write_line(outfile, wptr, 6, 0, "HETERO", NULL, hap_ct_table, &(hap_ct_table[2]), 1.0 / (hap_ct_table[0] + hap_ct_table[1]), output_min_p, tbuf2, flanklen);
+ test_mishap_write_line(outfile, wptr, 6, 0, "HETERO", nullptr, hap_ct_table, &(hap_ct_table[2]), 1.0 / (hap_ct_table[0] + hap_ct_table[1]), output_min_p, tbuf2, flanklen);
inspected_ct++;
if (!(inspected_ct % 1000)) {
printf("\r--test-mishap: %uk loci checked.", inspected_ct / 1000);
@@ -11507,7 +11524,7 @@ int32_t test_mishap(FILE* bedfile, uintptr_t bed_offset, char* outname, char* ou
if (fclose_null(&outfile)) {
goto test_mishap_ret_WRITE_FAIL;
}
- putchar('\r');
+ putc_unlocked('\r', stdout);
if (inspected_ct < marker_ct) {
LOGPRINTF("--test-mishap: %u loc%s checked (%" PRIuPTR " skipped).\n", inspected_ct, (inspected_ct == 1)? "us" : "i", marker_ct - inspected_ct);
LOGPREPRINTFWW("Report written to %s .\n", outname);
@@ -11658,7 +11675,7 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
// prefix}.ldset.
// Note that, when very large set(s) are present, and there's a moderate
// amount of "random" long-range LD, the memory requirement may be huge.
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
unsigned char* bigstack_end_mark = g_bigstack_end;
uintptr_t marker_ctv = ((marker_ct + 127) / 128) * (128 / BITCT);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
@@ -11778,6 +11795,11 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
// bugfix: last word might not be initialized by unpack_set(). Also
// initialize second-to-last word to defend against an unpack_set()
// implementation change.
+#ifndef __LP64__
+ // oh, this also matters in 32-bit case
+ tmp_set_bitfield[marker_ctv - 4] = 0;
+ tmp_set_bitfield[marker_ctv - 3] = 0;
+#endif
tmp_set_bitfield[marker_ctv - 2] = 0;
tmp_set_bitfield[marker_ctv - 1] = 0;
g_ld_load2_bitfield = load2_bitfield;
@@ -11829,22 +11851,22 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
bigstack_end_alloc_ul(idx1_block_size * marker_ctv, &result_bitfield);
uljj = founder_trail_ct + 2;
for (ulii = 1; ulii <= idx1_block_size; ulii++) {
- fill_ulong_zero(&(geno1[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(geno_masks1[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(geno1[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(geno_masks1[ulii * founder_ct_192_long - uljj]));
}
for (ulii = 1; ulii <= idx2_block_size; ulii++) {
- fill_ulong_zero(&(geno2[ulii * founder_ct_192_long - uljj]), uljj);
- fill_ulong_zero(&(geno_masks2[ulii * founder_ct_192_long - uljj]), uljj);
+ fill_ulong_zero(uljj, &(geno2[ulii * founder_ct_192_long - uljj]));
+ fill_ulong_zero(uljj, &(geno_masks2[ulii * founder_ct_192_long - uljj]));
}
- fill_ulong_zero(result_bitfield, idx1_block_size * marker_ctv);
+ fill_ulong_zero(idx1_block_size * marker_ctv, result_bitfield);
g_ld_geno1 = geno1;
g_ld_geno_masks1 = geno_masks1;
g_ld_geno2 = geno2;
g_ld_geno_masks2 = geno_masks2;
g_ld_result_bitfield = result_bitfield;
idx1_block_end = marker_idx + idx1_block_size;
- fill_ulong_zero(load2_bitfield, marker_ctv);
- fill_ulong_zero(result_bitfield, idx1_block_size * marker_ctv);
+ fill_ulong_zero(marker_ctv, load2_bitfield);
+ fill_ulong_zero(idx1_block_size * marker_ctv, result_bitfield);
for (set_idx = 0; set_idx < set_ct; set_idx++) {
cur_setdef = setdefs[set_idx];
setdef_iter_init(cur_setdef, marker_ct, marker_idx, &marker_idx2, &setdef_incr_aux);
@@ -11870,7 +11892,7 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
bitvec_or(&(tmp_set_bitfield[firstw]), wlen - firstw, &(load2_bitfield[firstw]));
}
}
- }
+ }
load_idx2_tot = popcount_longs(load2_bitfield, marker_ctv);
if (!load_idx2_tot) {
// no new r^2 computations to make at all!
@@ -11893,12 +11915,12 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
}
if (marker_uidx >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
ulii = block_idx1 * founder_ct_192_long;
loadbuf_ptr = &(geno1[ulii]);
@@ -11926,12 +11948,12 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
block_idx2 = 0;
while (1) {
if (marker_uidx2 >= chrom_end) {
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
+ chrom_fo_idx = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx2);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- is_x = (((int32_t)chrom_idx) == chrom_info_ptr->x_code);
- is_y = (((int32_t)chrom_idx) == chrom_info_ptr->y_code);
+ is_x = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (((int32_t)chrom_idx) == chrom_info_ptr->xymt_codes[Y_OFFSET]);
}
ulii = block_idx2 * founder_ct_192_long;
loadbuf_ptr = &(geno2[ulii]);
@@ -12024,7 +12046,7 @@ int32_t construct_ld_map(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset
}
}
fputs(&(marker_ids[marker_idx_to_uidx[marker_idx2] * max_marker_id_len]), outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
marker_idx2++;
}
@@ -12283,7 +12305,7 @@ int32_t set_test_common_init(pthread_t* threads, FILE* bedfile, uintptr_t bed_of
for (set_idx = 0; set_idx < set_ct; set_idx++) {
// we're calling this again during final write anyway, so don't bother
// saving raw_sig_ct or final_sig_ct now
- set_test_score(marker_ct, chisq_threshold, sip->set_max, orig_chisq, *ld_map_ptr, setdefs[set_idx], *sorted_chisq_buf_ptr, *sorted_marker_idx_buf_ptr, *proxy_arr_ptr, NULL, NULL, &(orig_set_scores[set_idx]));
+ set_test_score(marker_ct, chisq_threshold, sip->set_max, orig_chisq, *ld_map_ptr, setdefs[set_idx], *sorted_chisq_buf_ptr, *sorted_marker_idx_buf_ptr, *proxy_arr_ptr, nullptr, nullptr, &(orig_set_scores[set_idx]));
}
// just treat --mperm as --perm with min_perms == max_perms, since this isn't
// a proper max(T) test
@@ -12327,7 +12349,7 @@ void compute_set_scores(uintptr_t marker_ct, uintptr_t perm_vec_ct, uintptr_t se
stat_high = orig_set_scores[set_idx] + EPSILON;
stat_low = orig_set_scores[set_idx] - EPSILON;
for (pidx = 0; pidx < perm_vec_ct;) {
- set_test_score(marker_ct, chisq_threshold, set_max, &(chisq_matrix[pidx * marker_ct]), ld_map, setdefs[set_idx], sorted_chisq_buf, sorted_marker_idx_buf, proxy_arr, NULL, NULL, &cur_score);
+ set_test_score(marker_ct, chisq_threshold, set_max, &(chisq_matrix[pidx * marker_ct]), ld_map, setdefs[set_idx], sorted_chisq_buf, sorted_marker_idx_buf, proxy_arr, nullptr, nullptr, &cur_score);
if (cur_score > stat_high) {
uii += 2;
} else if (cur_score > stat_low) {
@@ -12353,9 +12375,9 @@ void compute_set_scores(uintptr_t marker_ct, uintptr_t perm_vec_ct, uintptr_t se
int32_t write_set_test_results(char* outname, char* outname_end2, Set_info* sip, uint32_t** ld_map, uint32_t** setdefs, uintptr_t* set_incl, uintptr_t set_ct, uintptr_t marker_ct_orig, uintptr_t marker_ct, uint32_t* marker_idx_to_uidx, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* perm_2success_ct, uint32_t* perm_attempt_ct, uint32_t mtest_adjust, uint32_t perm_count, double pfilter, double output_min_p, double chisq_threshold, double* orig_stats, double* sorted_chisq_buf, uin [...]
// assumes caller will free memory from stack
- FILE* outfile = NULL;
- uintptr_t* nonempty_set_incl = NULL;
- double* empirical_pvals = NULL;
+ FILE* outfile = nullptr;
+ uintptr_t* nonempty_set_incl = nullptr;
+ double* empirical_pvals = nullptr;
uintptr_t raw_set_ct = sip->ct;
uintptr_t max_set_id_len = sip->max_name_len;
uint32_t nonempty_set_ct = 0;
@@ -12407,7 +12429,7 @@ int32_t write_set_test_results(char* outname, char* outname_end2, Set_info* sip,
}
fputs(&(marker_ids[marker_idx_to_uidx[proxy_arr[0]] * max_marker_id_len]), outfile);
for (uii = 1; uii < final_sig_ct; uii++) {
- putc('|', outfile);
+ putc_unlocked('|', outfile);
fputs(&(marker_ids[marker_idx_to_uidx[proxy_arr[uii]] * max_marker_id_len]), outfile);
}
if (putc_checked('\n', outfile)) {
@@ -12442,7 +12464,7 @@ int32_t write_set_test_results(char* outname, char* outname_end2, Set_info* sip,
fill_idx_to_uidx_incl(nonempty_set_incl, raw_set_ct, nonempty_set_ct, nonempty_set_idx_to_uidx);
// .qassoc.set.adjusted instead of .set.mperm.adjusted, etc.
*outname_end2 = '\0';
- retval = multcomp(outname, outname_end2, nonempty_set_idx_to_uidx, nonempty_set_ct, sip->names, max_set_id_len, 0, NULL, NULL, pfilter, output_min_p, mtest_adjust, 1, 0.0, NULL, empirical_pvals);
+ retval = multcomp(outname, outname_end2, nonempty_set_idx_to_uidx, nonempty_set_ct, sip->names, max_set_id_len, 0, nullptr, nullptr, pfilter, output_min_p, mtest_adjust, 1, 0.0, nullptr, empirical_pvals);
}
while (0) {
write_set_test_results_ret_NOMEM:
@@ -12497,10 +12519,10 @@ void update_clump_histo(double pval, uintptr_t* histo) {
int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t plink_maxsnp, uint32_t* marker_pos, char** marker_allele_ptrs, uintptr_t* marker_reverse, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, Clump_info* clump_ip, uintptr_t* sex_male, uint32_t hh_exists) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- gzFile gz_infile = NULL;
- FILE* outfile = NULL;
- FILE* outfile_ranges = NULL;
- FILE* outfile_best = NULL;
+ gzFile gz_infile = nullptr;
+ FILE* outfile = nullptr;
+ FILE* outfile_ranges = nullptr;
+ FILE* outfile_best = nullptr;
uintptr_t marker_ctl = BITCT_TO_WORDCT(marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
@@ -12520,20 +12542,20 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
uintptr_t range_chrom_max = 0;
uintptr_t unmatched_group_ct = 0;
uintptr_t* haploid_mask = chrom_info_ptr->haploid_mask;
- char* range_group_names = NULL;
- char* fname_ptr = NULL;
+ char* range_group_names = nullptr;
+ char* fname_ptr = nullptr;
char* annot_flattened = clump_ip->annotate_flattened;
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
- char* header2_ptr = NULL;
- char* annot_ptr = NULL;
- char* cur_rg_names = NULL;
- uintptr_t* founder_include2 = NULL;
- uintptr_t* founder_male_include2 = NULL;
- uintptr_t* rg_chrom_bounds = NULL;
- uint32_t** rg_setdefs = NULL;
- uint32_t** cur_rg_setdefs = NULL;
- Clump_missing_id* not_found_list = NULL;
- uintptr_t* rangematch_bitfield = NULL;
+ char* header2_ptr = nullptr;
+ char* annot_ptr = nullptr;
+ char* cur_rg_names = nullptr;
+ uintptr_t* founder_include2 = nullptr;
+ uintptr_t* founder_male_include2 = nullptr;
+ uintptr_t* rg_chrom_bounds = nullptr;
+ uint32_t** rg_setdefs = nullptr;
+ uint32_t** cur_rg_setdefs = nullptr;
+ Clump_missing_id* not_found_list = nullptr;
+ uintptr_t* rangematch_bitfield = nullptr;
double p1_thresh = clump_ip->p1;
double p2_thresh = clump_ip->p2;
double load_pthresh = 0.05;
@@ -12663,7 +12685,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
if (clump_ip->range_fname) {
// 1. load range file, sort, etc.
- retval = load_range_list_sortpos(clump_ip->range_fname, clump_ip->range_border, 0, NULL, 0, chrom_info_ptr, &range_group_ct, &range_group_names, &max_range_group_id_len, &rg_chrom_bounds, &rg_setdefs, &range_chrom_max, "--clump-range");
+ retval = load_range_list_sortpos(clump_ip->range_fname, clump_ip->range_border, 0, nullptr, 0, chrom_info_ptr, &range_group_ct, &range_group_names, &max_range_group_id_len, &rg_chrom_bounds, &rg_setdefs, &range_chrom_max, "--clump-range");
if (retval) {
goto clump_reports_ret_1;
}
@@ -12761,7 +12783,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
if (!clump_entries) {
goto clump_reports_ret_NOMEM;
}
- fill_ulong_zero((uintptr_t*)clump_entries, marker_ct);
+ fill_ulong_zero(marker_ct, (uintptr_t*)clump_entries);
// 3. load file(s) in sequence. start with array of null pointers, allocate
// from bottom of stack (possibly need to save p-val, file number,
// annotations, and/or pointer to next entry) while updating
@@ -12848,7 +12870,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
break;
}
}
- fill_ulong_zero(col_bitfield, annot_ct_p2_ctl);
+ fill_ulong_zero(annot_ct_p2_ctl, col_bitfield);
uii = 0; // current 0-based column number
// We don't know in advance when the highest-precedence SNP/p-val columns
// will appear, so we initially populate parse_table with
@@ -12928,7 +12950,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
if (is_eoln_kns(*bufptr)) {
continue;
}
- fill_uint_zero(cur_parse_info, annot_ct_p2 * 2);
+ fill_uint_zero(annot_ct_p2 * 2, cur_parse_info);
uii = 0;
ukk = annot_ct * 2; // annotation string length
for (; uii < cur_read_ct; uii++) {
@@ -13087,7 +13109,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
loadbuf_raw[unfiltered_sample_ctl2 - 1] = 0;
// now this indicates whether a variant has previously been in a clump
- fill_ulong_zero(cur_bitfield, marker_ctl);
+ fill_ulong_zero(marker_ctl, cur_bitfield);
// 5. iterate through clumps, calculate r^2 and write output
memcpy(outname_end, ".clumped", 9);
if (fopen_checked(outname, "w", &outfile)) {
@@ -13151,16 +13173,16 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
ivar_uidx = marker_idx_to_uidx[ivar_idx];
cur_bp = marker_pos[ivar_uidx];
- uii = get_marker_chrom_fo_idx(chrom_info_ptr, ivar_uidx);
+ uii = get_variant_chrom_fo_idx(chrom_info_ptr, ivar_uidx);
clump_chrom_idx = chrom_info_ptr->chrom_file_order[uii];
- ujj = chrom_info_ptr->chrom_file_order_marker_idx[uii];
+ ujj = chrom_info_ptr->chrom_fo_vidx_start[uii];
if (cur_bp < bp_radius) {
clump_uidx_first = ujj;
} else {
clump_uidx_first = ujj + uint32arr_greater_than(&(marker_pos[ujj]), ivar_uidx + 1 - ujj, cur_bp - bp_radius);
}
next_unset_unsafe_ck(marker_exclude, &clump_uidx_first);
- clump_uidx_last = ivar_uidx + uint32arr_greater_than(&(marker_pos[ivar_uidx]), chrom_info_ptr->chrom_file_order_marker_idx[uii + 1] - ivar_uidx, cur_bp + bp_radius + 1);
+ clump_uidx_last = ivar_uidx + uint32arr_greater_than(&(marker_pos[ivar_uidx]), chrom_info_ptr->chrom_fo_vidx_start[uii + 1] - ivar_uidx, cur_bp + bp_radius + 1);
prev_unset_unsafe_ck(marker_exclude, &clump_uidx_last);
marker_uidx = clump_uidx_first;
marker_idx = ivar_idx + popcount_bit_idx(marker_exclude, clump_uidx_first, ivar_uidx) + clump_uidx_first - ivar_uidx;
@@ -13171,8 +13193,8 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
// 4. write main result
cur_window_size = 0;
is_haploid = is_set(haploid_mask, clump_chrom_idx);
- is_x = (clump_chrom_idx == (uint32_t)chrom_info_ptr->x_code);
- is_y = (clump_chrom_idx == (uint32_t)chrom_info_ptr->y_code);
+ is_x = (clump_chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (clump_chrom_idx == (uint32_t)chrom_info_ptr->xymt_codes[Y_OFFSET]);
window_data_ptr = window_data;
for (; marker_idx < ivar_idx; marker_uidx++, marker_idx++) {
next_unset_unsafe_ck(marker_exclude, &marker_uidx);
@@ -13230,8 +13252,8 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
marker_idx = ivar_idx + popcount_bit_idx(marker_exclude, clump_uidx_first, ivar_uidx) + clump_uidx_first - ivar_uidx;
max_r2 = -1;
max_r2_uidx = 0xffffffffU;
- fill_ulong_zero(histo, 5);
- best_entry_ptr = NULL;
+ fill_ulong_zero(5, histo);
+ best_entry_ptr = nullptr;
for (; marker_idx < ivar_idx; marker_uidx++, marker_idx++) {
marker_uidx = next_unset_unsafe(marker_exclude, marker_uidx);
clump_entry_ptr = clump_entries[marker_idx];
@@ -13483,7 +13505,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
fputs(cur_a1, outfile_best);
fputs(bufptr2, outfile_best);
- putc('/', outfile_best);
+ putc_unlocked('/', outfile_best);
fputs(cur_a2, outfile_best);
fputs(bufptr3, outfile_best);
g_textbuf[0] = ' ';
@@ -13494,7 +13516,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
if (annot_flattened) {
fputs(best_entry_ptr->annot, outfile_best);
}
- putc('\n', outfile_best);
+ putc_unlocked('\n', outfile_best);
} else {
bufptr = fw_strcpyn(plink_maxsnp, 2, "NA", bufptr);
bufptr = memcpya(bufptr, " NA NA NA NA NA \n", 45);
@@ -13599,7 +13621,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
for (ulii = 0; ulii < cur_rg_ct; ulii++) {
if (interval_in_setdef(cur_rg_setdefs[ulii], min_bp, max_bp)) {
if (uljj) {
- putc(',', outfile_ranges);
+ putc_unlocked(',', outfile_ranges);
} else {
uljj = 1;
}
@@ -13657,7 +13679,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
fputs("\n\n", outfile);
last_marker_idx = ~ZEROLU;
if (rg_setdefs) {
- fill_ulong_zero(rangematch_bitfield, BITCT_TO_WORDCT(cur_rg_ct));
+ fill_ulong_zero(BITCT_TO_WORDCT(cur_rg_ct), rangematch_bitfield);
unmatched_group_ct = cur_rg_ct;
}
for (ulii = 0; ulii < cur_window_size; ulii++) {
@@ -13718,7 +13740,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
fwrite(cur_a1, 1, a1_len, outfile);
fputs(bufptr2, outfile);
- putc('/', outfile);
+ putc_unlocked('/', outfile);
fwrite(cur_a2, 1, a2_len, outfile);
fputs(bufptr3, outfile);
g_textbuf[0] = ' ';
@@ -13738,7 +13760,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
fputs(bufptr2, outfile);
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
last_marker_idx = marker_idx;
}
bufptr = memcpya(g_textbuf, "\n RANGE: ", 18);
@@ -13773,15 +13795,15 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
break;
}
uljj++;
- putc(',', outfile);
+ putc_unlocked(',', outfile);
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
if (rg_setdefs) {
if (!cur_window_size) {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
fputs(" GENES: ", outfile);
uljj = 0;
@@ -13789,23 +13811,23 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
if (interval_in_setdef(cur_rg_setdefs[ulii], min_bp, max_bp)) {
if (uljj) {
if (uljj & 7) {
- putc(',', outfile);
+ putc_unlocked(',', outfile);
} else {
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
fputs(&(cur_rg_names[ulii * max_range_group_id_len]), outfile);
uljj++;
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (fwrite_checked("\n------------------------------------------------------------------\n\n", 69, outfile)) {
goto clump_reports_ret_WRITE_FAIL;
}
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (missing_variant_ct) {
// 1. sort by ID (could switch this to hash table-based too)
// 2. pick smallest pval when duplicates present
@@ -13883,7 +13905,7 @@ int32_t clump_reports(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
}
}
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (fclose_null(&outfile)) {
goto clump_reports_ret_WRITE_FAIL;
}
diff --git a/plink_matrix.c b/plink_matrix.c
index 9e2e167..76507cc 100644
--- a/plink_matrix.c
+++ b/plink_matrix.c
@@ -263,7 +263,7 @@ int32_t invert_matrix(int32_t dim, double* matrix, MATRIX_INVERT_BUF1_TYPE* dbl_
// [nxn].[t(v)]
for (i=0; i<dim; i++) {
- fill_double_zero(dbl_1d_buf, dim);
+ fill_double_zero(dim, dbl_1d_buf);
for (j=0; j<dim; j++) {
for (k=0; k<dim; k++) {
dbl_1d_buf[j] += matrix[i * dim + k] * dbl_2d_buf[j * dim + k];
diff --git a/plink_misc.c b/plink_misc.c
index 2d120cb..36ca272 100644
--- a/plink_misc.c
+++ b/plink_misc.c
@@ -6,9 +6,9 @@
#include "pigz.h"
void misc_init(Score_info* sc_ip) {
- sc_ip->fname = NULL;
- sc_ip->range_fname = NULL;
- sc_ip->data_fname = NULL;
+ sc_ip->fname = nullptr;
+ sc_ip->range_fname = nullptr;
+ sc_ip->data_fname = nullptr;
sc_ip->modifier = 0;
sc_ip->varid_col = 1;
sc_ip->allele_col = 2;
@@ -101,7 +101,7 @@ int32_t make_founders(uintptr_t unfiltered_sample_ct, uintptr_t sample_ct, char*
int32_t write_nosex(char* outname, char* outname_end, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t* sex_nm, uintptr_t gender_unk_ct, char* sample_ids, uintptr_t max_sample_id_len) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t sample_uidx = 0;
int32_t retval = 0;
@@ -119,7 +119,7 @@ int32_t write_nosex(char* outname, char* outname_end, uintptr_t unfiltered_sampl
for (sample_idx = 0; sample_idx < gender_unk_ct; sample_idx++, sample_uidx++) {
next_set_ul_unsafe_ck(sex_missing, &sample_uidx);
fputs(&(sample_ids[sample_uidx * max_sample_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
if (fclose_null(&outfile)) {
goto write_nosex_ret_WRITE_FAIL;
@@ -163,14 +163,14 @@ int32_t makepheno_load(FILE* phenofile, char* makepheno_str, uintptr_t unfiltere
goto makepheno_load_ret_NOMEM;
}
pheno_c = *pheno_c_ptr;
- fill_ulong_zero(pheno_c, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_c);
}
if (makepheno_all) {
fill_all_bits(unfiltered_sample_ct, pheno_nm);
}
// probably want to permit long lines here
g_textbuf[MAXLINELEN - 1] = ' ';
- while (fgets(g_textbuf, MAXLINELEN, phenofile) != NULL) {
+ while (fgets(g_textbuf, MAXLINELEN, phenofile) != nullptr) {
line_idx++;
if (!g_textbuf[MAXLINELEN - 1]) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --make-pheno file is pathologically long.\n", line_idx);
@@ -227,7 +227,7 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t sample_ct = unfiltered_sample_ct - sample_exclude_ct;
uintptr_t line_idx = 0;
- uintptr_t* isz = NULL;
+ uintptr_t* isz = nullptr;
double pheno_ctrld = (double)((int32_t)(1 - affection_01));
double pheno_cased = pheno_ctrld + 1.0;
double missing_phenod = (double)missing_pheno;
@@ -253,7 +253,7 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
goto load_pheno_ret_NOMEM;
}
pheno_c = *pheno_c_ptr;
- fill_ulong_zero(pheno_c, unfiltered_sample_ctl);
+ fill_ulong_zero(unfiltered_sample_ctl, pheno_c);
}
}
// ----- phenotype file load -----
@@ -266,7 +266,7 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
}
loadbuf = (char*)g_bigstack_base;
loadbuf[loadbuf_size - 1] = ' ';
- while (fgets(loadbuf, loadbuf_size, phenofile) != NULL) {
+ while (fgets(loadbuf, loadbuf_size, phenofile) != nullptr) {
line_idx++;
if (!loadbuf[loadbuf_size - 1]) {
if (loadbuf_size == MAXLINEBUFLEN) {
@@ -287,7 +287,7 @@ int32_t load_pheno(FILE* phenofile, uintptr_t unfiltered_sample_ct, uintptr_t sa
goto load_pheno_ret_MISSING_TOKENS;
}
tmp_len2 = strlen_se(bufptr);
- if ((tmp_len == 3) && (tmp_len2 == 3) && (!memcmp("FID", bufptr0, 3)) && (!memcmp("IID", bufptr, 3))) {
+ if ((((tmp_len == 3) && (!memcmp("FID", bufptr0, 3))) || ((tmp_len == 4) && (!memcmp("#FID", bufptr0, 4)))) && (tmp_len2 == 3) && (!memcmp("IID", bufptr, 3))) {
if (phenoname_str) {
tmp_len = strlen(phenoname_str);
do {
@@ -428,7 +428,7 @@ int32_t convert_tail_pheno(uint32_t unfiltered_sample_ct, uintptr_t* pheno_nm, u
}
pheno_c = *pheno_c_ptr;
}
- fill_ulong_zero(pheno_c, sample_uidx);
+ fill_ulong_zero(sample_uidx, pheno_c);
sample_uidx = 0;
do {
sample_uidx = next_set(pheno_nm, sample_uidx, unfiltered_sample_ct);
@@ -445,16 +445,16 @@ int32_t convert_tail_pheno(uint32_t unfiltered_sample_ct, uintptr_t* pheno_nm, u
}
} while (sample_uidx_stop < unfiltered_sample_ct);
free(pheno_d);
- *pheno_d_ptr = NULL;
+ *pheno_d_ptr = nullptr;
sample_uidx = popcount_longs(pheno_nm, BITCT_TO_WORDCT(unfiltered_sample_ct));
LOGPRINTF("--tail-pheno: %u phenotype value%s remaining.\n", sample_uidx, (sample_uidx == 1)? "" : "s");
return 0;
}
int32_t apply_cm_map(char* cm_map_fname, char* cm_map_chrname, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, double* marker_cms, Chrom_info* chrom_info_ptr) {
- FILE* shapeitfile = NULL;
- char* at_sign_ptr = NULL;
- char* fname_write = NULL;
+ FILE* shapeitfile = nullptr;
+ char* at_sign_ptr = nullptr;
+ char* fname_write = nullptr;
char* fname_buf = &(g_textbuf[MAXLINELEN]);
double cm_old = 0.0;
uint32_t autosome_ct = chrom_info_ptr->autosome_ct;
@@ -474,133 +474,137 @@ int32_t apply_cm_map(char* cm_map_fname, char* cm_map_chrname, uintptr_t unfilte
uint32_t uii;
int32_t bp_old;
int32_t bp_new;
- int32_t ii;
- if (!cm_map_chrname) {
- chrom_fo_idx = 0;
- chrom_ct = chrom_info_ptr->chrom_ct;
- at_sign_ptr = strchr(cm_map_fname, '@');
- fname_write = memcpya(fname_buf, cm_map_fname, (uintptr_t)(at_sign_ptr - cm_map_fname));
- at_sign_ptr++;
- post_at_sign_len = strlen(at_sign_ptr) + 1;
- } else {
- ii = get_chrom_code(chrom_info_ptr, cm_map_chrname);
- if (ii < 0) {
- LOGPREPRINTFWW("Error: --cm-map chromosome code '%s' not found in dataset.\n", cm_map_chrname);
- goto apply_cm_map_ret_INVALID_CMDLINE_2;
- }
- chrom_fo_idx = get_marker_chrom_fo_idx(chrom_info_ptr, chrom_info_ptr->chrom_start[(uint32_t)ii]);
- chrom_ct = chrom_fo_idx + 1;
- fname_buf = cm_map_fname;
- }
- g_textbuf[MAXLINELEN - 1] = ' ';
- for (; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
- if (marker_uidx == chrom_end) {
- continue;
- }
- uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ {
if (!cm_map_chrname) {
- if ((!uii) || (uii > autosome_ct)) {
- continue;
- }
- bufptr = uint32toa(uii, fname_write);
- memcpy(bufptr, at_sign_ptr, post_at_sign_len);
- if (fopen_checked(fname_buf, "r", &shapeitfile)) {
- LOGERRPRINTFWW("Warning: --cm-map failed to open %s.\n", fname_buf);
- continue;
- }
+ chrom_fo_idx = 0;
+ chrom_ct = chrom_info_ptr->chrom_ct;
+ at_sign_ptr = strchr(cm_map_fname, '@');
+ fname_write = memcpya(fname_buf, cm_map_fname, (uintptr_t)(at_sign_ptr - cm_map_fname));
+ at_sign_ptr++;
+ post_at_sign_len = strlen(at_sign_ptr) + 1;
} else {
- if (fopen_checked(cm_map_fname, "r", &shapeitfile)) {
- goto apply_cm_map_ret_OPEN_FAIL;
- }
- }
- updated_chrom_ct++;
- irreg_line_ct = 0;
- // First line is a header with three arbitrary fields.
- // All subsequent lines have three fields in the following order:
- // 1. bp position (increasing)
- // 2. cM/Mb recombination rate between current and previous bp positions
- // 3. current cM position
- // We mostly ignore field 2, since depending just on fields 1 and 3
- // maximizes accuracy. The one exception is the very first nonheader line.
- retval = load_to_first_token(shapeitfile, MAXLINELEN, '\0', "--cm-map file", g_textbuf, &bufptr, &line_idx);
- if (retval) {
- goto apply_cm_map_ret_1;
- }
- bufptr = next_token_mult(bufptr, 2);
- if (no_more_tokens_kns(bufptr)) {
- goto apply_cm_map_ret_MISSING_TOKENS;
- }
- bufptr = next_token(bufptr);
- if (!no_more_tokens_kns(bufptr)) {
- goto apply_cm_map_ret_MISSING_TOKENS;
+ const uint32_t chrom_name_slen = strlen(cm_map_chrname);
+ int32_t cur_chrom_code = get_chrom_code(cm_map_chrname, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ LOGPREPRINTFWW("Error: --cm-map chromosome code '%s' not found in dataset.\n", cm_map_chrname);
+ goto apply_cm_map_ret_INVALID_CMDLINE_2;
+ }
+ chrom_fo_idx = chrom_info_ptr->chrom_idx_to_foidx[(uint32_t)cur_chrom_code];
+ chrom_ct = chrom_fo_idx + 1;
+ fname_buf = cm_map_fname;
}
- bp_old = -1;
- while (fgets(g_textbuf, MAXLINELEN, shapeitfile)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --cm-map file is pathologically long.\n", line_idx);
- goto apply_cm_map_ret_INVALID_FORMAT_2;
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ for (; chrom_fo_idx < chrom_ct; chrom_fo_idx++) {
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
+ if (marker_uidx == chrom_end) {
+ continue;
}
- bufptr = skip_initial_spaces(g_textbuf);
- if ((*bufptr < '+') || (*bufptr > '9')) {
- // warning instead of error if text line found, since as of 8 Jan 2014
- // the posted chromosome 19 map has such a line
- if (*bufptr > ' ') {
- irreg_line_ct++;
+ uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
+ if (!cm_map_chrname) {
+ if ((!uii) || (uii > autosome_ct)) {
+ continue;
}
- continue;
- }
- if (scan_uint_defcap(bufptr, (uint32_t*)&bp_new)) {
- sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of --cm-map file.\n", line_idx);
- goto apply_cm_map_ret_INVALID_FORMAT_2;
- }
- if (bp_new <= bp_old) {
- logerrprint("Error: bp coordinates in --cm-map file are not in increasing order.\n");
- goto apply_cm_map_ret_INVALID_FORMAT;
+ bufptr = uint32toa(uii, fname_write);
+ memcpy(bufptr, at_sign_ptr, post_at_sign_len);
+ if (fopen_checked(fname_buf, "r", &shapeitfile)) {
+ LOGERRPRINTFWW("Warning: --cm-map failed to open %s.\n", fname_buf);
+ continue;
+ }
+ } else {
+ if (fopen_checked(cm_map_fname, "r", &shapeitfile)) {
+ goto apply_cm_map_ret_OPEN_FAIL;
+ }
+ }
+ updated_chrom_ct++;
+ irreg_line_ct = 0;
+ // First line is a header with three arbitrary fields.
+ // All subsequent lines have three fields in the following order:
+ // 1. bp position (increasing)
+ // 2. cM/Mb recombination rate between current and previous bp
+ // positions
+ // 3. current cM position
+ // We mostly ignore field 2, since depending just on fields 1 and 3
+ // maximizes accuracy. The one exception is the very first nonheader
+ // line.
+ retval = load_to_first_token(shapeitfile, MAXLINELEN, '\0', "--cm-map file", g_textbuf, &bufptr, &line_idx);
+ if (retval) {
+ goto apply_cm_map_ret_1;
}
- bufptr2 = next_token_mult(bufptr, 2);
- if (no_more_tokens_kns(bufptr2)) {
+ bufptr = next_token_mult(bufptr, 2);
+ if (no_more_tokens_kns(bufptr)) {
goto apply_cm_map_ret_MISSING_TOKENS;
}
- if (scan_double(bufptr2, &cm_new)) {
- sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of --cm-map file.\n", line_idx);
- goto apply_cm_map_ret_INVALID_FORMAT_2;
+ bufptr = next_token(bufptr);
+ if (!no_more_tokens_kns(bufptr)) {
+ goto apply_cm_map_ret_MISSING_TOKENS;
}
- if (bp_old == -1) {
- // parse field 2 only in this case
- bufptr = next_token(bufptr);
- if (scan_double(bufptr, &dxx)) {
- sprintf(g_logbuf, "Error: Invalid recombination rate on line %" PRIuPTR " of --cm-map file.\n", line_idx);
+ bp_old = -1;
+ while (fgets(g_textbuf, MAXLINELEN, shapeitfile)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --cm-map file is pathologically long.\n", line_idx);
goto apply_cm_map_ret_INVALID_FORMAT_2;
}
- cm_old = cm_new - dxx * 0.000001 * ((double)(bp_new + 1));
- }
- dxx = (cm_new - cm_old) / ((double)(bp_new - bp_old));
- while (marker_pos[marker_uidx] <= ((uint32_t)bp_new)) {
- marker_cms[marker_uidx] = cm_new - ((int32_t)(((uint32_t)bp_new) - marker_pos[marker_uidx])) * dxx;
- marker_uidx++;
- next_unset_ck(marker_exclude, chrom_end, &marker_uidx);
- if (marker_uidx == chrom_end) {
- goto apply_cm_map_chrom_done;
+ bufptr = skip_initial_spaces(g_textbuf);
+ if ((*bufptr < '+') || (*bufptr > '9')) {
+ // warning instead of error if text line found, since as of 8 Jan 2014
+ // the posted chromosome 19 map has such a line
+ if (*bufptr > ' ') {
+ irreg_line_ct++;
+ }
+ continue;
+ }
+ if (scan_uint_defcap(bufptr, (uint32_t*)&bp_new)) {
+ sprintf(g_logbuf, "Error: Invalid bp coordinate on line %" PRIuPTR " of --cm-map file.\n", line_idx);
+ goto apply_cm_map_ret_INVALID_FORMAT_2;
+ }
+ if (bp_new <= bp_old) {
+ logerrprint("Error: bp coordinates in --cm-map file are not in increasing order.\n");
+ goto apply_cm_map_ret_INVALID_FORMAT;
+ }
+ bufptr2 = next_token_mult(bufptr, 2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto apply_cm_map_ret_MISSING_TOKENS;
+ }
+ if (scan_double(bufptr2, &cm_new)) {
+ sprintf(g_logbuf, "Error: Invalid centimorgan position on line %" PRIuPTR " of --cm-map file.\n", line_idx);
+ goto apply_cm_map_ret_INVALID_FORMAT_2;
+ }
+ if (bp_old == -1) {
+ // parse field 2 only in this case
+ bufptr = next_token(bufptr);
+ if (scan_double(bufptr, &dxx)) {
+ sprintf(g_logbuf, "Error: Invalid recombination rate on line %" PRIuPTR " of --cm-map file.\n", line_idx);
+ goto apply_cm_map_ret_INVALID_FORMAT_2;
+ }
+ cm_old = cm_new - dxx * 0.000001 * ((double)(bp_new + 1));
+ }
+ dxx = (cm_new - cm_old) / ((double)(bp_new - bp_old));
+ while (marker_pos[marker_uidx] <= ((uint32_t)bp_new)) {
+ marker_cms[marker_uidx] = cm_new - ((int32_t)(((uint32_t)bp_new) - marker_pos[marker_uidx])) * dxx;
+ marker_uidx++;
+ next_unset_ck(marker_exclude, chrom_end, &marker_uidx);
+ if (marker_uidx == chrom_end) {
+ goto apply_cm_map_chrom_done;
+ }
}
+ bp_old = bp_new;
+ cm_old = cm_new;
+ }
+ for (; marker_uidx < chrom_end; marker_uidx++) {
+ marker_cms[marker_uidx] = cm_old;
+ }
+ apply_cm_map_chrom_done:
+ if (fclose_null(&shapeitfile)) {
+ goto apply_cm_map_ret_READ_FAIL;
+ }
+ if (irreg_line_ct) {
+ LOGERRPRINTFWW("Warning: %" PRIuPTR " irregular line%s skipped in %s.\n", irreg_line_ct, (irreg_line_ct == 1)? "" : "s", fname_buf);
}
- bp_old = bp_new;
- cm_old = cm_new;
- }
- for (; marker_uidx < chrom_end; marker_uidx++) {
- marker_cms[marker_uidx] = cm_old;
- }
- apply_cm_map_chrom_done:
- if (fclose_null(&shapeitfile)) {
- goto apply_cm_map_ret_READ_FAIL;
- }
- if (irreg_line_ct) {
- LOGERRPRINTFWW("Warning: %" PRIuPTR " irregular line%s skipped in %s.\n", irreg_line_ct, (irreg_line_ct == 1)? "" : "s", fname_buf);
}
+ LOGPRINTF("--cm-map: %u chromosome%s updated.\n", updated_chrom_ct, (updated_chrom_ct == 1)? "" : "s");
}
- LOGPRINTF("--cm-map: %u chromosome%s updated.\n", updated_chrom_ct, (updated_chrom_ct == 1)? "" : "s");
while (0) {
apply_cm_map_ret_OPEN_FAIL:
retval = RET_OPEN_FAIL;
@@ -627,7 +631,7 @@ int32_t apply_cm_map(char* cm_map_fname, char* cm_map_chrname, uintptr_t unfilte
int32_t update_marker_cms(Two_col_params* update_cm, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, double* marker_cms) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char skipchar = update_cm->skipchar;
uint32_t colid_first = (update_cm->colid < update_cm->colx);
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
@@ -746,7 +750,7 @@ int32_t update_marker_cms(Two_col_params* update_cm, uint32_t* marker_id_htable,
int32_t update_marker_pos(Two_col_params* update_map, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_exclude_ct_ptr, uint32_t* marker_pos, uint32_t* map_is_unsorted_ptr, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char skipchar = update_map->skipchar;
uint32_t colid_first = (update_map->colid < update_map->colx);
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
@@ -864,7 +868,7 @@ int32_t update_marker_pos(Two_col_params* update_map, uint32_t* marker_id_htable
for (marker_uidx = 0, marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
next_unset_unsafe_ck(marker_exclude, &marker_uidx);
while (marker_uidx >= chrom_end) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[++chrom_fo_idx_p1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[++chrom_fo_idx_p1];
last_pos = 0;
}
if (last_pos > marker_pos[marker_uidx]) {
@@ -905,7 +909,7 @@ int32_t update_marker_pos(Two_col_params* update_map, uint32_t* marker_id_htable
int32_t update_marker_names(Two_col_params* update_name, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char skipchar = update_name->skipchar;
uint32_t colold_first = (update_name->colid < update_name->colx);
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
@@ -1010,8 +1014,8 @@ int32_t update_marker_names(Two_col_params* update_name, uint32_t* marker_id_hta
int32_t update_marker_alleles(char* update_alleles_fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, char** marker_allele_ptrs, uintptr_t* max_marker_allele_len_ptr, char* outname, char* outname_end) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- FILE* errfile = NULL;
+ FILE* infile = nullptr;
+ FILE* errfile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t max_marker_allele_len = *max_marker_allele_len_ptr;
uintptr_t hit_ct = 0;
@@ -1111,6 +1115,11 @@ int32_t update_marker_alleles(char* update_alleles_fname, uint32_t* marker_id_ht
if ((len == len2) && (!memcmp(bufptr, bufptr2, len))) {
goto update_marker_alleles_ret_DUPLICATE_ALLELE_CODE;
}
+ if (memchr(bufptr, ',', len) || memchr(bufptr2, ',', len2)) {
+ // this breaks VCF and PLINK 2 binary
+ LOGPREPRINTFWW("Error: Comma-containing new allele code on line %" PRIuPTR " of --update-alleles file.\n", line_idx);
+ goto update_marker_alleles_ret_INVALID_FORMAT_2;
+ }
if (len >= max_marker_allele_len) {
max_marker_allele_len = len + 1;
}
@@ -1129,9 +1138,9 @@ int32_t update_marker_alleles(char* update_alleles_fname, uint32_t* marker_id_ht
}
*token_endnn(bufptr3) = '\0';
fputs(bufptr3, errfile);
- putc('\t', errfile);
+ putc_unlocked('\t', errfile);
fputs(bufptr2, errfile);
- putc('\t', errfile);
+ putc_unlocked('\t', errfile);
fputs(bufptr, errfile);
if (putc_checked('\n', errfile)) {
goto update_marker_alleles_ret_WRITE_FAIL;
@@ -1225,7 +1234,7 @@ uint32_t flip_process_token(char* tok_start, uint32_t* marker_id_htable, uint32_
int32_t flip_strand(char* flip_fname, uint32_t* marker_id_htable, uint32_t marker_id_htable_size, char* marker_ids, uintptr_t max_marker_id_len, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, char** marker_allele_ptrs) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* flipfile = NULL;
+ FILE* flipfile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t hit_ct = 0;
uintptr_t miss_ct = 0;
@@ -1282,7 +1291,7 @@ int32_t flip_strand(char* flip_fname, uint32_t* marker_id_htable, uint32_t marke
}
curtoklen = (uintptr_t)(bufptr2 - bufptr);
if (bufptr2 == &(g_textbuf[MAXLINELEN * 2])) {
- if (curtoklen > MAX_ID_LEN) {
+ if (curtoklen > MAX_ID_SLEN) {
logerrprint("Error: Excessively long ID in --flip file.\n");
goto flip_strand_ret_INVALID_FORMAT;
}
@@ -1332,7 +1341,7 @@ int32_t flip_strand(char* flip_fname, uint32_t* marker_id_htable, uint32_t marke
int32_t update_sample_ids(char* update_ids_fname, char* sorted_sample_ids, uintptr_t sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, char* sample_ids) {
// file has been pre-scanned
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
int32_t retval = 0;
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
uintptr_t hit_ct = 0;
@@ -1423,7 +1432,7 @@ int32_t update_sample_ids(char* update_ids_fname, char* sorted_sample_ids, uintp
int32_t update_sample_parents(char* update_parents_fname, char* sorted_sample_ids, uintptr_t sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, char* paternal_ids, uintptr_t max_paternal_id_len, char* maternal_ids, uintptr_t max_maternal_id_len, uintptr_t* founder_info) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
int32_t retval = 0;
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
uintptr_t hit_ct = 0;
@@ -1524,7 +1533,7 @@ int32_t update_sample_parents(char* update_parents_fname, char* sorted_sample_id
int32_t update_sample_sexes(char* update_sex_fname, uint32_t update_sex_col, char* sorted_sample_ids, uintptr_t sample_ct, uintptr_t max_sample_id_len, uint32_t* sample_id_map, uintptr_t* sex_nm, uintptr_t* sex_male) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
int32_t retval = 0;
uintptr_t sample_ctl = BITCT_TO_WORDCT(sample_ct);
uintptr_t hit_ct = 0;
@@ -1830,15 +1839,15 @@ uint32_t get_freq_file_type(char* bufptr) {
int32_t read_external_freqs(char* freqname, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, char** marker_allele_ptrs, double* set_allele_freqs, uint32_t* nchrobs, uint32_t maf_succ) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* freqfile = NULL;
+ FILE* freqfile = nullptr;
uintptr_t line_idx = 0;
uint32_t freq_counts = 0;
uint32_t alen1 = 0;
uint32_t alen2 = 0;
uint32_t double_missing_ct = 0;
uint32_t cur_nchrobs = 0;
- char* aptr1 = NULL;
- char* aptr2 = NULL;
+ char* aptr1 = nullptr;
+ char* aptr2 = nullptr;
int32_t retval = 0;
const char* missing_geno_ptr = g_missing_geno_ptr;
char missing_geno = *missing_geno_ptr;
@@ -1852,7 +1861,6 @@ int32_t read_external_freqs(char* freqname, uintptr_t unfiltered_marker_ct, uint
char* bufptr5;
double maf;
uintptr_t loadbuf_size;
- uint32_t chrom_idx;
uint32_t marker_uidx;
uint32_t uii;
int32_t c_hom_a1;
@@ -1861,264 +1869,277 @@ int32_t read_external_freqs(char* freqname, uintptr_t unfiltered_marker_ct, uint
int32_t c_hap_a1;
int32_t c_hap_a2;
int32_t ii;
- if (fopen_checked(freqname, "r", &freqfile)) {
- goto read_external_freqs_ret_OPEN_FAIL;
- }
- retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_ids, &id_map);
- if (retval) {
- goto read_external_freqs_ret_1;
- }
- loadbuf_size = bigstack_left();
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto read_external_freqs_ret_NOMEM;
- }
- loadbuf = (char*)g_bigstack_base;
- loadbuf[loadbuf_size - 1] = ' ';
- do {
- if (!fgets(loadbuf, loadbuf_size, freqfile)) {
- logerrprint("Error: Empty --read-freq file.\n");
- goto read_external_freqs_ret_INVALID_FORMAT;
+ {
+ if (fopen_checked(freqname, "r", &freqfile)) {
+ goto read_external_freqs_ret_OPEN_FAIL;
}
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- goto read_external_freqs_ret_TOO_LONG_LINE;
+ retval = sort_item_ids(unfiltered_marker_ct, marker_exclude, marker_exclude_ct, marker_ids, max_marker_id_len, 0, 0, strcmp_deref, &sorted_ids, &id_map);
+ if (retval) {
+ goto read_external_freqs_ret_1;
}
- bufptr = skip_initial_spaces(loadbuf);
- } while (is_eoln_kns(*bufptr));
- uii = get_freq_file_type(bufptr);
- if (!uii) {
- logerrprint("Error: Invalid --read-freq file header.\n");
- goto read_external_freqs_ret_INVALID_FORMAT;
- }
- if (uii < 3) {
- if (uii == 2) {
- freq_counts = 1;
+ loadbuf_size = bigstack_left();
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
+ goto read_external_freqs_ret_NOMEM;
}
- while (fgets(loadbuf, loadbuf_size, freqfile) != NULL) {
+ loadbuf = (char*)g_bigstack_base;
+ loadbuf[loadbuf_size - 1] = ' ';
+ do {
+ if (!fgets(loadbuf, loadbuf_size, freqfile)) {
+ logerrprint("Error: Empty --read-freq file.\n");
+ goto read_external_freqs_ret_INVALID_FORMAT;
+ }
line_idx++;
if (!loadbuf[loadbuf_size - 1]) {
goto read_external_freqs_ret_TOO_LONG_LINE;
}
bufptr = skip_initial_spaces(loadbuf);
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- goto read_external_freqs_ret_INVALID_CHROM;
+ } while (is_eoln_kns(*bufptr));
+ uii = get_freq_file_type(bufptr);
+ if (!uii) {
+ logerrprint("Error: Invalid --read-freq file header.\n");
+ goto read_external_freqs_ret_INVALID_FORMAT;
+ }
+ if (uii < 3) {
+ if (uii == 2) {
+ freq_counts = 1;
}
- chrom_idx = ii;
- bufptr = next_token(bufptr); // now at beginning of marker name
- bufptr2 = next_token(bufptr);
- if (!bufptr2) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- ii = bsearch_str(bufptr, strlen_se(bufptr), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
- if (ii != -1) {
- // may want to check for duplicates...
- marker_uidx = id_map[(uint32_t)ii];
- if ((chrom_idx == get_marker_chrom(chrom_info_ptr, marker_uidx)) || (!chrom_idx) || (!get_marker_chrom(chrom_info_ptr, marker_uidx))) {
- if ((marker_allele_ptrs[marker_uidx * 2 + 1] == missing_geno_ptr) && (marker_allele_ptrs[marker_uidx * 2] == missing_geno_ptr)) {
- double_missing_ct++;
- continue;
- }
- alen1 = strlen_se(bufptr2);
- aptr1 = bufptr2;
- bufptr2 = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr2)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- alen2 = strlen_se(bufptr2);
- aptr2 = bufptr2;
- if ((alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
- // permit A1='0', A2='0'
- if ((*aptr1 == missing_geno) && (alen1 == 1)) {
+ while (fgets(loadbuf, loadbuf_size, freqfile) != nullptr) {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ goto read_external_freqs_ret_TOO_LONG_LINE;
+ }
+ char* loadbuf_first_token = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*loadbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ bufptr = skip_initial_spaces(first_token_end); // marker name
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(loadbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ goto read_external_freqs_ret_INVALID_CHROM;
+ }
+ bufptr2 = next_token(bufptr);
+ if (!bufptr2) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ ii = bsearch_str(bufptr, strlen_se(bufptr), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
+ if (ii != -1) {
+ // may want to check for duplicates...
+ marker_uidx = id_map[(uint32_t)ii];
+ if ((((uint32_t)cur_chrom_code) == get_variant_chrom(chrom_info_ptr, marker_uidx)) || (!cur_chrom_code) || (!get_variant_chrom(chrom_info_ptr, marker_uidx))) {
+ if ((marker_allele_ptrs[marker_uidx * 2 + 1] == missing_geno_ptr) && (marker_allele_ptrs[marker_uidx * 2] == missing_geno_ptr)) {
+ double_missing_ct++;
continue;
}
- goto read_external_freqs_ret_A1_A2_SAME;
- }
- bufptr = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
+ alen1 = strlen_se(bufptr2);
+ aptr1 = bufptr2;
+ bufptr2 = next_token(bufptr2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ alen2 = strlen_se(bufptr2);
+ aptr2 = bufptr2;
+ if ((alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
+ // permit A1='0', A2='0'
+ if ((*aptr1 == missing_geno) && (alen1 == 1)) {
+ continue;
+ }
+ goto read_external_freqs_ret_A1_A2_SAME;
+ }
+ bufptr = next_token(bufptr2);
+ if (no_more_tokens_kns(bufptr)) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ if (freq_counts) {
+ if (no_more_tokens_kns(next_token(bufptr))) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ if (scan_uint_icap(bufptr, (uint32_t*)&c_hom_a1)) {
+ goto read_external_freqs_ret_INVALID_HOM_A1;
+ }
+ if (scan_uint_icap(next_token(bufptr), (uint32_t*)&c_hom_a2)) {
+ goto read_external_freqs_ret_INVALID_HOM_A2;
+ }
+ cur_nchrobs = c_hom_a1 + c_hom_a2;
+ maf = ((double)c_hom_a1 + maf_succ) / ((double)(cur_nchrobs + 2 * maf_succ));
+ if (nchrobs) {
+ nchrobs[marker_uidx] = cur_nchrobs;
+ }
+ } else {
+ if (scan_double(bufptr, &maf)) {
+ goto read_external_freqs_ret_INVALID_MAF;
+ }
+ if (nchrobs) {
+ bufptr = next_token(bufptr);
+ if (no_more_tokens_kns(bufptr)) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ if (scan_uint_icap(bufptr, &cur_nchrobs)) {
+ goto read_external_freqs_ret_INVALID_NCHROBS;
+ }
+ nchrobs[marker_uidx] = cur_nchrobs;
+ }
+ }
+ retval = load_one_freq(alen1, aptr1, alen2, aptr2, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
+ if (retval) {
+ goto read_external_freqs_ret_ALLELE_MISMATCH;
+ }
}
- if (freq_counts) {
- if (no_more_tokens_kns(next_token(bufptr))) {
+ }
+ }
+ if (freq_counts) {
+ logprint("--read-freq: .frq.count file loaded.\n");
+ } else {
+ logprint("--read-freq: .frq file loaded.\n");
+ }
+ } else if (uii == 3) {
+ // --freqx format
+ while (fgets(loadbuf, loadbuf_size, freqfile) != nullptr) {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ goto read_external_freqs_ret_TOO_LONG_LINE;
+ }
+ char* loadbuf_first_token = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*loadbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(loadbuf_first_token);
+ bufptr = skip_initial_spaces(first_token_end);
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - loadbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(loadbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ goto read_external_freqs_ret_INVALID_CHROM;
+ }
+ bufptr2 = next_token(bufptr);
+ if (!bufptr2) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ ii = bsearch_str(bufptr, strlen_se(bufptr), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
+ if (ii != -1) {
+ marker_uidx = id_map[(uint32_t)ii];
+ if ((((uint32_t)cur_chrom_code) == get_variant_chrom(chrom_info_ptr, marker_uidx)) || (!cur_chrom_code) || (!get_variant_chrom(chrom_info_ptr, marker_uidx))) {
+ if ((marker_allele_ptrs[marker_uidx * 2 + 1] == missing_geno_ptr) && (marker_allele_ptrs[marker_uidx * 2] == missing_geno_ptr)) {
+ double_missing_ct++;
+ continue;
+ }
+ alen1 = strlen_se(bufptr2);
+ aptr1 = bufptr2;
+ bufptr2 = next_token(bufptr2);
+ if (no_more_tokens_kns(bufptr2)) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ alen2 = strlen_se(bufptr2);
+ aptr2 = bufptr2;
+ if ((alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
+ if ((*aptr1 == missing_geno) && (alen1 == 1)) {
+ continue;
+ }
+ goto read_external_freqs_ret_A1_A2_SAME;
+ }
+ bufptr = next_token(bufptr2);
+ bufptr2 = next_token(bufptr);
+ bufptr3 = next_token(bufptr2);
+ bufptr4 = next_token(bufptr3);
+ bufptr5 = next_token(bufptr4);
+ if (no_more_tokens_kns(bufptr5)) {
goto read_external_freqs_ret_MISSING_TOKENS;
}
if (scan_uint_icap(bufptr, (uint32_t*)&c_hom_a1)) {
goto read_external_freqs_ret_INVALID_HOM_A1;
}
- if (scan_uint_icap(next_token(bufptr), (uint32_t*)&c_hom_a2)) {
+ if (scan_uint_icap(bufptr2, (uint32_t*)&c_het)) {
+ sprintf(g_logbuf, "Error: Invalid het count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
+ goto read_external_freqs_ret_INVALID_FORMAT_2;
+ }
+ if (scan_uint_icap(bufptr3, (uint32_t*)&c_hom_a2)) {
goto read_external_freqs_ret_INVALID_HOM_A2;
}
- cur_nchrobs = c_hom_a1 + c_hom_a2;
- maf = ((double)c_hom_a1 + maf_succ) / ((double)(cur_nchrobs + 2 * maf_succ));
- if (nchrobs) {
- nchrobs[marker_uidx] = cur_nchrobs;
+ if (scan_uint_icap(bufptr4, (uint32_t*)&c_hap_a1)) {
+ sprintf(g_logbuf, "Error: Invalid hap. A1 count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
+ goto read_external_freqs_ret_INVALID_FORMAT_2;
}
- } else {
- if (scan_double(bufptr, &maf)) {
- goto read_external_freqs_ret_INVALID_MAF;
+ if (scan_uint_icap(bufptr5, (uint32_t*)&c_hap_a2)) {
+ sprintf(g_logbuf, "Error: Invalid hap. A2 count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
+ goto read_external_freqs_ret_INVALID_FORMAT_2;
}
+ cur_nchrobs = 2 * (c_hom_a1 + c_het + c_hom_a2 + maf_succ) + c_hap_a1 + c_hap_a2;
+ maf = ((double)(c_hom_a1 * 2 + c_het + c_hap_a1 + maf_succ)) / ((double)cur_nchrobs);
if (nchrobs) {
- bufptr = next_token(bufptr);
- if (no_more_tokens_kns(bufptr)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- if (scan_uint_icap(bufptr, &cur_nchrobs)) {
- goto read_external_freqs_ret_INVALID_NCHROBS;
- }
nchrobs[marker_uidx] = cur_nchrobs;
}
+ retval = load_one_freq(alen1, aptr1, alen2, aptr2, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
+ if (retval) {
+ goto read_external_freqs_ret_ALLELE_MISMATCH;
+ }
}
- retval = load_one_freq(alen1, aptr1, alen2, aptr2, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
- if (retval) {
- goto read_external_freqs_ret_ALLELE_MISMATCH;
- }
- }
+ }
}
- }
- if (freq_counts) {
- logprint("--read-freq: .frq.count file loaded.\n");
+ logprint("--read-freq: .frqx file loaded.\n");
} else {
- logprint("--read-freq: .frq file loaded.\n");
- }
- } else if (uii == 3) {
- // --freqx format
- while (fgets(loadbuf, loadbuf_size, freqfile) != NULL) {
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- goto read_external_freqs_ret_TOO_LONG_LINE;
- }
- ii = get_chrom_code(chrom_info_ptr, loadbuf);
- if (ii < 0) {
- goto read_external_freqs_ret_INVALID_CHROM;
- }
- chrom_idx = ii;
- bufptr = next_token(loadbuf); // now at beginning of marker name
- bufptr2 = next_token(bufptr);
- if (!bufptr2) {
- goto read_external_freqs_ret_MISSING_TOKENS;
+ // Also support GCTA-style frequency files:
+ // [marker ID]\t[reference allele]\t[frequency of reference allele]\n
+ if (nchrobs) {
+ logerrprint("Error: The current run requires an allele frequency file with observation\ncounts.\n");
+ goto read_external_freqs_ret_INVALID_FORMAT;
}
- ii = bsearch_str(bufptr, strlen_se(bufptr), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
- if (ii != -1) {
- marker_uidx = id_map[(uint32_t)ii];
- if ((chrom_idx == get_marker_chrom(chrom_info_ptr, marker_uidx)) || (!chrom_idx) || (!get_marker_chrom(chrom_info_ptr, marker_uidx))) {
+
+ // no header line here
+ line_idx--;
+ do {
+ line_idx++;
+ if (!loadbuf[loadbuf_size - 1]) {
+ goto read_external_freqs_ret_TOO_LONG_LINE;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*bufptr)) {
+ continue;
+ }
+ bufptr = next_token(bufptr);
+ if (!bufptr) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
+ }
+ ii = bsearch_str(loadbuf, strlen_se(loadbuf), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
+ if (ii != -1) {
+ marker_uidx = id_map[(uint32_t)ii];
if ((marker_allele_ptrs[marker_uidx * 2 + 1] == missing_geno_ptr) && (marker_allele_ptrs[marker_uidx * 2] == missing_geno_ptr)) {
double_missing_ct++;
continue;
}
- alen1 = strlen_se(bufptr2);
- aptr1 = bufptr2;
- bufptr2 = next_token(bufptr2);
- if (no_more_tokens_kns(bufptr2)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- alen2 = strlen_se(bufptr2);
- aptr2 = bufptr2;
- if ((alen1 == alen2) && (!memcmp(aptr1, aptr2, alen1))) {
- if ((*aptr1 == missing_geno) && (alen1 == 1)) {
- continue;
- }
- goto read_external_freqs_ret_A1_A2_SAME;
- }
- bufptr = next_token(bufptr2);
- bufptr2 = next_token(bufptr);
- bufptr3 = next_token(bufptr2);
- bufptr4 = next_token(bufptr3);
- bufptr5 = next_token(bufptr4);
- if (no_more_tokens_kns(bufptr5)) {
+ alen1 = strlen_se(bufptr);
+ aptr1 = bufptr;
+ bufptr = next_token(bufptr);
+ if (no_more_tokens_kns(bufptr)) {
goto read_external_freqs_ret_MISSING_TOKENS;
}
- if (scan_uint_icap(bufptr, (uint32_t*)&c_hom_a1)) {
- goto read_external_freqs_ret_INVALID_HOM_A1;
- }
- if (scan_uint_icap(bufptr2, (uint32_t*)&c_het)) {
- sprintf(g_logbuf, "Error: Invalid het count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
- goto read_external_freqs_ret_INVALID_FORMAT_2;
+ if (scan_double(bufptr, &maf)) {
+ goto read_external_freqs_ret_INVALID_MAF;
}
- if (scan_uint_icap(bufptr3, (uint32_t*)&c_hom_a2)) {
- goto read_external_freqs_ret_INVALID_HOM_A2;
+ retval = load_one_freq(1, missing_geno_ptr, alen1, aptr1, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
+ if (retval) {
+ goto read_external_freqs_ret_ALLELE_MISMATCH;
}
- if (scan_uint_icap(bufptr4, (uint32_t*)&c_hap_a1)) {
- sprintf(g_logbuf, "Error: Invalid hap. A1 count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
- goto read_external_freqs_ret_INVALID_FORMAT_2;
+ } else {
+ // if there aren't exactly 3 columns, this isn't a GCTA .freq file
+ bufptr = next_token(bufptr);
+ if (no_more_tokens_kns(bufptr)) {
+ goto read_external_freqs_ret_MISSING_TOKENS;
}
- if (scan_uint_icap(bufptr5, (uint32_t*)&c_hap_a2)) {
- sprintf(g_logbuf, "Error: Invalid hap. A2 count on line %" PRIuPTR " of --read-freq file.\n", line_idx);
+ if (!no_more_tokens_kns(next_token(bufptr))) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --read-freq has more tokens than expected.\n", line_idx);
goto read_external_freqs_ret_INVALID_FORMAT_2;
}
- cur_nchrobs = 2 * (c_hom_a1 + c_het + c_hom_a2 + maf_succ) + c_hap_a1 + c_hap_a2;
- maf = ((double)(c_hom_a1 * 2 + c_het + c_hap_a1 + maf_succ)) / ((double)cur_nchrobs);
- if (nchrobs) {
- nchrobs[marker_uidx] = cur_nchrobs;
- }
- retval = load_one_freq(alen1, aptr1, alen2, aptr2, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
- if (retval) {
- goto read_external_freqs_ret_ALLELE_MISMATCH;
- }
- }
- }
+ }
+ } while (fgets(loadbuf, loadbuf_size, freqfile));
+ logprint("--read-freq: GCTA-formatted .freq file loaded.\n");
}
- logprint("--read-freq: .frqx file loaded.\n");
- } else {
- // Also support GCTA-style frequency files:
- // [marker ID]\t[reference allele]\t[frequency of reference allele]\n
- if (nchrobs) {
- logerrprint("Error: The current run requires an allele frequency file with observation\ncounts.\n");
- goto read_external_freqs_ret_INVALID_FORMAT;
+ if (double_missing_ct) {
+ LOGPRINTF("%u variant%s skipped since both existing allele codes were missing.\n", double_missing_ct, (double_missing_ct == 1)? "" : "s");
}
-
- // no header line here
- line_idx--;
- do {
- line_idx++;
- if (!loadbuf[loadbuf_size - 1]) {
- goto read_external_freqs_ret_TOO_LONG_LINE;
- }
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
- }
- bufptr = next_token(bufptr);
- if (!bufptr) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- ii = bsearch_str(loadbuf, strlen_se(loadbuf), sorted_ids, max_marker_id_len, unfiltered_marker_ct - marker_exclude_ct);
- if (ii != -1) {
- marker_uidx = id_map[(uint32_t)ii];
- if ((marker_allele_ptrs[marker_uidx * 2 + 1] == missing_geno_ptr) && (marker_allele_ptrs[marker_uidx * 2] == missing_geno_ptr)) {
- double_missing_ct++;
- continue;
- }
- alen1 = strlen_se(bufptr);
- aptr1 = bufptr;
- bufptr = next_token(bufptr);
- if (no_more_tokens_kns(bufptr)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- if (scan_double(bufptr, &maf)) {
- goto read_external_freqs_ret_INVALID_MAF;
- }
- retval = load_one_freq(1, missing_geno_ptr, alen1, aptr1, maf, &(set_allele_freqs[marker_uidx]), &(marker_allele_ptrs[marker_uidx * 2]), missing_geno);
- if (retval) {
- goto read_external_freqs_ret_ALLELE_MISMATCH;
- }
- } else {
- // if there aren't exactly 3 columns, this isn't a GCTA .freq file
- bufptr = next_token(bufptr);
- if (no_more_tokens_kns(bufptr)) {
- goto read_external_freqs_ret_MISSING_TOKENS;
- }
- if (!no_more_tokens_kns(next_token(bufptr))) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of --read-freq has more tokens than expected.\n", line_idx);
- goto read_external_freqs_ret_INVALID_FORMAT_2;
- }
- }
- } while (fgets(loadbuf, loadbuf_size, freqfile));
- logprint("--read-freq: GCTA-formatted .freq file loaded.\n");
- }
- if (double_missing_ct) {
- LOGPRINTF("%u variant%s skipped since both existing allele codes were missing.\n", double_missing_ct, (double_missing_ct == 1)? "" : "s");
}
while (0) {
read_external_freqs_ret_TOO_LONG_LINE:
@@ -2180,7 +2201,7 @@ int32_t read_external_freqs(char* freqname, uintptr_t unfiltered_marker_ct, uint
int32_t load_ax_alleles(Two_col_params* axalleles, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_exclude_ct, char** marker_allele_ptrs, uintptr_t* max_marker_allele_len_ptr, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, double* set_allele_freqs, uint32_t is_a2) {
// note that swap_reversed_marker_alleles() has NOT been called yet
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
char skipchar = axalleles->skipchar;
const char* missing_geno_ptr = g_missing_geno_ptr;
uint32_t colid_first = (axalleles->colid < axalleles->colx);
@@ -2325,15 +2346,15 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
// unfiltered_sample_ct == 0 ok
unsigned char* bigstack_mark = g_bigstack_base;
char* writebuf = g_textbuf;
- char* pzwritep = NULL;
+ char* pzwritep = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uint32_t* cur_cluster_map = cluster_map;
uint32_t* cur_cluster_starts = cluster_starts;
- uint32_t* cluster_map_nonmale = NULL;
- uint32_t* cluster_starts_nonmale = NULL;
- uint32_t* cluster_map_male = NULL;
- uint32_t* cluster_starts_male = NULL;
+ uint32_t* cluster_map_nonmale = nullptr;
+ uint32_t* cluster_starts_nonmale = nullptr;
+ uint32_t* cluster_map_male = nullptr;
+ uint32_t* cluster_starts_male = nullptr;
int32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
uint32_t cslen = 10;
int32_t retval = 0;
@@ -2390,7 +2411,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
cur_cluster_starts[clidx + 1] = clmpos;
}
}
- chrom_idx = chrom_info_ptr->x_code;
+ chrom_idx = chrom_info_ptr->xymt_codes[X_OFFSET];
if ((chrom_idx != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
if (bigstack_alloc_ui(cluster_ct + 1, &cluster_starts_nonmale) ||
bigstack_alloc_ui(sample_f_ct - sample_f_male_ct, &cluster_map_nonmale)) {
@@ -2410,7 +2431,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
cluster_starts_nonmale[clidx + 1] = clmpos;
}
}
- chrom_idx = chrom_info_ptr->y_code;
+ chrom_idx = chrom_info_ptr->xymt_codes[Y_OFFSET];
if (cluster_map_nonmale || ((chrom_idx != -1) && is_set(chrom_info_ptr->chrom_mask, chrom_idx))) {
if (bigstack_alloc_ui(cluster_ct + 1, &cluster_starts_male) ||
bigstack_alloc_ui(sample_f_male_ct, &cluster_map_male)) {
@@ -2442,14 +2463,14 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
}
memset(csptr, 32, 10);
for (chrom_idx = 0; chrom_idx < chrom_code_end; chrom_idx++) {
- if (!chrom_exists(chrom_info_ptr, chrom_idx)) {
+ if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
continue;
}
- is_x = (chrom_idx == chrom_info_ptr->x_code);
- is_y = (chrom_idx == chrom_info_ptr->y_code);
+ is_x = (chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
- marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_start[chrom_idx], chrom_end);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, chrom_idx);
+ marker_uidx = next_unset_ul(marker_exclude, get_chrom_start_vidx(chrom_info_ptr, chrom_idx), chrom_end);
if (marker_uidx >= chrom_end) {
continue;
}
@@ -2484,7 +2505,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
pzwritep = memcpya(pzwritep, writebuf, wptr_start - writebuf);
pzwritep = fw_strcpy(8, &(cluster_ids[clidx * max_cluster_id_len]), pzwritep);
pzwritep = memcpya(pzwritep, csptr, cslen);
- fill_uint_zero(cur_cts, 4);
+ fill_uint_zero(4, cur_cts);
uiptr3 = &(cluster_map_nonmale[cluster_starts_nonmale[clidx + 1]]);
while (uiptr < uiptr3) {
uii = *uiptr++;
@@ -2492,7 +2513,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
}
a1_obs = 2 * cur_cts[0] + cur_cts[2];
tot_obs = 2 * (cur_cts[0] + cur_cts[2] + cur_cts[3]);
- fill_uint_zero(cur_cts, 4);
+ fill_uint_zero(4, cur_cts);
uiptr3 = &(cluster_map_male[cluster_starts_male[clidx + 1]]);
while (uiptr2 < uiptr3) {
uii = *uiptr2++;
@@ -2518,7 +2539,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
pzwritep = memcpya(pzwritep, writebuf, wptr_start - writebuf);
pzwritep = fw_strcpy(8, &(cluster_ids[clidx * max_cluster_id_len]), pzwritep);
pzwritep = memcpya(pzwritep, csptr, cslen);
- fill_uint_zero(cur_cts, 4);
+ fill_uint_zero(4, cur_cts);
uiptr2 = &(cluster_map_male[cluster_starts_male[clidx + 1]]);
while (uiptr < uiptr2) {
uii = *uiptr++;
@@ -2549,7 +2570,7 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
pzwritep = memcpya(pzwritep, writebuf, wptr_start - writebuf);
pzwritep = fw_strcpy(8, &(cluster_ids[clidx * max_cluster_id_len]), pzwritep);
pzwritep = memcpya(pzwritep, csptr, cslen);
- fill_uint_zero(cur_cts, 4);
+ fill_uint_zero(4, cur_cts);
uiptr2 = &(cur_cluster_map[cur_cluster_starts[clidx + 1]]);
while (uiptr < uiptr2) {
uii = *uiptr++;
@@ -2610,14 +2631,14 @@ int32_t write_stratified_freqs(FILE* bedfile, uintptr_t bed_offset, char* outnam
int32_t write_cc_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t output_gz, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, Chrom_info* chrom_info_ptr, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, uintptr_t unfiltered_sample_ct, uintptr_t* founder_info, uint32_t nonfounders, uintptr_t* sex_male, uintptr_t* marker_reverse, uintptr_t* pheno_nm, uintptr_t* ph [...]
// unfiltered_sample_ct must be positive
unsigned char* bigstack_mark = g_bigstack_base;
- char* pzwritep = NULL;
+ char* pzwritep = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
- uintptr_t* loadbuf = NULL;
- uintptr_t* case_include2 = NULL;
- uintptr_t* ctrl_include2 = NULL;
- uintptr_t* male_vec = NULL;
- uintptr_t* nonmale_vec = NULL;
+ uintptr_t* loadbuf = nullptr;
+ uintptr_t* case_include2 = nullptr;
+ uintptr_t* ctrl_include2 = nullptr;
+ uintptr_t* male_vec = nullptr;
+ uintptr_t* nonmale_vec = nullptr;
int32_t chrom_code_end = chrom_info_ptr->max_code + 1 + chrom_info_ptr->name_ct;
int32_t retval = 0;
Pigz_state ps;
@@ -2689,14 +2710,14 @@ int32_t write_cc_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
pzwritep = strcpya(pzwritep, "SNP A1 A2 MAF_A MAF_U NCHROBS_A NCHROBS_U" EOLN_STR);
for (chrom_idx = 0; chrom_idx < chrom_code_end; chrom_idx++) {
- if (!chrom_exists(chrom_info_ptr, chrom_idx)) {
+ if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
continue;
}
- is_x = (chrom_idx == chrom_info_ptr->x_code);
- is_y = (chrom_idx == chrom_info_ptr->y_code);
+ is_x = (chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
- marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_start[chrom_idx], chrom_end);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, chrom_idx);
+ marker_uidx = next_unset_ul(marker_exclude, get_chrom_start_vidx(chrom_info_ptr, chrom_idx), chrom_end);
if (marker_uidx >= chrom_end) {
continue;
}
@@ -2794,7 +2815,7 @@ int32_t write_cc_freqs(FILE* bedfile, uintptr_t bed_offset, char* outname, char*
int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, double* set_allele_freqs, Chrom_info* chrom_info_ptr, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uintptr_t max_marker_allele_len, int32_t* ll_cts, int32_t* lh_cts, int32_t* hh_cts, int32_t* hapl_cts, int32_t* haph_cts, uint32_t sample_f_ct, uint32_t sample_f_male_ct, uint32_t nonfounders, uint64_t misc_flags, uintptr_t* mar [...]
// unfiltered_sample_ct == 0 ok
unsigned char* bigstack_mark = g_bigstack_base;
- char* pzwritep = NULL;
+ char* pzwritep = nullptr;
uint32_t reverse = 0;
uint32_t freq_counts = (misc_flags / MISC_FREQ_COUNTS) & 1;
uint32_t freqx = (misc_flags / MISC_FREQX) & 1;
@@ -2849,14 +2870,14 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
}
}
for (chrom_idx = 0; chrom_idx < chrom_code_end; chrom_idx++) {
- if (!chrom_exists(chrom_info_ptr, chrom_idx)) {
+ if (!is_set(chrom_info_ptr->chrom_mask, chrom_idx)) {
continue;
}
- is_x = (chrom_idx == chrom_info_ptr->x_code);
- is_y = (chrom_idx == chrom_info_ptr->y_code);
+ is_x = (chrom_idx == chrom_info_ptr->xymt_codes[X_OFFSET]);
+ is_y = (chrom_idx == chrom_info_ptr->xymt_codes[Y_OFFSET]);
is_haploid = is_set(chrom_info_ptr->haploid_mask, chrom_idx);
- chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_start[chrom_idx], chrom_end);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, chrom_idx);
+ marker_uidx = next_unset(marker_exclude, get_chrom_start_vidx(chrom_info_ptr, chrom_idx), chrom_end);
while (marker_uidx < chrom_end) {
reverse = IS_SET(marker_reverse, marker_uidx);
major_ptr = marker_allele_ptrs[marker_uidx * 2 + 1];
@@ -2945,11 +2966,11 @@ int32_t write_freqs(char* outname, char* outname_end, uint32_t plink_maxsnp, uin
int32_t sexcheck(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, uintptr_t* sex_nm, uintptr_t* sex_male, uint64_t misc_flags, double check_sex_fthresh, double check_sex_mthresh, uint32_t max_f_yobs, uint32_t min_m_yobs, Chrom_info* chrom [...]
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- uint32_t* het_cts = NULL;
- uint32_t* missing_cts = NULL;
- double* nei_offsets = NULL;
- uint32_t* ymiss_cts = NULL;
+ FILE* outfile = nullptr;
+ uint32_t* het_cts = nullptr;
+ uint32_t* missing_cts = nullptr;
+ double* nei_offsets = nullptr;
+ uint32_t* ymiss_cts = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
@@ -2963,8 +2984,8 @@ int32_t sexcheck(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
uint32_t yonly = (misc_flags / MISC_SEXCHECK_YONLY) & 1;
uint32_t gender_unk_ct = 0;
uint32_t problem_ct = 0;
- int32_t x_code = chrom_info_ptr->x_code;
- int32_t y_code = chrom_info_ptr->y_code;
+ int32_t x_code = chrom_info_ptr->xymt_codes[X_OFFSET];
+ int32_t y_code = chrom_info_ptr->xymt_codes[Y_OFFSET];
int32_t retval = 0;
uintptr_t* loadbuf_raw;
uintptr_t* loadbuf;
@@ -3013,8 +3034,8 @@ int32_t sexcheck(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
if ((x_code == -1) || (!is_set(chrom_info_ptr->chrom_mask, (uint32_t)x_code))) {
goto sexcheck_ret_NO_X_VAR;
}
- marker_uidx_end = chrom_info_ptr->chrom_end[(uint32_t)x_code];
- marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_start[(uint32_t)x_code], marker_uidx_end);
+ marker_uidx_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)x_code);
+ marker_uidx = next_unset_ul(marker_exclude, get_chrom_start_vidx(chrom_info_ptr, (uint32_t)x_code), marker_uidx_end);
if (marker_uidx == marker_uidx_end) {
goto sexcheck_ret_NO_X_VAR;
}
@@ -3078,8 +3099,8 @@ int32_t sexcheck(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outna
}
if (check_y) {
if ((y_code != -1) && is_set(chrom_info_ptr->chrom_mask, (uint32_t)y_code)) {
- marker_uidx_end = chrom_info_ptr->chrom_end[(uint32_t)y_code];
- marker_uidx = next_unset_ul(marker_exclude, chrom_info_ptr->chrom_start[(uint32_t)y_code], marker_uidx_end);
+ marker_uidx_end = get_chrom_end_vidx(chrom_info_ptr, (uint32_t)y_code);
+ marker_uidx = next_unset_ul(marker_exclude, get_chrom_start_vidx(chrom_info_ptr, (uint32_t)y_code), marker_uidx_end);
ytotal = marker_uidx_end - marker_uidx - popcount_bit_idx(marker_exclude, marker_uidx, marker_uidx_end);
}
if (ytotal) {
@@ -3318,7 +3339,7 @@ int32_t write_snplist(char* outname, char* outname_end, uintptr_t unfiltered_mar
}
int32_t write_var_ranges(char* outname, char* outname_end, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t write_var_range_ct) {
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t marker_uidx = 0;
uintptr_t marker_idx = 0;
int32_t retval = 0;
@@ -3338,13 +3359,13 @@ int32_t write_var_ranges(char* outname, char* outname_end, uintptr_t unfiltered_
for (block_idx = 1; block_idx <= write_var_range_ct; block_idx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
new_marker_idx = (block_idx * ((uint64_t)marker_ct)) / write_var_range_ct;
if (new_marker_idx > marker_idx + 1) {
marker_uidx = jump_forward_unset_unsafe(marker_exclude, marker_uidx + 1, new_marker_idx - marker_idx - 1);
}
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
marker_uidx++;
marker_idx = new_marker_idx;
}
@@ -3369,7 +3390,7 @@ int32_t write_var_ranges(char* outname, char* outname_end, uintptr_t unfiltered_
int32_t list_duplicate_vars(char* outname, char* outname_end, uint32_t dupvar_modifier, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr, char** marker_allele_ptrs) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uint32_t* uidx_list_end = (uint32_t*)g_bigstack_end;
uint32_t* group_list_start = (uint32_t*)g_bigstack_base;
@@ -3422,8 +3443,8 @@ int32_t list_duplicate_vars(char* outname, char* outname_end, uint32_t dupvar_mo
}
max_batch_size = bigstack_left() / (5 * sizeof(int32_t));
for (chrom_fo_idx = 0; chrom_fo_idx < chrom_info_ptr->chrom_ct; chrom_fo_idx++) {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
if (marker_uidx == chrom_end) {
continue;
}
@@ -3549,26 +3570,26 @@ int32_t list_duplicate_vars(char* outname, char* outname_end, uint32_t dupvar_mo
// use ASCII order
if (strcmp(a1ptr, a2ptr) < 0) {
fputs(a1ptr, outfile);
- putc(',', outfile);
+ putc_unlocked(',', outfile);
fputs(a2ptr, outfile);
} else {
fputs(a2ptr, outfile);
- putc(',', outfile);
+ putc_unlocked(',', outfile);
fputs(a1ptr, outfile);
}
} else {
fputs(a2ptr, outfile);
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(a1ptr, outfile);
}
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
while (1) {
fputs(&(marker_ids[ujj * max_marker_id_len]), outfile);
read_uiptr++;
if (uii & 0x80000000U) {
break;
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
uii = *read_uiptr;
ujj = uii & 0x7fffffff;
}
@@ -3623,7 +3644,7 @@ int32_t list_duplicate_vars(char* outname, char* outname_end, uint32_t dupvar_mo
goto list_duplicate_vars_ret_WRITE_FAIL;
}
} else {
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
}
} while (read_uiptr != group_write);
}
@@ -3654,9 +3675,9 @@ int32_t list_duplicate_vars(char* outname, char* outname_end, uint32_t dupvar_mo
int32_t het_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* outname_end, uint32_t output_gz, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t marker_ct, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uint32_t plink_maxfid, uint32_t plink_maxiid, uintptr_t max_sample_id_len, uintptr_t* founder_info, Chrom_info* chrom_info_ptr, double* set_allele_freqs) {
// Same F coefficient computation as sexcheck().
unsigned char* bigstack_mark = g_bigstack_base;
- uintptr_t* loadbuf_f = NULL;
- uintptr_t* founder_vec11 = NULL;
- char* pzwritep = NULL;
+ uintptr_t* loadbuf_f = nullptr;
+ uintptr_t* founder_vec11 = nullptr;
+ char* pzwritep = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
@@ -3745,8 +3766,8 @@ int32_t het_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
do {
chrom_fo_idx++;
} while (is_set(chrom_info_ptr->haploid_mask, chrom_info_ptr->chrom_file_order[chrom_fo_idx]));
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx + 1];
- marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_file_order_marker_idx[chrom_fo_idx], chrom_end);
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx + 1];
+ marker_uidx = next_unset(marker_exclude, chrom_info_ptr->chrom_fo_vidx_start[chrom_fo_idx], chrom_end);
} while (marker_uidx >= chrom_end);
if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
goto het_report_ret_READ_FAIL;
@@ -3878,8 +3899,8 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
// Math based on VCFtools variant_file::output_weir_and_cockerham_fst();
// frequency counting logic similar to cmh_assoc().
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
- char* wptr_start = NULL;
+ FILE* outfile = nullptr;
+ char* wptr_start = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
@@ -3891,7 +3912,7 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
uint32_t chrom_idx = 0;
uint32_t skipped_marker_ct = 0;
uint32_t pct = 0;
- int32_t mt_code = chrom_info_ptr->mt_code;
+ int32_t mt_code = chrom_info_ptr->xymt_codes[MT_OFFSET];
int32_t retval = 0;
uintptr_t* loadbuf;
uintptr_t* cluster_mask;
@@ -4013,7 +4034,7 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
if (marker_uidx >= chrom_end) {
while (1) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1U];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1U];
} while (marker_uidx >= chrom_end);
chrom_idx = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
if ((!IS_SET(chrom_info_ptr->haploid_mask, chrom_idx)) && (chrom_idx != (uint32_t)mt_code)) {
@@ -4034,7 +4055,7 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
if (load_raw(unfiltered_sample_ct4, bedfile, loadbuf)) {
goto fst_report_ret_READ_FAIL;
}
- fill_uint_zero(cluster_geno_cts, cluster_ct * 3);
+ fill_uint_zero(cluster_ct * 3, cluster_geno_cts);
ulptr = loadbuf;
ulptr2 = cluster_mask;
for (sample_uidx_base = 0; sample_uidx_base < unfiltered_sample_ct; sample_uidx_base += BITCT2) {
@@ -4101,7 +4122,7 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
if (marker_idx >= loop_end) {
if (marker_idx < marker_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -4114,7 +4135,7 @@ int32_t fst_report(FILE* bedfile, uintptr_t bed_offset, char* outname, char* out
goto fst_report_ret_WRITE_FAIL;
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
@@ -4154,8 +4175,8 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
// plink_dosage.c.
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
- FILE* outfile = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = QUATERCT_TO_WORDCT(unfiltered_sample_ct);
@@ -4168,11 +4189,11 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
uintptr_t range_skip = 0;
uintptr_t ulii = 0;
char* tbuf2 = &(g_textbuf[MAXLINELEN]);
- uintptr_t* marker_exclude_main = NULL;
- uintptr_t* sample_include2 = NULL;
- uintptr_t* sample_male_include2 = NULL;
- double* qrange_keys = NULL;
- double* effect_sizes_cur = NULL;
+ uintptr_t* marker_exclude_main = nullptr;
+ uintptr_t* sample_include2 = nullptr;
+ uintptr_t* sample_male_include2 = nullptr;
+ double* qrange_keys = nullptr;
+ double* effect_sizes_cur = nullptr;
double ploidy_d = 0.0;
double lbound = 0.0;
double ubound = 0.0;
@@ -4336,7 +4357,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
if (fwrite_checked(bufptr_arr[varid_idx], strlen_se(bufptr_arr[varid_idx]), outfile)) {
goto score_report_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
miss_ct++;
} else {
if (!IS_SET(marker_exclude, marker_uidx)) {
@@ -4359,13 +4380,13 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
if (fwrite_checked(bufptr_arr[varid_idx], strlen_se(bufptr_arr[varid_idx]), outfile)) {
goto score_report_ret_WRITE_FAIL;
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(bufptr_arr[allele_idx], outfile);
fputs(" vs ", outfile);
fputs(marker_allele_ptrs[2 * marker_uidx], outfile);
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(marker_allele_ptrs[2 * marker_uidx + 1], outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
miss_ct++;
miss_allele_ct++;
}
@@ -4379,7 +4400,7 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
if (fwrite_checked(bufptr_arr[varid_idx], strlen_se(bufptr_arr[varid_idx]), outfile)) {
goto score_report_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
miss_ct++;
miss_varid_ct++;
}
@@ -4616,9 +4637,9 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
if (fseeko(bedfile, bed_offset + ((uint64_t)marker_uidx) * unfiltered_sample_ct4, SEEK_SET)) {
goto score_report_ret_READ_FAIL;
}
- fill_double_zero(score_deltas, sample_ct);
- fill_uint_zero(miss_cts, sample_ct);
- fill_int_zero(named_allele_ct_deltas, sample_ct);
+ fill_double_zero(sample_ct, score_deltas);
+ fill_uint_zero(sample_ct, miss_cts);
+ fill_int_zero(sample_ct, named_allele_ct_deltas);
score_base = 0.0;
female_y_offset = 0.0;
obs_expected = 0;
@@ -4636,12 +4657,12 @@ int32_t score_report(Score_info* sc_ip, FILE* bedfile, uintptr_t bed_offset, uin
}
if (marker_uidx >= chrom_end) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
is_haploid = IS_SET(chrom_info_ptr->haploid_mask, uii);
- is_x = ((int32_t)uii == chrom_info_ptr->x_code)? 1 : 0;
- is_y = ((int32_t)uii == chrom_info_ptr->y_code)? 1 : 0;
+ is_x = ((int32_t)uii == chrom_info_ptr->xymt_codes[X_OFFSET])? 1 : 0;
+ is_y = ((int32_t)uii == chrom_info_ptr->xymt_codes[Y_OFFSET])? 1 : 0;
ploidy = 2 - is_haploid;
ploidy_d = (double)((int32_t)ploidy);
}
@@ -4915,7 +4936,7 @@ int32_t meta_analysis_open_and_read_header(const char* fname, char* loadbuf, uin
line_idx++;
}
}
- fill_uint_one(parse_table, token_ct);
+ fill_uint_one(token_ct, parse_table);
do {
slen = strlen_se(bufptr);
ii = bsearch_str(bufptr, slen, sorted_header_dict, max_header_len, header_dict_ct);
@@ -5095,13 +5116,13 @@ static inline uint32_t uint32_decode_5_hi_uchar(const char* start) {
int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1field_search_order, char* a2field_search_order, char* pfield_search_order, char* essfield_search_order, uint32_t flags, char* extractname, char* outname, char* outname_end, double output_min_p, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- gzFile gz_infile = NULL;
- FILE* infile = NULL;
- FILE* outfile = NULL;
- char* cur_window_marker_ids = NULL;
- char* sorted_extract_ids = NULL;
- uintptr_t* duplicate_id_bitfield = NULL;
- Ll_str** duplicate_id_htable = NULL;
+ gzFile gz_infile = nullptr;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
+ char* cur_window_marker_ids = nullptr;
+ char* sorted_extract_ids = nullptr;
+ uintptr_t* duplicate_id_bitfield = nullptr;
+ Ll_str** duplicate_id_htable = nullptr;
uintptr_t header_dict_ct = 2; // 'SE', BETA/OR
uintptr_t max_header_len = 3;
uintptr_t extract_ct = 0;
@@ -5220,97 +5241,62 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
uint32_t problem_mask;
uint32_t uii;
int32_t ii;
- // 1. Construct header search dictionary. Similar to clump_reports().
- if (snpfield_search_order) {
- header_dict_ct = count_and_measure_multistr(snpfield_search_order, &max_header_len);
- } else {
- max_header_len = 4; // 'SNP' + null terminator
- header_dict_ct++;
- }
- if (input_beta && (max_header_len < 5)) {
- max_header_len = 5;
- }
- if (weighted_z) {
- parse_max = 5;
- if (pfield_search_order) {
- header_dict_ct += count_and_measure_multistr(pfield_search_order, &max_header_len);
- } else {
- header_dict_ct++;
- }
- if (essfield_search_order) {
- header_dict_ct += count_and_measure_multistr(essfield_search_order, &max_header_len);
+ {
+ // 1. Construct header search dictionary. Similar to clump_reports().
+ if (snpfield_search_order) {
+ header_dict_ct = count_and_measure_multistr(snpfield_search_order, &max_header_len);
} else {
+ max_header_len = 4; // 'SNP' + null terminator
header_dict_ct++;
- if (max_header_len < 6) {
- max_header_len = 6;
- }
}
- }
- if (use_map) {
- if (max_header_len < 4) {
- max_header_len = 4;
+ if (input_beta && (max_header_len < 5)) {
+ max_header_len = 5;
}
- header_dict_ct += 2;
- if (!no_allele) {
- if (a1field_search_order) {
- header_dict_ct += count_and_measure_multistr(a1field_search_order, &max_header_len);
+ if (weighted_z) {
+ parse_max = 5;
+ if (pfield_search_order) {
+ header_dict_ct += count_and_measure_multistr(pfield_search_order, &max_header_len);
} else {
header_dict_ct++;
}
- if (a2field_search_order) {
- header_dict_ct += count_and_measure_multistr(a2field_search_order, &max_header_len);
+ if (essfield_search_order) {
+ header_dict_ct += count_and_measure_multistr(essfield_search_order, &max_header_len);
} else {
header_dict_ct++;
+ if (max_header_len < 6) {
+ max_header_len = 6;
+ }
}
- parse_max = 9;
- } else {
- parse_max = 7;
}
- }
- if (bigstack_alloc_c(header_dict_ct * max_header_len, &sorted_header_dict) ||
- bigstack_alloc_ui(header_dict_ct, &header_id_map)) {
- goto meta_analysis_ret_NOMEM;
- }
- ulii = 0; // write position
- if (snpfield_search_order) {
- bufptr = snpfield_search_order;
- uii = 0x10000000;
- do {
- slen = strlen(bufptr) + 1;
- memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
- header_id_map[ulii++] = uii++;
- bufptr = &(bufptr[slen]);
- } while (*bufptr);
- } else {
- memcpy(sorted_header_dict, "SNP", 4);
- header_id_map[0] = 0x10000000;
- ulii++;
- }
- if (!input_beta) {
- memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "OR");
- } else {
- memcpy(&(sorted_header_dict[ulii * max_header_len]), "BETA", 5);
- }
- header_id_map[ulii++] = 1;
- memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "SE");
- header_id_map[ulii++] = 2;
- if (weighted_z) {
- if (pfield_search_order) {
- bufptr = pfield_search_order;
- uii = 0x20000000;
- do {
- slen = strlen(bufptr) + 1;
- memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
- header_id_map[ulii++] = uii++;
- bufptr = &(bufptr[slen]);
- } while (*bufptr);
- } else {
- memcpy(&(sorted_header_dict[ulii * max_header_len]), "P", 2);
- header_id_map[ulii++] = 0x20000000;
+ if (use_map) {
+ if (max_header_len < 4) {
+ max_header_len = 4;
+ }
+ header_dict_ct += 2;
+ if (!no_allele) {
+ if (a1field_search_order) {
+ header_dict_ct += count_and_measure_multistr(a1field_search_order, &max_header_len);
+ } else {
+ header_dict_ct++;
+ }
+ if (a2field_search_order) {
+ header_dict_ct += count_and_measure_multistr(a2field_search_order, &max_header_len);
+ } else {
+ header_dict_ct++;
+ }
+ parse_max = 9;
+ } else {
+ parse_max = 7;
+ }
+ }
+ if (bigstack_alloc_c(header_dict_ct * max_header_len, &sorted_header_dict) ||
+ bigstack_alloc_ui(header_dict_ct, &header_id_map)) {
+ goto meta_analysis_ret_NOMEM;
}
- if (essfield_search_order) {
- bufptr = pfield_search_order;
- uii = 0x30000000;
+ ulii = 0; // write position
+ if (snpfield_search_order) {
+ bufptr = snpfield_search_order;
+ uii = 0x10000000;
do {
slen = strlen(bufptr) + 1;
memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
@@ -5318,19 +5304,22 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
bufptr = &(bufptr[slen]);
} while (*bufptr);
} else {
- memcpy(&(sorted_header_dict[ulii * max_header_len]), "NMISS", 6);
- header_id_map[ulii++] = 0x30000000;
+ memcpy(sorted_header_dict, "SNP", 4);
+ header_id_map[0] = 0x10000000;
+ ulii++;
}
- }
- if (use_map) {
- memcpy(&(sorted_header_dict[ulii * max_header_len]), "CHR", 4);
- header_id_map[ulii++] = 5;
- memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "BP");
- header_id_map[ulii++] = 6;
- if (!no_allele) {
- if (a1field_search_order) {
- bufptr = a1field_search_order;
- uii = 0x40000000;
+ if (!input_beta) {
+ memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "OR");
+ } else {
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), "BETA", 5);
+ }
+ header_id_map[ulii++] = 1;
+ memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "SE");
+ header_id_map[ulii++] = 2;
+ if (weighted_z) {
+ if (pfield_search_order) {
+ bufptr = pfield_search_order;
+ uii = 0x20000000;
do {
slen = strlen(bufptr) + 1;
memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
@@ -5338,12 +5327,12 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
bufptr = &(bufptr[slen]);
} while (*bufptr);
} else {
- memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "A1");
- header_id_map[ulii++] = 0x40000000;
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), "P", 2);
+ header_id_map[ulii++] = 0x20000000;
}
- if (a2field_search_order) {
- bufptr = a2field_search_order;
- uii = 0x50000000;
+ if (essfield_search_order) {
+ bufptr = pfield_search_order;
+ uii = 0x30000000;
do {
slen = strlen(bufptr) + 1;
memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
@@ -5351,620 +5340,226 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
bufptr = &(bufptr[slen]);
} while (*bufptr);
} else {
- memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "A2");
- header_id_map[ulii] = 0x50000000;
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), "NMISS", 6);
+ header_id_map[ulii++] = 0x30000000;
}
}
- }
- if (qsort_ext(sorted_header_dict, header_dict_ct, max_header_len, strcmp_deref, (char*)header_id_map, sizeof(int32_t))) {
- goto meta_analysis_ret_NOMEM;
- }
- if (scan_for_duplicate_ids(sorted_header_dict, header_dict_ct, max_header_len)) {
- logerrprint("Error: Duplicate/invalid --meta-analysis-...-field field name.\n");
- goto meta_analysis_ret_INVALID_CMDLINE;
- }
-
- // 2. If --extract specified, load and sort permitted variant list.
- if (extractname) {
- if (fopen_checked(extractname, FOPEN_RB, &infile)) {
- goto meta_analysis_ret_OPEN_FAIL;
- }
- retval = scan_token_ct_len(MAXLINELEN, infile, g_textbuf, &extract_ct, &max_extract_id_len);
- if (retval) {
- goto meta_analysis_ret_1;
- }
- if (!extract_ct) {
- logerrprint("Error: Empty --extract file.\n");
- goto meta_analysis_ret_INVALID_FORMAT;
- }
- if (max_extract_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: --extract IDs are limited to " MAX_ID_LEN_STR " characters.\n");
- goto meta_analysis_ret_INVALID_FORMAT;
- }
- if (bigstack_alloc_c(extract_ct * max_extract_id_len, &sorted_extract_ids)) {
- goto meta_analysis_ret_NOMEM;
- }
- rewind(infile);
- // Considered switching to a hash table, but decided against it for now
- // since it's less memory-efficient (in the usual case of similar-length
- // IDs), especially when lots of duplicate IDs are present. Might be worth
- // revisiting this decision in the future, though, since there are
- // reasonable use cases involving 40-80 million line --extract files, and
- // skipping the sort step there is a big win.
- retval = read_tokens(MAXLINELEN, extract_ct, max_extract_id_len, infile, g_textbuf, sorted_extract_ids);
- if (retval) {
- goto meta_analysis_ret_1;
- }
- if (fclose_null(&infile)) {
- goto meta_analysis_ret_READ_FAIL;
- }
- qsort(sorted_extract_ids, extract_ct, max_extract_id_len, strcmp_casted);
- ulii = collapse_duplicate_ids(sorted_extract_ids, extract_ct, max_extract_id_len, NULL);
- if (ulii < extract_ct) {
- extract_ct = ulii;
- bigstack_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
- }
- extract_ctl = BITCT_TO_WORDCT(extract_ct);
- if (bigstack_alloc_ul(extract_ctl, &duplicate_id_bitfield)) {
- goto meta_analysis_ret_NOMEM;
- }
- } else {
- duplicate_id_htable = (Ll_str**)bigstack_alloc(HASHMEM);
- }
-
- // 3. Allocate space for initial hash table.
- // Saving memory is pretty important here, so we use the following packing in
- // the ss field (W = byte width required to save numbers up to file_ct, and
- // M = 1 iff 'no-map' was not specified):
- // [W]: number of files this variant appears in minus 1, little-endian
- // [W+1]..[W+5], if M==1: chromosome byte followed by bp coordinate int; may
- // need to widen chromosome byte later
- // [W+5M+1]: null-terminated variant ID. Followed by null-terminated A1/A2
- // if 'no-allele' not specified
- bigstack_mark2 = g_bigstack_base;
- htable = (Ll_str**)bigstack_alloc(HASHMEM);
- if (!htable) {
- goto meta_analysis_ret_NOMEM;
- }
- for (uii = 0; uii < HASHSIZE; uii++) {
- htable[uii] = NULL;
- }
-
- // 4. Initial scan: save all potentially valid variant IDs (and accompanying
- // allele codes/chr/pos, if present) in the hash table, and produce .prob
- // file. Also determine maximum line length, for use in later passes.
- fname_ptr = input_fnames;
- do {
- fname_ptr = strchr(fname_ptr, '\0');
- fname_ptr++;
- file_ct++;
- } while (*fname_ptr);
- file_ct_byte_width = __builtin_clz(file_ct) / 8;
- file_ct_mask = 0xffffffffU >> (8 * file_ct_byte_width);
- file_ct_byte_width = 4 - file_ct_byte_width;
- file_ct64 = (file_ct + 63) / 64;
-
- slen_base = file_ct_byte_width;
- if (use_map) {
- slen_base += 5;
- }
- fname_ptr = input_fnames;
- htable_write = (Ll_str*)g_bigstack_base;
- bigstack_end_mark[-1] = ' ';
- for (file_idx = 0; file_idx < file_ct; file_idx++) {
- if (sorted_extract_ids) {
- fill_ulong_zero(duplicate_id_bitfield, extract_ctl);
- } else {
- for (uii = 0; uii < HASHSIZE; uii++) {
- duplicate_id_htable[uii] = NULL;
- }
- }
- fname_len = strlen(fname_ptr);
- // prevent overlap between loadbuf and new hash table entries.
- loadbuf_size = (((uintptr_t)(bigstack_end_mark - ((unsigned char*)htable_write))) / 4);
- if (loadbuf_size > MAXLINEBUFLEN) {
- loadbuf_size = MAXLINEBUFLEN;
- } else if (loadbuf_size <= MAXLINELEN) {
- goto meta_analysis_ret_NOMEM;
- }
- loadbuf = (char*)(&(bigstack_end_mark[-((intptr_t)loadbuf_size)]));
- duplicate_id_htable_write = (Ll_str*)loadbuf;
- htable_write_limit = ((uintptr_t)loadbuf) - loadbuf_size - 16;
- token_ct = parse_max;
- retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, &line_idx, &line_max);
- if (retval) {
- goto meta_analysis_ret_1;
- }
- while (1) {
- line_idx++;
- if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
- if (!gzeof(gz_infile)) {
- goto meta_analysis_ret_READ_FAIL;
- }
- break;
- }
- if (!loadbuf[loadbuf_size - 1]) {
- if (loadbuf_size == MAXLINEBUFLEN) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, fname_ptr);
- goto meta_analysis_ret_INVALID_FORMAT_WW;
- }
- goto meta_analysis_ret_NOMEM;
- }
- bufptr = skip_initial_spaces(loadbuf);
- if (is_eoln_kns(*bufptr)) {
- slen = strlen(bufptr) + ((uintptr_t)(bufptr - loadbuf));
- if (slen >= line_max) {
- line_max = slen + 1;
- }
- continue;
- }
- bufptr = next_token_multz(bufptr, col_skips[0]);
- token_ptrs[col_sequence[0]] = bufptr;
- for (seq_idx = 1; seq_idx < token_ct; seq_idx++) {
- bufptr = next_token_mult(bufptr, col_skips[seq_idx]);
- token_ptrs[col_sequence[seq_idx]] = bufptr;
- }
- if (!bufptr) {
- // PLINK 1.07 doesn't error out here, or even count the number of
- // instances
- slen = strlen(loadbuf);
- if (slen >= line_max) {
- line_max = slen + 1;
- }
- continue;
- }
- slen = strlen(bufptr) + ((uintptr_t)(bufptr - loadbuf));
- if (slen >= line_max) {
- line_max = slen + 1;
- }
- bufptr = token_ptrs[0];
- var_id_len = strlen_se(bufptr);
- if (var_id_len > MAX_ID_LEN) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s has an excessively long variant ID.\n", line_idx, fname_ptr);
- goto meta_analysis_ret_INVALID_FORMAT_WW;
- }
- bufptr[var_id_len] = '\0';
- uii = hashval2(bufptr, var_id_len++);
- // var_id_len now includes null-terminator
- if (sorted_extract_ids) {
- ii = bsearch_str(bufptr, var_id_len - 1, sorted_extract_ids, max_extract_id_len, extract_ct);
- if (ii == -1) {
- continue;
- }
- if (is_set(duplicate_id_bitfield, ii)) {
- problem_mask = 0x200;
- goto meta_analysis_report_error;
- }
- set_bit(ii, duplicate_id_bitfield);
- } else {
- ll_pptr = &(duplicate_id_htable[uii]);
- while (1) {
- ll_ptr = *ll_pptr;
- if ((!ll_ptr) || (!strcmp(bufptr, ll_ptr->ss))) {
- break;
- }
- ll_pptr = &(ll_ptr->next);
- }
- if (ll_ptr) {
- problem_mask = 0x200;
- goto meta_analysis_report_error;
- }
- // word-align for now
- // note that it is NOT safe to use uii here.
- ulii = sizeof(intptr_t) + round_up_pow2(var_id_len, BYTECT);
- if (((uintptr_t)htable_write) + ulii > ((uintptr_t)duplicate_id_htable_write)) {
- goto meta_analysis_ret_NOMEM;
- }
- duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) - ulii);
- *ll_pptr = duplicate_id_htable_write;
- duplicate_id_htable_write->next = NULL;
- memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len);
- }
- ll_pptr = &(htable[uii]);
-
- // validate
- problem_mask = 0;
- if (use_map) {
- ii = get_chrom_code(chrom_info_ptr, token_ptrs[5]);
- if (ii < 0) {
- problem_mask = 1;
+ if (use_map) {
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), "CHR", 4);
+ header_id_map[ulii++] = 5;
+ memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "BP");
+ header_id_map[ulii++] = 6;
+ if (!no_allele) {
+ if (a1field_search_order) {
+ bufptr = a1field_search_order;
+ uii = 0x40000000;
+ do {
+ slen = strlen(bufptr) + 1;
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
+ header_id_map[ulii++] = uii++;
+ bufptr = &(bufptr[slen]);
+ } while (*bufptr);
} else {
- cur_chrom = (uint32_t)ii;
- if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
- continue;
- }
- }
- if (scan_uint_defcap(token_ptrs[6], &cur_bp)) {
- problem_mask |= 2;
- }
- if (!no_allele) {
- bufptr = token_ptrs[7];
- a1lenp1 = strlen_se(bufptr); // not +1 yet
- if ((*bufptr == missing_geno) && (a1lenp1 == 1)) {
- problem_mask |= 4;
- }
- bufptr[a1lenp1++] = '\0';
- // A2 allele present if token_ct == 7 or == 9
- // if we make further extensions to this function, we should replace
- // "token_ct & 1" with an a2_present boolean
- if (token_ct & 1) {
- bufptr = token_ptrs[8];
- a2lenp1 = strlen_se(bufptr);
- if ((*bufptr == missing_geno) && (a2lenp1 == 1)) {
- problem_mask |= 8;
- }
- bufptr[a2lenp1++] = '\0';
- }
- }
- }
- if (scan_double(token_ptrs[1], &cur_beta) || (cur_beta == INFINITY) || ((!input_beta) && (!(cur_beta >= 0))) || (input_beta && ((cur_beta != cur_beta) || (cur_beta == -INFINITY)))) {
- problem_mask |= 0x10;
- }
- if (scan_double(token_ptrs[2], &cur_se) || (!(cur_se >= 0.0)) || (cur_se == INFINITY)) {
- problem_mask |= 0x20;
- }
- if (weighted_z) {
- if (scan_double(token_ptrs[3], &cur_p) || (!(cur_p >= 0.0)) || (cur_p > 1.0)) {
- problem_mask |= 0x80;
- }
- if (scan_double(token_ptrs[4], &cur_ess) || (!(cur_ess > 0.0)) || (cur_ess == INFINITY)) {
- problem_mask |= 0x100;
- }
- }
- // check main hash table
- bufptr = token_ptrs[0];
- while (1) {
- ll_ptr = *ll_pptr;
- if ((!ll_ptr) || (!strcmp(bufptr, &(ll_ptr->ss[slen_base])))) {
- break;
- }
- ll_pptr = &(ll_ptr->next);
- }
-
- if (!ll_ptr) {
- if (problem_mask) {
- goto meta_analysis_report_error;
- }
- // new hash table entry; word-align the allocation for now
- ll_ptr = htable_write;
- *ll_pptr = ll_ptr;
- ll_ptr->next = NULL;
- wptr = memseta(ll_ptr->ss, 0, file_ct_byte_width);
- if (use_map) {
- *wptr++ = cur_chrom;
- wptr = memcpya(wptr, &cur_bp, 4);
- }
- wptr = memcpya(wptr, bufptr, var_id_len);
- if (var_id_len > max_var_id_len_p1) {
- max_var_id_len_p1 = var_id_len;
- }
- if (!no_allele) {
- bufptr = wptr;
- wptr = memcpya(wptr, token_ptrs[7], a1lenp1);
- if (token_ct & 1) {
- wptr = memcpya(wptr, token_ptrs[8], a2lenp1);
- } else {
- *wptr++ = '\0';
- }
- uii = (uintptr_t)(wptr - bufptr);
- if (uii > max_combined_allele_len) {
- max_combined_allele_len = uii;
- }
+ memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "A1");
+ header_id_map[ulii++] = 0x40000000;
}
- if (report_all) {
- final_variant_ct++;
- }
- htable_write = (Ll_str*)round_up_pow2((uintptr_t)wptr, sizeof(uintptr_t));
- if ((((uintptr_t)htable_write) > ((uintptr_t)duplicate_id_htable_write)) || (((uintptr_t)htable_write) > htable_write_limit)) {
- goto meta_analysis_ret_NOMEM;
- }
- } else {
- if ((token_ct - 2 * weighted_z < 6) || meta_analysis_allelic_match(&(ll_ptr->ss[slen_base + var_id_len]), token_ptrs, token_ct, a1lenp1, a2lenp1)) {
- if (problem_mask) {
- goto meta_analysis_report_error;
- }
- // increment file count. Assume little-endian machine
- uiptr = (uint32_t*)ll_ptr->ss;
- uii = (*uiptr) & file_ct_mask;
- if ((!report_all) && (!uii)) {
- final_variant_ct++;
- }
- uii++;
- memcpy(ll_ptr->ss, &uii, file_ct_byte_width);
- } else {
- problem_mask |= 0x40;
- meta_analysis_report_error:
- if ((problem_mask == 0x200) && (!report_dups)) {
- continue;
- }
- if (!outfile) {
- memcpy(outname_end, ".prob", 6);
- if (fopen_checked(outname, "w", &outfile)) {
- goto meta_analysis_ret_OPEN_FAIL;
- }
- }
- bufptr = memcpyax(g_textbuf, fname_ptr, fname_len, '\t');
- bufptr = memcpyax(bufptr, token_ptrs[0], var_id_len - 1, '\t');
+ if (a2field_search_order) {
+ bufptr = a2field_search_order;
+ uii = 0x50000000;
do {
- wptr = strcpyax(bufptr, problem_strings[__builtin_ctz(problem_mask)], '\n');
- if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
- goto meta_analysis_ret_WRITE_FAIL;
- }
- problem_mask &= problem_mask - 1;
- } while (problem_mask);
- rejected_ct++;
+ slen = strlen(bufptr) + 1;
+ memcpy(&(sorted_header_dict[ulii * max_header_len]), bufptr, slen);
+ header_id_map[ulii++] = uii++;
+ bufptr = &(bufptr[slen]);
+ } while (*bufptr);
+ } else {
+ memcpyl3(&(sorted_header_dict[ulii * max_header_len]), "A2");
+ header_id_map[ulii] = 0x50000000;
}
}
}
- if (gzclose(gz_infile) != Z_OK) {
- gz_infile = NULL;
- goto meta_analysis_ret_READ_FAIL;
- }
- gz_infile = NULL;
- if (!sorted_extract_ids) {
- ulii = ((uintptr_t)loadbuf) - ((uintptr_t)duplicate_id_htable_write);
- if (ulii > duplicate_id_htable_max_alloc) {
- duplicate_id_htable_max_alloc = ulii;
- }
+ if (qsort_ext(sorted_header_dict, header_dict_ct, max_header_len, strcmp_deref, (char*)header_id_map, sizeof(int32_t))) {
+ goto meta_analysis_ret_NOMEM;
}
- fname_ptr = &(fname_ptr[fname_len + 1]);
- }
- if (outfile) {
- if (fclose_null(&outfile)) {
- goto meta_analysis_ret_WRITE_FAIL;
+ if (scan_for_duplicate_ids(sorted_header_dict, header_dict_ct, max_header_len)) {
+ logerrprint("Error: Duplicate/invalid --meta-analysis-...-field field name.\n");
+ goto meta_analysis_ret_INVALID_CMDLINE;
}
- LOGPRINTFWW("--meta-analysis: %" PRIu64 " problematic line%s; see %s .\n", rejected_ct, (rejected_ct == 1)? "" : "s", outname);
- }
- // 5. Determine final set of variants, and sort them (by chromosome, then
- // position, then variant ID in natural order). file_ct and (usually)
- // [A1+A2 len] are also included past the end of each entry, to remove the
- // need for an auxiliary index and let us free the hash table.
- if (!final_variant_ct) {
- logerrprint("Error: No --meta-analysis variants.\n");
- goto meta_analysis_ret_INVALID_CMDLINE;
-#ifdef __LP64__
- } else if (final_variant_ct > 0x7fffffff) {
- logerrprint("Error: Too many distinct --meta-analysis variants (max 2^31 - 1).\n");
-#endif
- }
- if (!no_allele) {
- combined_allele_len_byte_width = 4 - (__builtin_clz(max_combined_allele_len) / 8);
- }
- // bp coordinate, if present, expands from 4 to 5 bytes
- master_var_entry_len = slen_base + use_map + max_var_id_len_p1 + combined_allele_len_byte_width;
- loadbuf_size = round_up_pow2(line_max, END_ALLOC_CHUNK);
- loadbuf = (char*)bigstack_end_alloc_presized(loadbuf_size);
- if ((!loadbuf) ||
- bigstack_end_alloc_c(final_variant_ct * master_var_entry_len, &master_var_list) ||
- (((uintptr_t)htable_write) > ((uintptr_t)master_var_list))) {
- goto meta_analysis_ret_NOMEM;
- }
- // instead of following hash table pointers, we just plow through the table
- // entries in the order they were allocated in; this lets us access memory
- // sequentially
- ll_ptr = (Ll_str*)g_bigstack_base;
- for (master_var_idx = 0; master_var_idx < final_variant_ct;) {
- cur_file_ct_m1 = 0; // clear high bits
- memcpy(&cur_file_ct_m1, ll_ptr->ss, file_ct_byte_width);
- if (report_all || cur_file_ct_m1) {
- wptr = &(master_var_list[master_var_idx * master_var_entry_len]);
- master_var_idx++;
- if (use_map) {
- *wptr++ = ll_ptr->ss[file_ct_byte_width];
- memcpy(&uii, &(ll_ptr->ss[file_ct_byte_width + 1]), 4);
- wptr = uint32_encode_5_hi_uchar(uii, wptr);
- }
- bufptr = &(ll_ptr->ss[slen_base]);
- slen = strlen(bufptr) + 1;
- wptr = memcpya(wptr, bufptr, slen);
- wptr = memcpya(wptr, &cur_file_ct_m1, file_ct_byte_width);
- bufptr = &(bufptr[slen]);
- if (!no_allele) {
- // only save allele length sum, including null terminators
- slen = strlen(bufptr) + 1;
- slen += strlen(&(bufptr[slen])) + 1;
- memcpy(wptr, &slen, combined_allele_len_byte_width);
- bufptr = &(bufptr[slen]);
+ // 2. If --extract specified, load and sort permitted variant list.
+ if (extractname) {
+ if (fopen_checked(extractname, FOPEN_RB, &infile)) {
+ goto meta_analysis_ret_OPEN_FAIL;
}
- } else {
- bufptr = (char*)memchr(&(ll_ptr->ss[slen_base]), 0, max_var_id_len_p1);
- if (!no_allele) {
- bufptr = (char*)memchr(&(bufptr[1]), 0, max_combined_allele_len);
- bufptr = (char*)memchr(&(bufptr[1]), 0, max_combined_allele_len);
+ retval = scan_token_ct_len(MAXLINELEN, infile, g_textbuf, &extract_ct, &max_extract_id_len);
+ if (retval) {
+ goto meta_analysis_ret_1;
}
- bufptr++;
- }
- // now bufptr points to the byte past the end of the hash table entry
- // allocation, and we know the next allocation starts at [this byte,
- // rounded up to nearest word boundary]
- ll_ptr = (Ll_str*)round_up_pow2((uintptr_t)bufptr, sizeof(intptr_t));
- }
- qsort(master_var_list, final_variant_ct, master_var_entry_len, strcmp_natural);
- // don't need htable anymore
- bigstack_reset(bigstack_mark2);
- if (!sorted_extract_ids) {
- bigstack_alloc(duplicate_id_htable_max_alloc);
- }
- total_data_slots = bigstack_left() / sizeof(uintptr_t);
-
- // 6. Remaining load passes: determine how many remaining variants' worth of
- // effect sizes/SEs/Ps/ESSes fit in memory, load and meta-analyze just
- // those variants, rinse and repeat.
- memcpy(outname_end, ".meta", 6);
- if (fopen_checked(outname, "w", &outfile)) {
- goto meta_analysis_ret_OPEN_FAIL;
- }
- if (use_map) {
- fputs(" CHR BP", outfile);
- }
- fputs(" SNP", outfile);
- if (!no_allele) {
- fputs(" A1 A2", outfile);
- }
- fputs(output_beta? " N P P(R) BETA BETA(R) Q I" : " N P P(R) OR OR(R) Q I", outfile);
- if (weighted_z) {
- fputs(" WEIGHTED_Z P(WZ)", outfile);
- }
- if (report_study_specific) {
- for (file_idx = 0; file_idx < file_ct; file_idx++) {
- g_textbuf[0] = ' ';
- g_textbuf[1] = 'F';
- wptr = uint32toa(file_idx, &(g_textbuf[2]));
- wptr = width_force(8, g_textbuf, wptr);
- if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
- goto meta_analysis_ret_WRITE_FAIL;
+ if (!extract_ct) {
+ logerrprint("Error: Empty --extract file.\n");
+ goto meta_analysis_ret_INVALID_FORMAT;
}
- }
- }
- putc('\n', outfile);
-
- cur_data_index = (uintptr_t*)g_bigstack_base;
- if (use_map) {
- // chr/bp values can be discordant; when they are, we can't directly search
- // master_var_list for variant IDs. Instead, we populate
- // cur_window_marker_ids with an ASCII-sorted list (marker ID, cur_var_idx)
- // tuples.
- window_entry_base_cost += (max_var_id_len_p1 + sizeof(int32_t) + sizeof(intptr_t) - 1) / sizeof(intptr_t);
- }
- max_var_id_len_p5 = max_var_id_len_p1 + 4;
- while (1) {
- first_var_idx = last_var_idx;
- // memory requrirements per current-window variant:
- // - 2 * sizeof(intptr_t) for cur_data pointer and current file write
- // index; this grows from bottom of stack, while pointed-to stuff is
- // allocated from top
- // (technically could update the file write indexes in-place, but this
- // part is not memory-critical so I doubt it's worth it.)
- // - 2 * sizeof(double) * file_ct for effect sizes and SEs; filled from
- // back to front
- // sometimes, numerator and squared denominator of weighted Z-score
- // sometimes, bitfield describing which files are involved
- // sometimes, combined_allele_len for A1/A2, sizeof(double)-aligned
- // - sometimes, (max_var_id_len_p1 + sizeof(int32_t)) rounded up, for
- // cur_window_marker_ids.
- cur_entry_list_window = &(master_var_list[last_var_idx * master_var_entry_len]);
- bufptr = cur_entry_list_window;
- variants_remaining = final_variant_ct - last_var_idx;
- if (use_map) {
- bufptr = &(bufptr[6]); // ignore chromosome/position here
- }
- cur_data = (double*)(&(cur_data_index[total_data_slots]));
- ulii = 0;
- for (cur_variant_ct = 0; cur_variant_ct < variants_remaining; cur_variant_ct++) {
- bufptr2 = &(bufptr[cur_variant_ct * master_var_entry_len]);
- bufptr2 = (char*)memchr(bufptr2, 0, master_var_entry_len);
- bufptr2++;
- cur_file_ct_m1 = 0;
- memcpy(&cur_file_ct_m1, bufptr2, file_ct_byte_width);
- cur_data_slots = 0;
- if (report_study_specific) {
-#ifdef __LP64__
- cur_data_slots += file_ct64;
-#else
- cur_data_slots += 2 * file_ct64;
-#endif
+ if (max_extract_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: --extract IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
+ goto meta_analysis_ret_INVALID_FORMAT;
}
- if (!no_allele) {
- cur_combined_allele_len = 0;
- memcpy(&cur_combined_allele_len, &(bufptr2[file_ct_byte_width]), combined_allele_len_byte_width);
- cur_data_slots += (8 / BYTECT) * ((cur_combined_allele_len + 7) / 8);
- }
- cur_data_ptr = &(cur_data[-((intptr_t)cur_data_slots)]);
- cur_data_slots += window_entry_base_cost + (16 / BYTECT) * (cur_file_ct_m1 + 1 + weighted_z);
- ulii += cur_data_slots;
- if (ulii > total_data_slots) {
- break;
+ if (bigstack_alloc_c(extract_ct * max_extract_id_len, &sorted_extract_ids)) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ rewind(infile);
+ // Considered switching to a hash table, but decided against it for now
+ // since it's less memory-efficient (in the usual case of similar-length
+ // IDs), especially when lots of duplicate IDs are present. Might be
+ // worth revisiting this decision in the future, though, since there are
+ // reasonable use cases involving 40-80 million line --extract files, and
+ // skipping the sort step there is a big win.
+ retval = read_tokens(MAXLINELEN, extract_ct, max_extract_id_len, infile, g_textbuf, sorted_extract_ids);
+ if (retval) {
+ goto meta_analysis_ret_1;
+ }
+ if (fclose_null(&infile)) {
+ goto meta_analysis_ret_READ_FAIL;
}
- if (report_study_specific) {
- fill_ulong_zero((uintptr_t*)cur_data_ptr, file_ct64 * (8 / BYTECT));
+ qsort(sorted_extract_ids, extract_ct, max_extract_id_len, strcmp_casted);
+ ulii = collapse_duplicate_ids(sorted_extract_ids, extract_ct, max_extract_id_len, nullptr);
+ if (ulii < extract_ct) {
+ extract_ct = ulii;
+ bigstack_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
}
- if (weighted_z) {
- cur_data[-2] = 0.0;
- cur_data[-1] = 0.0;
+ extract_ctl = BITCT_TO_WORDCT(extract_ct);
+ if (bigstack_alloc_ul(extract_ctl, &duplicate_id_bitfield)) {
+ goto meta_analysis_ret_NOMEM;
}
- cur_data = &(cur_data[-((intptr_t)(cur_data_slots - window_entry_base_cost))]);
- // [effect sizes/SEs, reverse order] {WZ} {file idx bitfield} {A1/A2}
- // ^
- // |
- // cur_data_ptr
- //
- // cur_data_index[2 * var_idx + 1] = # of effect sizes/etc. saved so far
- cur_data_index[2 * cur_variant_ct] = (uintptr_t)cur_data_ptr;
- cur_data_index[2 * cur_variant_ct + 1] = 0;
+ } else {
+ duplicate_id_htable = (Ll_str**)bigstack_alloc(HASHMEM);
}
- if (!cur_variant_ct) {
+
+ // 3. Allocate space for initial hash table.
+ // Saving memory is pretty important here, so we use the following packing
+ // in the ss field (W = byte width required to save numbers up to file_ct,
+ // and M = 1 iff 'no-map' was not specified):
+ // [W]: number of files this variant appears in minus 1, little-endian
+ // [W+1]..[W+5], if M==1: chromosome byte followed by bp coordinate int;
+ // may need to widen chromosome byte later
+ // [W+5M+1]: null-terminated variant ID. Followed by null-terminated A1/A2
+ // if 'no-allele' not specified
+ bigstack_mark2 = g_bigstack_base;
+ htable = (Ll_str**)bigstack_alloc(HASHMEM);
+ if (!htable) {
goto meta_analysis_ret_NOMEM;
}
- last_var_idx += cur_variant_ct;
+ for (uii = 0; uii < HASHSIZE; uii++) {
+ htable[uii] = nullptr;
+ }
+
+ // 4. Initial scan: save all potentially valid variant IDs (and
+ // accompanying allele codes/chr/pos, if present) in the hash table, and
+ // produce .prob file. Also determine maximum line length, for use in
+ // later passes.
+ fname_ptr = input_fnames;
+ do {
+ fname_ptr = strchr(fname_ptr, '\0');
+ fname_ptr++;
+ file_ct++;
+ } while (*fname_ptr);
+ file_ct_byte_width = __builtin_clz(file_ct) / 8;
+ file_ct_mask = 0xffffffffU >> (8 * file_ct_byte_width);
+ file_ct_byte_width = 4 - file_ct_byte_width;
+ file_ct64 = (file_ct + 63) / 64;
+
+ slen_base = file_ct_byte_width;
if (use_map) {
- // position cur_window_marker_ids on top of cur_data_index
- cur_window_marker_ids = (char*)(&(cur_data_index[2 * cur_variant_ct]));
- // note that bufptr is positioned properly for reading variant IDs,
- // though it won't be after this loop
- bufptr2 = cur_window_marker_ids;
- for (uii = 0; uii < cur_variant_ct; uii++) {
- strcpy(bufptr2, bufptr);
- memcpy(&(bufptr2[max_var_id_len_p1]), &uii, 4);
- bufptr = &(bufptr[master_var_entry_len]);
- bufptr2 = &(bufptr2[max_var_id_len_p5]);
- }
- qsort(cur_window_marker_ids, cur_variant_ct, max_var_id_len_p5, strcmp_casted);
+ slen_base += 5;
}
fname_ptr = input_fnames;
+ htable_write = (Ll_str*)g_bigstack_base;
+ bigstack_end_mark[-1] = ' ';
for (file_idx = 0; file_idx < file_ct; file_idx++) {
if (sorted_extract_ids) {
- fill_ulong_zero(duplicate_id_bitfield, extract_ctl);
+ fill_ulong_zero(extract_ctl, duplicate_id_bitfield);
} else {
for (uii = 0; uii < HASHSIZE; uii++) {
- duplicate_id_htable[uii] = NULL;
+ duplicate_id_htable[uii] = nullptr;
}
}
- duplicate_id_htable_write = (Ll_str*)bigstack_mark2;
fname_len = strlen(fname_ptr);
+ // prevent overlap between loadbuf and new hash table entries.
+ loadbuf_size = (((uintptr_t)(bigstack_end_mark - ((unsigned char*)htable_write))) / 4);
+ if (loadbuf_size > MAXLINEBUFLEN) {
+ loadbuf_size = MAXLINEBUFLEN;
+ } else if (loadbuf_size <= MAXLINELEN) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ loadbuf = (char*)(&(bigstack_end_mark[-((intptr_t)loadbuf_size)]));
+ duplicate_id_htable_write = (Ll_str*)loadbuf;
+ htable_write_limit = ((uintptr_t)loadbuf) - loadbuf_size - 16;
token_ct = parse_max;
- retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, NULL, NULL);
+ retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, &line_idx, &line_max);
if (retval) {
goto meta_analysis_ret_1;
}
while (1) {
- // yeah, this is repetitive
+ line_idx++;
if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
if (!gzeof(gz_infile)) {
goto meta_analysis_ret_READ_FAIL;
}
break;
}
- bufptr = skip_initial_spaces(loadbuf);
+ if (!loadbuf[loadbuf_size - 1]) {
+ if (loadbuf_size == MAXLINEBUFLEN) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s is pathologically long.\n", line_idx, fname_ptr);
+ goto meta_analysis_ret_INVALID_FORMAT_WW;
+ }
+ goto meta_analysis_ret_NOMEM;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
if (is_eoln_kns(*bufptr)) {
+ slen = strlen(bufptr) + ((uintptr_t)(bufptr - loadbuf));
+ if (slen >= line_max) {
+ line_max = slen + 1;
+ }
continue;
}
- bufptr = next_token_multz(bufptr, col_skips[0]);
+ bufptr = next_token_multz(bufptr, col_skips[0]);
token_ptrs[col_sequence[0]] = bufptr;
for (seq_idx = 1; seq_idx < token_ct; seq_idx++) {
bufptr = next_token_mult(bufptr, col_skips[seq_idx]);
token_ptrs[col_sequence[seq_idx]] = bufptr;
}
- if (!bufptr) {
- continue;
+ if (!bufptr) {
+ // PLINK 1.07 doesn't error out here, or even count the number of
+ // instances
+ slen = strlen(loadbuf);
+ if (slen >= line_max) {
+ line_max = slen + 1;
+ }
+ continue;
+ }
+ slen = strlen(bufptr) + ((uintptr_t)(bufptr - loadbuf));
+ if (slen >= line_max) {
+ line_max = slen + 1;
}
- bufptr = token_ptrs[0];
+ bufptr = token_ptrs[0];
var_id_len = strlen_se(bufptr);
- if (var_id_len >= max_var_id_len_p1) {
- continue;
+ if (var_id_len > MAX_ID_SLEN) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s has an excessively long variant ID.\n", line_idx, fname_ptr);
+ goto meta_analysis_ret_INVALID_FORMAT_WW;
}
bufptr[var_id_len] = '\0';
- if (sorted_extract_ids) {
- ii = bsearch_str(bufptr, var_id_len, sorted_extract_ids, max_extract_id_len, extract_ct);
+ uii = hashval2(bufptr, var_id_len++);
+ // var_id_len now includes null-terminator
+ if (sorted_extract_ids) {
+ ii = bsearch_str(bufptr, var_id_len - 1, sorted_extract_ids, max_extract_id_len, extract_ct);
if (ii == -1) {
continue;
}
if (is_set(duplicate_id_bitfield, ii)) {
- continue;
+ problem_mask = 0x200;
+ goto meta_analysis_report_error;
}
set_bit(ii, duplicate_id_bitfield);
} else {
- uii = hashval2(bufptr, var_id_len);
ll_pptr = &(duplicate_id_htable[uii]);
while (1) {
ll_ptr = *ll_pptr;
@@ -5974,316 +5569,746 @@ int32_t meta_analysis(char* input_fnames, char* snpfield_search_order, char* a1f
ll_pptr = &(ll_ptr->next);
}
if (ll_ptr) {
- continue;
+ problem_mask = 0x200;
+ goto meta_analysis_report_error;
}
+ // word-align for now
+ // note that it is NOT safe to use uii here.
+ ulii = sizeof(intptr_t) + round_up_pow2(var_id_len, BYTECT);
+ if (((uintptr_t)htable_write) + ulii > ((uintptr_t)duplicate_id_htable_write)) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) - ulii);
*ll_pptr = duplicate_id_htable_write;
- duplicate_id_htable_write->next = NULL;
- memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len + 1);
- ulii = sizeof(intptr_t) + ((var_id_len + BYTECT) & (~(BYTECT - 1)));
- duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) + ulii);
+ duplicate_id_htable_write->next = nullptr;
+ memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len);
}
+ ll_pptr = &(htable[uii]);
+
+ // validate
+ problem_mask = 0;
if (use_map) {
- ii = get_chrom_code(chrom_info_ptr, token_ptrs[5]);
+ ii = get_chrom_code_destructive(chrom_info_ptr, token_ptrs[5]);
if (ii < 0) {
- continue;
- }
- cur_chrom = (uint32_t)ii;
- if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
- continue;
+ problem_mask = 1;
+ } else {
+ cur_chrom = (uint32_t)ii;
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
+ continue;
+ }
}
- if (scan_uint_defcap(token_ptrs[6], &cur_bp)) {
- continue;
+ if (scan_uint_defcap(token_ptrs[6], &cur_bp)) {
+ problem_mask |= 2;
}
- if (!no_allele) {
+ if (!no_allele) {
bufptr = token_ptrs[7];
- a1lenp1 = strlen_se(bufptr);
+ a1lenp1 = strlen_se(bufptr); // not +1 yet
if ((*bufptr == missing_geno) && (a1lenp1 == 1)) {
- continue;
+ problem_mask |= 4;
}
bufptr[a1lenp1++] = '\0';
- if (token_ct & 1) {
+ // A2 allele present if token_ct == 7 or == 9
+ // if we make further extensions to this function, we should
+ // replace "token_ct & 1" with an a2_present boolean
+ if (token_ct & 1) {
bufptr = token_ptrs[8];
a2lenp1 = strlen_se(bufptr);
if ((*bufptr == missing_geno) && (a2lenp1 == 1)) {
- continue;
+ problem_mask |= 8;
}
bufptr[a2lenp1++] = '\0';
}
}
}
- if (scan_double(token_ptrs[1], &cur_beta)) {
- continue;
+ if (scan_double(token_ptrs[1], &cur_beta) || (cur_beta == INFINITY) || ((!input_beta) && (!(cur_beta >= 0))) || (input_beta && ((cur_beta != cur_beta) || (cur_beta == -INFINITY)))) {
+ problem_mask |= 0x10;
}
- if (!input_beta) {
- cur_beta = log(cur_beta);
- }
- if (!realnum(cur_beta)) {
- continue;
- }
- if (scan_double(token_ptrs[2], &cur_se) || (!(cur_se >= 0.0)) || (cur_se == INFINITY)) {
- continue;
+ if (scan_double(token_ptrs[2], &cur_se) || (!(cur_se >= 0.0)) || (cur_se == INFINITY)) {
+ problem_mask |= 0x20;
}
if (weighted_z) {
if (scan_double(token_ptrs[3], &cur_p) || (!(cur_p >= 0.0)) || (cur_p > 1.0)) {
- continue;
+ problem_mask |= 0x80;
}
if (scan_double(token_ptrs[4], &cur_ess) || (!(cur_ess > 0.0)) || (cur_ess == INFINITY)) {
- continue;
+ problem_mask |= 0x100;
}
}
+ // check main hash table
bufptr = token_ptrs[0];
- if (use_map) {
- ii = bsearch_str(bufptr, var_id_len, cur_window_marker_ids, max_var_id_len_p5, cur_variant_ct);
- if (ii == -1) {
- continue;
- }
-#ifdef __LP64__
- cur_var_idx = 0; // clear high 32 bits
-#endif
- memcpy(&cur_var_idx, &(cur_window_marker_ids[(((uint32_t)ii) * max_var_id_len_p5) + max_var_id_len_p1]), 4);
- } else {
- bufptr[var_id_len] = '\0';
- ulii = (uint32_t)bsearch_str_natural(bufptr, cur_entry_list_window, master_var_entry_len, cur_variant_ct);
- // this comparison catches -1 return value
- if ((ulii >= last_var_idx) || (ulii < first_var_idx)) {
- continue;
+ while (1) {
+ ll_ptr = *ll_pptr;
+ if ((!ll_ptr) || (!strcmp(bufptr, &(ll_ptr->ss[slen_base])))) {
+ break;
}
- cur_var_idx = ulii - first_var_idx;
+ ll_pptr = &(ll_ptr->next);
}
- cur_data_ptr = (double*)cur_data_index[2 * cur_var_idx];
- cur_file_ct_m1 = cur_data_index[2 * cur_var_idx + 1];
- if (!no_allele) {
- if (!report_study_specific) {
- bufptr2 = (char*)cur_data_ptr;
- } else {
- bufptr2 = (char*)(&(cur_data_ptr[file_ct64]));
+
+ if (!ll_ptr) {
+ if (problem_mask) {
+ goto meta_analysis_report_error;
+ }
+ // new hash table entry; word-align the allocation for now
+ ll_ptr = htable_write;
+ *ll_pptr = ll_ptr;
+ ll_ptr->next = nullptr;
+ wptr = memseta(ll_ptr->ss, 0, file_ct_byte_width);
+ if (use_map) {
+ *wptr++ = cur_chrom;
+ wptr = memcpya(wptr, &cur_bp, 4);
+ }
+ wptr = memcpya(wptr, bufptr, var_id_len);
+ if (var_id_len > max_var_id_len_p1) {
+ max_var_id_len_p1 = var_id_len;
}
- if (!cur_file_ct_m1) {
- // save allele codes
- bufptr2 = memcpya(bufptr2, token_ptrs[7], a1lenp1);
+ if (!no_allele) {
+ bufptr = wptr;
+ wptr = memcpya(wptr, token_ptrs[7], a1lenp1);
if (token_ct & 1) {
- bufptr2 = memcpya(bufptr2, token_ptrs[8], a2lenp1);
+ wptr = memcpya(wptr, token_ptrs[8], a2lenp1);
} else {
- *bufptr2++ = '\0';
+ *wptr++ = '\0';
}
- } else {
- // compare them
- uii = meta_analysis_allelic_match(bufptr2, token_ptrs, token_ct, a1lenp1, a2lenp1);
- if (!uii) {
- continue;
- } else if (uii == 2) {
- cur_beta = -cur_beta;
+ uii = (uintptr_t)(wptr - bufptr);
+ if (uii > max_combined_allele_len) {
+ max_combined_allele_len = uii;
}
}
- }
- if (report_study_specific) {
- set_bit(file_idx, (uintptr_t*)cur_data_ptr);
- }
- if (weighted_z) {
- dxx = ltqnorm(1.0 - cur_p * 0.5) * sqrt(cur_ess);
- if (cur_beta > 0.0) {
- cur_data_ptr[-2] += dxx;
+ if (report_all) {
+ final_variant_ct++;
+ }
+ htable_write = (Ll_str*)round_up_pow2((uintptr_t)wptr, sizeof(uintptr_t));
+ if ((((uintptr_t)htable_write) > ((uintptr_t)duplicate_id_htable_write)) || (((uintptr_t)htable_write) > htable_write_limit)) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ } else {
+ if ((token_ct - 2 * weighted_z < 6) || meta_analysis_allelic_match(&(ll_ptr->ss[slen_base + var_id_len]), token_ptrs, token_ct, a1lenp1, a2lenp1)) {
+ if (problem_mask) {
+ goto meta_analysis_report_error;
+ }
+ // increment file count. Assume little-endian machine
+ uiptr = (uint32_t*)ll_ptr->ss;
+ uii = (*uiptr) & file_ct_mask;
+ if ((!report_all) && (!uii)) {
+ final_variant_ct++;
+ }
+ uii++;
+ memcpy(ll_ptr->ss, &uii, file_ct_byte_width);
} else {
- cur_data_ptr[-2] -= dxx;
+ problem_mask |= 0x40;
+ meta_analysis_report_error:
+ if ((problem_mask == 0x200) && (!report_dups)) {
+ continue;
+ }
+ if (!outfile) {
+ memcpy(outname_end, ".prob", 6);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto meta_analysis_ret_OPEN_FAIL;
+ }
+ }
+ bufptr = memcpyax(g_textbuf, fname_ptr, fname_len, '\t');
+ bufptr = memcpyax(bufptr, token_ptrs[0], var_id_len - 1, '\t');
+ do {
+ wptr = strcpyax(bufptr, problem_strings[__builtin_ctz(problem_mask)], '\n');
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ goto meta_analysis_ret_WRITE_FAIL;
+ }
+ problem_mask &= problem_mask - 1;
+ } while (problem_mask);
+ rejected_ct++;
}
- cur_data_ptr[-1] += cur_ess;
}
- cur_data_ptr = &(cur_data_ptr[-2 * ((int32_t)(cur_file_ct_m1 + weighted_z))]);
- cur_data_ptr[-2] = cur_beta;
- cur_data_ptr[-1] = cur_se;
- cur_data_index[2 * cur_var_idx + 1] += 1;
}
if (gzclose(gz_infile) != Z_OK) {
- gz_infile = NULL;
- goto meta_analysis_ret_READ_FAIL;
+ gz_infile = nullptr;
+ goto meta_analysis_ret_READ_FAIL;
+ }
+ gz_infile = nullptr;
+ if (!sorted_extract_ids) {
+ ulii = ((uintptr_t)loadbuf) - ((uintptr_t)duplicate_id_htable_write);
+ if (ulii > duplicate_id_htable_max_alloc) {
+ duplicate_id_htable_max_alloc = ulii;
+ }
}
- gz_infile = NULL;
fname_ptr = &(fname_ptr[fname_len + 1]);
}
- for (cur_var_idx = 0; cur_var_idx < cur_variant_ct; cur_var_idx++) {
- cur_data_ptr = (double*)cur_data_index[2 * cur_var_idx];
- cur_file_ct = cur_data_index[2 * cur_var_idx + 1];
- bufptr = &(cur_entry_list_window[cur_var_idx * master_var_entry_len]);
- wptr = g_textbuf;
- if (use_map) {
- cur_chrom = (uint32_t)((unsigned char)(*bufptr++));
- wptr = width_force(4, wptr, chrom_name_write(chrom_info_ptr, cur_chrom, wptr));
- wptr = memseta(wptr, 32, 2);
- cur_bp = uint32_decode_5_hi_uchar(bufptr);
- bufptr = &(bufptr[5]);
- wptr = uint32toa_w10(cur_bp, wptr);
- }
- *wptr++ = ' ';
- var_id_len = strlen(bufptr);
- // bleah, this column width was not adaptive
- wptr = fw_strcpyn(14, var_id_len, bufptr, wptr);
- if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ if (outfile) {
+ if (fclose_null(&outfile)) {
goto meta_analysis_ret_WRITE_FAIL;
}
- if (!no_allele) {
- if (!report_study_specific) {
- bufptr = (char*)cur_data_ptr;
- } else {
- bufptr = (char*)(&(cur_data_ptr[file_ct64]));
- }
- slen = strlen(bufptr);
- putc(' ', outfile);
- if (slen == 1) {
- putc(' ', outfile);
- putc(' ', outfile);
- } else if (slen == 2) {
- putc(' ', outfile);
- }
- bufptr2 = &(bufptr[slen]);
- if (fwrite_checked(bufptr, bufptr2 - bufptr, outfile)) {
+ LOGPRINTFWW("--meta-analysis: %" PRIu64 " problematic line%s; see %s .\n", rejected_ct, (rejected_ct == 1)? "" : "s", outname);
+ }
+
+ // 5. Determine final set of variants, and sort them (by chromosome, then
+ // position, then variant ID in natural order). file_ct and (usually)
+ // [A1+A2 len] are also included past the end of each entry, to remove
+ // the need for an auxiliary index and let us free the hash table.
+ if (!final_variant_ct) {
+ logerrprint("Error: No --meta-analysis variants.\n");
+ goto meta_analysis_ret_INVALID_CMDLINE;
+ #ifdef __LP64__
+ } else if (final_variant_ct > 0x7fffffff) {
+ logerrprint("Error: Too many distinct --meta-analysis variants (max 2^31 - 1).\n");
+ #endif
+ }
+ if (!no_allele) {
+ combined_allele_len_byte_width = 4 - (__builtin_clz(max_combined_allele_len) / 8);
+ }
+ // bp coordinate, if present, expands from 4 to 5 bytes
+ master_var_entry_len = slen_base + use_map + max_var_id_len_p1 + combined_allele_len_byte_width;
+ loadbuf_size = round_up_pow2(line_max, END_ALLOC_CHUNK);
+ loadbuf = (char*)bigstack_end_alloc_presized(loadbuf_size);
+ if ((!loadbuf) ||
+ bigstack_end_alloc_c(final_variant_ct * master_var_entry_len, &master_var_list) ||
+ (((uintptr_t)htable_write) > ((uintptr_t)master_var_list))) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ // instead of following hash table pointers, we just plow through the table
+ // entries in the order they were allocated in; this lets us access memory
+ // sequentially
+ ll_ptr = (Ll_str*)g_bigstack_base;
+ for (master_var_idx = 0; master_var_idx < final_variant_ct;) {
+ cur_file_ct_m1 = 0; // clear high bits
+ memcpy(&cur_file_ct_m1, ll_ptr->ss, file_ct_byte_width);
+ if (report_all || cur_file_ct_m1) {
+ wptr = &(master_var_list[master_var_idx * master_var_entry_len]);
+ master_var_idx++;
+ if (use_map) {
+ *wptr++ = ll_ptr->ss[file_ct_byte_width];
+ memcpy(&uii, &(ll_ptr->ss[file_ct_byte_width + 1]), 4);
+ wptr = uint32_encode_5_hi_uchar(uii, wptr);
+ }
+ bufptr = &(ll_ptr->ss[slen_base]);
+ slen = strlen(bufptr) + 1;
+ wptr = memcpya(wptr, bufptr, slen);
+ wptr = memcpya(wptr, &cur_file_ct_m1, file_ct_byte_width);
+ bufptr = &(bufptr[slen]);
+ if (!no_allele) {
+ // only save allele length sum, including null terminators
+ slen = strlen(bufptr) + 1;
+ slen += strlen(&(bufptr[slen])) + 1;
+ memcpy(wptr, &slen, combined_allele_len_byte_width);
+ bufptr = &(bufptr[slen]);
+ }
+ } else {
+ bufptr = (char*)memchr(&(ll_ptr->ss[slen_base]), 0, max_var_id_len_p1);
+ if (!no_allele) {
+ bufptr = (char*)memchr(&(bufptr[1]), 0, max_combined_allele_len);
+ bufptr = (char*)memchr(&(bufptr[1]), 0, max_combined_allele_len);
+ }
+ bufptr++;
+ }
+ // now bufptr points to the byte past the end of the hash table entry
+ // allocation, and we know the next allocation starts at [this byte,
+ // rounded up to nearest word boundary]
+ ll_ptr = (Ll_str*)round_up_pow2((uintptr_t)bufptr, sizeof(intptr_t));
+ }
+ qsort(master_var_list, final_variant_ct, master_var_entry_len, strcmp_natural);
+ // don't need htable anymore
+ bigstack_reset(bigstack_mark2);
+ if (!sorted_extract_ids) {
+ bigstack_alloc(duplicate_id_htable_max_alloc);
+ }
+ total_data_slots = bigstack_left() / sizeof(uintptr_t);
+
+ // 6. Remaining load passes: determine how many remaining variants' worth
+ // of effect sizes/SEs/Ps/ESSes fit in memory, load and meta-analyze
+ // just those variants, rinse and repeat.
+ memcpy(outname_end, ".meta", 6);
+ if (fopen_checked(outname, "w", &outfile)) {
+ goto meta_analysis_ret_OPEN_FAIL;
+ }
+ if (use_map) {
+ fputs(" CHR BP", outfile);
+ }
+ fputs(" SNP", outfile);
+ if (!no_allele) {
+ fputs(" A1 A2", outfile);
+ }
+ fputs(output_beta? " N P P(R) BETA BETA(R) Q I" : " N P P(R) OR OR(R) Q I", outfile);
+ if (weighted_z) {
+ fputs(" WEIGHTED_Z P(WZ)", outfile);
+ }
+ if (report_study_specific) {
+ for (file_idx = 0; file_idx < file_ct; file_idx++) {
+ g_textbuf[0] = ' ';
+ g_textbuf[1] = 'F';
+ wptr = uint32toa(file_idx, &(g_textbuf[2]));
+ wptr = width_force(8, g_textbuf, wptr);
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
goto meta_analysis_ret_WRITE_FAIL;
}
+ }
+ }
+ putc_unlocked('\n', outfile);
+
+ cur_data_index = (uintptr_t*)g_bigstack_base;
+ if (use_map) {
+ // chr/bp values can be discordant; when they are, we can't directly
+ // search master_var_list for variant IDs. Instead, we populate
+ // cur_window_marker_ids with an ASCII-sorted list (marker ID,
+ // cur_var_idx) tuples.
+ window_entry_base_cost += (max_var_id_len_p1 + sizeof(int32_t) + sizeof(intptr_t) - 1) / sizeof(intptr_t);
+ }
+ max_var_id_len_p5 = max_var_id_len_p1 + 4;
+ while (1) {
+ first_var_idx = last_var_idx;
+ // memory requrirements per current-window variant:
+ // - 2 * sizeof(intptr_t) for cur_data pointer and current file write
+ // index; this grows from bottom of stack, while pointed-to stuff is
+ // allocated from top
+ // (technically could update the file write indexes in-place, but this
+ // part is not memory-critical so I doubt it's worth it.)
+ // - 2 * sizeof(double) * file_ct for effect sizes and SEs; filled from
+ // back to front
+ // sometimes, numerator and squared denominator of weighted Z-score
+ // sometimes, bitfield describing which files are involved
+ // sometimes, combined_allele_len for A1/A2, sizeof(double)-aligned
+ // - sometimes, (max_var_id_len_p1 + sizeof(int32_t)) rounded up, for
+ // cur_window_marker_ids.
+ cur_entry_list_window = &(master_var_list[last_var_idx * master_var_entry_len]);
+ bufptr = cur_entry_list_window;
+ variants_remaining = final_variant_ct - last_var_idx;
+ if (use_map) {
+ bufptr = &(bufptr[6]); // ignore chromosome/position here
+ }
+ cur_data = (double*)(&(cur_data_index[total_data_slots]));
+ ulii = 0;
+ for (cur_variant_ct = 0; cur_variant_ct < variants_remaining; cur_variant_ct++) {
+ bufptr2 = &(bufptr[cur_variant_ct * master_var_entry_len]);
+ bufptr2 = (char*)memchr(bufptr2, 0, master_var_entry_len);
bufptr2++;
- if (*bufptr2) {
- // bugfix: fputs_w4 does the wrong thing for 4+ character alleles.
- // instead, we want a leading space, then fputs_w3.
- if (!bufptr2[1]) {
- fputs(" ", outfile);
- putc(bufptr2[0], outfile);
- } else if (!bufptr2[2]) {
- fputs(" ", outfile);
- putc(bufptr2[0], outfile);
- putc(bufptr2[1], outfile);
- } else {
- putc(' ', outfile);
- fputs(bufptr2, outfile);
- }
- } else {
- fputs(" ?", outfile);
- }
- }
- g_textbuf[0] = ' ';
- wptr = &(g_textbuf[1]);
- wptr = width_force(3, wptr, uint32toa(cur_file_ct, wptr));
- if (cur_file_ct >= 2) {
- // and here's the actual computation.
- numer = 0.0;
- denom = 0.0;
- denom2 = 0.0;
- for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
- ii = ((int32_t)(file_idx + weighted_z)) * (-2);
- cur_beta = cur_data_ptr[ii];
- cur_se = cur_data_ptr[ii + 1];
- cur_inv_var = 1.0 / (cur_se * cur_se);
- numer += cur_inv_var * cur_beta;
- denom += cur_inv_var;
- denom2 += cur_inv_var * cur_inv_var;
- }
- varsum = 1.0 / denom;
- summ = numer * varsum;
- meta_q = 0.0;
- for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
- ii = ((int32_t)(file_idx + weighted_z)) * (-2);
- cur_beta = cur_data_ptr[ii];
- cur_se = cur_data_ptr[ii + 1];
- dxx = (cur_beta - summ) / cur_se;
- meta_q += dxx * dxx;
- }
- dxx = (double)((int32_t)(cur_file_ct - 1));
- tau2 = (meta_q - dxx) / (denom - denom2 / denom);
- if (tau2 < 0.0) {
- tau2 = 0.0;
- }
- numer_random = 0.0;
- denom_random = 0.0;
- for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
- ii = ((int32_t)(file_idx + weighted_z)) * (-2);
- cur_beta = cur_data_ptr[ii];
- cur_se = cur_data_ptr[ii + 1];
- cur_inv_var = 1.0 / (cur_se * cur_se + tau2);
- numer_random += cur_inv_var * cur_beta;
- denom_random += cur_inv_var;
- }
- varsum_random = 1.0 / denom_random;
- summ_random = numer_random * varsum_random;
- summtest = summ / sqrt(varsum);
- summtest_random = summ_random / sqrt(varsum_random);
- p1 = chiprob_p(summtest * summtest, 1);
- pr = chiprob_p(summtest_random * summtest_random, 1);
- pq = chiprob_p(meta_q, dxx);
- meta_i = 100 * ((meta_q - dxx) / meta_q);
- if (meta_i < 0.0) {
- meta_i = 0.0;
- } else if (meta_i > 100) {
- meta_i = 100;
- }
- if (!output_beta) {
- summ = exp(summ);
- summ_random = exp(summ_random);
+ cur_file_ct_m1 = 0;
+ memcpy(&cur_file_ct_m1, bufptr2, file_ct_byte_width);
+ cur_data_slots = 0;
+ if (report_study_specific) {
+ #ifdef __LP64__
+ cur_data_slots += file_ct64;
+ #else
+ cur_data_slots += 2 * file_ct64;
+ #endif
}
- *wptr++ = ' ';
- if (p1 >= 0.0) {
- wptr = dtoa_g_wxp4x(MAXV(p1, output_min_p), 11, ' ', wptr);
- } else {
- wptr = memcpya(wptr, " NA ", 12);
+ if (!no_allele) {
+ cur_combined_allele_len = 0;
+ memcpy(&cur_combined_allele_len, &(bufptr2[file_ct_byte_width]), combined_allele_len_byte_width);
+ cur_data_slots += (8 / BYTECT) * ((cur_combined_allele_len + 7) / 8);
+ }
+ cur_data_ptr = &(cur_data[-((intptr_t)cur_data_slots)]);
+ cur_data_slots += window_entry_base_cost + (16 / BYTECT) * (cur_file_ct_m1 + 1 + weighted_z);
+ ulii += cur_data_slots;
+ if (ulii > total_data_slots) {
+ break;
}
- if (pr >= 0.0) {
- wptr = dtoa_g_wxp4x(MAXV(pr, output_min_p), 11, ' ', wptr);
- } else {
- wptr = memcpya(wptr, " NA ", 12);
+ if (report_study_specific) {
+ fill_ulong_zero(file_ct64 * (8 / BYTECT), (uintptr_t*)cur_data_ptr);
}
- wptr = dtoa_f_w7p4x(summ, ' ', wptr);
- wptr = dtoa_f_w7p4x(summ_random, ' ', wptr);
- if (pq >= 0.0) {
- wptr = dtoa_f_w7p4x(MAXV(pq, output_min_p), ' ', wptr);
+ if (weighted_z) {
+ cur_data[-2] = 0.0;
+ cur_data[-1] = 0.0;
+ }
+ cur_data = &(cur_data[-((intptr_t)(cur_data_slots - window_entry_base_cost))]);
+ // [effect sizes/SEs, reverse order] {WZ} {file idx bitfield} {A1/A2}
+ // ^
+ // |
+ // cur_data_ptr
+ //
+ // cur_data_index[2 * var_idx + 1] = # of effect sizes/etc. saved so
+ // far
+ cur_data_index[2 * cur_variant_ct] = (uintptr_t)cur_data_ptr;
+ cur_data_index[2 * cur_variant_ct + 1] = 0;
+ }
+ if (!cur_variant_ct) {
+ goto meta_analysis_ret_NOMEM;
+ }
+ last_var_idx += cur_variant_ct;
+ if (use_map) {
+ // position cur_window_marker_ids on top of cur_data_index
+ cur_window_marker_ids = (char*)(&(cur_data_index[2 * cur_variant_ct]));
+ // note that bufptr is positioned properly for reading variant IDs,
+ // though it won't be after this loop
+ bufptr2 = cur_window_marker_ids;
+ for (uii = 0; uii < cur_variant_ct; uii++) {
+ strcpy(bufptr2, bufptr);
+ memcpy(&(bufptr2[max_var_id_len_p1]), &uii, 4);
+ bufptr = &(bufptr[master_var_entry_len]);
+ bufptr2 = &(bufptr2[max_var_id_len_p5]);
+ }
+ qsort(cur_window_marker_ids, cur_variant_ct, max_var_id_len_p5, strcmp_casted);
+ }
+ fname_ptr = input_fnames;
+ for (file_idx = 0; file_idx < file_ct; file_idx++) {
+ if (sorted_extract_ids) {
+ fill_ulong_zero(extract_ctl, duplicate_id_bitfield);
} else {
- wptr = memcpya(wptr, " NA ", 8);
+ for (uii = 0; uii < HASHSIZE; uii++) {
+ duplicate_id_htable[uii] = nullptr;
+ }
}
- wptr = width_force(7, wptr, dtoa_f_p2(meta_i, wptr));
- if (weighted_z) {
- numer = cur_data_ptr[-2];
- denom2 = cur_data_ptr[-1];
- dxx = numer / sqrt(denom2);
- *wptr++ = ' ';
- wptr = dtoa_g_wxp4x(dxx, 11, ' ', wptr);
- dxx = 1.0 - 2 * fabs(normdist(fabs(dxx)) - 0.5);
- wptr = dtoa_g_wxp4(MAXV(dxx, output_min_p), 11, wptr);
+ duplicate_id_htable_write = (Ll_str*)bigstack_mark2;
+ fname_len = strlen(fname_ptr);
+ token_ct = parse_max;
+ retval = meta_analysis_open_and_read_header(fname_ptr, loadbuf, loadbuf_size, sorted_header_dict, header_id_map, header_dict_ct, max_header_len, weighted_z, &token_ct, &gz_infile, col_skips, col_sequence, nullptr, nullptr);
+ if (retval) {
+ goto meta_analysis_ret_1;
}
- } else {
- wptr = memcpya(wptr, " NA NA NA NA NA NA", 56);
- if (weighted_z) {
- wptr = memcpya(wptr, " NA NA", 24);
+ while (1) {
+ // yeah, this is repetitive
+ if (!gzgets(gz_infile, loadbuf, loadbuf_size)) {
+ if (!gzeof(gz_infile)) {
+ goto meta_analysis_ret_READ_FAIL;
+ }
+ break;
+ }
+ bufptr = skip_initial_spaces(loadbuf);
+ if (is_eoln_kns(*bufptr)) {
+ continue;
+ }
+ bufptr = next_token_multz(bufptr, col_skips[0]);
+ token_ptrs[col_sequence[0]] = bufptr;
+ for (seq_idx = 1; seq_idx < token_ct; seq_idx++) {
+ bufptr = next_token_mult(bufptr, col_skips[seq_idx]);
+ token_ptrs[col_sequence[seq_idx]] = bufptr;
+ }
+ if (!bufptr) {
+ continue;
+ }
+ bufptr = token_ptrs[0];
+ var_id_len = strlen_se(bufptr);
+ if (var_id_len >= max_var_id_len_p1) {
+ continue;
+ }
+ bufptr[var_id_len] = '\0';
+ if (sorted_extract_ids) {
+ ii = bsearch_str(bufptr, var_id_len, sorted_extract_ids, max_extract_id_len, extract_ct);
+ if (ii == -1) {
+ continue;
+ }
+ if (is_set(duplicate_id_bitfield, ii)) {
+ continue;
+ }
+ set_bit(ii, duplicate_id_bitfield);
+ } else {
+ uii = hashval2(bufptr, var_id_len);
+ ll_pptr = &(duplicate_id_htable[uii]);
+ while (1) {
+ ll_ptr = *ll_pptr;
+ if ((!ll_ptr) || (!strcmp(bufptr, ll_ptr->ss))) {
+ break;
+ }
+ ll_pptr = &(ll_ptr->next);
+ }
+ if (ll_ptr) {
+ continue;
+ }
+ *ll_pptr = duplicate_id_htable_write;
+ duplicate_id_htable_write->next = nullptr;
+ memcpy(duplicate_id_htable_write->ss, bufptr, var_id_len + 1);
+ ulii = sizeof(intptr_t) + ((var_id_len + BYTECT) & (~(BYTECT - 1)));
+ duplicate_id_htable_write = (Ll_str*)(((uintptr_t)duplicate_id_htable_write) + ulii);
+ }
+ if (use_map) {
+ ii = get_chrom_code_destructive(chrom_info_ptr, token_ptrs[5]);
+ if (ii < 0) {
+ continue;
+ }
+ cur_chrom = (uint32_t)ii;
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom)) {
+ continue;
+ }
+ if (scan_uint_defcap(token_ptrs[6], &cur_bp)) {
+ continue;
+ }
+ if (!no_allele) {
+ bufptr = token_ptrs[7];
+ a1lenp1 = strlen_se(bufptr);
+ if ((*bufptr == missing_geno) && (a1lenp1 == 1)) {
+ continue;
+ }
+ bufptr[a1lenp1++] = '\0';
+ if (token_ct & 1) {
+ bufptr = token_ptrs[8];
+ a2lenp1 = strlen_se(bufptr);
+ if ((*bufptr == missing_geno) && (a2lenp1 == 1)) {
+ continue;
+ }
+ bufptr[a2lenp1++] = '\0';
+ }
+ }
+ }
+ if (scan_double(token_ptrs[1], &cur_beta)) {
+ continue;
+ }
+ if (!input_beta) {
+ cur_beta = log(cur_beta);
+ }
+ if (!realnum(cur_beta)) {
+ continue;
+ }
+ if (scan_double(token_ptrs[2], &cur_se) || (!(cur_se >= 0.0)) || (cur_se == INFINITY)) {
+ continue;
+ }
+ if (weighted_z) {
+ if (scan_double(token_ptrs[3], &cur_p) || (!(cur_p >= 0.0)) || (cur_p > 1.0)) {
+ continue;
+ }
+ if (scan_double(token_ptrs[4], &cur_ess) || (!(cur_ess > 0.0)) || (cur_ess == INFINITY)) {
+ continue;
+ }
+ }
+ bufptr = token_ptrs[0];
+ if (use_map) {
+ ii = bsearch_str(bufptr, var_id_len, cur_window_marker_ids, max_var_id_len_p5, cur_variant_ct);
+ if (ii == -1) {
+ continue;
+ }
+ #ifdef __LP64__
+ cur_var_idx = 0; // clear high 32 bits
+ #endif
+ memcpy(&cur_var_idx, &(cur_window_marker_ids[(((uint32_t)ii) * max_var_id_len_p5) + max_var_id_len_p1]), 4);
+ } else {
+ bufptr[var_id_len] = '\0';
+ ulii = (uint32_t)bsearch_str_natural(bufptr, cur_entry_list_window, master_var_entry_len, cur_variant_ct);
+ // this comparison catches -1 return value
+ if ((ulii >= last_var_idx) || (ulii < first_var_idx)) {
+ continue;
+ }
+ cur_var_idx = ulii - first_var_idx;
+ }
+ cur_data_ptr = (double*)cur_data_index[2 * cur_var_idx];
+ cur_file_ct_m1 = cur_data_index[2 * cur_var_idx + 1];
+ if (!no_allele) {
+ if (!report_study_specific) {
+ bufptr2 = (char*)cur_data_ptr;
+ } else {
+ bufptr2 = (char*)(&(cur_data_ptr[file_ct64]));
+ }
+ if (!cur_file_ct_m1) {
+ // save allele codes
+ bufptr2 = memcpya(bufptr2, token_ptrs[7], a1lenp1);
+ if (token_ct & 1) {
+ bufptr2 = memcpya(bufptr2, token_ptrs[8], a2lenp1);
+ } else {
+ *bufptr2++ = '\0';
+ }
+ } else {
+ // compare them
+ uii = meta_analysis_allelic_match(bufptr2, token_ptrs, token_ct, a1lenp1, a2lenp1);
+ if (!uii) {
+ continue;
+ } else if (uii == 2) {
+ cur_beta = -cur_beta;
+ }
+ }
+ }
+ if (report_study_specific) {
+ set_bit(file_idx, (uintptr_t*)cur_data_ptr);
+ }
+ if (weighted_z) {
+ dxx = ltqnorm(1.0 - cur_p * 0.5) * sqrt(cur_ess);
+ if (cur_beta > 0.0) {
+ cur_data_ptr[-2] += dxx;
+ } else {
+ cur_data_ptr[-2] -= dxx;
+ }
+ cur_data_ptr[-1] += cur_ess;
+ }
+ cur_data_ptr = &(cur_data_ptr[-2 * ((int32_t)(cur_file_ct_m1 + weighted_z))]);
+ cur_data_ptr[-2] = cur_beta;
+ cur_data_ptr[-1] = cur_se;
+ cur_data_index[2 * cur_var_idx + 1] += 1;
}
+ if (gzclose(gz_infile) != Z_OK) {
+ gz_infile = nullptr;
+ goto meta_analysis_ret_READ_FAIL;
+ }
+ gz_infile = nullptr;
+ fname_ptr = &(fname_ptr[fname_len + 1]);
}
- if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
- goto meta_analysis_ret_WRITE_FAIL;
- }
- if (report_study_specific) {
- uii = 0;
- ulptr = (uintptr_t*)cur_data_ptr;
- for (file_idx = 0; file_idx < file_ct; file_idx++) {
- if (is_set(ulptr, file_idx)) {
- uii++;
- dxx = cur_data_ptr[((int32_t)(uii + weighted_z)) * (-2)];
- if (!output_beta) {
- // finish fixing PLINK 1.07 bug
- dxx = exp(dxx);
+ for (cur_var_idx = 0; cur_var_idx < cur_variant_ct; cur_var_idx++) {
+ cur_data_ptr = (double*)cur_data_index[2 * cur_var_idx];
+ cur_file_ct = cur_data_index[2 * cur_var_idx + 1];
+ bufptr = &(cur_entry_list_window[cur_var_idx * master_var_entry_len]);
+ wptr = g_textbuf;
+ if (use_map) {
+ cur_chrom = (uint32_t)((unsigned char)(*bufptr++));
+ wptr = width_force(4, wptr, chrom_name_write(chrom_info_ptr, cur_chrom, wptr));
+ wptr = memseta(wptr, 32, 2);
+ cur_bp = uint32_decode_5_hi_uchar(bufptr);
+ bufptr = &(bufptr[5]);
+ wptr = uint32toa_w10(cur_bp, wptr);
+ }
+ *wptr++ = ' ';
+ var_id_len = strlen(bufptr);
+ // bleah, this column width was not adaptive
+ wptr = fw_strcpyn(14, var_id_len, bufptr, wptr);
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ goto meta_analysis_ret_WRITE_FAIL;
+ }
+ if (!no_allele) {
+ if (!report_study_specific) {
+ bufptr = (char*)cur_data_ptr;
+ } else {
+ bufptr = (char*)(&(cur_data_ptr[file_ct64]));
+ }
+ slen = strlen(bufptr);
+ putc_unlocked(' ', outfile);
+ if (slen == 1) {
+ putc_unlocked(' ', outfile);
+ putc_unlocked(' ', outfile);
+ } else if (slen == 2) {
+ putc_unlocked(' ', outfile);
+ }
+ bufptr2 = &(bufptr[slen]);
+ if (fwrite_checked(bufptr, bufptr2 - bufptr, outfile)) {
+ goto meta_analysis_ret_WRITE_FAIL;
+ }
+ bufptr2++;
+ if (*bufptr2) {
+ // bugfix: fputs_w4 does the wrong thing for 4+ character alleles.
+ // instead, we want a leading space, then fputs_w3.
+ if (!bufptr2[1]) {
+ fputs(" ", outfile);
+ putc_unlocked(bufptr2[0], outfile);
+ } else if (!bufptr2[2]) {
+ fputs(" ", outfile);
+ putc_unlocked(bufptr2[0], outfile);
+ putc_unlocked(bufptr2[1], outfile);
+ } else {
+ putc_unlocked(' ', outfile);
+ fputs(bufptr2, outfile);
}
- dtoa_f_w7p4x(dxx, '\0', &(g_textbuf[1]));
- fputs(g_textbuf, outfile);
} else {
- fputs(" NA", outfile);
+ fputs(" ?", outfile);
}
}
+ g_textbuf[0] = ' ';
+ wptr = &(g_textbuf[1]);
+ wptr = width_force(3, wptr, uint32toa(cur_file_ct, wptr));
+ if (cur_file_ct >= 2) {
+ // and here's the actual computation.
+ numer = 0.0;
+ denom = 0.0;
+ denom2 = 0.0;
+ for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
+ ii = ((int32_t)(file_idx + weighted_z)) * (-2);
+ cur_beta = cur_data_ptr[ii];
+ cur_se = cur_data_ptr[ii + 1];
+ cur_inv_var = 1.0 / (cur_se * cur_se);
+ numer += cur_inv_var * cur_beta;
+ denom += cur_inv_var;
+ denom2 += cur_inv_var * cur_inv_var;
+ }
+ varsum = 1.0 / denom;
+ summ = numer * varsum;
+ meta_q = 0.0;
+ for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
+ ii = ((int32_t)(file_idx + weighted_z)) * (-2);
+ cur_beta = cur_data_ptr[ii];
+ cur_se = cur_data_ptr[ii + 1];
+ dxx = (cur_beta - summ) / cur_se;
+ meta_q += dxx * dxx;
+ }
+ dxx = (double)((int32_t)(cur_file_ct - 1));
+ tau2 = (meta_q - dxx) / (denom - denom2 / denom);
+ if (tau2 < 0.0) {
+ tau2 = 0.0;
+ }
+ numer_random = 0.0;
+ denom_random = 0.0;
+ for (file_idx = 1; file_idx <= cur_file_ct; file_idx++) {
+ ii = ((int32_t)(file_idx + weighted_z)) * (-2);
+ cur_beta = cur_data_ptr[ii];
+ cur_se = cur_data_ptr[ii + 1];
+ cur_inv_var = 1.0 / (cur_se * cur_se + tau2);
+ numer_random += cur_inv_var * cur_beta;
+ denom_random += cur_inv_var;
+ }
+ varsum_random = 1.0 / denom_random;
+ summ_random = numer_random * varsum_random;
+ summtest = summ / sqrt(varsum);
+ summtest_random = summ_random / sqrt(varsum_random);
+ p1 = chiprob_p(summtest * summtest, 1);
+ pr = chiprob_p(summtest_random * summtest_random, 1);
+ pq = chiprob_p(meta_q, dxx);
+ meta_i = 100 * ((meta_q - dxx) / meta_q);
+ if (meta_i < 0.0) {
+ meta_i = 0.0;
+ } else if (meta_i > 100) {
+ meta_i = 100;
+ }
+ if (!output_beta) {
+ summ = exp(summ);
+ summ_random = exp(summ_random);
+ }
+ *wptr++ = ' ';
+ if (p1 >= 0.0) {
+ wptr = dtoa_g_wxp4x(MAXV(p1, output_min_p), 11, ' ', wptr);
+ } else {
+ wptr = memcpya(wptr, " NA ", 12);
+ }
+ if (pr >= 0.0) {
+ wptr = dtoa_g_wxp4x(MAXV(pr, output_min_p), 11, ' ', wptr);
+ } else {
+ wptr = memcpya(wptr, " NA ", 12);
+ }
+ wptr = dtoa_f_w7p4x(summ, ' ', wptr);
+ wptr = dtoa_f_w7p4x(summ_random, ' ', wptr);
+ if (pq >= 0.0) {
+ wptr = dtoa_f_w7p4x(MAXV(pq, output_min_p), ' ', wptr);
+ } else {
+ wptr = memcpya(wptr, " NA ", 8);
+ }
+ wptr = width_force(7, wptr, dtoa_f_p2(meta_i, wptr));
+ if (weighted_z) {
+ numer = cur_data_ptr[-2];
+ denom2 = cur_data_ptr[-1];
+ dxx = numer / sqrt(denom2);
+ *wptr++ = ' ';
+ wptr = dtoa_g_wxp4x(dxx, 11, ' ', wptr);
+ dxx = 1.0 - 2 * fabs(normdist(fabs(dxx)) - 0.5);
+ wptr = dtoa_g_wxp4(MAXV(dxx, output_min_p), 11, wptr);
+ }
+ } else {
+ wptr = memcpya(wptr, " NA NA NA NA NA NA", 56);
+ if (weighted_z) {
+ wptr = memcpya(wptr, " NA NA", 24);
+ }
+ }
+ if (fwrite_checked(g_textbuf, wptr - g_textbuf, outfile)) {
+ goto meta_analysis_ret_WRITE_FAIL;
+ }
+ if (report_study_specific) {
+ uii = 0;
+ ulptr = (uintptr_t*)cur_data_ptr;
+ for (file_idx = 0; file_idx < file_ct; file_idx++) {
+ if (is_set(ulptr, file_idx)) {
+ uii++;
+ dxx = cur_data_ptr[((int32_t)(uii + weighted_z)) * (-2)];
+ if (!output_beta) {
+ // finish fixing PLINK 1.07 bug
+ dxx = exp(dxx);
+ }
+ dtoa_f_w7p4x(dxx, '\0', &(g_textbuf[1]));
+ fputs(g_textbuf, outfile);
+ } else {
+ fputs(" NA", outfile);
+ }
+ }
+ }
+ putc_unlocked('\n', outfile);
+ }
+ if (last_var_idx == final_variant_ct) {
+ break;
}
- putc('\n', outfile);
+ pass_idx++;
+ printf("\r--meta-analysis: Pass %u complete (%" PRIu64 "%%).", pass_idx, (last_var_idx * ((uint64_t)100)) / final_variant_ct);
+ fflush(stdout);
}
- if (last_var_idx == final_variant_ct) {
- break;
+ if (pass_idx) {
+ putc_unlocked('\r', stdout);
}
- pass_idx++;
- printf("\r--meta-analysis: Pass %u complete (%" PRIu64 "%%).", pass_idx, (last_var_idx * 100LLU) / final_variant_ct);
- fflush(stdout);
- }
- if (pass_idx) {
- putchar('\r');
+ LOGPRINTFWW("--meta-analysis: %" PRIuPTR " variant%s processed; results written to %s .\n", final_variant_ct, (final_variant_ct == 1)? "" : "s", outname);
}
- LOGPRINTFWW("--meta-analysis: %" PRIuPTR " variant%s processed; results written to %s .\n", final_variant_ct, (final_variant_ct == 1)? "" : "s", outname);
while (0) {
meta_analysis_ret_NOMEM:
diff --git a/plink_perm.c b/plink_perm.c
index aaad0b4..3605f65 100644
--- a/plink_perm.c
+++ b/plink_perm.c
@@ -50,7 +50,7 @@ void generate_cc_perm_vec(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotien
uint32_t urand;
uint32_t uii;
if (set_ct * 2 < tot_ct) {
- fill_ulong_zero(perm_vec, QUATERCT_TO_ALIGNED_WORDCT(tot_ct));
+ fill_ulong_zero(QUATERCT_TO_ALIGNED_WORDCT(tot_ct), perm_vec);
for (; num_set < set_ct; num_set++) {
do {
do {
@@ -91,7 +91,7 @@ void generate_cc_perm1(uint32_t tot_ct, uint32_t set_ct, uint32_t tot_quotient,
uint32_t urand;
uint32_t uii;
if (set_ct * 2 < tot_ct) {
- fill_ulong_zero(perm_vec, BITCT_TO_WORDCT(tot_ct));
+ fill_ulong_zero(BITCT_TO_WORDCT(tot_ct), perm_vec);
for (; num_set < set_ct; num_set++) {
do {
do {
@@ -419,7 +419,7 @@ THREAD_RET_TYPE generate_qt_cluster_perms_smajor_thread(void* arg) {
pmax = g_perm_vec_ct;
}
pdiff = pmax - pmin;
- fill_uint_zero(in_cluster_positions, cluster_ct);
+ fill_uint_zero(cluster_ct, in_cluster_positions);
for (; sample_idx < pheno_nm_ct; sample_idx++) {
cur_source = *pheno_d2++;
cluster_idx = sample_to_cluster[sample_idx];
@@ -536,7 +536,7 @@ THREAD_RET_TYPE generate_qt_cluster_perms_pmajor_thread(void* arg) {
}
pdiff = pmax - pmin;
for (poffset = 0; poffset < pdiff; poffset++) {
- fill_uint_zero(in_cluster_positions, cluster_ct);
+ fill_uint_zero(cluster_ct, in_cluster_positions);
pheno_ptr = pheno_d2;
for (sample_idx = 0; sample_idx < sample_valid_ct; sample_idx++) {
cluster_idx = sample_to_cluster[sample_idx];
@@ -606,7 +606,7 @@ void transpose_perms(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno_
memcpy(perm_vecst, wbuf, 16);
perm_vecst = &(perm_vecst[4]);
transpose_perms_loop_start:
- fill_uint_zero(wbuf, 4);
+ fill_uint_zero(4, wbuf);
wshift = 0;
}
wbptr = wbuf;
@@ -660,7 +660,7 @@ void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno
memcpy(perm_vecst, wbuf, 16);
perm_vecst = &(perm_vecst[4]);
transpose_perm1s_loop_start:
- fill_uint_zero(wbuf, 2);
+ fill_uint_zero(2, wbuf);
wshift = 0;
}
wbptr = wbuf;
@@ -690,12 +690,12 @@ void transpose_perm1s(uintptr_t* perm_vecs, uint32_t perm_vec_ct, uint32_t pheno
int32_t make_perm_pheno(pthread_t* threads, char* outname, char* outname_end, uintptr_t unfiltered_sample_ct, uintptr_t* sample_exclude, uintptr_t sample_ct, char* sample_ids, uintptr_t max_sample_id_len, uint32_t cluster_ct, uint32_t* cluster_map, uint32_t* cluster_starts, uint32_t pheno_nm_ct, uintptr_t* pheno_nm, uintptr_t* pheno_c, double* pheno_d, char* output_missing_pheno, uint32_t permphe_ct) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t unfiltered_sample_ctl = BITCT_TO_WORDCT(unfiltered_sample_ct);
uintptr_t pheno_nm_ctl = BITCT_TO_WORDCT(pheno_nm_ct);
uintptr_t pheno_nm_ctv = round_up_pow2(pheno_nm_ctl, VEC_WORDS);
uintptr_t perm_vec_ctcl8m = 0;
- char* writebuf = NULL;
+ char* writebuf = nullptr;
int32_t retval = 0;
uintptr_t* ulptr;
double* dptr;
@@ -747,7 +747,7 @@ int32_t make_perm_pheno(pthread_t* threads, char* outname, char* outname_end, ui
}
generate_cc_cluster_perms_thread((void*)ulii);
} else {
- g_perm_cluster_starts = NULL;
+ g_perm_cluster_starts = nullptr;
g_perm_tot_quotient = 0x100000000LLU / pheno_nm_ct;
magic_num(g_perm_tot_quotient, &g_perm_totq_magic, &g_perm_totq_preshift, &g_perm_totq_postshift, &g_perm_totq_incr);
if (spawn_threads(threads, &generate_cc_perms_thread, g_perm_generation_thread_ct)) {
@@ -765,7 +765,7 @@ int32_t make_perm_pheno(pthread_t* threads, char* outname, char* outname_end, ui
goto make_perm_pheno_ret_NOMEM;
}
if (cluster_starts) {
- retval = cluster_include_and_reindex(unfiltered_sample_ct, pheno_nm, 1, NULL, pheno_nm_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, NULL, NULL);
+ retval = cluster_include_and_reindex(unfiltered_sample_ct, pheno_nm, 1, nullptr, pheno_nm_ct, 0, cluster_ct, cluster_map, cluster_starts, &g_perm_cluster_ct, &g_perm_cluster_map, &g_perm_cluster_starts, nullptr, nullptr);
if (retval) {
goto make_perm_pheno_ret_1;
}
@@ -803,15 +803,15 @@ int32_t make_perm_pheno(pthread_t* threads, char* outname, char* outname_end, ui
fputs(&(sample_ids[sample_uidx * max_sample_id_len]), outfile);
if (!IS_SET(pheno_nm, sample_uidx)) {
for (perm_idx = 0; perm_idx < permphe_ct; perm_idx++) {
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(output_missing_pheno, outfile);
}
} else if (pheno_c) {
ulptr = &(g_perm_vecs[sample_nmidx / BITCT]);
rshift = sample_nmidx % BITCT;
for (perm_idx = 0; perm_idx < permphe_ct; perm_idx++) {
- putc('\t', outfile);
- putc('1' + ((ulptr[perm_idx * pheno_nm_ctv] >> rshift) & 1), outfile);
+ putc_unlocked('\t', outfile);
+ putc_unlocked('1' + ((ulptr[perm_idx * pheno_nm_ctv] >> rshift) & 1), outfile);
}
sample_nmidx++;
} else {
diff --git a/plink_rserve.c b/plink_rserve.c
index 277d129..29a0dfc 100644
--- a/plink_rserve.c
+++ b/plink_rserve.c
@@ -16,15 +16,15 @@
int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin_debug, FILE* bedfile, uintptr_t bed_offset, uintptr_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uintptr_t* marker_reverse, char* marker_ids, uintptr_t max_marker_id_len, char** marker_allele_ptrs, uint32_t* marker_pos, uint32_t plink_maxsnp, Chrom_info* chrom_info_ptr, uintptr_t unfiltered_sample_ct, uintptr_t* pheno_nm, uint32_t pheno_nm_ct, uintptr_t* pheno_c, double* pheno_d, u [...]
// See PLINK 1.07 r.cpp.
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* infile = NULL;
- FILE* outfile = NULL;
- int32_t* geno_int_buf = NULL;
- Rinteger* r_n = NULL;
- Rinteger* r_s = NULL;
- Rdouble* r_p = NULL;
- Rdouble* r_cov = NULL;
- Rconnection* rc = NULL;
- char* chrom_name_ptr = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
+ int32_t* geno_int_buf = nullptr;
+ Rinteger* r_n = nullptr;
+ Rinteger* r_s = nullptr;
+ Rdouble* r_p = nullptr;
+ Rdouble* r_cov = nullptr;
+ Rconnection* rc = nullptr;
+ char* chrom_name_ptr = nullptr;
uintptr_t unfiltered_sample_ct4 = (unfiltered_sample_ct + 3) / 4;
uintptr_t unfiltered_sample_ctl2 = (unfiltered_sample_ct + (BITCT2 - 1)) / BITCT2;
uintptr_t pheno_nm_ctl2 = (pheno_nm_ct + (BITCT2 - 1)) / BITCT2;
@@ -35,7 +35,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
uint32_t chrom_name_len = 0;
uint32_t pct = 0;
int32_t retval = 0;
- char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_LEN];
+ char chrom_name_buf[3 + MAX_CHROM_TEXTNUM_SLEN];
uintptr_t marker_uidx;
uintptr_t marker_uidx_base;
uintptr_t ulii;
@@ -136,7 +136,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
if (bigstack_alloc_i(unfiltered_sample_ct, &sample_to_cluster)) {
goto rserve_call_ret_NOMEM;
}
- fill_int_one(sample_to_cluster, pheno_nm_ct);
+ fill_int_one(pheno_nm_ct, sample_to_cluster);
fill_unfiltered_sample_to_cluster(unfiltered_sample_ct, cluster_ct, cluster_map, cluster_starts, (uint32_t*)sample_to_cluster);
inplace_collapse_uint32_incl((uint32_t*)sample_to_cluster, unfiltered_sample_ct, pheno_nm, pheno_nm_ct);
bigstack_shrink_top(sample_to_cluster, pheno_nm_ct * sizeof(int32_t));
@@ -224,8 +224,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
fwrite(g_textbuf, 1, (uintptr_t)(bufptr - g_textbuf), outfile);
}
bufptr = int32toa(sample_to_cluster[sample_idx], g_textbuf);
- bufptr = memcpya(bufptr, " ) \n", 4);
- fputs("CLUSTER[CLUSTER==-1] <- NA\n", outfile);
+ bufptr = memcpya(bufptr, " ) \nCLUSTER[CLUSTER==-1] <- NA\n", 31);
if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
goto rserve_call_ret_WRITE_FAIL;
}
@@ -293,7 +292,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
if (marker_uidx >= chrom_end) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
chrom_name_ptr = chrom_name_buf5w4write(chrom_info_ptr, uii, &chrom_name_len, chrom_name_buf);
@@ -335,7 +334,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
if (marker_uidx >= chrom_end) {
do {
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[(++chrom_fo_idx) + 1];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[(++chrom_fo_idx) + 1];
} while (marker_uidx >= chrom_end);
uii = chrom_info_ptr->chrom_file_order[chrom_fo_idx];
chrom_name_ptr = chrom_name_buf5w4write(chrom_info_ptr, uii, &chrom_name_len, chrom_name_buf);
@@ -358,7 +357,6 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
if (fwrite_checked(g_textbuf, bufptr - g_textbuf, outfile)) {
goto rserve_call_ret_WRITE_FAIL;
}
- block_offset = 0;
sample_idx = 0;
while (1) {
bufptr = g_textbuf;
@@ -388,13 +386,13 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
if (fwrite_checked(inbuf_start, inbuf_end - inbuf_start, outfile)) {
goto rserve_call_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
marker_idx_base += block_size;
if (marker_idx_base >= loop_end) {
if (marker_idx_base < marker_ct) {
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
pct = (marker_idx_base * 100LLU) / marker_ct;
printf("\b\b%u%%", pct);
@@ -405,7 +403,7 @@ int32_t rserve_call(char* rplugin_fname, uint32_t rplugin_port, uint32_t rplugin
}
if (pct >= 10) {
- putchar('\b');
+ putc_unlocked('\b', stdout);
}
fputs("\b\b", stdout);
logprint("done.\n");
diff --git a/plink_set.c b/plink_set.c
index ea6c551..d7f9818 100644
--- a/plink_set.c
+++ b/plink_set.c
@@ -3,24 +3,24 @@
#include "plink_set.h"
void set_init(Set_info* sip, Annot_info* aip) {
- sip->fname = NULL;
- sip->setnames_flattened = NULL;
- sip->subset_fname = NULL;
- sip->merged_set_name = NULL;
- sip->genekeep_flattened = NULL;
+ sip->fname = nullptr;
+ sip->setnames_flattened = nullptr;
+ sip->subset_fname = nullptr;
+ sip->merged_set_name = nullptr;
+ sip->genekeep_flattened = nullptr;
sip->ct = 0;
sip->modifier = 0;
sip->set_r2 = 0.5;
sip->set_p = 0.05;
sip->set_test_lambda = 0.0;
sip->set_max = 5;
- aip->fname = NULL;
- aip->attrib_fname = NULL;
- aip->ranges_fname = NULL;
- aip->filter_fname = NULL;
- aip->snps_fname = NULL;
- aip->subset_fname = NULL;
- aip->snpfield = NULL;
+ aip->fname = nullptr;
+ aip->attrib_fname = nullptr;
+ aip->ranges_fname = nullptr;
+ aip->filter_fname = nullptr;
+ aip->snps_fname = nullptr;
+ aip->subset_fname = nullptr;
+ aip->snpfield = nullptr;
aip->modifier = 0;
aip->border = 0;
}
@@ -253,8 +253,8 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
// Called directly by extract_exclude_range(), define_sets(), and indirectly
// by annotate(), gene_report(), and clump_reports().
// Assumes caller will reset g_bigstack_end later.
- Ll_str* make_set_ll = NULL;
- char* set_names = NULL;
+ Ll_str* make_set_ll = nullptr;
+ char* set_names = nullptr;
uintptr_t set_ct = 0;
uintptr_t max_set_id_len = 0;
uintptr_t line_idx = 0;
@@ -274,21 +274,151 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
uint32_t range_last;
uint32_t uii;
uint32_t ujj;
- int32_t ii;
- g_textbuf[MAXLINELEN - 1] = ' ';
- // if we need to track set names, put together a sorted list
- if (track_set_names) {
+ {
+ g_textbuf[MAXLINELEN - 1] = ' ';
+ // if we need to track set names, put together a sorted list
+ if (track_set_names) {
+ while (fgets(g_textbuf, MAXLINELEN, infile)) {
+ line_idx++;
+ if (!g_textbuf[MAXLINELEN - 1]) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file is pathologically long.\n", line_idx, file_descrip);
+ goto load_range_list_ret_INVALID_FORMAT_2;
+ }
+ char* textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
+ continue;
+ }
+ char* first_token_end = token_endnn(textbuf_first_token);
+ bufptr2 = next_token_mult(first_token_end, 3);
+ if (!collapse_group) {
+ bufptr3 = bufptr2;
+ } else {
+ bufptr3 = next_token(bufptr2);
+ }
+ if (no_more_tokens_kns(bufptr3)) {
+ sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file has fewer tokens than expected.\n", line_idx, file_descrip);
+ goto load_range_list_ret_INVALID_FORMAT_2;
+ }
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
+ sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
+ goto load_range_list_ret_INVALID_FORMAT_2;
+ }
+ // chrom_mask check removed, we want to track empty sets
+ uii = strlen_se(bufptr2);
+ bufptr2[uii] = '\0';
+ if (subset_ct) {
+ if (bsearch_str(bufptr2, uii, sorted_subset_ids, max_subset_id_len, subset_ct) == -1) {
+ continue;
+ }
+ }
+ if (collapse_group) {
+ uii = strlen_se(bufptr3);
+ bufptr3[uii] = '\0';
+ }
+ // when there are repeats, they are likely to be next to each other
+ if (make_set_ll && (!strcmp(make_set_ll->ss, bufptr3))) {
+ continue;
+ }
+ uii++;
+ // argh, --clump counts positional overlaps which don't include any
+ // variants in the dataset. So we prefix set IDs with a chromosome
+ // index in that case (with leading zeroes) and treat cross-chromosome
+ // sets as distinct.
+ if (!marker_pos) {
+ uii += 4;
+ }
+ if (uii > max_set_id_len) {
+ max_set_id_len = uii;
+ }
+ if (bigstack_end_alloc_llstr(uii, &ll_tmp)) {
+ goto load_range_list_ret_NOMEM;
+ }
+ ll_tmp->next = make_set_ll;
+ if (marker_pos) {
+ memcpy(ll_tmp->ss, bufptr3, uii);
+ } else {
+ uitoa_z4((uint32_t)cur_chrom_code, ll_tmp->ss);
+ // if first character of gene name is a digit, natural sort has strange
+ // effects unless we force [3] to be nonnumeric...
+ ll_tmp->ss[3] -= 15;
+ memcpy(&(ll_tmp->ss[4]), bufptr3, uii - 4);
+ }
+ make_set_ll = ll_tmp;
+ set_ct++;
+ }
+ if (!set_ct) {
+ if (fail_on_no_sets) {
+ if (marker_pos) {
+ if (!allow_no_variants) {
+ // okay, this is a kludge
+ logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--make-set file.\n");
+ retval = RET_ALL_MARKERS_EXCLUDED;
+ goto load_range_list_ret_1;
+ }
+ } else {
+ if (subset_ct) {
+ logerrprint("Error: No --gene-subset genes present in --gene-report file.\n");
+ } else {
+ logerrprint("Error: Empty --gene-report file.\n");
+ }
+ retval = RET_INVALID_FORMAT;
+ goto load_range_list_ret_1;
+ }
+ }
+ LOGERRPRINTF("Warning: No valid ranges in %s file.\n", file_descrip);
+ goto load_range_list_ret_1;
+ }
+ max_set_id_len += c_prefix;
+ if (max_set_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: Set IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
+ goto load_range_list_ret_INVALID_FORMAT;
+ }
+ if (bigstack_alloc_c(set_ct * max_set_id_len, set_names_ptr)) {
+ goto load_range_list_ret_NOMEM;
+ }
+ set_names = *set_names_ptr;
+ if (!c_prefix) {
+ for (ulii = 0; ulii < set_ct; ulii++) {
+ strcpy(&(set_names[ulii * max_set_id_len]), make_set_ll->ss);
+ make_set_ll = make_set_ll->next;
+ }
+ } else {
+ for (ulii = 0; ulii < set_ct; ulii++) {
+ memcpy(&(set_names[ulii * max_set_id_len]), "C_", 2);
+ strcpy(&(set_names[ulii * max_set_id_len + 2]), make_set_ll->ss);
+ make_set_ll = make_set_ll->next;
+ }
+ }
+ qsort(set_names, set_ct, max_set_id_len, strcmp_natural);
+ set_ct = collapse_duplicate_ids(set_names, set_ct, max_set_id_len, nullptr);
+ bigstack_shrink_top(set_names, set_ct * max_set_id_len);
+ rewind(infile);
+ } else {
+ set_ct = 1;
+ }
+ make_set_range_arr = (Make_set_range**)bigstack_end_alloc(set_ct * sizeof(intptr_t));
+ if (!make_set_range_arr) {
+ goto load_range_list_ret_NOMEM;
+ }
+ for (set_idx = 0; set_idx < set_ct; set_idx++) {
+ make_set_range_arr[set_idx] = nullptr;
+ }
+ line_idx = 0;
while (fgets(g_textbuf, MAXLINELEN, infile)) {
line_idx++;
if (!g_textbuf[MAXLINELEN - 1]) {
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file is pathologically long.\n", line_idx, file_descrip);
goto load_range_list_ret_INVALID_FORMAT_2;
}
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*bufptr)) {
+ char* textbuf_first_token = skip_initial_spaces(g_textbuf);
+ if (is_eoln_kns(*textbuf_first_token)) {
continue;
}
- bufptr2 = next_token_mult(bufptr, 3);
+ char* first_token_end = token_endnn(textbuf_first_token);
+ bufptr2 = next_token_mult(first_token_end, 3);
if (!collapse_group) {
bufptr3 = bufptr2;
} else {
@@ -298,235 +428,112 @@ int32_t load_range_list(FILE* infile, uint32_t track_set_names, uint32_t border_
sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file has fewer tokens than expected.\n", line_idx, file_descrip);
goto load_range_list_ret_INVALID_FORMAT_2;
}
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
+ const uint32_t chrom_name_slen = (uintptr_t)(first_token_end - textbuf_first_token);
+ *first_token_end = '\0';
+ int32_t cur_chrom_code = get_chrom_code(textbuf_first_token, chrom_info_ptr, chrom_name_slen);
+ if (cur_chrom_code < 0) {
sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
goto load_range_list_ret_INVALID_FORMAT_2;
}
- // chrom_mask check removed, we want to track empty sets
- uii = strlen_se(bufptr2);
- bufptr2[uii] = '\0';
- if (subset_ct) {
- if (bsearch_str(bufptr2, uii, sorted_subset_ids, max_subset_id_len, subset_ct) == -1) {
+ if (!is_set(chrom_info_ptr->chrom_mask, cur_chrom_code)) {
+ continue;
+ }
+ chrom_idx = cur_chrom_code;
+ if (marker_pos) {
+ chrom_start = get_chrom_start_vidx(chrom_info_ptr, chrom_idx);
+ chrom_end = get_chrom_end_vidx(chrom_info_ptr, chrom_idx);
+ if (chrom_end == chrom_start) {
+ continue;
+ }
+ // might need to move this outside the if-statement later
+ if (subset_ct && (bsearch_str(bufptr2, strlen_se(bufptr2), sorted_subset_ids, max_subset_id_len, subset_ct) == -1)) {
continue;
}
}
- if (collapse_group) {
- uii = strlen_se(bufptr3);
- bufptr3[uii] = '\0';
- }
- // when there are repeats, they are likely to be next to each other
- if (make_set_ll && (!strcmp(make_set_ll->ss, bufptr3))) {
- continue;
+ bufptr = skip_initial_spaces(&(first_token_end[1]));
+ if (scan_uint_defcap(bufptr, &range_first)) {
+ sprintf(g_logbuf, "Error: Invalid range start position on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
+ goto load_range_list_ret_INVALID_FORMAT_2;
}
- uii++;
- // argh, --clump counts positional overlaps which don't include any
- // variants in the dataset. So we prefix set IDs with a chromosome index
- // in that case (with leading zeroes) and treat cross-chromosome sets as
- // distinct.
- if (!marker_pos) {
- uii += 4;
+ bufptr = next_token(bufptr);
+ if (scan_uint_defcap(bufptr, &range_last)) {
+ sprintf(g_logbuf, "Error: Invalid range end position on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
+ goto load_range_list_ret_INVALID_FORMAT_2;
}
- if (uii > max_set_id_len) {
- max_set_id_len = uii;
+ if (range_last < range_first) {
+ sprintf(g_logbuf, "Error: Range end position smaller than range start on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
+ wordwrapb(0);
+ goto load_range_list_ret_INVALID_FORMAT_2;
}
- if (bigstack_end_alloc_llstr(uii, &ll_tmp)) {
- goto load_range_list_ret_NOMEM;
+ if (border_extend > range_first) {
+ range_first = 0;
+ } else {
+ range_first -= border_extend;
}
- ll_tmp->next = make_set_ll;
- if (marker_pos) {
- memcpy(ll_tmp->ss, bufptr3, uii);
+ range_last += border_extend;
+ if (set_ct > 1) {
+ // bugfix: bsearch_str_natural requires null-terminated string
+ uii = strlen_se(bufptr3);
+ bufptr3[uii] = '\0';
+ if (c_prefix) {
+ bufptr3 = &(bufptr3[-2]);
+ memcpy(bufptr3, "C_", 2);
+ } else if (!marker_pos) {
+ bufptr3 = &(bufptr3[-4]);
+ uitoa_z4(chrom_idx, bufptr3);
+ bufptr3[3] -= 15;
+ }
+ // this should never fail
+ set_idx = (uint32_t)bsearch_str_natural(bufptr3, set_names, max_set_id_len, set_ct);
} else {
- uitoa_z4((uint32_t)ii, ll_tmp->ss);
- // if first character of gene name is a digit, natural sort has strange
- // effects unless we force [3] to be nonnumeric...
- ll_tmp->ss[3] -= 15;
- memcpy(&(ll_tmp->ss[4]), bufptr3, uii - 4);
+ set_idx = 0;
}
- make_set_ll = ll_tmp;
- set_ct++;
- }
- if (!set_ct) {
- if (fail_on_no_sets) {
- if (marker_pos) {
- if (!allow_no_variants) {
- // okay, this is a kludge
- logerrprint("Error: All variants excluded by --gene{-all}, since no sets were defined from\n--make-set file.\n");
- retval = RET_ALL_MARKERS_EXCLUDED;
- goto load_range_list_ret_1;
- }
- } else {
- if (subset_ct) {
- logerrprint("Error: No --gene-subset genes present in --gene-report file.\n");
- } else {
- logerrprint("Error: Empty --gene-report file.\n");
- }
- retval = RET_INVALID_FORMAT;
- goto load_range_list_ret_1;
+ if (marker_pos) {
+ // translate to within-chromosome uidx
+ range_first = uint32arr_greater_than(&(marker_pos[chrom_start]), chrom_end - chrom_start, range_first);
+ range_last = uint32arr_greater_than(&(marker_pos[chrom_start]), chrom_end - chrom_start, range_last + 1);
+ if (range_last > range_first) {
+ msr_tmp = (Make_set_range*)bigstack_end_alloc(sizeof(Make_set_range));
+ msr_tmp->next = make_set_range_arr[set_idx];
+ // normally, I'd keep chrom_idx here since that enables by-chromosome
+ // sorting, but that's probably not worth bloating Make_set_range from
+ // 16 to 32 bytes
+ msr_tmp->uidx_start = chrom_start + range_first;
+ msr_tmp->uidx_end = chrom_start + range_last;
+ make_set_range_arr[set_idx] = msr_tmp;
}
+ } else {
+ msr_tmp = (Make_set_range*)bigstack_end_alloc(sizeof(Make_set_range));
+ msr_tmp->next = make_set_range_arr[set_idx];
+ msr_tmp->uidx_start = range_first;
+ msr_tmp->uidx_end = range_last + 1;
+ make_set_range_arr[set_idx] = msr_tmp;
}
- LOGERRPRINTF("Warning: No valid ranges in %s file.\n", file_descrip);
- goto load_range_list_ret_1;
- }
- max_set_id_len += c_prefix;
- if (max_set_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Set IDs are limited to " MAX_ID_LEN_STR " characters.\n");
- goto load_range_list_ret_INVALID_FORMAT;
- }
- if (bigstack_alloc_c(set_ct * max_set_id_len, set_names_ptr)) {
- goto load_range_list_ret_NOMEM;
- }
- set_names = *set_names_ptr;
- if (!c_prefix) {
- for (ulii = 0; ulii < set_ct; ulii++) {
- strcpy(&(set_names[ulii * max_set_id_len]), make_set_ll->ss);
- make_set_ll = make_set_ll->next;
- }
- } else {
- for (ulii = 0; ulii < set_ct; ulii++) {
- memcpy(&(set_names[ulii * max_set_id_len]), "C_", 2);
- strcpy(&(set_names[ulii * max_set_id_len + 2]), make_set_ll->ss);
- make_set_ll = make_set_ll->next;
- }
- }
- qsort(set_names, set_ct, max_set_id_len, strcmp_natural);
- set_ct = collapse_duplicate_ids(set_names, set_ct, max_set_id_len, NULL);
- bigstack_shrink_top(set_names, set_ct * max_set_id_len);
- rewind(infile);
- } else {
- set_ct = 1;
- }
- make_set_range_arr = (Make_set_range**)bigstack_end_alloc(set_ct * sizeof(intptr_t));
- if (!make_set_range_arr) {
- goto load_range_list_ret_NOMEM;
- }
- for (set_idx = 0; set_idx < set_ct; set_idx++) {
- make_set_range_arr[set_idx] = NULL;
- }
- line_idx = 0;
- while (fgets(g_textbuf, MAXLINELEN, infile)) {
- line_idx++;
- if (!g_textbuf[MAXLINELEN - 1]) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file is pathologically long.\n", line_idx, file_descrip);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- bufptr = skip_initial_spaces(g_textbuf);
- if (is_eoln_kns(*bufptr)) {
- continue;
- }
- bufptr2 = next_token_mult(bufptr, 3);
- if (!collapse_group) {
- bufptr3 = bufptr2;
- } else {
- bufptr3 = next_token(bufptr2);
- }
- if (no_more_tokens_kns(bufptr3)) {
- sprintf(g_logbuf, "Error: Line %" PRIuPTR " of %s file has fewer tokens than expected.\n", line_idx, file_descrip);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- ii = get_chrom_code(chrom_info_ptr, bufptr);
- if (ii < 0) {
- sprintf(g_logbuf, "Error: Invalid chromosome code on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- if (!is_set(chrom_info_ptr->chrom_mask, ii)) {
- continue;
}
- chrom_idx = ii;
- if (marker_pos) {
- chrom_start = chrom_info_ptr->chrom_start[chrom_idx];
- chrom_end = chrom_info_ptr->chrom_end[chrom_idx];
- if (chrom_end == chrom_start) {
- continue;
+ // allocate buffer for sorting ranges later
+ uii = 0;
+ for (set_idx = 0; set_idx < set_ct; set_idx++) {
+ ujj = 0;
+ msr_tmp = make_set_range_arr[set_idx];
+ while (msr_tmp) {
+ ujj++;
+ msr_tmp = msr_tmp->next;
}
- // might need to move this outside the if-statement later
- if (subset_ct && (bsearch_str(bufptr2, strlen_se(bufptr2), sorted_subset_ids, max_subset_id_len, subset_ct) == -1)) {
- continue;
+ if (ujj > uii) {
+ uii = ujj;
}
}
- bufptr = next_token(bufptr);
- if (scan_uint_defcap(bufptr, &range_first)) {
- sprintf(g_logbuf, "Error: Invalid range start position on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- bufptr = next_token(bufptr);
- if (scan_uint_defcap(bufptr, &range_last)) {
- sprintf(g_logbuf, "Error: Invalid range end position on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- if (range_last < range_first) {
- sprintf(g_logbuf, "Error: Range end position smaller than range start on line %" PRIuPTR " of %s file.\n", line_idx, file_descrip);
- wordwrapb(0);
- goto load_range_list_ret_INVALID_FORMAT_2;
- }
- if (border_extend > range_first) {
- range_first = 0;
- } else {
- range_first -= border_extend;
- }
- range_last += border_extend;
- if (set_ct > 1) {
- // bugfix: bsearch_str_natural requires null-terminated string
- uii = strlen_se(bufptr3);
- bufptr3[uii] = '\0';
- if (c_prefix) {
- bufptr3 = &(bufptr3[-2]);
- memcpy(bufptr3, "C_", 2);
- } else if (!marker_pos) {
- bufptr3 = &(bufptr3[-4]);
- uitoa_z4(chrom_idx, bufptr3);
- bufptr3[3] -= 15;
- }
- // this should never fail
- set_idx = (uint32_t)bsearch_str_natural(bufptr3, set_names, max_set_id_len, set_ct);
- } else {
- set_idx = 0;
- }
- if (marker_pos) {
- // translate to within-chromosome uidx
- range_first = uint32arr_greater_than(&(marker_pos[chrom_start]), chrom_end - chrom_start, range_first);
- range_last = uint32arr_greater_than(&(marker_pos[chrom_start]), chrom_end - chrom_start, range_last + 1);
- if (range_last > range_first) {
- msr_tmp = (Make_set_range*)bigstack_end_alloc(sizeof(Make_set_range));
- msr_tmp->next = make_set_range_arr[set_idx];
- // normally, I'd keep chrom_idx here since that enables by-chromosome
- // sorting, but that's probably not worth bloating Make_set_range from
- // 16 to 32 bytes
- msr_tmp->uidx_start = chrom_start + range_first;
- msr_tmp->uidx_end = chrom_start + range_last;
- make_set_range_arr[set_idx] = msr_tmp;
- }
- } else {
- msr_tmp = (Make_set_range*)bigstack_end_alloc(sizeof(Make_set_range));
- msr_tmp->next = make_set_range_arr[set_idx];
- msr_tmp->uidx_start = range_first;
- msr_tmp->uidx_end = range_last + 1;
- make_set_range_arr[set_idx] = msr_tmp;
+ if (range_sort_buf_ptr) {
+ bigstack_end_alloc_ull(uii, range_sort_buf_ptr);
}
- }
- // allocate buffer for sorting ranges later
- uii = 0;
- for (set_idx = 0; set_idx < set_ct; set_idx++) {
- ujj = 0;
- msr_tmp = make_set_range_arr[set_idx];
- while (msr_tmp) {
- ujj++;
- msr_tmp = msr_tmp->next;
+ if (set_ct_ptr) {
+ *set_ct_ptr = set_ct;
}
- if (ujj > uii) {
- uii = ujj;
+ if (max_set_id_len_ptr) {
+ *max_set_id_len_ptr = max_set_id_len;
}
+ *make_set_range_arr_ptr = make_set_range_arr;
}
- if (range_sort_buf_ptr) {
- bigstack_end_alloc_ull(uii, range_sort_buf_ptr);
- }
- if (set_ct_ptr) {
- *set_ct_ptr = set_ct;
- }
- if (max_set_id_len_ptr) {
- *max_set_id_len_ptr = max_set_id_len;
- }
- *make_set_range_arr_ptr = make_set_range_arr;
while (0) {
load_range_list_ret_NOMEM:
retval = RET_NOMEM;
@@ -548,16 +555,16 @@ int32_t extract_exclude_range(char* fname, uint32_t* marker_pos, uintptr_t unfil
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t orig_marker_exclude_ct = *marker_exclude_ct_ptr;
- Make_set_range** range_arr = NULL;
+ Make_set_range** range_arr = nullptr;
int32_t retval = 0;
Make_set_range* msr_tmp;
uintptr_t* marker_exclude_new;
if (fopen_checked(fname, "r", &infile)) {
goto extract_exclude_range_ret_OPEN_FAIL;
}
- retval = load_range_list(infile, 0, 0, 0, 0, 0, allow_no_variants, 0, NULL, 0, marker_pos, chrom_info_ptr, NULL, NULL, NULL, &range_arr, NULL, is_exclude? "--exclude range" : "--extract range");
+ retval = load_range_list(infile, 0, 0, 0, 0, 0, allow_no_variants, 0, nullptr, 0, marker_pos, chrom_info_ptr, nullptr, nullptr, nullptr, &range_arr, nullptr, is_exclude? "--exclude range" : "--extract range");
if (retval) {
goto extract_exclude_range_ret_1;
}
@@ -833,7 +840,7 @@ uint32_t save_set_range(uint64_t* range_sort_buf, uint32_t marker_ct, uint32_t r
ulii = ((rsb_last_idx + 1) / 2) + 1;
ulii *= 16;
if (ulii > mem_req) {
- fill_ulong_zero(bitfield_ptr, (bound_top_d128 - bound_bottom_d128) * (128 / BITCT));
+ fill_ulong_zero((bound_top_d128 - bound_bottom_d128) * (128 / BITCT), bitfield_ptr);
range_start = bound_bottom_d128 * 128;
if (do_flip) {
rsb_last_idx--;
@@ -943,9 +950,9 @@ uint32_t save_set_range(uint64_t* range_sort_buf, uint32_t marker_ct, uint32_t r
int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, uint32_t* marker_pos, uintptr_t* marker_exclude_ct_ptr, char* marker_ids, uintptr_t max_marker_id_len, Chrom_info* chrom_info_ptr, uint32_t allow_no_variants) {
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
- char* sorted_marker_ids = NULL;
- char* sorted_genekeep_ids = NULL;
+ FILE* infile = nullptr;
+ char* sorted_marker_ids = nullptr;
+ char* sorted_genekeep_ids = nullptr;
uintptr_t unfiltered_marker_ctl = BITCT_TO_WORDCT(unfiltered_marker_ct);
uintptr_t marker_exclude_ct = *marker_exclude_ct_ptr;
uintptr_t marker_ct = unfiltered_marker_ct - marker_exclude_ct;
@@ -962,12 +969,12 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
uintptr_t genekeep_ct = 0;
uintptr_t max_genekeep_len = 0;
uintptr_t max_set_id_len = 0;
- Make_set_range** make_set_range_arr = NULL;
+ Make_set_range** make_set_range_arr = nullptr;
char* midbuf = &(g_textbuf[MAXLINELEN]);
- char* sorted_subset_ids = NULL;
- char* set_names = NULL;
- char* bufptr = NULL;
- uint64_t* range_sort_buf = NULL;
+ char* sorted_subset_ids = nullptr;
+ char* set_names = nullptr;
+ char* bufptr = nullptr;
+ uint64_t* range_sort_buf = nullptr;
char* bufptr2;
char* bufptr3;
char* buf_end;
@@ -1013,7 +1020,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
}
free(sip->genekeep_flattened);
- sip->genekeep_flattened = NULL;
+ sip->genekeep_flattened = nullptr;
gene_all = 1;
} else {
do {
@@ -1092,8 +1099,8 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
goto define_sets_ret_1;
}
}
- if (max_subset_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Subset IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_subset_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: Subset IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto define_sets_ret_INVALID_FORMAT;
}
if (bigstack_end_alloc_c(subset_ct * max_subset_id_len, &sorted_subset_ids)) {
@@ -1121,7 +1128,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
}
qsort(sorted_subset_ids, subset_ct, max_subset_id_len, strcmp_casted);
- subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, NULL);
+ subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, nullptr);
}
if (fopen_checked(sip->fname, make_set? "r" : FOPEN_RB, &infile)) {
goto define_sets_ret_OPEN_FAIL;
@@ -1155,7 +1162,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
if (complement_sets) {
bitvec_and(marker_bitfield_tmp, unfiltered_marker_ctl, marker_exclude_new);
- fill_ulong_zero(marker_bitfield_tmp, unfiltered_marker_ctl);
+ fill_ulong_zero(unfiltered_marker_ctl, marker_bitfield_tmp);
}
}
} else {
@@ -1223,11 +1230,14 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
if (complement_sets) {
bitvec_and(marker_bitfield_tmp, unfiltered_marker_ctl, marker_exclude_new);
- fill_ulong_zero(marker_bitfield_tmp, unfiltered_marker_ctl);
+ fill_ulong_zero(unfiltered_marker_ctl, marker_bitfield_tmp);
}
in_set = 0;
} else if (!in_set) {
- if (subset_ct && (bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_subset_ids, max_subset_id_len, subset_ct) == -1)) {
+ // bugfix: forgot to apply --gene here
+ // if (gene_all || (bsearch_str_nl(&(set_names[set_idx * max_set_id_len]), sorted_genekeep_ids, max_genekeep_len, genekeep_ct) != -1)) {
+
+ if ((subset_ct && (bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_subset_ids, max_subset_id_len, subset_ct) == -1)) || (sorted_genekeep_ids && (bsearch_str(bufptr, (uintptr_t)(bufptr2 - bufptr), sorted_genekeep_ids, max_genekeep_len, genekeep_ct) == -1))) {
in_set = 2; // ignore this set
bufptr = &(bufptr2[1]);
continue;
@@ -1361,8 +1371,8 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
if (sip->merged_set_name) {
set_ct = 1;
max_set_id_len = strlen(sip->merged_set_name) + 1;
- if (max_set_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Set IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_set_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: Set IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto define_sets_ret_INVALID_FORMAT;
}
if (bigstack_alloc_c(max_set_id_len, &set_names)) {
@@ -1370,8 +1380,8 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
}
memcpy(set_names, sip->merged_set_name, max_set_id_len);
} else {
- if (max_set_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: Set IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_set_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: Set IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto define_sets_ret_INVALID_FORMAT;
}
if (bigstack_alloc_c(set_ct * max_set_id_len, &set_names)) {
@@ -1476,9 +1486,9 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
goto define_sets_ret_NOMEM;
}
#ifdef __LP64__
- fill_ulong_zero(marker_bitfield_tmp, round_up_pow2(marker_ctp2l, 2));
+ fill_ulong_zero(round_up_pow2(marker_ctp2l, 2), marker_bitfield_tmp);
#else
- fill_ulong_zero(marker_bitfield_tmp, round_up_pow2(marker_ctp2l, 4));
+ fill_ulong_zero(round_up_pow2(marker_ctp2l, 4), marker_bitfield_tmp);
#endif
while (1) {
if (fread_checked(midbuf, MAXLINELEN, infile, &bufsize)) {
@@ -1533,7 +1543,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
goto define_sets_ret_NOMEM;
}
set_idx++;
- fill_ulong_zero(marker_bitfield_tmp, marker_ctp2l);
+ fill_ulong_zero(marker_ctp2l, marker_bitfield_tmp);
range_first = marker_ct;
range_last = 0;
}
@@ -1638,7 +1648,7 @@ int32_t define_sets(Set_info* sip, uintptr_t unfiltered_marker_ct, uintptr_t* ma
int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t marker_ct, uintptr_t unfiltered_marker_ct, uintptr_t* marker_exclude, char* marker_ids, uintptr_t max_marker_id_len, uint32_t* marker_pos, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
- FILE* outfile = NULL;
+ FILE* outfile = nullptr;
uintptr_t set_ct = sip->ct;
uintptr_t max_set_name_len = sip->max_name_len;
uintptr_t set_idx = 0;
@@ -1668,7 +1678,7 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
}
fputs("SNP\tCHR\tBP", outfile);
for (set_idx = 0; set_idx < set_ct; set_idx++) {
- putc('\t', outfile);
+ putc_unlocked('\t', outfile);
fputs(&(sip->names[set_idx * max_set_name_len]), outfile);
}
if (putc_checked('\n', outfile)) {
@@ -1690,9 +1700,9 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
for (marker_idx = 0; marker_idx < marker_ct; marker_uidx++, marker_idx++) {
next_unset_ul_unsafe_ck(marker_exclude, &marker_uidx);
if (marker_uidx >= chrom_end) {
- uii = get_marker_chrom_fo_idx(chrom_info_ptr, marker_uidx);
+ uii = get_variant_chrom_fo_idx(chrom_info_ptr, marker_uidx);
chrom_idx = chrom_info_ptr->chrom_file_order[uii];
- chrom_end = chrom_info_ptr->chrom_file_order_marker_idx[uii];
+ chrom_end = chrom_info_ptr->chrom_fo_vidx_start[uii];
}
fputs(&(marker_ids[marker_uidx * max_marker_id_len]), outfile);
bufptr = chrom_name_write(chrom_info_ptr, chrom_idx, &(g_textbuf[1]));
@@ -1773,7 +1783,7 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
fill_idx_to_uidx(marker_exclude, unfiltered_marker_ct, marker_ct, marker_idx_to_uidx);
for (set_idx = 0; set_idx < set_ct; set_idx++) {
fputs(&(sip->names[set_idx * max_set_name_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
cur_set_ptr = sip->setdefs[set_idx];
range_ct = cur_set_ptr[0];
if (range_ct != 0xffffffffU) {
@@ -1781,7 +1791,7 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
ujj = cur_set_ptr[uii * 2 + 2];
for (marker_idx = cur_set_ptr[uii * 2 + 1]; marker_idx < ujj; marker_idx++) {
fputs(&(marker_ids[marker_idx_to_uidx[marker_idx] * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
} else {
@@ -1791,7 +1801,7 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
if (cur_set_ptr[3]) {
for (marker_idx = 0; marker_idx < range_start; marker_idx++) {
fputs(&(marker_ids[marker_idx_to_uidx[marker_idx] * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
marker_idx = 0;
@@ -1801,13 +1811,13 @@ int32_t write_set(Set_info* sip, char* outname, char* outname_end, uint32_t mark
break;
}
fputs(&(marker_ids[marker_idx_to_uidx[marker_idx] * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
marker_idx++;
}
if ((range_start + uii < marker_ct) && cur_set_ptr[3]) {
for (marker_idx = range_start + uii; marker_idx < marker_ct; marker_idx++) {
fputs(&(marker_ids[marker_idx_to_uidx[marker_idx] * max_marker_id_len]), outfile);
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
}
}
}
@@ -1848,9 +1858,9 @@ void unpack_set(uintptr_t marker_ct, uint32_t* setdef, uintptr_t* include_bitfie
keep_outer = setdef[3];
if (range_start) {
if (keep_outer) {
- fill_ulong_one(include_bitfield, range_start / BITCT);
+ fill_ulong_one(range_start / BITCT, include_bitfield);
} else {
- fill_ulong_zero(include_bitfield, range_start / BITCT);
+ fill_ulong_zero(range_start / BITCT, include_bitfield);
}
}
memcpy(&(include_bitfield[range_start / BITCT]), (uintptr_t*)(&(setdef[4])), ((range_ct + 127) / 128) * 16);
@@ -1859,11 +1869,11 @@ void unpack_set(uintptr_t marker_ct, uint32_t* setdef, uintptr_t* include_bitfie
if (keep_outer) {
fill_bits(uii, marker_ct - uii, include_bitfield);
} else {
- fill_ulong_zero(&(include_bitfield[uii / BITCT]), marker_ctl - uii / BITCT);
+ fill_ulong_zero(marker_ctl - uii / BITCT, &(include_bitfield[uii / BITCT]));
}
}
} else {
- fill_ulong_zero(include_bitfield, marker_ctl);
+ fill_ulong_zero(marker_ctl, include_bitfield);
for (uii = 0; uii < range_ct; uii++) {
range_start = setdef[uii * 2 + 1];
fill_bits(range_start, setdef[uii * 2 + 2] - range_start, include_bitfield);
@@ -1948,7 +1958,7 @@ uint32_t extract_set_union(uint32_t** setdefs, uintptr_t set_ct, uintptr_t* set_
uint32_t range_end;
uint32_t keep_outer;
uint32_t read_offset;
- fill_ulong_zero(filtered_union, marker_ctl);
+ fill_ulong_zero(marker_ctl, filtered_union);
for (set_idx = 0; set_idx < set_ct; set_idx++) {
if (set_incl && (!IS_SET(set_incl, set_idx))) {
continue;
@@ -1963,7 +1973,7 @@ uint32_t extract_set_union(uint32_t** setdefs, uintptr_t set_ct, uintptr_t* set_
read_offset = 0;
if (range_start > unset_startw) {
if (keep_outer) {
- fill_ulong_one(filtered_union, range_start);
+ fill_ulong_one(range_start, filtered_union);
unset_startw = range_start;
}
} else {
@@ -1979,7 +1989,7 @@ uint32_t extract_set_union(uint32_t** setdefs, uintptr_t set_ct, uintptr_t* set_
}
if (keep_outer && (range_end < unset_endw)) {
// may overfill end
- fill_ulong_one(&(filtered_union[range_end]), unset_endw - range_end);
+ fill_ulong_one(unset_endw - range_end, &(filtered_union[range_end]));
unset_endw = range_end;
}
} else if (range_ct) {
@@ -2101,7 +2111,7 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
next_set_unsafe_ck(set_incl, &set_uidx);
}
cur_setdef = sip->setdefs[set_uidx];
- fill_ulong_zero(cur_bitfield, marker_ctlv);
+ fill_ulong_zero(marker_ctlv, cur_bitfield);
range_ct = cur_setdef[0];
range_start = marker_ct;
range_end = 0;
@@ -2121,7 +2131,7 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
include_out_of_bounds = cur_setdef[3];
read_bitfield = (uintptr_t*)(&(cur_setdef[4]));
if (include_out_of_bounds && range_offset) {
- fill_ulong_one(cur_bitfield, range_offset / BITCT);
+ fill_ulong_one(range_offset / BITCT, cur_bitfield);
range_start = 0;
}
for (marker_midx = 0; marker_midx < range_stop; marker_midx++) {
@@ -2152,7 +2162,7 @@ uint32_t setdefs_compress(Set_info* sip, uintptr_t* set_incl, uintptr_t set_ct,
int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t subset_ct, char* sorted_subset_ids, uintptr_t max_subset_id_len, Chrom_info* chrom_info_ptr, uintptr_t* gene_ct_ptr, char** gene_names_ptr, uintptr_t* max_gene_id_len_ptr, uintptr_t** chrom_bounds_ptr, uint32_t*** genedefs_ptr, uintptr_t* chrom_max_gene_ct_ptr, const char* file_descrip) {
// --annotate, --clump-range, --gene-report
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
+ FILE* infile = nullptr;
uintptr_t gene_ct = 0;
uintptr_t max_gene_id_len = 0;
uintptr_t chrom_max_gene_ct = 0;
@@ -2179,7 +2189,7 @@ int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t s
if (fopen_checked(fname, "r", &infile)) {
goto load_range_list_sortpos_ret_OPEN_FAIL;
}
- retval = load_range_list(infile, 1, border_extend, 0, 0, 0, 0, subset_ct, sorted_subset_ids, 0, NULL, chrom_info_ptr, &gene_ct, gene_names_ptr, &max_gene_id_len, &gene_arr, &range_sort_buf, file_descrip);
+ retval = load_range_list(infile, 1, border_extend, 0, 0, 0, 0, subset_ct, sorted_subset_ids, 0, nullptr, chrom_info_ptr, &gene_ct, gene_names_ptr, &max_gene_id_len, &gene_arr, &range_sort_buf, file_descrip);
if (retval) {
goto load_range_list_sortpos_ret_1;
}
@@ -2299,25 +2309,25 @@ int32_t load_range_list_sortpos(char* fname, uint32_t border_extend, uintptr_t s
int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilter, Chrom_info* chrom_info_ptr) {
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- gzFile gz_attribfile = NULL;
- FILE* infile = NULL;
- FILE* outfile = NULL;
- char* sorted_snplist = NULL;
- char* sorted_attr_ids = NULL; // natural-sorted
- char* sorted_snplist_attr_ids = NULL;
- char* sorted_subset_ids = NULL;
- char* range_names = NULL;
- char* filter_range_names = NULL;
- char* wptr = NULL;
- const char* snp_field = NULL;
- uintptr_t* attr_bitfields = NULL;
- uintptr_t* chrom_bounds = NULL;
- uintptr_t* chrom_filter_bounds = NULL;
- uint32_t** rangedefs = NULL;
- uint32_t** filter_rangedefs = NULL;
- uint32_t* range_idx_lookup = NULL;
- uint32_t* attr_id_remap = NULL;
- uint32_t* merged_attr_idx_buf = NULL;
+ gzFile gz_attribfile = nullptr;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
+ char* sorted_snplist = nullptr;
+ char* sorted_attr_ids = nullptr; // natural-sorted
+ char* sorted_snplist_attr_ids = nullptr;
+ char* sorted_subset_ids = nullptr;
+ char* range_names = nullptr;
+ char* filter_range_names = nullptr;
+ char* wptr = nullptr;
+ const char* snp_field = nullptr;
+ uintptr_t* attr_bitfields = nullptr;
+ uintptr_t* chrom_bounds = nullptr;
+ uintptr_t* chrom_filter_bounds = nullptr;
+ uint32_t** rangedefs = nullptr;
+ uint32_t** filter_rangedefs = nullptr;
+ uint32_t* range_idx_lookup = nullptr;
+ uint32_t* attr_id_remap = nullptr;
+ uint32_t* merged_attr_idx_buf = nullptr;
const char constsnpstr[] = "SNP";
const char constdotstr[] = ".";
const char constnastr[] = "NA";
@@ -2434,7 +2444,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
goto annotate_ret_READ_FAIL;
}
qsort(sorted_snplist, snplist_ct, max_snplist_id_len, strcmp_casted);
- ulii = collapse_duplicate_ids(sorted_snplist, snplist_ct, max_snplist_id_len, NULL);
+ ulii = collapse_duplicate_ids(sorted_snplist, snplist_ct, max_snplist_id_len, nullptr);
if (ulii < snplist_ct) {
snplist_ct = ulii;
bigstack_shrink_top(sorted_snplist, snplist_ct * max_snplist_id_len);
@@ -2458,7 +2468,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
goto annotate_ret_NOMEM;
}
for (uii = 0; uii < HASHSIZE; uii++) {
- attr_id_htable[uii] = NULL;
+ attr_id_htable[uii] = nullptr;
}
while (1) {
line_idx++;
@@ -2509,7 +2519,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
if (bigstack_end_alloc_llstr(slen, &ll_ptr)) {
goto annotate_ret_NOMEM;
}
- ll_ptr->next = NULL;
+ ll_ptr->next = nullptr;
memcpy(ll_ptr->ss, bufptr2, slen);
if (slen > max_attr_id_len) {
max_attr_id_len = slen;
@@ -2585,7 +2595,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
} while (!is_eoln_kns(*bufptr2));
}
gzclose(gz_attribfile);
- gz_attribfile = NULL;
+ gz_attribfile = nullptr;
if (qsort_ext(sorted_snplist_attr_ids, snplist_attr_ct, max_snplist_attr_id_len, strcmp_deref, (char*)attr_bitfields, attr_id_ctl * sizeof(intptr_t))) {
goto annotate_ret_NOMEM;
}
@@ -2609,8 +2619,8 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
logerrprint("Error: --annotate subset file is empty.\n");
goto annotate_ret_INVALID_FORMAT;
}
- if (max_subset_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: --annotate subset IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_subset_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: --annotate subset IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto annotate_ret_INVALID_FORMAT;
}
if (bigstack_end_alloc_c(subset_ct * max_subset_id_len, &sorted_subset_ids)) {
@@ -2625,7 +2635,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
goto annotate_ret_READ_FAIL;
}
qsort(sorted_subset_ids, subset_ct, max_subset_id_len, strcmp_casted);
- subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, NULL);
+ subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, nullptr);
}
// normally can't use border here because we need nearest distance
retval = load_range_list_sortpos(aip->ranges_fname, (block01 && (!track_distance))? border : 0, subset_ct, sorted_subset_ids, max_subset_id_len, chrom_info_ptr, &range_ct, &range_names, &max_range_name_len, &chrom_bounds, &rangedefs, &chrom_max_range_ct, "--annotate ranges");
@@ -2646,7 +2656,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
}
bigstack_end_reset(bigstack_end_mark);
if (aip->filter_fname) {
- retval = load_range_list_sortpos(aip->filter_fname, border, 0, NULL, 0, chrom_info_ptr, &filter_range_ct, &filter_range_names, &max_filter_range_name_len, &chrom_filter_bounds, &filter_rangedefs, &chrom_max_filter_range_ct, "--annotate filter");
+ retval = load_range_list_sortpos(aip->filter_fname, border, 0, nullptr, 0, chrom_info_ptr, &filter_range_ct, &filter_range_names, &max_filter_range_name_len, &chrom_filter_bounds, &filter_rangedefs, &chrom_max_filter_range_ct, "--annotate filter");
if (retval) {
goto annotate_ret_1;
}
@@ -2787,11 +2797,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
goto annotate_ret_INVALID_FORMAT_WW;
}
ujj |= 1 << uii;
- if (!seq_idx) {
- col_skips[0] = col_idx;
- } else {
- col_skips[seq_idx] = col_idx - col_skips[seq_idx - 1];
- }
+ col_skips[seq_idx] = col_idx;
col_sequence[seq_idx++] = uii;
}
}
@@ -2802,6 +2808,10 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
sprintf(g_logbuf, "Error: Missing column header%s in %s.\n", (seq_idx + 1 == token_ct)? "" : "s", aip->fname);
goto annotate_ret_INVALID_FORMAT_WW;
}
+ // bugfix: must go backwards
+ for (ujj = seq_idx - 1; ujj; --ujj) {
+ col_skips[ujj] -= col_skips[ujj - 1];
+ }
memcpy(outname_end, ".annot", 7);
if (fopen_checked(outname, "w", &outfile)) {
goto annotate_ret_OPEN_FAIL;
@@ -2815,12 +2825,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
if (block01) {
if (!range_ct) {
for (ulii = 0; ulii < attr_id_ct; ulii++) {
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
fputs(&(sorted_attr_ids[ulii * max_attr_id_len]), outfile);
}
} else {
for (uii = 0; uii < unique_annot_ct; uii++) {
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
ujj = merged_attr_idx_buf[uii];
if (ujj < 0x80000000U) {
fputs(&(range_names[ujj * max_range_name_len + 4]), outfile);
@@ -2838,7 +2848,7 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
} else {
fputs(" ANNOT", outfile);
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
while (fgets(loadbuf, loadbuf_size, infile)) {
line_idx++;
if (!loadbuf[loadbuf_size - 1]) {
@@ -2862,9 +2872,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
if (!bufptr) {
continue;
}
+
if (need_pos) {
// CHR
- chrom_idx = get_chrom_code(chrom_info_ptr, token_ptrs[0]);
+ // can't use get_chrom_code_destructive() due to later
+ // strchr(bufptr, '\0') call
+ chrom_idx = get_chrom_code_counted(chrom_info_ptr, strlen_se(token_ptrs[0]), token_ptrs[0]);
if (chrom_idx < 0) {
continue;
}
@@ -3062,12 +3075,12 @@ int32_t annotate(Annot_info* aip, char* outname, char* outname_end, double pfilt
goto annotate_ret_WRITE_FAIL;
}
}
- putc(' ', outfile);
+ putc_unlocked(' ', outfile);
if (fwrite_checked(writebuf, wptr - writebuf, outfile)) {
goto annotate_ret_WRITE_FAIL;
}
- putc('\n', outfile);
+ putc_unlocked('\n', outfile);
if (block01 && at_least_one_annot) {
// reinitialize
ulptr = (uintptr_t*)writebuf;
@@ -3123,20 +3136,20 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
// similar to define_sets() and --clump
unsigned char* bigstack_mark = g_bigstack_base;
unsigned char* bigstack_end_mark = g_bigstack_end;
- FILE* infile = NULL;
- FILE* outfile = NULL;
+ FILE* infile = nullptr;
+ FILE* outfile = nullptr;
uintptr_t subset_ct = 0;
uintptr_t max_subset_id_len = 0;
uintptr_t extract_ct = 0;
uintptr_t max_extract_id_len = 0;
const char constsnpstr[] = "SNP";
- char* sorted_subset_ids = NULL;
- char* sorted_extract_ids = NULL;
- uintptr_t* chrom_bounds = NULL;
- uint32_t** genedefs = NULL;
+ char* sorted_subset_ids = nullptr;
+ char* sorted_extract_ids = nullptr;
+ uintptr_t* chrom_bounds = nullptr;
+ uint32_t** genedefs = nullptr;
uint64_t saved_line_ct = 0;
uint32_t do_pfilter = (pfilter != 2.0);
- uint32_t token_ct = 2 + (extractname != NULL) + do_pfilter;
+ uint32_t token_ct = 2 + (extractname != nullptr) + do_pfilter;
uint32_t snp_field_len = 0;
uint32_t col_idx = 0;
uint32_t seq_idx = 0;
@@ -3194,8 +3207,8 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
logerrprint("Error: --gene-subset file is empty.\n");
goto gene_report_ret_INVALID_FORMAT;
}
- if (max_subset_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: --gene-subset IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_subset_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: --gene-subset IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto gene_report_ret_INVALID_FORMAT;
}
if (bigstack_end_alloc_c(subset_ct * max_subset_id_len, &sorted_subset_ids)) {
@@ -3210,7 +3223,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
goto gene_report_ret_READ_FAIL;
}
qsort(sorted_subset_ids, subset_ct, max_subset_id_len, strcmp_casted);
- subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, NULL);
+ subset_ct = collapse_duplicate_ids(sorted_subset_ids, subset_ct, max_subset_id_len, nullptr);
}
if (extractname) {
if (fopen_checked(extractname, FOPEN_RB, &infile)) {
@@ -3224,8 +3237,8 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
logerrprint("Error: Empty --extract file.\n");
goto gene_report_ret_INVALID_FORMAT;
}
- if (max_extract_id_len > MAX_ID_LEN_P1) {
- logerrprint("Error: --extract IDs are limited to " MAX_ID_LEN_STR " characters.\n");
+ if (max_extract_id_len > MAX_ID_BLEN) {
+ logerrprint("Error: --extract IDs are limited to " MAX_ID_SLEN_STR " characters.\n");
goto gene_report_ret_INVALID_FORMAT;
}
if (bigstack_alloc_c(extract_ct * max_extract_id_len, &sorted_extract_ids)) {
@@ -3241,7 +3254,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
goto gene_report_ret_READ_FAIL;
}
qsort(sorted_extract_ids, extract_ct, max_extract_id_len, strcmp_casted);
- ulii = collapse_duplicate_ids(sorted_extract_ids, extract_ct, max_extract_id_len, NULL);
+ ulii = collapse_duplicate_ids(sorted_extract_ids, extract_ct, max_extract_id_len, nullptr);
if (ulii < extract_ct) {
extract_ct = ulii;
bigstack_shrink_top(sorted_extract_ids, extract_ct * max_extract_id_len);
@@ -3263,7 +3276,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
// gene name. Final output will be the other way around, so we need a
// remapping table.
// This logic needs to change a bit if support for unplaced contigs is added
- // or MAX_CHROM_TEXTNUM_LEN changes.
+ // or MAX_CHROM_TEXTNUM_SLEN changes.
if (bigstack_alloc_ui(gene_ct, &gene_chridx_to_nameidx) ||
bigstack_alloc_ui(gene_ct, &gene_nameidx_to_chridx) ||
bigstack_alloc_c(gene_ct * max_gene_name_len, &loadbuf)) {
@@ -3349,11 +3362,7 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
goto gene_report_ret_INVALID_FORMAT_WW;
}
found_header_bitfield |= 1 << ujj;
- if (!seq_idx) {
- col_skips[0] = col_idx;
- } else {
- col_skips[seq_idx] = col_idx - col_skips[seq_idx - 1];
- }
+ col_skips[seq_idx] = col_idx;
col_sequence[seq_idx++] = ujj;
}
}
@@ -3364,6 +3373,10 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
sprintf(g_logbuf, "Error: Missing column header%s in %s.\n", (seq_idx + 1 == token_ct)? "" : "s", fname);
goto gene_report_ret_INVALID_FORMAT_WW;
}
+ // bugfix
+ for (uii = seq_idx - 1; uii; --uii) {
+ col_skips[uii] -= col_skips[uii - 1];
+ }
// assume *bufptr is now \n (if it isn't, header line is never written to
// output anyway)
header_len = 1 + (uintptr_t)(bufptr - header_ptr);
@@ -3407,8 +3420,9 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
if (!bufptr) {
goto gene_report_load_loop;
}
+
// CHR
- chrom_idx = get_chrom_code(chrom_info_ptr, token_ptrs[0]);
+ chrom_idx = get_chrom_code_counted(chrom_info_ptr, strlen_se(token_ptrs[0]), token_ptrs[0]);
if (chrom_idx < 0) {
// todo: log warning?
goto gene_report_load_loop;
@@ -3507,10 +3521,10 @@ int32_t gene_report(char* fname, char* glist, char* subset_fname, uint32_t borde
fputs(&(bufptr[4]), outfile);
fputs(" -- chr", outfile);
if (bufptr[2] != '0') {
- putc(bufptr[2], outfile);
+ putc_unlocked(bufptr[2], outfile);
}
- putc(bufptr[3] + 15, outfile);
- putc(':', outfile);
+ putc_unlocked(bufptr[3] + 15, outfile);
+ putc_unlocked(':', outfile);
uiptr = genedefs[gene_idx];
range_ct = *uiptr++;
ujj = 0; // gene length
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/plink1.9.git
More information about the debian-med-commit
mailing list