[med-svn] [Git][med-team/plink1-9][master] 3 commits: New upstream version 1.90~b6.1-180528
Dylan Aïssi
gitlab at salsa.debian.org
Mon Jun 4 07:05:49 BST 2018
Dylan Aïssi pushed to branch master at Debian Med / plink1.9
Commits:
ece3ea42 by Dylan Aïssi at 2018-06-04T07:58:42+02:00
New upstream version 1.90~b6.1-180528
- - - - -
c5cc8b2b by Dylan Aïssi at 2018-06-04T07:58:45+02:00
Merge tag 'upstream/1.90_b6.1-180528'
Upstream version 1.90~b6.1-180528
- - - - -
644b614f by Dylan Aïssi at 2018-06-04T08:01:32+02:00
Update changelogs
- - - - -
7 changed files:
- debian/changelog
- debian/upstream.docs/upstream.changelog
- plink.c
- plink_assoc.c
- plink_common.c
- plink_common.h
- plink_data.c
Changes:
=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+plink1.9 (1.90~b6.1-180528-1) UNRELEASED; urgency=medium
+
+ * New upstream release.
+
+ -- Dylan Aïssi <bob.dybian at gmail.com> Mon, 04 Jun 2018 07:59:00 +0200
+
plink1.9 (1.90~b5.4-180410-1) unstable; urgency=medium
* New upstream release.
@@ -251,4 +257,3 @@ plink (1.03p1-1) unstable; urgency=low
* Initial release (Closes: #490832).
-- Steffen Moeller <moeller at debian.org> Sun, 24 Aug 2008 17:10:55 +0200
-
=====================================
debian/upstream.docs/upstream.changelog
=====================================
--- a/debian/upstream.docs/upstream.changelog
+++ b/debian/upstream.docs/upstream.changelog
@@ -1,6 +1,10 @@
# Copy/Paste from https://www.cog-genomics.org/plink/1.9/
-10 Apr 2018: --zero-cluster command-line parsing bugfix. --mendel-multigen bugfix.
+28 May 2018: Fixed --file triallelic-variant handling bug which occurred when the .map was unsorted.
+
+26 May (beta 6): --merge-equal-pos bugfixes. If you used --merge-equal-pos with .bed+.bim+.fam filesets where allele order may have differed between .bim files, or with any .ped+.map fileset at all, we recommend redoing that run.
+
+10 Apr: --zero-cluster command-line parsing bugfix. --mendel-multigen bugfix.
21 Feb: "--indiv-sort file" no longer scrambles the phenotypes when used with --{b}merge or --merge-list.
=====================================
plink.c
=====================================
--- a/plink.c
+++ b/plink.c
@@ -93,7 +93,7 @@
static const char ver_str[] =
#ifdef STABLE_BUILD
- "PLINK v1.90b5.4"
+ "PLINK v1.90b6.1"
#else
"PLINK v1.90p"
#endif
@@ -105,7 +105,7 @@ static const char ver_str[] =
#else
" 32-bit"
#endif
- " (10 Apr 2018)";
+ " (28 May 2018)";
static const char ver_str2[] =
// include leading space if day < 10, so character length stays the same
""
=====================================
plink_assoc.c
=====================================
--- a/plink_assoc.c
+++ b/plink_assoc.c
@@ -11236,6 +11236,11 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
c_star = a1_ctd - a_star;
d_star = ctrl_ctd - a1_ctd + a_star;
+ // concordance fix (25 May 2018): print NA,NA instead of inf,0
+ if ((a_star == 0.0) || (b_star == 0.0) || (c_star == 0.0) || (d_star == 0.0)) {
+ goto cmh_assoc_bd_fail;
+ }
+
// inverse variance
dxx = 1.0 / a_star + 1.0 / b_star + 1.0 / c_star + 1.0 / d_star;
=====================================
plink_common.c
=====================================
--- a/plink_common.c
+++ b/plink_common.c
@@ -4902,6 +4902,25 @@ void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_e
}
}
+void cleanup_allele_storage2(uintptr_t allele_storage_entry_ct, char** allele_storage) {
+ if (allele_storage) {
+ const uintptr_t one_char_strs_addr = (uintptr_t)g_one_char_strs;
+ for (uintptr_t idx = 0; idx < allele_storage_entry_ct;) {
+ char* cur_entry = allele_storage[idx];
+ if (!cur_entry) {
+ // --merge-equal-pos hacked entry
+ idx += 2;
+ continue;
+ }
+ // take advantage of unsigned wraparound
+ if ((((uintptr_t)cur_entry) - one_char_strs_addr) >= 512) {
+ free(cur_entry);
+ }
+ ++idx;
+ }
+ }
+}
+
void refresh_chrom_info(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx, uint32_t* __restrict chrom_end_ptr, uint32_t* __restrict chrom_fo_idx_ptr, uint32_t* __restrict is_x_ptr, uint32_t* __restrict is_y_ptr, uint32_t* __restrict is_mt_ptr, uint32_t* __restrict is_haploid_ptr) {
// assumes we are at the end of the chromosome denoted by chrom_fo_idx. Ok
// for chrom_fo_idx == 0xffffffffU.
=====================================
plink_common.h
=====================================
--- a/plink_common.h
+++ b/plink_common.h
@@ -2266,6 +2266,10 @@ uint32_t allele_reset(const char* newval, uint32_t allele_slen, char** allele_pt
void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, char** allele_storage);
+// needed by fixed --merge-equal-pos implementation, which takes more liberties
+// with allele_storage[]
+void cleanup_allele_storage2(uintptr_t allele_storage_entry_ct, char** allele_storage);
+
// no need for this; code is simpler if we just create a copy of marker_exclude
// with all non-autosomal loci removed
/*
=====================================
plink_data.c
=====================================
--- a/plink_data.c
+++ b/plink_data.c
@@ -5116,6 +5116,8 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
// Ll_str.next is a pointer to a linked list entry storing the next 1+ allele
// names. Worst case, the linked list is of length 4 (beyond that we error
// out). Most of the time, it'll be length 0.
+ // Allele names are separated by tabs; \0 denotes the end of the allele name
+ // list.
// This type of function will become more important when it's time to parse
// .vcf files, etc.
uint32_t allele_num = 0;
@@ -5145,30 +5147,29 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
if ((slen == an_len) && (!memcmp(cur_allele_name_start, allele_name, an_len))) {
marker_allele_cts[allele_num] += 1;
return 0;
+ }
+ allele_num++;
+ if (cur_allele_name_start[slen] == '\t') {
+ cur_allele_name_start = &(cur_allele_name_start[slen + 1]);
+ } else if (allele_list_start->next) {
+ allele_list_start = allele_list_start->next;
+ cur_allele_name_start = allele_list_start->ss;
} else {
- allele_num++;
- if (cur_allele_name_start[slen] == '\t') {
- cur_allele_name_start = &(cur_allele_name_start[slen + 1]);
- } else if (allele_list_start->next) {
- allele_list_start = allele_list_start->next;
- cur_allele_name_start = allele_list_start->ss;
+ chars_left = ((0x7ffffff0 - sizeof(intptr_t)) - ((uintptr_t)(&(cur_allele_name_start[slen + 1]) - allele_list_start->ss))) & 15;
+ if (chars_left > an_len) {
+ cur_allele_name_start[slen] = '\t';
+ memcpyx(&(cur_allele_name_start[slen + 1]), allele_name, an_len, '\0');
} else {
- chars_left = ((0x7ffffff0 - sizeof(intptr_t)) - ((uintptr_t)(&(cur_allele_name_start[slen + 1]) - allele_list_start->ss))) & 15;
- if (chars_left > an_len) {
- cur_allele_name_start[slen] = '\t';
- memcpyx(&(cur_allele_name_start[slen + 1]), allele_name, an_len, '\0');
- } else {
- if (bigstack_end_alloc_llstr(an_len + 1, &ll_ptr)) {
- return RET_NOMEM;
- }
- allele_list_start->next = ll_ptr;
- ll_ptr->next = nullptr;
- cur_allele_name_start = ll_ptr->ss;
- memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
- }
- marker_allele_cts[allele_num] = 1;
- return 0;
+ if (bigstack_end_alloc_llstr(an_len + 1, &ll_ptr)) {
+ return RET_NOMEM;
+ }
+ allele_list_start->next = ll_ptr;
+ ll_ptr->next = nullptr;
+ cur_allele_name_start = ll_ptr->ss;
+ memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
}
+ marker_allele_cts[allele_num] = 1;
+ return 0;
}
}
return RET_INVALID_FORMAT;
@@ -5443,22 +5444,17 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
}
if (marker_allele_cts[4 * marker_idx + 2]) {
uii = marker_allele_cts[4 * marker_idx + 3];
- if (map_is_unsorted) {
- sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (uii? "quad" : "tri"));
- } else {
- sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (uii? "quad" : "tri"));
- }
+ // bugfix (28 May 2018): do NOT look up map_reverse[] in unsorted case
+ sprintf(g_logbuf, "Warning: Variant %" PRIuPTR "%s %sallelic; setting rarest missing.\n", marker_idx + 1, map_is_unsorted? " (post-sort)" : "", (uii? "quad" : "tri"));
logerrprintb();
get_top_two_ui(&(marker_allele_cts[4 * marker_idx]), uii? 4 : 3, &ulii, &uljj);
- uii = map_is_unsorted? map_reverse[marker_idx] : marker_idx;
} else {
ulii = (marker_allele_cts[4 * marker_idx] < marker_allele_cts[4 * marker_idx + 1])? 1 : 0;
uljj = ulii ^ 1;
- uii = marker_idx;
}
- aptr1 = get_llstr((Ll_str*)(&(marker_alleles_tmp[uii])), uljj);
- aptr2 = get_llstr((Ll_str*)(&(marker_alleles_tmp[uii])), ulii);
+ aptr1 = get_llstr((Ll_str*)(&(marker_alleles_tmp[marker_idx])), uljj);
+ aptr2 = get_llstr((Ll_str*)(&(marker_alleles_tmp[marker_idx])), ulii);
if (!aptr1) {
aptr1 = missing_geno_ptr;
} else {
@@ -5708,7 +5704,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
g_bigstack_base -= cur_slen_rdup;
logprint("\n");
if (retval != RET_NOMEM) {
- LOGERRPRINTF("Error: More than 4 different alleles at variant %u%s.\n", uii + 1, map_is_unsorted? " (post-sort/filter)" : "");
+ LOGERRPRINTF("Error: More than 4 different alleles at variant %u%s.\n", uii + 1, map_is_unsorted? " (post-sort)" : "");
}
break;
ped_to_bed_multichar_allele_ret_INVALID_FORMAT_4:
@@ -6107,11 +6103,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
}
if (marker_alleles[marker_idx * 4 + 2]) {
cc = marker_alleles[marker_idx * 4 + 3];
- if (map_is_unsorted) {
- sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (cc? "quad" : "tri"));
- } else {
- sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (cc? "quad" : "tri"));
- }
+ sprintf(g_logbuf, "Warning: Variant %" PRIuPTR "%s %sallelic; setting rarest alleles missing.\n", marker_idx + 1, map_is_unsorted? " (post-sort)" : "", (cc? "quad" : "tri"));
logerrprintb();
ujj = (cc? 4 : 3);
// insertion sort
@@ -15021,8 +15013,8 @@ int32_t report_non_biallelics(char* outname, char* outname_end, Ll_str* non_bial
return retval;
}
-void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint32_t* distinct_allele_ct_ptr) {
- uint32_t distinct_allele_ct = *distinct_allele_ct_ptr;
+void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint32_t* distinct_allele_ctp) {
+ uint32_t distinct_allele_ct = *distinct_allele_ctp;
uint32_t allele_idx = 0;
if ((marker_allele_ptr == g_missing_geno_ptr) || (distinct_allele_ct == 3)) {
return;
@@ -15033,33 +15025,52 @@ void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint3
}
allele_idx++;
}
- *distinct_allele_ct_ptr = distinct_allele_ct + 1;
+ *distinct_allele_ctp = distinct_allele_ct + 1;
if (distinct_allele_ct == 2) {
return;
}
allele_ptrs[distinct_allele_ct] = marker_allele_ptr;
}
-uint32_t merge_alleles(char** marker_allele_ptrs, uint32_t marker_uidx, uint32_t marker_uidx2) {
- uint32_t distinct_allele_ct = 0;
- char* allele_ptrs[2];
- allele_ptrs[0] = nullptr;
- allele_ptrs[1] = nullptr;
+uint32_t merge_equal_pos_alleles(char** marker_allele_ptrs, uint32_t marker_uidx, uint32_t* distinct_allele_ctp, char** allele_ptrs) {
// reverse order so --keep-allele-order works
- merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx + 1], allele_ptrs, &distinct_allele_ct);
- merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx], allele_ptrs, &distinct_allele_ct);
- merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx2 + 1], allele_ptrs, &distinct_allele_ct);
- merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx2], allele_ptrs, &distinct_allele_ct);
- if (distinct_allele_ct > 2) {
+ merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx + 1], allele_ptrs, distinct_allele_ctp);
+ merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx], allele_ptrs, distinct_allele_ctp);
+ if (*distinct_allele_ctp > 2) {
+ return 1;
+ }
+ return 0;
+}
+
+uint32_t save_equal_pos_alleles(const int64_t* ll_buf, uint32_t read_pos, uint32_t read_pos_stop, char** equal_pos_allele_ptrs, char** marker_allele_ptrs) {
+ const uint32_t canonical_marker_uidx = (uint32_t)ll_buf[read_pos];
+ // (do we really need the reversal in merge_equal_pos_alleles()?)
+ // bugfix (26 May 2018): forgot about plink 1.9's allele memory management
+ // strategy.
+ const char* new_allele = equal_pos_allele_ptrs[1]? equal_pos_allele_ptrs[1] : g_missing_geno_ptr;
+ if (allele_reset(new_allele, strlen(new_allele), &(marker_allele_ptrs[canonical_marker_uidx * 2]))) {
+ return 1;
+ }
+ new_allele = equal_pos_allele_ptrs[0]? equal_pos_allele_ptrs[0] : g_missing_geno_ptr;
+ if (allele_reset(new_allele, strlen(new_allele), &(marker_allele_ptrs[canonical_marker_uidx * 2 + 1]))) {
return 1;
}
- if (allele_ptrs[0]) {
- // todo: test if this write is inefficient enough that we want to guard it
- // with an if statement
- marker_allele_ptrs[2 * marker_uidx + 1] = allele_ptrs[0];
- if (allele_ptrs[1]) {
- marker_allele_ptrs[2 * marker_uidx] = allele_ptrs[1];
+ ++read_pos;
+ for (; read_pos < read_pos_stop; ++read_pos) {
+ const uint32_t cur_marker_uidx = (uint32_t)ll_buf[read_pos];
+ // DIRTY HACK (25 May 2018): save (nullptr, canonical_marker_uidx) tuple,
+ // and have merge_main() look up the canonical marker_uidx in this case.
+ // something like this is necessary for mixed .bed/.ped merge +
+ // --merge-equal-pos to work properly.
+ // bugfix (26 May 2018): need to free entries here by hand.
+ if (marker_allele_ptrs[cur_marker_uidx * 2][1]) {
+ free(marker_allele_ptrs[cur_marker_uidx * 2]);
}
+ marker_allele_ptrs[cur_marker_uidx * 2] = nullptr;
+ if (marker_allele_ptrs[cur_marker_uidx * 2 + 1][1]) {
+ free(marker_allele_ptrs[cur_marker_uidx * 2 + 1]);
+ }
+ marker_allele_ptrs[cur_marker_uidx * 2 + 1] = (char*)((uintptr_t)canonical_marker_uidx);
}
return 0;
}
@@ -15081,6 +15092,10 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t
uint32_t read_pos = 0;
uint32_t write_pos = 0; // may be lower than read_pos due to dups
uint32_t position_warning_ct = 0;
+ char* equal_pos_allele_ptrs[2];
+ equal_pos_allele_ptrs[0] = nullptr;
+ equal_pos_allele_ptrs[1] = nullptr;
+ uint32_t equal_pos_allele_ct = 0;
uint32_t chrom_idx;
uint32_t chrom_read_end_idx;
int64_t llxx;
@@ -15096,6 +15111,7 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t
continue;
}
unplaced = (unplaced == 0) || (chrom_info_ptr->zero_extra_chroms && (unplaced > chrom_info_ptr->max_code));
+ const uint32_t cur_merge_equal_pos = merge_equal_pos && (!unplaced);
chrom_read_end_idx = chrom_start[chrom_idx + 1];
// ll_buf has base-pair positions in high 32 bits, and pre-sort indices in
// low 32 bits.
@@ -15104,34 +15120,63 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t
prev_bp = (uint32_t)(((uint64_t)llxx) >> 32);
pos_buf[write_pos] = prev_bp;
marker_map[(uint32_t)llxx] = write_pos++;
+ uint32_t equal_pos_bp = 0xffffffffU; // force initial mismatch
+ uint32_t equal_pos_read_start = 0;
for (; read_pos < chrom_read_end_idx; read_pos++) {
llxx = ll_buf[read_pos];
presort_idx = (uint32_t)llxx;
cur_bp = (uint32_t)(llxx >> 32);
- // do not merge chr 0 (unplaced).
- if ((prev_bp == cur_bp) && (!unplaced)) {
- if (merge_equal_pos && merge_alleles(marker_allele_ptrs, ((uint32_t)ll_buf[read_pos - 1]), presort_idx)) {
- LOGERRPRINTFWW("Error: --merge-equal-pos failure. Variants '%s' and '%s' have the same position, but do not share the same alleles.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
- return 1;
- }
- LOGPREPRINTFWW("Warning: Variants '%s' and '%s' have the same position.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
- if (position_warning_ct < 3) {
- logerrprintb();
- } else {
- logstr(g_logbuf);
- }
- position_warning_ct++;
- if (merge_equal_pos) {
+ // don't care about position conflicts on chr 0 (unplaced).
+ if (cur_merge_equal_pos) {
+ // bugfix (25 May 2018): previous merge_alleles() usage was broken for
+ // multiple reasons
+ if (prev_bp == cur_bp) {
+ if (equal_pos_bp != prev_bp) {
+ equal_pos_read_start = read_pos - 1;
+ equal_pos_bp = prev_bp;
+ equal_pos_allele_ptrs[0] = nullptr;
+ equal_pos_allele_ptrs[1] = nullptr;
+ equal_pos_allele_ct = 0;
+ if (merge_equal_pos_alleles(marker_allele_ptrs, (uint32_t)ll_buf[equal_pos_read_start], &equal_pos_allele_ct, equal_pos_allele_ptrs)) {
+ LOGERRPRINTFWW("Error: --merge-equal-pos failure: inconsistent alleles at %s:%u.\n", "?", cur_bp);
+ return 1;
+ }
+ }
+ if (merge_equal_pos_alleles(marker_allele_ptrs, presort_idx, &equal_pos_allele_ct, equal_pos_allele_ptrs)) {
+ LOGERRPRINTFWW("Error: --merge-equal-pos failure: inconsistent alleles at %s:%u.\n", "?", cur_bp);
+ return 1;
+ }
marker_map[presort_idx] = write_pos - 1;
continue;
- }
+ } else {
+ if (equal_pos_bp == prev_bp) {
+ if (save_equal_pos_alleles(ll_buf, equal_pos_read_start, read_pos, equal_pos_allele_ptrs, marker_allele_ptrs)) {
+ return 1;
+ }
+ }
+ prev_bp = cur_bp;
+ }
+ } else if ((prev_bp == cur_bp) && (!unplaced)) {
+ // shouldn't print warning in --merge-equal-pos case
+ LOGPREPRINTFWW("Warning: Variants '%s' and '%s' have the same position.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
+ if (position_warning_ct < 3) {
+ logerrprintb();
+ } else {
+ logstr(g_logbuf);
+ }
+ position_warning_ct++;
} else {
- prev_bp = cur_bp;
+ prev_bp = cur_bp;
}
marker_map[presort_idx] = write_pos;
marker_cms[write_pos] = marker_cms_tmp[presort_idx];
pos_buf[write_pos++] = cur_bp;
}
+ if (equal_pos_bp == prev_bp) {
+ if (save_equal_pos_alleles(ll_buf, equal_pos_read_start, read_pos, equal_pos_allele_ptrs, marker_allele_ptrs)) {
+ return 1;
+ }
+ }
read_pos = chrom_start[chrom_idx + 1];
chrom_start[chrom_idx + 1] = write_pos;
}
@@ -15367,8 +15412,16 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
bufptr2[alen1] = '\0';
alen2 = strlen_se(bufptr3);
bufptr3[alen2] = '\0';
- bufptr4 = marker_allele_ptrs[((uint32_t)ii) * 2];
- bufptr5 = marker_allele_ptrs[((uint32_t)ii) * 2 + 1];
+ uint32_t canonical_ascii_uidx = (uint32_t)ii;
+ bufptr4 = marker_allele_ptrs[canonical_ascii_uidx * 2];
+ bufptr5 = marker_allele_ptrs[canonical_ascii_uidx * 2 + 1];
+ if (!bufptr4) {
+ // --merge-equal-pos non-canonical index. Retrieve the canonical
+ // index.
+ canonical_ascii_uidx = (uintptr_t)bufptr5;
+ bufptr4 = marker_allele_ptrs[canonical_ascii_uidx * 2];
+ bufptr5 = marker_allele_ptrs[canonical_ascii_uidx * 2 + 1];
+ }
last_marker_in_idx = marker_in_idx;
if (!cur_sample_ct) {
@@ -15531,7 +15584,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
if (!umm) {
diff_discordant++;
}
- if (merge_diff_print(outfile, idbuf, bufptr, &(sample_ids[ujj * max_sample_id_len]), ucc, ucc3, &(marker_allele_ptrs[((uint32_t)ii) * 2]))) {
+ if (merge_diff_print(outfile, idbuf, bufptr, &(sample_ids[ujj * max_sample_id_len]), ucc, ucc3, &(marker_allele_ptrs[canonical_ascii_uidx * 2]))) {
goto merge_main_ret_WRITE_FAIL;
}
}
@@ -15669,6 +15722,15 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
}
// actual relative position
marker_out_idx = marker_map[uii] - start_marker_idx;
+ // bugfix (25 May 2018): in --merge-equal-pos case, we need to update a
+ // single canonical marker_allele_ptrs[]. We now handle this by
+ // storing nullptr in [0] and the canonical index in [1] when the
+ // current index is non-canonical.
+ char** canonical_marker_allele_ptrs = &(marker_allele_ptrs[uii * 2]);
+ if (!canonical_marker_allele_ptrs[0]) {
+ const uint32_t canonical_ascii_uidx = (uintptr_t)canonical_marker_allele_ptrs[1];
+ canonical_marker_allele_ptrs = &(marker_allele_ptrs[canonical_ascii_uidx * 2]);
+ }
if ((*aptr1 == '0') && (alen1 == 1)) {
if ((*aptr2 != '0') || (alen2 != 1)) {
@@ -15679,39 +15741,39 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
goto merge_main_ret_HALF_MISSING;
} else {
ucc2 = 0; // A2 count
- if (!strcmp(aptr1, marker_allele_ptrs[uii * 2 + 1])) {
+ if (!strcmp(aptr1, canonical_marker_allele_ptrs[1])) {
ucc2++;
- } else if (strcmp(aptr1, marker_allele_ptrs[uii * 2])) {
+ } else if (strcmp(aptr1, canonical_marker_allele_ptrs[0])) {
// new allele code
- if (marker_allele_ptrs[uii * 2 + 1] == missing_geno_ptr) {
+ if (canonical_marker_allele_ptrs[1] == missing_geno_ptr) {
// fill A2 first
ucc2++;
- ukk = uii * 2 + 1;
- } else if (marker_allele_ptrs[uii * 2] == missing_geno_ptr) {
- ukk = uii * 2;
+ ukk = 1;
+ } else if (canonical_marker_allele_ptrs[0] == missing_geno_ptr) {
+ ukk = 0;
} else {
goto merge_main_ret_NOT_BIALLELIC;
}
- if (allele_set(aptr1, alen1, &(marker_allele_ptrs[ukk]))) {
+ if (allele_set(aptr1, alen1, &(canonical_marker_allele_ptrs[ukk]))) {
goto merge_main_ret_NOMEM;
}
}
- if (!strcmp(aptr2, marker_allele_ptrs[uii * 2 + 1])) {
+ if (!strcmp(aptr2, canonical_marker_allele_ptrs[1])) {
ucc2++;
- } else if (strcmp(aptr2, marker_allele_ptrs[uii * 2])) {
+ } else if (strcmp(aptr2, canonical_marker_allele_ptrs[0])) {
// put A2 check second since the only way A2 will be unset when A1
// is set at this point is if it was specified that way in an
// earlier binary file.
- if (marker_allele_ptrs[uii * 2] == missing_geno_ptr) {
- ukk = uii * 2;
- } else if (marker_allele_ptrs[uii * 2 + 1] == missing_geno_ptr) {
- ukk = uii * 2 + 1;
+ if (canonical_marker_allele_ptrs[0] == missing_geno_ptr) {
+ ukk = 0;
+ } else if (canonical_marker_allele_ptrs[1] == missing_geno_ptr) {
+ ukk = 1;
// bugfix (14 Nov 2017): forgot to increment the A2 allele count!
++ucc2;
} else {
goto merge_main_ret_NOT_BIALLELIC;
}
- if (allele_set(aptr2, alen2, &(marker_allele_ptrs[ukk]))) {
+ if (allele_set(aptr2, alen2, &(canonical_marker_allele_ptrs[ukk]))) {
goto merge_main_ret_NOMEM;
}
}
@@ -15772,7 +15834,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
if (!umm) {
diff_discordant++;
}
- if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, &(marker_allele_ptrs[uii * 2]))) {
+ if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, canonical_marker_allele_ptrs)) {
goto merge_main_ret_WRITE_FAIL;
}
}
@@ -15786,7 +15848,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
diff_not_both_genotyped++;
} else if (ucc2 != ucc3) {
diff_discordant++;
- if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, &(marker_allele_ptrs[uii * 2]))) {
+ if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, canonical_marker_allele_ptrs)) {
goto merge_main_ret_WRITE_FAIL;
}
}
@@ -16648,7 +16710,11 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
break;
}
merge_datasets_ret_1:
- cleanup_allele_storage(2, tot_marker_ct * 2, marker_allele_ptrs);
+ if (merge_equal_pos) {
+ cleanup_allele_storage2(tot_marker_ct * 2, marker_allele_ptrs);
+ } else {
+ cleanup_allele_storage(2, tot_marker_ct * 2, marker_allele_ptrs);
+ }
fclose_cond(mergelistfile);
fclose_cond(outfile);
bigstack_double_reset(bigstack_mark, bigstack_end_mark);
View it on GitLab: https://salsa.debian.org/med-team/plink1-9/compare/7c1a905bdc915410076bf26808bc7a627e194c3f...644b614f638d080e6bd004cc33b968f6e276ebfa
--
View it on GitLab: https://salsa.debian.org/med-team/plink1-9/compare/7c1a905bdc915410076bf26808bc7a627e194c3f...644b614f638d080e6bd004cc33b968f6e276ebfa
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180604/876ca615/attachment-0001.html>
More information about the debian-med-commit
mailing list