[med-svn] [Git][med-team/plink1-9][master] 3 commits: New upstream version 1.90~b6.1-180528

Dylan Aïssi gitlab at salsa.debian.org
Mon Jun 4 07:05:49 BST 2018


Dylan Aïssi pushed to branch master at Debian Med / plink1.9


Commits:
ece3ea42 by Dylan Aïssi at 2018-06-04T07:58:42+02:00
New upstream version 1.90~b6.1-180528
- - - - -
c5cc8b2b by Dylan Aïssi at 2018-06-04T07:58:45+02:00
Merge tag 'upstream/1.90_b6.1-180528'

Upstream version 1.90~b6.1-180528

- - - - -
644b614f by Dylan Aïssi at 2018-06-04T08:01:32+02:00
Update changelogs

- - - - -


7 changed files:

- debian/changelog
- debian/upstream.docs/upstream.changelog
- plink.c
- plink_assoc.c
- plink_common.c
- plink_common.h
- plink_data.c


Changes:

=====================================
debian/changelog
=====================================
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+plink1.9 (1.90~b6.1-180528-1) UNRELEASED; urgency=medium
+
+  * New upstream release.
+
+ -- Dylan Aïssi <bob.dybian at gmail.com>  Mon, 04 Jun 2018 07:59:00 +0200
+
 plink1.9 (1.90~b5.4-180410-1) unstable; urgency=medium
 
   * New upstream release.
@@ -251,4 +257,3 @@ plink (1.03p1-1) unstable; urgency=low
   * Initial release (Closes: #490832).
 
  -- Steffen Moeller <moeller at debian.org>  Sun, 24 Aug 2008 17:10:55 +0200
-


=====================================
debian/upstream.docs/upstream.changelog
=====================================
--- a/debian/upstream.docs/upstream.changelog
+++ b/debian/upstream.docs/upstream.changelog
@@ -1,6 +1,10 @@
 # Copy/Paste from https://www.cog-genomics.org/plink/1.9/
 
-10 Apr 2018: --zero-cluster command-line parsing bugfix. --mendel-multigen bugfix.
+28 May 2018: Fixed --file triallelic-variant handling bug which occurred when the .map was unsorted.
+
+26 May (beta 6): --merge-equal-pos bugfixes. If you used --merge-equal-pos with .bed+.bim+.fam filesets where allele order may have differed between .bim files, or with any .ped+.map fileset at all, we recommend redoing that run.
+
+10 Apr: --zero-cluster command-line parsing bugfix. --mendel-multigen bugfix.
 
 21 Feb: "--indiv-sort file" no longer scrambles the phenotypes when used with --{b}merge or --merge-list.
 


=====================================
plink.c
=====================================
--- a/plink.c
+++ b/plink.c
@@ -93,7 +93,7 @@
 
 static const char ver_str[] =
 #ifdef STABLE_BUILD
-  "PLINK v1.90b5.4"
+  "PLINK v1.90b6.1"
 #else
   "PLINK v1.90p"
 #endif
@@ -105,7 +105,7 @@ static const char ver_str[] =
 #else
   " 32-bit"
 #endif
-  " (10 Apr 2018)";
+  " (28 May 2018)";
 static const char ver_str2[] =
   // include leading space if day < 10, so character length stays the same
   ""


=====================================
plink_assoc.c
=====================================
--- a/plink_assoc.c
+++ b/plink_assoc.c
@@ -11236,6 +11236,11 @@ int32_t cmh_assoc(pthread_t* threads, FILE* bedfile, uintptr_t bed_offset, char*
               c_star = a1_ctd - a_star;
               d_star = ctrl_ctd - a1_ctd + a_star;
 
+              // concordance fix (25 May 2018): print NA,NA instead of inf,0
+              if ((a_star == 0.0) || (b_star == 0.0) || (c_star == 0.0) || (d_star == 0.0)) {
+                goto cmh_assoc_bd_fail;
+              }
+
 	      // inverse variance
               dxx = 1.0 / a_star + 1.0 / b_star + 1.0 / c_star + 1.0 / d_star;
 


=====================================
plink_common.c
=====================================
--- a/plink_common.c
+++ b/plink_common.c
@@ -4902,6 +4902,25 @@ void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_e
   }
 }
 
+void cleanup_allele_storage2(uintptr_t allele_storage_entry_ct, char** allele_storage) {
+  if (allele_storage) {
+    const uintptr_t one_char_strs_addr = (uintptr_t)g_one_char_strs;
+    for (uintptr_t idx = 0; idx < allele_storage_entry_ct;) {
+      char* cur_entry = allele_storage[idx];
+      if (!cur_entry) {
+        // --merge-equal-pos hacked entry
+        idx += 2;
+        continue;
+      }
+      // take advantage of unsigned wraparound
+      if ((((uintptr_t)cur_entry) - one_char_strs_addr) >= 512) {
+	free(cur_entry);
+      }
+      ++idx;
+    }
+  }
+}
+
 void refresh_chrom_info(const Chrom_info* chrom_info_ptr, uintptr_t marker_uidx, uint32_t* __restrict chrom_end_ptr, uint32_t* __restrict chrom_fo_idx_ptr, uint32_t* __restrict is_x_ptr, uint32_t* __restrict is_y_ptr, uint32_t* __restrict is_mt_ptr, uint32_t* __restrict is_haploid_ptr) {
   // assumes we are at the end of the chromosome denoted by chrom_fo_idx.  Ok
   // for chrom_fo_idx == 0xffffffffU.


=====================================
plink_common.h
=====================================
--- a/plink_common.h
+++ b/plink_common.h
@@ -2266,6 +2266,10 @@ uint32_t allele_reset(const char* newval, uint32_t allele_slen, char** allele_pt
 
 void cleanup_allele_storage(uint32_t max_allele_slen, uintptr_t allele_storage_entry_ct, char** allele_storage);
 
+// needed by fixed --merge-equal-pos implementation, which takes more liberties
+// with allele_storage[]
+void cleanup_allele_storage2(uintptr_t allele_storage_entry_ct, char** allele_storage);
+
 // no need for this; code is simpler if we just create a copy of marker_exclude
 // with all non-autosomal loci removed
 /*


=====================================
plink_data.c
=====================================
--- a/plink_data.c
+++ b/plink_data.c
@@ -5116,6 +5116,8 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
   // Ll_str.next is a pointer to a linked list entry storing the next 1+ allele
   // names.  Worst case, the linked list is of length 4 (beyond that we error
   // out).  Most of the time, it'll be length 0.
+  // Allele names are separated by tabs; \0 denotes the end of the allele name
+  // list.
   // This type of function will become more important when it's time to parse
   // .vcf files, etc.
   uint32_t allele_num = 0;
@@ -5145,30 +5147,29 @@ int32_t incr_text_allele_str(char* allele_name, uint32_t an_len, Ll_str* allele_
     if ((slen == an_len) && (!memcmp(cur_allele_name_start, allele_name, an_len))) {
       marker_allele_cts[allele_num] += 1;
       return 0;
+    }
+    allele_num++;
+    if (cur_allele_name_start[slen] == '\t') {
+      cur_allele_name_start = &(cur_allele_name_start[slen + 1]);
+    } else if (allele_list_start->next) {
+      allele_list_start = allele_list_start->next;
+      cur_allele_name_start = allele_list_start->ss;
     } else {
-      allele_num++;
-      if (cur_allele_name_start[slen] == '\t') {
-	cur_allele_name_start = &(cur_allele_name_start[slen + 1]);
-      } else if (allele_list_start->next) {
-	allele_list_start = allele_list_start->next;
-	cur_allele_name_start = allele_list_start->ss;
+      chars_left = ((0x7ffffff0 - sizeof(intptr_t)) - ((uintptr_t)(&(cur_allele_name_start[slen + 1]) - allele_list_start->ss))) & 15;
+      if (chars_left > an_len) {
+        cur_allele_name_start[slen] = '\t';
+        memcpyx(&(cur_allele_name_start[slen + 1]), allele_name, an_len, '\0');
       } else {
-	chars_left = ((0x7ffffff0 - sizeof(intptr_t)) - ((uintptr_t)(&(cur_allele_name_start[slen + 1]) - allele_list_start->ss))) & 15;
-	if (chars_left > an_len) {
-	  cur_allele_name_start[slen] = '\t';
-	  memcpyx(&(cur_allele_name_start[slen + 1]), allele_name, an_len, '\0');
-	} else {
-	  if (bigstack_end_alloc_llstr(an_len + 1, &ll_ptr)) {
-	    return RET_NOMEM;
-	  }
-	  allele_list_start->next = ll_ptr;
-	  ll_ptr->next = nullptr;
-	  cur_allele_name_start = ll_ptr->ss;
-	  memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
-	}
-	marker_allele_cts[allele_num] = 1;
-	return 0;
+        if (bigstack_end_alloc_llstr(an_len + 1, &ll_ptr)) {
+          return RET_NOMEM;
+        }
+        allele_list_start->next = ll_ptr;
+        ll_ptr->next = nullptr;
+        cur_allele_name_start = ll_ptr->ss;
+        memcpyx(cur_allele_name_start, allele_name, an_len, '\0');
       }
+      marker_allele_cts[allele_num] = 1;
+      return 0;
     }
   }
   return RET_INVALID_FORMAT;
@@ -5443,22 +5444,17 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
     }
     if (marker_allele_cts[4 * marker_idx + 2]) {
       uii = marker_allele_cts[4 * marker_idx + 3];
-      if (map_is_unsorted) {
-        sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (uii? "quad" : "tri"));
-      } else {
-        sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (uii? "quad" : "tri"));
-      }
+      // bugfix (28 May 2018): do NOT look up map_reverse[] in unsorted case
+      sprintf(g_logbuf, "Warning: Variant %" PRIuPTR "%s %sallelic; setting rarest missing.\n", marker_idx + 1, map_is_unsorted? " (post-sort)" : "", (uii? "quad" : "tri"));
       logerrprintb();
       get_top_two_ui(&(marker_allele_cts[4 * marker_idx]), uii? 4 : 3, &ulii, &uljj);
-      uii = map_is_unsorted? map_reverse[marker_idx] : marker_idx;
     } else {
       ulii = (marker_allele_cts[4 * marker_idx] < marker_allele_cts[4 * marker_idx + 1])? 1 : 0;
       uljj = ulii ^ 1;
-      uii = marker_idx;
     }
 
-    aptr1 = get_llstr((Ll_str*)(&(marker_alleles_tmp[uii])), uljj);
-    aptr2 = get_llstr((Ll_str*)(&(marker_alleles_tmp[uii])), ulii);
+    aptr1 = get_llstr((Ll_str*)(&(marker_alleles_tmp[marker_idx])), uljj);
+    aptr2 = get_llstr((Ll_str*)(&(marker_alleles_tmp[marker_idx])), ulii);
     if (!aptr1) {
       aptr1 = missing_geno_ptr;
     } else {
@@ -5708,7 +5704,7 @@ int32_t ped_to_bed_multichar_allele(FILE** pedfile_ptr, FILE** outfile_ptr, char
     g_bigstack_base -= cur_slen_rdup;
     logprint("\n");
     if (retval != RET_NOMEM) {
-      LOGERRPRINTF("Error: More than 4 different alleles at variant %u%s.\n", uii + 1, map_is_unsorted? " (post-sort/filter)" : "");
+      LOGERRPRINTF("Error: More than 4 different alleles at variant %u%s.\n", uii + 1, map_is_unsorted? " (post-sort)" : "");
     }
     break;
   ped_to_bed_multichar_allele_ret_INVALID_FORMAT_4:
@@ -6107,11 +6103,7 @@ int32_t ped_to_bed(char* pedname, char* mapname, char* outname, char* outname_en
 	}
 	if (marker_alleles[marker_idx * 4 + 2]) {
 	  cc = marker_alleles[marker_idx * 4 + 3];
-	  if (map_is_unsorted) {
-	    sprintf(g_logbuf, "Warning: Variant %u (post-sort/filter) %sallelic; setting rarest missing.\n", map_reverse[marker_idx] + 1, (cc? "quad" : "tri"));
-	  } else {
-	    sprintf(g_logbuf, "Warning: Variant %" PRIuPTR " %sallelic; setting rarest alleles missing.\n", marker_idx + 1, (cc? "quad" : "tri"));
-	  }
+          sprintf(g_logbuf, "Warning: Variant %" PRIuPTR "%s %sallelic; setting rarest alleles missing.\n", marker_idx + 1, map_is_unsorted? " (post-sort)" : "", (cc? "quad" : "tri"));
 	  logerrprintb();
 	  ujj = (cc? 4 : 3);
 	  // insertion sort
@@ -15021,8 +15013,8 @@ int32_t report_non_biallelics(char* outname, char* outname_end, Ll_str* non_bial
   return retval;
 }
 
-void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint32_t* distinct_allele_ct_ptr) {
-  uint32_t distinct_allele_ct = *distinct_allele_ct_ptr;
+void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint32_t* distinct_allele_ctp) {
+  uint32_t distinct_allele_ct = *distinct_allele_ctp;
   uint32_t allele_idx = 0;
   if ((marker_allele_ptr == g_missing_geno_ptr) || (distinct_allele_ct == 3)) {
     return;
@@ -15033,33 +15025,52 @@ void merge_alleles_update_str(char* marker_allele_ptr, char** allele_ptrs, uint3
     }
     allele_idx++;
   }
-  *distinct_allele_ct_ptr = distinct_allele_ct + 1;
+  *distinct_allele_ctp = distinct_allele_ct + 1;
   if (distinct_allele_ct == 2) {
     return;
   }
   allele_ptrs[distinct_allele_ct] = marker_allele_ptr;
 }
 
-uint32_t merge_alleles(char** marker_allele_ptrs, uint32_t marker_uidx, uint32_t marker_uidx2) {
-  uint32_t distinct_allele_ct = 0;
-  char* allele_ptrs[2];
-  allele_ptrs[0] = nullptr;
-  allele_ptrs[1] = nullptr;
+uint32_t merge_equal_pos_alleles(char** marker_allele_ptrs, uint32_t marker_uidx, uint32_t* distinct_allele_ctp, char** allele_ptrs) {
   // reverse order so --keep-allele-order works
-  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx + 1], allele_ptrs, &distinct_allele_ct);
-  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx], allele_ptrs, &distinct_allele_ct);
-  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx2 + 1], allele_ptrs, &distinct_allele_ct);
-  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx2], allele_ptrs, &distinct_allele_ct);
-  if (distinct_allele_ct > 2) {
+  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx + 1], allele_ptrs, distinct_allele_ctp);
+  merge_alleles_update_str(marker_allele_ptrs[2 * marker_uidx], allele_ptrs, distinct_allele_ctp);
+  if (*distinct_allele_ctp > 2) {
+    return 1;
+  }
+  return 0;
+}
+
+uint32_t save_equal_pos_alleles(const int64_t* ll_buf, uint32_t read_pos, uint32_t read_pos_stop, char** equal_pos_allele_ptrs, char** marker_allele_ptrs) {
+  const uint32_t canonical_marker_uidx = (uint32_t)ll_buf[read_pos];
+  // (do we really need the reversal in merge_equal_pos_alleles()?)
+  // bugfix (26 May 2018): forgot about plink 1.9's allele memory management
+  // strategy.
+  const char* new_allele = equal_pos_allele_ptrs[1]? equal_pos_allele_ptrs[1] : g_missing_geno_ptr;
+  if (allele_reset(new_allele, strlen(new_allele), &(marker_allele_ptrs[canonical_marker_uidx * 2]))) {
+    return 1;
+  }
+  new_allele = equal_pos_allele_ptrs[0]? equal_pos_allele_ptrs[0] : g_missing_geno_ptr;
+  if (allele_reset(new_allele, strlen(new_allele), &(marker_allele_ptrs[canonical_marker_uidx * 2 + 1]))) {
     return 1;
   }
-  if (allele_ptrs[0]) {
-    // todo: test if this write is inefficient enough that we want to guard it
-    // with an if statement
-    marker_allele_ptrs[2 * marker_uidx + 1] = allele_ptrs[0];
-    if (allele_ptrs[1]) {
-      marker_allele_ptrs[2 * marker_uidx] = allele_ptrs[1];
+  ++read_pos;
+  for (; read_pos < read_pos_stop; ++read_pos) {
+    const uint32_t cur_marker_uidx = (uint32_t)ll_buf[read_pos];
+    // DIRTY HACK (25 May 2018): save (nullptr, canonical_marker_uidx) tuple,
+    // and have merge_main() look up the canonical marker_uidx in this case.
+    // something like this is necessary for mixed .bed/.ped merge +
+    // --merge-equal-pos to work properly.
+    // bugfix (26 May 2018): need to free entries here by hand.
+    if (marker_allele_ptrs[cur_marker_uidx * 2][1]) {
+      free(marker_allele_ptrs[cur_marker_uidx * 2]);
     }
+    marker_allele_ptrs[cur_marker_uidx * 2] = nullptr;
+    if (marker_allele_ptrs[cur_marker_uidx * 2 + 1][1]) {
+      free(marker_allele_ptrs[cur_marker_uidx * 2 + 1]);
+    }
+    marker_allele_ptrs[cur_marker_uidx * 2 + 1] = (char*)((uintptr_t)canonical_marker_uidx);
   }
   return 0;
 }
@@ -15081,6 +15092,10 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t 
   uint32_t read_pos = 0;
   uint32_t write_pos = 0; // may be lower than read_pos due to dups
   uint32_t position_warning_ct = 0;
+  char* equal_pos_allele_ptrs[2];
+  equal_pos_allele_ptrs[0] = nullptr;
+  equal_pos_allele_ptrs[1] = nullptr;
+  uint32_t equal_pos_allele_ct = 0;
   uint32_t chrom_idx;
   uint32_t chrom_read_end_idx;
   int64_t llxx;
@@ -15096,6 +15111,7 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t 
       continue;
     }
     unplaced = (unplaced == 0) || (chrom_info_ptr->zero_extra_chroms && (unplaced > chrom_info_ptr->max_code));
+    const uint32_t cur_merge_equal_pos = merge_equal_pos && (!unplaced);
     chrom_read_end_idx = chrom_start[chrom_idx + 1];
     // ll_buf has base-pair positions in high 32 bits, and pre-sort indices in
     // low 32 bits.
@@ -15104,34 +15120,63 @@ static inline uint32_t merge_post_msort_update_maps(char* marker_ids, uintptr_t 
     prev_bp = (uint32_t)(((uint64_t)llxx) >> 32);
     pos_buf[write_pos] = prev_bp;
     marker_map[(uint32_t)llxx] = write_pos++;
+    uint32_t equal_pos_bp = 0xffffffffU;  // force initial mismatch
+    uint32_t equal_pos_read_start = 0;
     for (; read_pos < chrom_read_end_idx; read_pos++) {
       llxx = ll_buf[read_pos];
       presort_idx = (uint32_t)llxx;
       cur_bp = (uint32_t)(llxx >> 32);
-      // do not merge chr 0 (unplaced).
-      if ((prev_bp == cur_bp) && (!unplaced)) {
-	if (merge_equal_pos && merge_alleles(marker_allele_ptrs, ((uint32_t)ll_buf[read_pos - 1]), presort_idx)) {
-	  LOGERRPRINTFWW("Error: --merge-equal-pos failure.  Variants '%s' and '%s' have the same position, but do not share the same alleles.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
-	  return 1;
-	}
-	LOGPREPRINTFWW("Warning: Variants '%s' and '%s' have the same position.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
-	if (position_warning_ct < 3) {
-	  logerrprintb();
-	} else {
-	  logstr(g_logbuf);
-	}
-	position_warning_ct++;
-	if (merge_equal_pos) {
+      // don't care about position conflicts on chr 0 (unplaced).
+      if (cur_merge_equal_pos) {
+        // bugfix (25 May 2018): previous merge_alleles() usage was broken for
+        // multiple reasons
+        if (prev_bp == cur_bp) {
+          if (equal_pos_bp != prev_bp) {
+            equal_pos_read_start = read_pos - 1;
+            equal_pos_bp = prev_bp;
+            equal_pos_allele_ptrs[0] = nullptr;
+            equal_pos_allele_ptrs[1] = nullptr;
+            equal_pos_allele_ct = 0;
+            if (merge_equal_pos_alleles(marker_allele_ptrs, (uint32_t)ll_buf[equal_pos_read_start], &equal_pos_allele_ct, equal_pos_allele_ptrs)) {
+              LOGERRPRINTFWW("Error: --merge-equal-pos failure: inconsistent alleles at %s:%u.\n", "?", cur_bp);
+              return 1;
+            }
+          }
+          if (merge_equal_pos_alleles(marker_allele_ptrs, presort_idx, &equal_pos_allele_ct, equal_pos_allele_ptrs)) {
+            LOGERRPRINTFWW("Error: --merge-equal-pos failure: inconsistent alleles at %s:%u.\n", "?", cur_bp);
+            return 1;
+          }
 	  marker_map[presort_idx] = write_pos - 1;
 	  continue;
-	}
+        } else {
+          if (equal_pos_bp == prev_bp) {
+            if (save_equal_pos_alleles(ll_buf, equal_pos_read_start, read_pos, equal_pos_allele_ptrs, marker_allele_ptrs)) {
+              return 1;
+            }
+          }
+          prev_bp = cur_bp;
+        }
+      } else if ((prev_bp == cur_bp) && (!unplaced)) {
+        // shouldn't print warning in --merge-equal-pos case
+        LOGPREPRINTFWW("Warning: Variants '%s' and '%s' have the same position.\n", &(marker_ids[max_marker_id_len * presort_idx]), &(marker_ids[max_marker_id_len * ((uint32_t)ll_buf[read_pos - 1])]));
+        if (position_warning_ct < 3) {
+          logerrprintb();
+        } else {
+          logstr(g_logbuf);
+        }
+        position_warning_ct++;
       } else {
-	prev_bp = cur_bp;
+        prev_bp = cur_bp;
       }
       marker_map[presort_idx] = write_pos;
       marker_cms[write_pos] = marker_cms_tmp[presort_idx];
       pos_buf[write_pos++] = cur_bp;
     }
+    if (equal_pos_bp == prev_bp) {
+      if (save_equal_pos_alleles(ll_buf, equal_pos_read_start, read_pos, equal_pos_allele_ptrs, marker_allele_ptrs)) {
+        return 1;
+      }
+    }
     read_pos = chrom_start[chrom_idx + 1];
     chrom_start[chrom_idx + 1] = write_pos;
   }
@@ -15367,8 +15412,16 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
       bufptr2[alen1] = '\0';
       alen2 = strlen_se(bufptr3);
       bufptr3[alen2] = '\0';
-      bufptr4 = marker_allele_ptrs[((uint32_t)ii) * 2];
-      bufptr5 = marker_allele_ptrs[((uint32_t)ii) * 2 + 1];
+      uint32_t canonical_ascii_uidx = (uint32_t)ii;
+      bufptr4 = marker_allele_ptrs[canonical_ascii_uidx * 2];
+      bufptr5 = marker_allele_ptrs[canonical_ascii_uidx * 2 + 1];
+      if (!bufptr4) {
+        // --merge-equal-pos non-canonical index.  Retrieve the canonical
+        // index.
+        canonical_ascii_uidx = (uintptr_t)bufptr5;
+        bufptr4 = marker_allele_ptrs[canonical_ascii_uidx * 2];
+        bufptr5 = marker_allele_ptrs[canonical_ascii_uidx * 2 + 1];
+      }
 
       last_marker_in_idx = marker_in_idx;
       if (!cur_sample_ct) {
@@ -15531,7 +15584,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
 		if (!umm) {
 		  diff_discordant++;
 		}
-		if (merge_diff_print(outfile, idbuf, bufptr, &(sample_ids[ujj * max_sample_id_len]), ucc, ucc3, &(marker_allele_ptrs[((uint32_t)ii) * 2]))) {
+		if (merge_diff_print(outfile, idbuf, bufptr, &(sample_ids[ujj * max_sample_id_len]), ucc, ucc3, &(marker_allele_ptrs[canonical_ascii_uidx * 2]))) {
 		  goto merge_main_ret_WRITE_FAIL;
 		}
 	      }
@@ -15669,6 +15722,15 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
 	}
 	// actual relative position
 	marker_out_idx = marker_map[uii] - start_marker_idx;
+        // bugfix (25 May 2018): in --merge-equal-pos case, we need to update a
+        // single canonical marker_allele_ptrs[].  We now handle this by
+        // storing nullptr in [0] and the canonical index in [1] when the
+        // current index is non-canonical.
+        char** canonical_marker_allele_ptrs = &(marker_allele_ptrs[uii * 2]);
+        if (!canonical_marker_allele_ptrs[0]) {
+          const uint32_t canonical_ascii_uidx = (uintptr_t)canonical_marker_allele_ptrs[1];
+          canonical_marker_allele_ptrs = &(marker_allele_ptrs[canonical_ascii_uidx * 2]);
+        }
 
 	if ((*aptr1 == '0') && (alen1 == 1)) {
           if ((*aptr2 != '0') || (alen2 != 1)) {
@@ -15679,39 +15741,39 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
 	  goto merge_main_ret_HALF_MISSING;
 	} else {
 	  ucc2 = 0; // A2 count
-	  if (!strcmp(aptr1, marker_allele_ptrs[uii * 2 + 1])) {
+	  if (!strcmp(aptr1, canonical_marker_allele_ptrs[1])) {
 	    ucc2++;
-	  } else if (strcmp(aptr1, marker_allele_ptrs[uii * 2])) {
+	  } else if (strcmp(aptr1, canonical_marker_allele_ptrs[0])) {
 	    // new allele code
-	    if (marker_allele_ptrs[uii * 2 + 1] == missing_geno_ptr) {
+	    if (canonical_marker_allele_ptrs[1] == missing_geno_ptr) {
 	      // fill A2 first
 	      ucc2++;
-	      ukk = uii * 2 + 1;
-	    } else if (marker_allele_ptrs[uii * 2] == missing_geno_ptr) {
-	      ukk = uii * 2;
+	      ukk = 1;
+	    } else if (canonical_marker_allele_ptrs[0] == missing_geno_ptr) {
+	      ukk = 0;
 	    } else {
 	      goto merge_main_ret_NOT_BIALLELIC;
 	    }
-	    if (allele_set(aptr1, alen1, &(marker_allele_ptrs[ukk]))) {
+	    if (allele_set(aptr1, alen1, &(canonical_marker_allele_ptrs[ukk]))) {
 	      goto merge_main_ret_NOMEM;
 	    }
 	  }
-	  if (!strcmp(aptr2, marker_allele_ptrs[uii * 2 + 1])) {
+	  if (!strcmp(aptr2, canonical_marker_allele_ptrs[1])) {
 	    ucc2++;
-	  } else if (strcmp(aptr2, marker_allele_ptrs[uii * 2])) {
+	  } else if (strcmp(aptr2, canonical_marker_allele_ptrs[0])) {
 	    // put A2 check second since the only way A2 will be unset when A1
 	    // is set at this point is if it was specified that way in an
 	    // earlier binary file.
-	    if (marker_allele_ptrs[uii * 2] == missing_geno_ptr) {
-	      ukk = uii * 2;
-	    } else if (marker_allele_ptrs[uii * 2 + 1] == missing_geno_ptr) {
-              ukk = uii * 2 + 1;
+	    if (canonical_marker_allele_ptrs[0] == missing_geno_ptr) {
+	      ukk = 0;
+	    } else if (canonical_marker_allele_ptrs[1] == missing_geno_ptr) {
+              ukk = 1;
               // bugfix (14 Nov 2017): forgot to increment the A2 allele count!
               ++ucc2;
 	    } else {
 	      goto merge_main_ret_NOT_BIALLELIC;
 	    }
-	    if (allele_set(aptr2, alen2, &(marker_allele_ptrs[ukk]))) {
+	    if (allele_set(aptr2, alen2, &(canonical_marker_allele_ptrs[ukk]))) {
 	      goto merge_main_ret_NOMEM;
 	    }
 	  }
@@ -15772,7 +15834,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
 	      if (!umm) {
 		diff_discordant++;
 	      }
-	      if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, &(marker_allele_ptrs[uii * 2]))) {
+	      if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, canonical_marker_allele_ptrs)) {
 		goto merge_main_ret_WRITE_FAIL;
 	      }
 	    }
@@ -15786,7 +15848,7 @@ int32_t merge_main(char* bedname, char* bimname, char* famname, char* bim_loadbu
 	      diff_not_both_genotyped++;
 	    } else if (ucc2 != ucc3) {
 	      diff_discordant++;
-	      if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, &(marker_allele_ptrs[uii * 2]))) {
+	      if (merge_diff_print(outfile, idbuf, &(marker_ids[uii * max_marker_id_len]), &(sample_ids[((uint32_t)ii) * max_sample_id_len]), ucc2, ucc3, canonical_marker_allele_ptrs)) {
 		goto merge_main_ret_WRITE_FAIL;
 	      }
 	    }
@@ -16648,7 +16710,11 @@ int32_t merge_datasets(char* bedname, char* bimname, char* famname, char* outnam
     break;
   }
  merge_datasets_ret_1:
-  cleanup_allele_storage(2, tot_marker_ct * 2, marker_allele_ptrs);
+  if (merge_equal_pos) {
+    cleanup_allele_storage2(tot_marker_ct * 2, marker_allele_ptrs);
+  } else {
+    cleanup_allele_storage(2, tot_marker_ct * 2, marker_allele_ptrs);
+  }
   fclose_cond(mergelistfile);
   fclose_cond(outfile);
   bigstack_double_reset(bigstack_mark, bigstack_end_mark);



View it on GitLab: https://salsa.debian.org/med-team/plink1-9/compare/7c1a905bdc915410076bf26808bc7a627e194c3f...644b614f638d080e6bd004cc33b968f6e276ebfa

-- 
View it on GitLab: https://salsa.debian.org/med-team/plink1-9/compare/7c1a905bdc915410076bf26808bc7a627e194c3f...644b614f638d080e6bd004cc33b968f6e276ebfa
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180604/876ca615/attachment-0001.html>


More information about the debian-med-commit mailing list