[med-svn] [Git][med-team/beagle][master] 5 commits: New upstream version 5.1-200324+dfsg

Dylan Aïssi gitlab at salsa.debian.org
Fri Mar 27 09:06:24 GMT 2020



Dylan Aïssi pushed to branch master at Debian Med / beagle


Commits:
19dc4045 by Dylan Aïssi at 2020-03-27T09:31:35+01:00
New upstream version 5.1-200324+dfsg
- - - - -
eb519b43 by Dylan Aïssi at 2020-03-27T09:31:35+01:00
Update upstream source from tag 'upstream/5.1-200324+dfsg'

Update to upstream version '5.1-200324+dfsg'
with Debian dir bcaea9ddd96b17a288445fc927275586c5ab1436
- - - - -
4d7fda49 by Dylan Aïssi at 2020-03-27T09:47:23+01:00
Update changelogs

- - - - -
00d41b26 by Dylan Aïssi at 2020-03-27T09:59:04+01:00
Standards-Version: 4.5.0 (no changes needed)

- - - - -
a17476f0 by Dylan Aïssi at 2020-03-27T10:06:02+01:00
Upload to unstable

- - - - -


13 changed files:

- debian/changelog
- debian/control
- debian/upstream.docs/release_notes
- debian/upstream.docs/run.beagle.example
- main/Main.java
- phase/HapImputer.java
- phase/HmmUpdater.java
- phase/Ibs2.java
- phase/ImputeBaum.java
- phase/PhaseLS.java
- phase/RecombRegress.java
- vcf/PlinkGenMap.java
- vcf/RefIt.java


Changes:

=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+beagle (5.1-200324+dfsg-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Standards-Version: 4.5.0 (no changes needed).
+
+ -- Dylan Aïssi <daissi at debian.org>  Fri, 27 Mar 2020 10:05:43 +0100
+
 beagle (5.1-191125+dfsg-1) unstable; urgency=medium
 
   * New upstream release.


=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 12),
                javahelper
 Build-Depends-Indep: default-jdk,
                      libhtsjdk-java
-Standards-Version: 4.4.1
+Standards-Version: 4.5.0
 Vcs-Browser: https://salsa.debian.org/med-team/beagle
 Vcs-Git: https://salsa.debian.org/med-team/beagle.git
 Homepage: https://faculty.washington.edu/browning/beagle/beagle.html


=====================================
debian/upstream.docs/release_notes
=====================================
@@ -89,3 +89,16 @@ Beagle 5.1 (20Nov19.573) release notes
 Beagle 5.1 (25Nov19.28d) release notes
 ============================================
 * Increased robustness to genetic maps with small inter-marker distances
+
+Beagle 5.1 (13Mar20.38e) release notes
+============================================
+* Improve robustness to rounding error
+* Fix a concurrency bug
+
+Beagle 5.1 (21Mar20.d65) release notes
+============================================
+* Fix a bug in reference-based phasing
+
+Beagle 5.1 (24Mar20.5f5) release notes
+============================================
+* Improve robustness when some samples have high rates of missing genotypes


=====================================
debian/upstream.docs/run.beagle.example
=====================================
@@ -1,47 +1,47 @@
 #!/bin/bash
-if [ ! -f beagle.20Nov19.573.jar ]; then
+if [ ! -f beagle.24Mar20.5f5.jar ]; then
   echo
-  echo "Downloading beagle.20Nov19.573.jar"
-  wget http://faculty.washington.edu/browning/beagle/beagle.20Nov19.573.jar
+  echo "Downloading beagle.24Mar20.5f5.jar"
+  wget http://faculty.washington.edu/browning/beagle/beagle.24Mar20.5f5.jar
 fi
 
-if [ ! -f bref3.20Nov19.573.jar ]; then
+if [ ! -f bref3.24Mar20.5f5.jar ]; then
   echo
-  echo "Downloading bref3.20Nov19.573.jar"
-  wget http://faculty.washington.edu/browning/beagle/bref3.20Nov19.573.jar
+  echo "Downloading bref3.24Mar20.5f5.jar"
+  wget http://faculty.washington.edu/browning/beagle/bref3.24Mar20.5f5.jar
 fi
 
 echo
 
-if [ ! -f test.20Nov19.573.vcf.gz ]; then
+if [ ! -f test.24Mar20.5f5.vcf.gz ]; then
     echo
-    echo "*** Downloading some 1000 Genomes Project data to file: test.20Nov19.573.vcf.gz ***"
-    wget http://faculty.washington.edu/browning/beagle/test.20Nov19.573.vcf.gz
+    echo "*** Downloading some 1000 Genomes Project data to file: test.24Mar20.5f5.vcf.gz ***"
+    wget http://faculty.washington.edu/browning/beagle/test.24Mar20.5f5.vcf.gz
 fi
 
 echo
-echo "*** Creating test files: ref.20Nov19.573.vcf.gz target.20Nov19.573.vcf.gz ***"
+echo "*** Creating test files: ref.24Mar20.5f5.vcf.gz target.24Mar20.5f5.vcf.gz ***"
 echo
-zcat test.20Nov19.573.vcf.gz | cut -f1-190 | tr '/' '|' | gzip > ref.20Nov19.573.vcf.gz
-zcat test.20Nov19.573.vcf.gz | cut -f1-9,191-200 | gzip > target.20Nov19.573.vcf.gz
+zcat test.24Mar20.5f5.vcf.gz | cut -f1-190 | tr '/' '|' | gzip > ref.24Mar20.5f5.vcf.gz
+zcat test.24Mar20.5f5.vcf.gz | cut -f1-9,191-200 | gzip > target.24Mar20.5f5.vcf.gz
 
 echo
 echo "*** Running test analysis with \"gt=\" argument ***"
 echo
-java -jar beagle.20Nov19.573.jar gt=test.20Nov19.573.vcf.gz out=out.gt
+java -jar beagle.24Mar20.5f5.jar gt=test.24Mar20.5f5.vcf.gz out=out.gt
 
 echo
 echo "*** Running test analysis with \"ref=\" and \"gt=\" arguments ***"
 echo
-java -jar beagle.20Nov19.573.jar ref=ref.20Nov19.573.vcf.gz gt=target.20Nov19.573.vcf.gz out=out.ref
+java -jar beagle.24Mar20.5f5.jar ref=ref.24Mar20.5f5.vcf.gz gt=target.24Mar20.5f5.vcf.gz out=out.ref
 
 echo
 echo "*** Making \"bref3\" file ***"
 echo
-java -jar bref3.20Nov19.573.jar ref.20Nov19.573.vcf.gz > ref.20Nov19.573.bref3
+java -jar bref3.24Mar20.5f5.jar ref.24Mar20.5f5.vcf.gz > ref.24Mar20.5f5.bref3
 
 echo
 echo "*** Running test analysis with \"bref3\" file ***"
 echo
-java -jar beagle.20Nov19.573.jar ref=ref.20Nov19.573.bref3 gt=target.20Nov19.573.vcf.gz out=out.bref3
+java -jar beagle.24Mar20.5f5.jar ref=ref.24Mar20.5f5.bref3 gt=target.24Mar20.5f5.vcf.gz out=out.bref3
 


=====================================
main/Main.java
=====================================
@@ -65,8 +65,8 @@ public class Main {
      * The program name and version.
      */
     public static final String VERSION = "(version 5.1)";
-    public static final String PROGRAM = "beagle.25Nov19.28d.jar";
-    public static final String COMMAND = "java -jar beagle.25Nov19.28d.jar";
+    public static final String PROGRAM = "beagle.24Mar20.5f5.jar";
+    public static final String COMMAND = "java -jar beagle.24Mar20.5f5.jar";
 
     /**
      * The copyright string.
@@ -78,7 +78,7 @@ public class Main {
      */
     public static final String SHORT_HELP = Main.PROGRAM + " " + VERSION
             + Const.nl + Main.COPYRIGHT
-            + Const.nl + "Enter \"java -jar beagle.25Nov19.28d.jar\" to "
+            + Const.nl + "Enter \"java -jar beagle.24Mar20.5f5.jar\" to "
             + "list command line argument";
 
     private final Par par;


=====================================
phase/HapImputer.java
=====================================
@@ -42,9 +42,9 @@ import vcf.Samples;
 public class HapImputer {
 
     private final Markers markers;
-    private final Samples samples;
-    private final int nHaps;
-    private final AtomicReferenceArray<LongArray> haps;
+    private final Samples targSamples;
+    private final int nTargHaps;
+    private final AtomicReferenceArray<LongArray> targHaps;
     private final AtomicReferenceArray<List<PartlyImputedAllele>> missing;
 
     /**
@@ -56,11 +56,11 @@ public class HapImputer {
      */
     public HapImputer(Markers markers, Samples samples) {
         this.markers = markers;
-        this.samples = samples;
-        this.nHaps = 2*samples.nSamples();
-        this.haps = new AtomicReferenceArray<>(nHaps);
-        this.missing = new AtomicReferenceArray<>(nHaps);
-        for (int j=0; j<nHaps; ++j) {
+        this.targSamples = samples;
+        this.nTargHaps = 2*samples.nSamples();
+        this.targHaps = new AtomicReferenceArray<>(nTargHaps);
+        this.missing = new AtomicReferenceArray<>(nTargHaps);
+        for (int j=0; j<nTargHaps; ++j) {
             missing.set(j, new ArrayList<>(4));
         }
     }
@@ -70,7 +70,7 @@ public class HapImputer {
      * @return the list of target samples
      */
     public Samples targSamples() {
-        return samples;
+        return targSamples;
     }
 
     /**
@@ -92,7 +92,7 @@ public class HapImputer {
      * @throws NullPointerException if {@code alleles == null}
      */
     public void setHap(int hap, int[] alleles) {
-        haps.set(hap, markers.allelesToBits(alleles));
+        targHaps.set(hap, markers.allelesToBits(alleles));
     }
 
     /**
@@ -137,16 +137,16 @@ public class HapImputer {
      * {@code (0 <= h && h <= 2 * this.targSamples().nSamples())}
      */
     public GT imputedHaps() {
-        GT oldGT = new HapsGT(markers, samples, haps);
-        LongArray[] newHaps = IntStream.range(0, nHaps)
+        GT oldGT = new HapsGT(markers, targSamples, targHaps);
+        LongArray[] newHaps = IntStream.range(0, nTargHaps)
                 .parallel()
                 .mapToObj(h -> imputeHap(h, oldGT))
                 .toArray(LongArray[]::new);
-        return new HapsGT(markers, samples, newHaps);
+        return new HapsGT(markers, targSamples, newHaps);
     }
 
     private LongArray imputeHap(int h, GT oldGT) {
-        LongArray oldHap = haps.get(h);
+        LongArray oldHap = targHaps.get(h);
         List<PartlyImputedAllele> partlyImputedAlleles = missing.get(h);
         if (partlyImputedAlleles.isEmpty()) {
             return oldHap;
@@ -165,15 +165,15 @@ public class HapImputer {
 
         private final int marker;
         private final float[] alProbs;
-        private final int[] refHaps;
+        private final int[] targHaps;
         private final float[] stateProbs;
 
         private PartlyImputedAllele(Markers markers, int marker,
-                float[] alProbs, IntList refHaps, FloatList stateProbs) {
+                float[] alProbs, IntList targHaps, FloatList stateProbs) {
             int nAlleles =  markers.marker(marker).nAlleles();
             this.marker = marker;
             this.alProbs = Arrays.copyOf(alProbs, nAlleles);
-            this.refHaps = refHaps.toArray();
+            this.targHaps = targHaps.toArray();
             this.stateProbs = stateProbs.toArray();
         }
 
@@ -184,8 +184,8 @@ public class HapImputer {
         private int imputeAllele(GT phasedGT) {
             assert phasedGT.isPhased();
             float[] updatedProbs = alProbs.clone();
-            for (int j=0; j<refHaps.length; ++j) {
-                int allele = phasedGT.allele(marker, refHaps[j]);
+            for (int j=0; j<targHaps.length; ++j) {
+                int allele = phasedGT.allele(marker, targHaps[j]);
                 updatedProbs[allele] += stateProbs[j];
             }
             int imputedAllele = 0;


=====================================
phase/HmmUpdater.java
=====================================
@@ -90,4 +90,57 @@ public class HmmUpdater {
             bwd[k] = scale*bwd[k] + shift;
         }
     }
+
+    /**
+     * Updates the forward values and returns the sum of the updated forward
+     * values.
+     * @param fwd the array of forward values that will be updated
+     * @param pSwitch the probability of jumping to a random state
+     * @param sum the sum of forward values in the specified array
+     * @param mismatch the number of allele mismatches (0 or 1) for
+     * each HMM state
+     * @param nStates the number of states
+     * @return the sum of the updated forward values
+     * @throws IndexOutOfBoundsException if
+     * {@code fwd.length < nStates || mismatch.length < nStates}
+     * @throws NullPointerException if
+     * {@code fwd == null || mismatch == null}
+     */
+    public double fwdUpdate(double[] fwd, double sum, double pSwitch,
+            byte[] mismatch, int nStates) {
+        double shift = pSwitch/nStates;
+        double scale = (1.0f - pSwitch)/sum;
+        sum = 0.0f;
+        for (int k=0; k<nStates; ++k) {
+            fwd[k] = pMismatch[mismatch[k]]*(scale*fwd[k] + shift);
+            sum += fwd[k];
+        }
+        return sum;
+    }
+
+    /**
+     * Updates the backward values.
+     * @param bwd the array of backward values that will be updated
+     * @param pSwitch the probability of jumping to a random state
+     * @param mismatch the number of allele mismatches (0 or 1) for
+     * each HMM state
+     * @param nStates the number of states
+     * @throws IndexOutOfBoundsException if
+     * {@code bwd.length < nStates || mismatch.length < nStates}
+     * @throws NullPointerException if
+     * {@code bwd == null || mismatch == null}
+     */
+    public void bwdUpdate(double[] bwd, double pSwitch, byte[] mismatch,
+            int nStates) {
+        double sum = 0.0f;
+        for (int k=0; k<nStates; ++k) {
+            bwd[k] *= pMismatch[mismatch[k]];
+            sum += bwd[k];
+        }
+        double shift = pSwitch/nStates;
+        double scale = (1.0f - pSwitch)/sum;
+        for (int k=0; k<nStates; ++k) {
+            bwd[k] = scale*bwd[k] + shift;
+        }
+    }
 }


=====================================
phase/Ibs2.java
=====================================
@@ -44,7 +44,8 @@ import vcf.RefGT;
 public final class Ibs2 {
 
     private static final int MIN_STEP_MARKERS = 100;
-    private static final int MAX_STEP_MARKERS = 1500; // value for high-frequency markers
+    private static final int MAX_STEP_MARKERS = 1500; // high-frequency markers
+    private static final float MAX_MISS_PROP = 0.1f;
     private static final double MAX_IBD_GAP_CM = 4.0;
 
     private final GT targGT;
@@ -163,9 +164,8 @@ public final class Ibs2 {
         int wP1 = w + 1;
         int start = wStarts.get(w);
         int end = (wP1 < wStarts.size()) ? wStarts.get(wP1) : targGT.nMarkers();
-        int nSamples = targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
         List<SampClust> equivLists = new ArrayList<>(1);
-        equivLists.add(new SampClust(nSamples));
+        equivLists.add(initCluster(targGT, refGT, start, end));
         for (int m=start; m<end; ++m) {
             int mm = m;
             equivLists = equivLists.stream()
@@ -175,10 +175,29 @@ public final class Ibs2 {
         return results(equivLists, targGT.nSamples());
     }
 
+    private static SampClust initCluster(GT targGT, RefGT refGT, int start,
+            int end) {
+        int nTargSamples = targGT.nSamples();
+        int nSamples = targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
+        int maxMiss = (int) Math.ceil(MAX_MISS_PROP*(end - start));
+        int[] missCnt = new int[nSamples];
+        for (int m=start; m<end; ++m) {
+            for (int s=0; s<nTargSamples; ++s) {
+                if (targGT.allele1(m, s)==-1 || targGT.allele2(m, s)==-1) {
+                    ++missCnt[s];
+                }
+            }
+        }
+        int[] initCluster = IntStream.range(0, nSamples)
+                .filter(s -> s>=nTargSamples || missCnt[s]<=maxMiss)
+                .toArray();
+        return new SampClust(initCluster, true);
+    }
+
     private static Stream<SampClust> partition(GT targGT, RefGT refGT,
             SampClust parent, int m) {
         // this method assumes int[] parent.samples is sorted in increasing order
-        int[] a = new int[2];   // alleles
+        int[] gt = new int[2];
         int nAlleles = targGT.marker(m).nAlleles();
         int nTargSamples = targGT.nSamples();
         IntList[] gtToList = new IntList[nAlleles*nAlleles];
@@ -186,8 +205,8 @@ public final class Ibs2 {
         IntList gtIndices = new IntList(16);
         IntList missingTargSamples = new IntList(32);
         for (int s : parent.samples) {
-            setAlleles(m, s, targGT, refGT, a);
-            if (a[0]<0 || a[1]<0) {
+            setAlleles(m, s, targGT, refGT, gt);
+            if (gt[0]<0 || gt[1]<0) {
                 assert s<nTargSamples;
                 missingTargSamples.add(s);
                 for (int k=0, n=gtIndices.size(); k<n; ++k) {
@@ -195,13 +214,13 @@ public final class Ibs2 {
                 }
             }
             else {
-                int gtIndex = a[0]<=a[1] ? a[0]*nAlleles + a[1] : a[1]*nAlleles + a[0];
+                int gtIndex = gt[0]<=gt[1] ? gt[0]*nAlleles + gt[1] : gt[1]*nAlleles + gt[0];
                 if (gtToList[gtIndex]!=null) {
                     gtToList[gtIndex].add(s);
                 }
                 else if (s<nTargSamples || missingTargSamples.size()>0) { // assumes int[] parent.samples is increasing
                     gtIndices.add(gtIndex);
-                    areHom[gtIndex] = parent.areHomozygous && (a[0]==a[1]);
+                    areHom[gtIndex] = parent.areHomozygous && (gt[0]==gt[1]);
                     gtToList[gtIndex] = new IntList();
                     for (int k=0, n=missingTargSamples.size(); k<n; ++k) {
                         // if s>=nTargSamples, put s in list w/ missTargSamples
@@ -212,8 +231,8 @@ public final class Ibs2 {
             }
         }
         return gtIndices.stream()
-                .filter(gt -> gtToList[gt].size()>1)
-                .mapToObj(gt -> new SampClust(gtToList[gt].toArray(), areHom[gt]));
+                .filter(i -> gtToList[i].size()>1)
+                .mapToObj(i -> new SampClust(gtToList[i].toArray(), areHom[i]));
     }
 
     private static void setAlleles(int m, int s, GT targGT, RefGT refGT,


=====================================
phase/ImputeBaum.java
=====================================
@@ -197,7 +197,7 @@ public class ImputeBaum {
             int b1 = allele(m, hap);
             int b2 = allele(m, hap ^ 0b1);
             if (b1>=0 && b2>=0) {
-                if (b1==b2) {
+                if (b1==b2 || hap>=nTargHaps) {
                     alProbs[b1] += prob;
                 }
                 else {


=====================================
phase/PhaseLS.java
=====================================
@@ -67,7 +67,9 @@ public class PhaseLS {
     private static void updateRecombFactor(ExecutorService es,
             PhaseData phaseData, PhaseIbs phaseIbs) {
         int nThreads = phaseData.par().nthreads();
-        double maxSumY = Math.max(5000.0/nThreads, 200.0);
+        double maxSumY = 5000.0;
+        double maxThreadSumY = Math.max(maxSumY/nThreads, 200.0);
+        int maxUpdates = 20000/nThreads + 1;
         Regress regress = new Regress();
         int nSamples = phaseData.targGT().nSamples();
         CountDownLatch latch = new CountDownLatch(nThreads);
@@ -75,8 +77,9 @@ public class PhaseLS {
             Random rand = new Random(phaseData.seed() +  j);
             es.submit(() -> {
                 try {
+                    int nUpdates = 0;
                     RecombRegress rr = new RecombRegress(phaseIbs, regress);
-                    while (rr.sumY()<maxSumY) { // thread-confined for reproducability
+                    while (rr.sumY()<maxThreadSumY && ++nUpdates <= maxUpdates) { // thread-confined for reproducability
                         rr.update(rand.nextInt(nSamples));
                     }
                     latch.countDown();
@@ -88,12 +91,12 @@ public class PhaseLS {
         }
         MultiThreadUtils.await(latch);
         float recombFactor = (float) regress.beta();
-        if (recombFactor>0.0 && Float.isFinite(recombFactor)) {
+        if (regress.sumY()>=maxSumY && recombFactor>0.0
+                && Float.isFinite(recombFactor)) {
             phaseData.setRecombFactor(recombFactor);
         }
         else {
-            System.out.println(Const.nl + "WARNING: no recombFactor update: "
-                    + recombFactor + Const.nl);
+            System.out.println(Const.nl + "WARNING: no recombFactor update");
         }
     }
 


=====================================
phase/RecombRegress.java
=====================================
@@ -46,11 +46,11 @@ public class RecombRegress {
 
     private final int[] hap1;
     private final int[] hap2;
-    private final float[] fwd;
+    private final double[] fwd;
+    private final double[] fwdM1;
     private final float[] bwd;
-    private final float[] fwdM1;
     private final float[][] savedBwd;
-    private final float[] pMismatch;
+    private final double[] pMismatch;
     private final HmmUpdater hmmUpdater;
     private final Regress regress;
 
@@ -84,13 +84,13 @@ public class RecombRegress {
 
         this.hap1 = new int[nMarkers];
         this.hap2 = new int[nMarkers];
-        this.savedBwd = new float[nMarkers][maxStates];
-        this.fwd = new float[maxStates];
         this.bwd = new float[maxStates];
-        this.fwdM1 = new float[maxStates];
-        float pErr = phaseData.err();
-        this.pMismatch = new float[] {1.0f - pErr, pErr};
-        this.hmmUpdater = new HmmUpdater(pErr);
+        this.savedBwd = new float[nMarkers][maxStates];
+        this.fwd = new double[maxStates];
+        this.fwdM1 = new double[maxStates];
+        double pErr = phaseData.err();
+        this.pMismatch = new double[] {1.0f - pErr, pErr};
+        this.hmmUpdater = new HmmUpdater((float) pErr);
         this.regress = regress;
     }
 
@@ -126,31 +126,36 @@ public class RecombRegress {
             hmmUpdater.bwdUpdate(bwd, pRecomb.get(mP1), alMatch[mP1], nStates);
             System.arraycopy(bwd, 0, savedBwd[m], 0, nStates);
         }
-        float hFactor = (float) (nStates / (nStates - 1.0));
+        double hFactor = nStates / (nStates - 1.0);
         Arrays.fill(fwd, 0, nStates, 1.0f/nStates);
-        float sum = 1.0f;
+        double sum = 1.0f;
         sum = hmmUpdater.fwdUpdate(fwd, sum, pRecomb.get(0), alMatch[0], nStates);
         for (int m=1; m<nMarkers; ++m) {
             sum = fwdUpdate(m, alMatch[m], sum, hFactor);
         }
     }
 
-    private float fwdUpdate(int m, byte[] alMatch, float lastSum, float hFactor) {
-        float pSwitch = pRecomb.get(m);
-        float f = ((1.0f - pSwitch) + (pSwitch/nStates))/lastSum;
-        float partNumer = 0.0f;
-        float denom = 0.0f;
+    private double fwdUpdate(int m, byte[] alMatch, double lastSum, double hFactor) {
+        double pSwitch = pRecomb.get(m);
+        double shift = pSwitch/nStates;
+        double scale = (1.0f - pSwitch)/lastSum;
+        double partScale = ((1.0f - pSwitch) + (pSwitch/nStates))/lastSum;
+
+        double numer = 0.0f;
+        double denom = 0.0f;
 
         float[] storedBwd = savedBwd[m];
         System.arraycopy(fwd, 0, fwdM1, 0, nStates);
-        float sum = hmmUpdater.fwdUpdate(fwd, lastSum, pRecomb.get(m), alMatch, nStates);
+        double sum = 0.0f;
         for (int k=0; k<nStates; ++k) {
-            partNumer += pMismatch[alMatch[k]]*f*fwdM1[k]*storedBwd[k];
+            fwd[k] = pMismatch[alMatch[k]]*(scale*fwdM1[k] + shift);
+            double partFwd =  pMismatch[alMatch[k]]*(partScale*fwdM1[k]);
+            numer += fwd[k]<partFwd ? 0.0 : (fwd[k] - partFwd)*storedBwd[k];
             denom += fwd[k]*storedBwd[k];
+            sum += fwd[k];
         }
-        float num = denom - partNumer;
-        float xVal = genDist.get(m);
-        float yVal = (hFactor*num/denom);
+        double xVal = genDist.get(m);
+        double yVal = (hFactor*numer/denom);
         regress.add(xVal, yVal);
         sumY += yVal;
         return sum;


=====================================
vcf/PlinkGenMap.java
=====================================
@@ -349,7 +349,7 @@ public final class PlinkGenMap implements GeneticMap {
     @Override
     public int basePos(int chrom, double geneticPosition) {
         checkChromIndex(chrom);
-        int minEndBasepairDist = 5_000_000;
+        double minEndCmDist = 5.0;
         int mapSizeM1 = genPos[chrom].length - 1;
         assert mapSizeM1>0;
         assert basePos[chrom].length==genPos[chrom].length;
@@ -361,42 +361,36 @@ public final class PlinkGenMap implements GeneticMap {
             int aIndex = insPt - 1;
             int bIndex = insPt;
             if (aIndex==mapSizeM1) {
-                insPt = Arrays.binarySearch(basePos[chrom], basePos[chrom][mapSizeM1] - minEndBasepairDist);
+                insPt = Arrays.binarySearch(genPos[chrom], genPos[chrom][mapSizeM1] - minEndCmDist);
                 if (insPt<0) {
                     insPt = -insPt - 2;
                 }
                 assert insPt<mapSizeM1;
                 aIndex = Math.max(insPt, 0);
                 bIndex = mapSizeM1;
-                while (genPos[chrom][aIndex]==genPos[chrom][bIndex] && aIndex>0) {
-                    --aIndex;
-                }
             }
             else if (bIndex==0) {
-                insPt = Arrays.binarySearch(basePos[chrom], basePos[chrom][0] + minEndBasepairDist);
+                insPt = Arrays.binarySearch(genPos[chrom], genPos[chrom][0] + minEndCmDist);
                 if (insPt<0) {
                     insPt = -insPt - 1;
                 }
                 assert insPt>0;
                 aIndex = 0;
                 bIndex = Math.min(insPt, mapSizeM1);
-                while (genPos[chrom][aIndex]==genPos[chrom][bIndex] && bIndex<mapSizeM1) {
-                    ++bIndex;
-                }
             }
             double x = geneticPosition;
             double a = genPos[chrom][aIndex];
             double b = genPos[chrom][bIndex];
             int fa = basePos[chrom][aIndex];
             int fb = basePos[chrom][bIndex];
-            double interp = ((x-a)/(b-a)) * (fb-fa);
+            double interp = fa + ((x-a)/(b-a)) * (fb-fa);
             if (interp>=Integer.MAX_VALUE) {
                 String s = "Base position exceeds Integer.MAX_VALUE"
                         + blbutil.Const.nl
                         + "Are all genetic distances in cM units (in command line and in genetic map)?";
                 throw new IllegalArgumentException(s);
             }
-            return fa + (int) Math.round(interp);
+            return (int) Math.round(interp);
         }
     }
 


=====================================
vcf/RefIt.java
=====================================
@@ -55,18 +55,18 @@ public class RefIt implements SampleFileIt<RefGTRec> {
      * The default number of {@code GTRec} objects that are
      * stored in a buffer.
      */
-    public static final int DEFAULT_BUFFER_SIZE = 1<<10;
+    private static final int DEFAULT_BUFFER_SIZE = 1<<10;
+    private static final String[] SENTINAL = new String[0];
+
+    private static volatile boolean stopFileReading = false;
 
     private final VcfHeader vcfHeader;
     private final FileIt<String> it;
     private final Function<String, RefGTRec> mapper;
     private final Filter<Marker> markerFilter;
 
-    private int lastChrom;
-    private boolean eof = false;
-
     private final ArrayBlockingQueue<String[]> stringBuffer;
-    private final List<RefGTRec> midBuffer;
+    private final List<RefGTRec> lowFreqBuffer;
     private final Deque<RefGTRec> recBuffer;
     private final SeqCoder3 seqCoder;
     private final int maxSeqCodedAlleles;
@@ -74,6 +74,8 @@ public class RefIt implements SampleFileIt<RefGTRec> {
 
     private final ExecutorService es;
 
+    private int lastChrom = -1;
+
     /**
      * Create and returns a new {@code RefIt} instance from the specified
      * iterator.
@@ -146,9 +148,8 @@ public class RefIt implements SampleFileIt<RefGTRec> {
         this.maxSeqCodedAlleles = Math.min(seqCoder.maxNSeq(), SeqCoder3.MAX_NALLELES);
         this.maxSeqCodingMajorCnt = maxSeqCodingMajorCnt(vcfHeader.samples());
 
-        this.lastChrom = -1;
         this.stringBuffer = new ArrayBlockingQueue<>(1);
-        this.midBuffer = new ArrayList<>();
+        this.lowFreqBuffer = new ArrayList<>();
         this.recBuffer = new ArrayDeque<>(bufferSize);
 
         this.es = Executors.newSingleThreadExecutor();
@@ -165,35 +166,45 @@ public class RefIt implements SampleFileIt<RefGTRec> {
             ArrayBlockingQueue<String[]> q, String firstRec, FileIt<String> it,
             int bufferSize) {
         es.submit(() -> {
-            List<String> recs = new ArrayList<>(bufferSize);
-            recs.add(firstRec);
-            while (it.hasNext()) {
-                if (recs.size()==bufferSize) {
-                    String[] sa = recs.toArray(new String[0]);
-                    MultiThreadUtils.putInBlockingQ(q, sa);
-                    recs.clear();
+            try {
+                List<String> recs = new ArrayList<>(bufferSize);
+                recs.add(firstRec);
+                while (it.hasNext()) {
+                    if (recs.size()==bufferSize) {
+                        if (stopFileReading) {
+                            recs.clear();
+                            break;
+                        }
+                        String[] sa = recs.toArray(new String[0]);
+                        MultiThreadUtils.putInBlockingQ(q, sa);
+                        recs.clear();
+                    }
+                    recs.add(it.next());
+                }
+                try {
+                    if (recs.size()>0) {
+                        q.put(recs.toArray(new String[0]));
+                        recs.clear();
+                    }
+                    q.put(SENTINAL);
+                } catch (InterruptedException ex) {
+                    Utilities.exit("ERROR: " + it.file(), ex);
                 }
-                recs.add(it.next());
             }
-            try {
-                assert recs.size()>0;
-                q.put(recs.toArray(new String[0]));
-                q.put(new String[0]);
-                recs.clear();
-            } catch (InterruptedException ex) {
-                Utilities.exit("ERROR: " + it.file(), ex);
+            catch (Throwable t) {
+                Utilities.exit(t);
             }
         });
     }
 
     private void fillEmissionBuffer() {
         assert recBuffer.isEmpty();
-        boolean finished = false;
-        while (recBuffer.isEmpty() && finished==false) {
+        while (recBuffer.isEmpty()) {
             String[] sa = MultiThreadUtils.takeFromBlockingQ(stringBuffer);
             if (sa.length==0) {
-                finished = true;
                 stringBuffer.add(sa); // put sentinal back in qeueue
+                flushToRecBuffer();
+                return;
             }
             else {
                 RefGTRec[] recs = parseLines(sa);
@@ -203,29 +214,25 @@ public class RefIt implements SampleFileIt<RefGTRec> {
                     if (lastChrom == -1) {
                         lastChrom = chrom;
                     }
-                    if (chrom!=lastChrom || midBuffer.size()==Integer.MAX_VALUE) {
-                        flushMidBufferToRecBuffer();
+                    if (chrom!=lastChrom || lowFreqBuffer.size()==Integer.MAX_VALUE) {
+                        flushToRecBuffer();
                         lastChrom = chrom;
                     }
                     if (applySeqCoding(rec)==false) {
-                        midBuffer.add(rec);
+                        lowFreqBuffer.add(rec);
                     }
                     else {
                         boolean success = seqCoder.add(rec);
                         if (success == false) {
-                            flushMidBufferToRecBuffer();
+                            flushToRecBuffer();
                             success = seqCoder.add(rec);
                             assert success;
                         }
-                        midBuffer.add(null);
+                        lowFreqBuffer.add(null);
                     }
                 }
             }
         }
-        if (finished) {
-            flushMidBufferToRecBuffer();
-            eof = true;
-        }
     }
 
     private RefGTRec[] parseLines(String[] lines) {
@@ -236,29 +243,30 @@ public class RefIt implements SampleFileIt<RefGTRec> {
                 .toArray(RefGTRec[]::new);
     }
 
-    private void flushMidBufferToRecBuffer() {
+    private void flushToRecBuffer() {
         List<RefGTRec> list = seqCoder.getCompressedList();
         int index = 0;
-        for (int j=0, n=midBuffer.size(); j<n; ++j) {
-            GTRec ve = midBuffer.get(j);
+        for (int j=0, n=lowFreqBuffer.size(); j<n; ++j) {
+            GTRec ve = lowFreqBuffer.get(j);
             if (ve==null) {
-                midBuffer.set(j, list.get(index++));
+                lowFreqBuffer.set(j, list.get(index++));
             }
         }
-        recBuffer.addAll(midBuffer);
-        midBuffer.clear();
+        recBuffer.addAll(lowFreqBuffer);
+        lowFreqBuffer.clear();
     }
 
     @Override
     public void close() {
-        it.close();
-        recBuffer.clear();
-        midBuffer.clear();
-        // empty string buffer in order to stop ExecutorService
+        stopFileReading = true;
+        // empty string buffer
         String[] sa = MultiThreadUtils.takeFromBlockingQ(stringBuffer);
         while (sa.length>0) {
             sa = MultiThreadUtils.takeFromBlockingQ(stringBuffer);
         }
+        recBuffer.clear();
+        lowFreqBuffer.clear();
+        it.close();
         MultiThreadUtils.shutdownExecService(es);
     }
 
@@ -283,7 +291,7 @@ public class RefIt implements SampleFileIt<RefGTRec> {
             throw new NoSuchElementException();
         }
         RefGTRec first = recBuffer.removeFirst();
-        if (recBuffer.isEmpty() && eof==false) {
+        if (recBuffer.isEmpty()) {
             fillEmissionBuffer();
         }
         return first;



View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c613446d5176789855d71b6a0dde97d0781cd982...a17476f0c40bdec4d629c3c9fc2c09a14a6f9331

-- 
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c613446d5176789855d71b6a0dde97d0781cd982...a17476f0c40bdec4d629c3c9fc2c09a14a6f9331
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200327/84a530b9/attachment-0001.html>


More information about the debian-med-commit mailing list