[med-svn] [Git][med-team/beagle][upstream] New upstream version 5.1-200427+dfsg

Dylan Aïssi gitlab at salsa.debian.org
Thu Apr 30 07:23:33 BST 2020



Dylan Aïssi pushed to branch upstream at Debian Med / beagle


Commits:
dcb39a51 by Dylan Aïssi at 2020-04-30T08:23:15+02:00
New upstream version 5.1-200427+dfsg
- - - - -


3 changed files:

- main/Main.java
- phase/FixedPhaseData.java
- phase/Ibs2.java


Changes:

=====================================
main/Main.java
=====================================
@@ -65,8 +65,8 @@ public class Main {
      * The program name and version.
      */
     public static final String VERSION = "(version 5.1)";
-    public static final String PROGRAM = "beagle.24Mar20.5f5.jar";
-    public static final String COMMAND = "java -jar beagle.24Mar20.5f5.jar";
+    public static final String PROGRAM = "beagle.27Apr20.b81.jar";
+    public static final String COMMAND = "java -jar beagle.27Apr20.b81.jar";
 
     /**
      * The copyright string.
@@ -78,7 +78,7 @@ public class Main {
      */
     public static final String SHORT_HELP = Main.PROGRAM + " " + VERSION
             + Const.nl + Main.COPYRIGHT
-            + Const.nl + "Enter \"java -jar beagle.24Mar20.5f5.jar\" to "
+            + Const.nl + "Enter \"java -jar beagle.27Apr20.b81.jar\" to "
             + "list command line argument";
 
     private final Par par;


=====================================
phase/FixedPhaseData.java
=====================================
@@ -46,7 +46,6 @@ import vcf.SplicedGT;
 public class FixedPhaseData {
 
     private static final float MAX_HIFREQ_PROP = 0.9f;
-    private static final float MIN_IBS2_CM = 2.0f;
 
     private final Par par;
     private final float err;
@@ -145,7 +144,7 @@ public class FixedPhaseData {
             this.prevHiFreqMarker = prevHiFreqMarker(targGT.nMarkers(), hiFreqIndices);
             this.prevWt = prevWt(map, hiFreqIndices);
         }
-        this.ibs2 = new Ibs2(hiFreqTargGT, hiFreqRefGT, hiFreqMap, MIN_IBS2_CM);
+        this.ibs2 = new Ibs2(hiFreqTargGT, hiFreqRefGT, hiFreqMap);
     }
 
     private static void ignoreLowFreqCarriers(IntArray[][] carriers) {


=====================================
phase/Ibs2.java
=====================================
@@ -25,6 +25,7 @@ import ints.WrappedIntArray;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Random;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
@@ -43,15 +44,17 @@ import vcf.RefGT;
  */
 public final class Ibs2 {
 
-    private static final int MIN_STEP_MARKERS = 100;
-    private static final int MAX_STEP_MARKERS = 1500; // high-frequency markers
-    private static final float MAX_MISS_PROP = 0.1f;
+    private static final float MIN_IBS2_CM = 2.0f;
     private static final double MAX_IBD_GAP_CM = 4.0;
+    private static final float MAX_MISS_FREQ = 0.1f;
+    private static final float MIN_MINOR_FREQ = 0.02f;
+    private static final float MIN_STEP_CM = 0.5f*MIN_IBS2_CM;
+    private static final int MIN_STEP_MARKERS = 100;
+    private static final int MAX_STEP_MARKERS = 1000;
 
     private final GT targGT;
     private final RefGT refGT;
     private final DoubleArray genPos;
-    private final double minCmIbs2;
     private final SampleSeg[][] sampleSegs; // [targ sample][segment]
 
     /**
@@ -60,7 +63,6 @@ public final class Ibs2 {
      * @param refGT the reference genotype data or {@code null} if there are
      * no reference data
      * @param map an array with the genetic map positions of each marker
-     * @param minCmIbs2 the minimum cM length of a stored IBS2 segment
      * @throws NullPointerException if {@code (targGT == null || map == null)}
      * @throws IllegalArgumentException if
      * {@code targGT.nMarkers() != map.genPos().size()}
@@ -69,22 +71,23 @@ public final class Ibs2 {
      * @throws IllegalArgumentException if
      * {@code (minCmIbs2 <= 0 || Double.isFinite(minCmIbs2) == false)}
      */
-    public Ibs2(GT targGT, RefGT refGT, MarkerMap map, double minCmIbs2) {
-        checkArgs(targGT, refGT, map, minCmIbs2);
+    public Ibs2(GT targGT, RefGT refGT, MarkerMap map) {
+        checkArgs(targGT, refGT, map);
         this.targGT = targGT;
         this.refGT = refGT;
         this.genPos = map.genPos();
-        this.minCmIbs2 = minCmIbs2;
-        IntArray windowStarts = stepStarts(map, 0.5*minCmIbs2);
+        Boolean[] useMarker = useMarker(refGT, targGT);
+
+        IntArray windowStarts = stepStarts(useMarker, map);
 
         int[][][] idSets = IntStream.range(0, windowStarts.size())
                 .parallel()
-                .mapToObj(w -> ibsSamples(targGT, refGT, windowStarts, w))
+                .mapToObj(w -> ibsSamples(targGT, refGT, useMarker, windowStarts, w))
                 .toArray(int[][][]::new); //[window][targ_sample][ibs2_samples]
 
         int nMarkersM1 = targGT.nMarkers() - 1;
         Predicate<SampleSeg> predicate =
-                ss -> (genPos.get(ss.inclEnd()) - genPos.get(ss.start())) >= minCmIbs2;
+                ss -> (genPos.get(ss.inclEnd()) - genPos.get(ss.start())) >= MIN_IBS2_CM;
         this.sampleSegs = IntStream.range(0, targGT.nSamples())
                 .parallel()
                 .mapToObj(s -> {
@@ -99,95 +102,98 @@ public final class Ibs2 {
                 .toArray(SampleSeg[][]::new);
     }
 
-    private static void checkArgs(GT targGT, RefGT refGT, MarkerMap map,
-            double minIbs2Cm) {
+    private static void checkArgs(GT targGT, RefGT refGT, MarkerMap map) {
         if (targGT.nMarkers()!=map.genPos().size()) {
             throw new IllegalArgumentException(String.valueOf(map.genPos().size()));
         }
         if (refGT!=null && refGT.markers().equals(targGT.markers())==false) {
             throw new IllegalArgumentException("inconsistent markers");
         }
-        if (minIbs2Cm <= 0 || Double.isFinite(minIbs2Cm)==false) {
-            throw new IllegalArgumentException(String.valueOf(minIbs2Cm));
-        }
     }
 
-    private static IntArray stepStarts(MarkerMap map, double minCM) {
-        double[] genPos = map.genPos().toArray();
-        IntList indices = new IntList(genPos.length/100);
+    private static IntArray stepStarts(Boolean[] useMarker, MarkerMap map) {
+        DoubleArray genPos = map.genPos();
+        IntList indices = new IntList(genPos.size()/100);
 
         int nextStart = 0;
-        while (nextStart<genPos.length) {
+        while (nextStart<genPos.size()) {
             indices.add(nextStart);
-            nextStart = nextStart(genPos, nextStart, minCM);
+            nextStart = nextStart(genPos, useMarker, nextStart, MIN_STEP_CM);
         }
-        if (popLastInterval(indices, genPos, minCM)) {
+        if (nextStart==Integer.MAX_VALUE) {
+            // last interval is too short and should be removed
             indices.pop();
         }
-        return new WrappedIntArray(indices.toArray());
+        return new WrappedIntArray(indices);
     }
 
-    private static int nextStart(double[] genPos, int start, double minCM) {
-        double nextGenPos = genPos[start] + minCM;
-        int nextIndex = Arrays.binarySearch(genPos, start, genPos.length,
-                nextGenPos);
-        if (nextIndex<0) {
-            nextIndex = -nextIndex-1;
+    private static int nextStart(DoubleArray genPos, Boolean[] useMarker,
+            int start, double minCM) {
+        int nextStart = start + 1;
+        int markerCnt = 0;
+        while (nextStart<useMarker.length && markerCnt<MIN_STEP_MARKERS) {
+            if (useMarker[nextStart]) {
+                ++markerCnt;
+            }
+            ++nextStart;
         }
-        int nextStart = nextIndex;
-
-        int minNextStart = start + MIN_STEP_MARKERS;
-        int maxNextStart = start + MAX_STEP_MARKERS;
-        if (nextStart < minNextStart) {
-            nextStart = minNextStart;
+        double targCM = genPos.get(start) + minCM;
+        while (nextStart<useMarker.length && genPos.get(nextStart)<targCM
+                && markerCnt<MAX_STEP_MARKERS) {
+            if (useMarker[nextStart]) {
+                ++markerCnt;
+            }
+            ++nextStart;
         }
-        else if (nextStart > maxNextStart) {
-            nextStart = maxNextStart;
+        if (nextStart==useMarker.length &&
+                (markerCnt<MIN_STEP_MARKERS || genPos.get(nextStart-1)<targCM)) {
+            // signal that last interval is too short and should be removed
+            nextStart = Integer.MAX_VALUE;
         }
         return nextStart;
     }
 
-    private static boolean popLastInterval(IntList indices, double[] pos,
-            double minCM) {
-        if (indices.size()==1) {
-            return false;
-        }
-        else {
-            int lastStart = indices.get(indices.size()-1);
-            return (pos.length - lastStart) < (MIN_STEP_MARKERS>>1)
-                    || (pos[pos.length-1] - pos[lastStart]) < (0.5*minCM);
-        }
-    }
-
-    private static int[][] ibsSamples(GT targGT, RefGT refGT, IntArray wStarts,
-            int w) {
+    // To Do: ibsSamples need to be updated to incorporate useMarker
+    private static int[][] ibsSamples(GT targGT, RefGT refGT, Boolean[] useMarker,
+            IntArray wStarts, int w) {
         int wP1 = w + 1;
         int start = wStarts.get(w);
         int end = (wP1 < wStarts.size()) ? wStarts.get(wP1) : targGT.nMarkers();
+        int[] markers = markers(useMarker, start, end);
         List<SampClust> equivLists = new ArrayList<>(1);
-        equivLists.add(initCluster(targGT, refGT, start, end));
-        for (int m=start; m<end; ++m) {
+        equivLists.add(initCluster(targGT, refGT, markers));
+        for (int m : markers) {
             int mm = m;
             equivLists = equivLists.stream()
-                    .flatMap(ia -> partition(targGT, refGT, ia, mm))
+                    .flatMap(list -> partition(targGT, refGT, list, mm))
                     .collect(Collectors.toCollection(ArrayList::new));
         }
         return results(equivLists, targGT.nSamples());
     }
 
-    private static SampClust initCluster(GT targGT, RefGT refGT, int start,
-            int end) {
+    private static int[] markers(Boolean[] useMarker, int start, int end) {
+        IntList markers = new IntList(MIN_STEP_MARKERS);
+        for (int m=start; m<end; ++m) {
+            if (useMarker[m]) {
+                markers.add(m);
+            }
+        }
+        return markers.toArray();
+    }
+
+    private static SampClust initCluster(GT targGT, RefGT refGT,
+            int[] markers) {
         int nTargSamples = targGT.nSamples();
         int nSamples = targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
-        int maxMiss = (int) Math.ceil(MAX_MISS_PROP*(end - start));
         int[] missCnt = new int[nSamples];
-        for (int m=start; m<end; ++m) {
+        for (int m : markers) {
             for (int s=0; s<nTargSamples; ++s) {
                 if (targGT.allele1(m, s)==-1 || targGT.allele2(m, s)==-1) {
                     ++missCnt[s];
                 }
             }
         }
+        int maxMiss = (int) Math.ceil(MAX_MISS_FREQ*markers.length);
         int[] initCluster = IntStream.range(0, nSamples)
                 .filter(s -> s>=nTargSamples || missCnt[s]<=maxMiss)
                 .toArray();
@@ -221,9 +227,10 @@ public final class Ibs2 {
                 else if (s<nTargSamples || missingTargSamples.size()>0) { // assumes int[] parent.samples is increasing
                     gtIndices.add(gtIndex);
                     areHom[gtIndex] = parent.areHomozygous && (gt[0]==gt[1]);
+                    // put s and missTargSamples in new list
+                    // s MUST come after missing target samples for list to be increasing
                     gtToList[gtIndex] = new IntList();
                     for (int k=0, n=missingTargSamples.size(); k<n; ++k) {
-                        // if s>=nTargSamples, put s in list w/ missTargSamples
                         gtToList[gtIndex].add(missingTargSamples.get(k));
                     }
                     gtToList[gtIndex].add(s);
@@ -403,14 +410,6 @@ public final class Ibs2 {
         return targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
     }
 
-    /**
-     * Returns the minimum cM length of a stored IBS2 segment.
-     * @return the minimum cM length of a stored IBS2 segment
-     */
-    public double minCmIbs2() {
-        return minCmIbs2;
-    }
-
     /**
      * Returns {@code true} if the specified samples are estimated
      * to be IBS2 at the specified marker, and the IBS2 interval
@@ -463,4 +462,58 @@ public final class Ibs2 {
             this.areHomozygous = true;
         }
     }
+
+    private static Boolean[] useMarker(RefGT refGT, GT targGT) {
+        int minTotalHaps = 200;
+        int[] refHaps = refHaps(refGT, targGT, minTotalHaps);
+        int nTargHaps = targGT.nHaps();
+        int maxMiss = Math.round(MAX_MISS_FREQ*nTargHaps);
+        int minMinor = Math.round(MIN_MINOR_FREQ*(nTargHaps + refHaps.length));
+        return IntStream.range(0, targGT.nMarkers())
+                .parallel()
+                .mapToObj(m -> accept(refGT, refHaps, targGT, m, maxMiss, minMinor))
+                .toArray(Boolean[]::new);
+    }
+
+    private static int[] refHaps(RefGT refGT, GT targGT, int minTotalHaps) {
+        int nTargHaps = targGT.nHaps();
+        if (refGT==null || minTotalHaps<=nTargHaps) {
+            return new int[0];
+        }
+        int xtraRefHaps = Math.min(minTotalHaps - nTargHaps, refGT.nHaps());
+        if (xtraRefHaps < refGT.nHaps()) {
+            Random rand = new Random(xtraRefHaps);
+            return rand.ints(xtraRefHaps, 0, refGT.nHaps()).sorted().toArray();
+        }
+        else {
+            return IntStream.range(0, refGT.nHaps()).toArray();
+        }
+    }
+
+    private static Boolean accept(RefGT refGT, int[] refHaps, GT targGT,
+            int marker, int maxMiss, int minMinor) {
+        int[] cnts = missAndAlleleCnts(refGT, refHaps, targGT, marker);
+        if (cnts[0]>maxMiss || cnts.length<3) {
+            return Boolean.FALSE;
+        }
+        Arrays.sort(cnts, 1, cnts.length);
+        return Boolean.valueOf(cnts[cnts.length-2] >= minMinor);
+    }
+
+    private static int[] missAndAlleleCnts(RefGT refGT, int[] refHaps, GT targGT,
+            int marker) {
+        assert refGT==null || targGT.marker(marker).equals(refGT.marker(marker));
+        int nAllelesP1 = targGT.marker(marker).nAlleles() + 1;
+        // missing allele cnt is in element [0], ref allele cnt is in element [1]
+        int[] cnts = new int[nAllelesP1];
+        if (refGT!=null) {
+            for (int refHap : refHaps) {
+                ++cnts[refGT.allele(marker, refHap)+1];
+            }
+        }
+        for (int h=0, n=targGT.nHaps(); h<n; ++h) {
+            ++cnts[targGT.allele(marker, h)+1];
+        }
+        return cnts;
+    }
 }



View it on GitLab: https://salsa.debian.org/med-team/beagle/-/commit/dcb39a5143ebd41a68edcb1a51902776b6b28079

-- 
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/commit/dcb39a5143ebd41a68edcb1a51902776b6b28079
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200430/ac3cf6fe/attachment-0001.html>


More information about the debian-med-commit mailing list