[med-svn] [Git][med-team/beagle][upstream] New upstream version 5.1-200427+dfsg
Dylan Aïssi
gitlab at salsa.debian.org
Thu Apr 30 07:23:33 BST 2020
Dylan Aïssi pushed to branch upstream at Debian Med / beagle
Commits:
dcb39a51 by Dylan Aïssi at 2020-04-30T08:23:15+02:00
New upstream version 5.1-200427+dfsg
- - - - -
3 changed files:
- main/Main.java
- phase/FixedPhaseData.java
- phase/Ibs2.java
Changes:
=====================================
main/Main.java
=====================================
@@ -65,8 +65,8 @@ public class Main {
* The program name and version.
*/
public static final String VERSION = "(version 5.1)";
- public static final String PROGRAM = "beagle.24Mar20.5f5.jar";
- public static final String COMMAND = "java -jar beagle.24Mar20.5f5.jar";
+ public static final String PROGRAM = "beagle.27Apr20.b81.jar";
+ public static final String COMMAND = "java -jar beagle.27Apr20.b81.jar";
/**
* The copyright string.
@@ -78,7 +78,7 @@ public class Main {
*/
public static final String SHORT_HELP = Main.PROGRAM + " " + VERSION
+ Const.nl + Main.COPYRIGHT
- + Const.nl + "Enter \"java -jar beagle.24Mar20.5f5.jar\" to "
+ + Const.nl + "Enter \"java -jar beagle.27Apr20.b81.jar\" to "
+ "list command line argument";
private final Par par;
=====================================
phase/FixedPhaseData.java
=====================================
@@ -46,7 +46,6 @@ import vcf.SplicedGT;
public class FixedPhaseData {
private static final float MAX_HIFREQ_PROP = 0.9f;
- private static final float MIN_IBS2_CM = 2.0f;
private final Par par;
private final float err;
@@ -145,7 +144,7 @@ public class FixedPhaseData {
this.prevHiFreqMarker = prevHiFreqMarker(targGT.nMarkers(), hiFreqIndices);
this.prevWt = prevWt(map, hiFreqIndices);
}
- this.ibs2 = new Ibs2(hiFreqTargGT, hiFreqRefGT, hiFreqMap, MIN_IBS2_CM);
+ this.ibs2 = new Ibs2(hiFreqTargGT, hiFreqRefGT, hiFreqMap);
}
private static void ignoreLowFreqCarriers(IntArray[][] carriers) {
=====================================
phase/Ibs2.java
=====================================
@@ -25,6 +25,7 @@ import ints.WrappedIntArray;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
+import java.util.Random;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
@@ -43,15 +44,17 @@ import vcf.RefGT;
*/
public final class Ibs2 {
- private static final int MIN_STEP_MARKERS = 100;
- private static final int MAX_STEP_MARKERS = 1500; // high-frequency markers
- private static final float MAX_MISS_PROP = 0.1f;
+ private static final float MIN_IBS2_CM = 2.0f;
private static final double MAX_IBD_GAP_CM = 4.0;
+ private static final float MAX_MISS_FREQ = 0.1f;
+ private static final float MIN_MINOR_FREQ = 0.02f;
+ private static final float MIN_STEP_CM = 0.5f*MIN_IBS2_CM;
+ private static final int MIN_STEP_MARKERS = 100;
+ private static final int MAX_STEP_MARKERS = 1000;
private final GT targGT;
private final RefGT refGT;
private final DoubleArray genPos;
- private final double minCmIbs2;
private final SampleSeg[][] sampleSegs; // [targ sample][segment]
/**
@@ -60,7 +63,6 @@ public final class Ibs2 {
* @param refGT the reference genotype data or {@code null} if there are
* no reference data
* @param map an array with the genetic map positions of each marker
- * @param minCmIbs2 the minimum cM length of a stored IBS2 segment
* @throws NullPointerException if {@code (targGT == null || map == null)}
* @throws IllegalArgumentException if
* {@code targGT.nMarkers() != map.genPos().size()}
@@ -69,22 +71,23 @@ public final class Ibs2 {
* @throws IllegalArgumentException if
* {@code (minCmIbs2 <= 0 || Double.isFinite(minCmIbs2) == false)}
*/
- public Ibs2(GT targGT, RefGT refGT, MarkerMap map, double minCmIbs2) {
- checkArgs(targGT, refGT, map, minCmIbs2);
+ public Ibs2(GT targGT, RefGT refGT, MarkerMap map) {
+ checkArgs(targGT, refGT, map);
this.targGT = targGT;
this.refGT = refGT;
this.genPos = map.genPos();
- this.minCmIbs2 = minCmIbs2;
- IntArray windowStarts = stepStarts(map, 0.5*minCmIbs2);
+ Boolean[] useMarker = useMarker(refGT, targGT);
+
+ IntArray windowStarts = stepStarts(useMarker, map);
int[][][] idSets = IntStream.range(0, windowStarts.size())
.parallel()
- .mapToObj(w -> ibsSamples(targGT, refGT, windowStarts, w))
+ .mapToObj(w -> ibsSamples(targGT, refGT, useMarker, windowStarts, w))
.toArray(int[][][]::new); //[window][targ_sample][ibs2_samples]
int nMarkersM1 = targGT.nMarkers() - 1;
Predicate<SampleSeg> predicate =
- ss -> (genPos.get(ss.inclEnd()) - genPos.get(ss.start())) >= minCmIbs2;
+ ss -> (genPos.get(ss.inclEnd()) - genPos.get(ss.start())) >= MIN_IBS2_CM;
this.sampleSegs = IntStream.range(0, targGT.nSamples())
.parallel()
.mapToObj(s -> {
@@ -99,95 +102,98 @@ public final class Ibs2 {
.toArray(SampleSeg[][]::new);
}
- private static void checkArgs(GT targGT, RefGT refGT, MarkerMap map,
- double minIbs2Cm) {
+ private static void checkArgs(GT targGT, RefGT refGT, MarkerMap map) {
if (targGT.nMarkers()!=map.genPos().size()) {
throw new IllegalArgumentException(String.valueOf(map.genPos().size()));
}
if (refGT!=null && refGT.markers().equals(targGT.markers())==false) {
throw new IllegalArgumentException("inconsistent markers");
}
- if (minIbs2Cm <= 0 || Double.isFinite(minIbs2Cm)==false) {
- throw new IllegalArgumentException(String.valueOf(minIbs2Cm));
- }
}
- private static IntArray stepStarts(MarkerMap map, double minCM) {
- double[] genPos = map.genPos().toArray();
- IntList indices = new IntList(genPos.length/100);
+ private static IntArray stepStarts(Boolean[] useMarker, MarkerMap map) {
+ DoubleArray genPos = map.genPos();
+ IntList indices = new IntList(genPos.size()/100);
int nextStart = 0;
- while (nextStart<genPos.length) {
+ while (nextStart<genPos.size()) {
indices.add(nextStart);
- nextStart = nextStart(genPos, nextStart, minCM);
+ nextStart = nextStart(genPos, useMarker, nextStart, MIN_STEP_CM);
}
- if (popLastInterval(indices, genPos, minCM)) {
+ if (nextStart==Integer.MAX_VALUE) {
+ // last interval is too short and should be removed
indices.pop();
}
- return new WrappedIntArray(indices.toArray());
+ return new WrappedIntArray(indices);
}
- private static int nextStart(double[] genPos, int start, double minCM) {
- double nextGenPos = genPos[start] + minCM;
- int nextIndex = Arrays.binarySearch(genPos, start, genPos.length,
- nextGenPos);
- if (nextIndex<0) {
- nextIndex = -nextIndex-1;
+ private static int nextStart(DoubleArray genPos, Boolean[] useMarker,
+ int start, double minCM) {
+ int nextStart = start + 1;
+ int markerCnt = 0;
+ while (nextStart<useMarker.length && markerCnt<MIN_STEP_MARKERS) {
+ if (useMarker[nextStart]) {
+ ++markerCnt;
+ }
+ ++nextStart;
}
- int nextStart = nextIndex;
-
- int minNextStart = start + MIN_STEP_MARKERS;
- int maxNextStart = start + MAX_STEP_MARKERS;
- if (nextStart < minNextStart) {
- nextStart = minNextStart;
+ double targCM = genPos.get(start) + minCM;
+ while (nextStart<useMarker.length && genPos.get(nextStart)<targCM
+ && markerCnt<MAX_STEP_MARKERS) {
+ if (useMarker[nextStart]) {
+ ++markerCnt;
+ }
+ ++nextStart;
}
- else if (nextStart > maxNextStart) {
- nextStart = maxNextStart;
+ if (nextStart==useMarker.length &&
+ (markerCnt<MIN_STEP_MARKERS || genPos.get(nextStart-1)<targCM)) {
+ // signal that last interval is too short and should be removed
+ nextStart = Integer.MAX_VALUE;
}
return nextStart;
}
- private static boolean popLastInterval(IntList indices, double[] pos,
- double minCM) {
- if (indices.size()==1) {
- return false;
- }
- else {
- int lastStart = indices.get(indices.size()-1);
- return (pos.length - lastStart) < (MIN_STEP_MARKERS>>1)
- || (pos[pos.length-1] - pos[lastStart]) < (0.5*minCM);
- }
- }
-
- private static int[][] ibsSamples(GT targGT, RefGT refGT, IntArray wStarts,
- int w) {
+ // To Do: ibsSamples need to be updated to incorporate useMarker
+ private static int[][] ibsSamples(GT targGT, RefGT refGT, Boolean[] useMarker,
+ IntArray wStarts, int w) {
int wP1 = w + 1;
int start = wStarts.get(w);
int end = (wP1 < wStarts.size()) ? wStarts.get(wP1) : targGT.nMarkers();
+ int[] markers = markers(useMarker, start, end);
List<SampClust> equivLists = new ArrayList<>(1);
- equivLists.add(initCluster(targGT, refGT, start, end));
- for (int m=start; m<end; ++m) {
+ equivLists.add(initCluster(targGT, refGT, markers));
+ for (int m : markers) {
int mm = m;
equivLists = equivLists.stream()
- .flatMap(ia -> partition(targGT, refGT, ia, mm))
+ .flatMap(list -> partition(targGT, refGT, list, mm))
.collect(Collectors.toCollection(ArrayList::new));
}
return results(equivLists, targGT.nSamples());
}
- private static SampClust initCluster(GT targGT, RefGT refGT, int start,
- int end) {
+ private static int[] markers(Boolean[] useMarker, int start, int end) {
+ IntList markers = new IntList(MIN_STEP_MARKERS);
+ for (int m=start; m<end; ++m) {
+ if (useMarker[m]) {
+ markers.add(m);
+ }
+ }
+ return markers.toArray();
+ }
+
+ private static SampClust initCluster(GT targGT, RefGT refGT,
+ int[] markers) {
int nTargSamples = targGT.nSamples();
int nSamples = targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
- int maxMiss = (int) Math.ceil(MAX_MISS_PROP*(end - start));
int[] missCnt = new int[nSamples];
- for (int m=start; m<end; ++m) {
+ for (int m : markers) {
for (int s=0; s<nTargSamples; ++s) {
if (targGT.allele1(m, s)==-1 || targGT.allele2(m, s)==-1) {
++missCnt[s];
}
}
}
+ int maxMiss = (int) Math.ceil(MAX_MISS_FREQ*markers.length);
int[] initCluster = IntStream.range(0, nSamples)
.filter(s -> s>=nTargSamples || missCnt[s]<=maxMiss)
.toArray();
@@ -221,9 +227,10 @@ public final class Ibs2 {
else if (s<nTargSamples || missingTargSamples.size()>0) { // assumes int[] parent.samples is increasing
gtIndices.add(gtIndex);
areHom[gtIndex] = parent.areHomozygous && (gt[0]==gt[1]);
+ // put s and missTargSamples in new list
+ // s MUST come after missing target samples for list to be increasing
gtToList[gtIndex] = new IntList();
for (int k=0, n=missingTargSamples.size(); k<n; ++k) {
- // if s>=nTargSamples, put s in list w/ missTargSamples
gtToList[gtIndex].add(missingTargSamples.get(k));
}
gtToList[gtIndex].add(s);
@@ -403,14 +410,6 @@ public final class Ibs2 {
return targGT.nSamples() + (refGT!=null ? refGT.nSamples() : 0);
}
- /**
- * Returns the minimum cM length of a stored IBS2 segment.
- * @return the minimum cM length of a stored IBS2 segment
- */
- public double minCmIbs2() {
- return minCmIbs2;
- }
-
/**
* Returns {@code true} if the specified samples are estimated
* to be IBS2 at the specified marker, and the IBS2 interval
@@ -463,4 +462,58 @@ public final class Ibs2 {
this.areHomozygous = true;
}
}
+
+ private static Boolean[] useMarker(RefGT refGT, GT targGT) {
+ int minTotalHaps = 200;
+ int[] refHaps = refHaps(refGT, targGT, minTotalHaps);
+ int nTargHaps = targGT.nHaps();
+ int maxMiss = Math.round(MAX_MISS_FREQ*nTargHaps);
+ int minMinor = Math.round(MIN_MINOR_FREQ*(nTargHaps + refHaps.length));
+ return IntStream.range(0, targGT.nMarkers())
+ .parallel()
+ .mapToObj(m -> accept(refGT, refHaps, targGT, m, maxMiss, minMinor))
+ .toArray(Boolean[]::new);
+ }
+
+ private static int[] refHaps(RefGT refGT, GT targGT, int minTotalHaps) {
+ int nTargHaps = targGT.nHaps();
+ if (refGT==null || minTotalHaps<=nTargHaps) {
+ return new int[0];
+ }
+ int xtraRefHaps = Math.min(minTotalHaps - nTargHaps, refGT.nHaps());
+ if (xtraRefHaps < refGT.nHaps()) {
+ Random rand = new Random(xtraRefHaps);
+ return rand.ints(xtraRefHaps, 0, refGT.nHaps()).sorted().toArray();
+ }
+ else {
+ return IntStream.range(0, refGT.nHaps()).toArray();
+ }
+ }
+
+ private static Boolean accept(RefGT refGT, int[] refHaps, GT targGT,
+ int marker, int maxMiss, int minMinor) {
+ int[] cnts = missAndAlleleCnts(refGT, refHaps, targGT, marker);
+ if (cnts[0]>maxMiss || cnts.length<3) {
+ return Boolean.FALSE;
+ }
+ Arrays.sort(cnts, 1, cnts.length);
+ return Boolean.valueOf(cnts[cnts.length-2] >= minMinor);
+ }
+
+ private static int[] missAndAlleleCnts(RefGT refGT, int[] refHaps, GT targGT,
+ int marker) {
+ assert refGT==null || targGT.marker(marker).equals(refGT.marker(marker));
+ int nAllelesP1 = targGT.marker(marker).nAlleles() + 1;
+ // missing allele cnt is in element [0], ref allele cnt is in element [1]
+ int[] cnts = new int[nAllelesP1];
+ if (refGT!=null) {
+ for (int refHap : refHaps) {
+ ++cnts[refGT.allele(marker, refHap)+1];
+ }
+ }
+ for (int h=0, n=targGT.nHaps(); h<n; ++h) {
+ ++cnts[targGT.allele(marker, h)+1];
+ }
+ return cnts;
+ }
}
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/commit/dcb39a5143ebd41a68edcb1a51902776b6b28079
--
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/commit/dcb39a5143ebd41a68edcb1a51902776b6b28079
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200430/ac3cf6fe/attachment-0001.html>
More information about the debian-med-commit
mailing list