[med-svn] [Git][med-team/bbmap][master] 4 commits: New upstream version 38.98+dfsg
Andreas Tille (@tille)
gitlab at salsa.debian.org
Tue Aug 23 15:22:56 BST 2022
Andreas Tille pushed to branch master at Debian Med / bbmap
Commits:
cc001214 by Andreas Tille at 2022-08-23T16:19:16+02:00
New upstream version 38.98+dfsg
- - - - -
e2927e4e by Andreas Tille at 2022-08-23T16:19:16+02:00
routine-update: New upstream version
- - - - -
5374e077 by Andreas Tille at 2022-08-23T16:19:22+02:00
Update upstream source from tag 'upstream/38.98+dfsg'
Update to upstream version '38.98+dfsg'
with Debian dir 6373c5bbb64e28c7b976e0eac244c408e6f89cb0
- - - - -
346038b3 by Andreas Tille at 2022-08-23T16:21:36+02:00
routine-update: Ready to upload to unstable
- - - - -
23 changed files:
- README.md
- bbcms.sh
- current/align2/BBSplitter.java
- current/bloom/BloomFilterCorrectorWrapper.java
- current/bloom/KCountArray.java
- current/bloom/KCountArray7MTA.java
- current/driver/ConcatenateFiles.java
- current/driver/SelectReads.java
- current/jgi/BBDuk.java
- current/jgi/CountGC.java
- current/jgi/CoveragePileup.java
- current/jgi/KmerCoverage.java
- current/jgi/KmerNormalize.java
- current/jgi/RandomGenome.java
- current/jgi/ReadKmerDepthDistribution.java
- current/pacbio/MergeReadsAndGenome.java
- current/shared/Parser.java
- current/shared/ReadStats.java
- current/shared/Shared.java
- current/shared/TrimRead.java
- current/stream/SamLine.java
- debian/changelog
- docs/changelog.txt
Changes:
=====================================
README.md
=====================================
@@ -3,4 +3,4 @@
# Language: Java, Bash
# Information about documentation is in /docs/readme.txt.
-# Version 38.97
+# Version 38.98
=====================================
bbcms.sh
=====================================
@@ -3,7 +3,7 @@
usage(){
echo "
Written by Brian Bushnell
-Last modified September 4, 2018
+Last modified August 8, 2022
Description: Error corrects reads and/or filters by depth, storing
kmer counts in a count-min sketch (a Bloom filter variant).
@@ -64,6 +64,8 @@ cells= Option to set the number of cells manually. By default this
to set this is to ensure deterministic output.
seed=0 This will change the hash function used. Useful if running
iteratively with a very full table. -1 uses a random seed.
+lockedincrement=t Increases counting accuracy for a slight speed penalty.
+ Could have low performance on very low-complexity sequence.
Depth filtering parameters:
mincount=0 If positive, reads with kmer counts below mincount will
=====================================
current/align2/BBSplitter.java
=====================================
@@ -441,8 +441,7 @@ public class BBSplitter {
}
tf.close();
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
return refname;
}
@@ -1154,8 +1153,7 @@ public class BBSplitter {
tsw.println("samtools index "+bam3);
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
try {
File f=new File(outname);
@@ -1237,7 +1235,7 @@ public class BBSplitter {
sb.setLength(0);
}
}
- tsw.poison();
+ tsw.poisonAndWait();
}
static final void clearStatics(){
=====================================
current/bloom/BloomFilterCorrectorWrapper.java
=====================================
@@ -80,6 +80,7 @@ public class BloomFilterCorrectorWrapper {
ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
ReadWrite.MAX_ZIP_THREADS=Tools.max(Shared.threads()>1 ? 2 : 1, Shared.threads()>20 ? Shared.threads()/2 : Shared.threads());
BBMerge.strict=true;
+ KCountArray.LOCKED_INCREMENT=true;
//Create a parser object
Parser parser=new Parser();
=====================================
current/bloom/KCountArray.java
=====================================
@@ -59,8 +59,9 @@ public abstract class KCountArray implements Serializable {
// }else{
// kca=new KCountArray8MT(cells_, cbits_, gap_, hashes_, prefilter); //Like 7MT but uses prefilter
// }
- kca=new KCountArray7MTA(cells_, cbits_, gap_, hashes_, prefilter, prefilterLimit_); //Like 4MT but uses primes
-
+
+ kca=new KCountArray7MTA(cells_, cbits_, gap_, hashes_, prefilter, prefilterLimit_);
+
// if(prefilter==null){
// kca=new KCountArray9MT(cells_, cbits_, gap_, hashes_); //Like 7MT but uses canonical kmers
// }else{
@@ -520,4 +521,8 @@ public abstract class KCountArray implements Serializable {
private static final long canonK=4;
static final long canonMask=(1<<(canonK*2))-1; //e.g. 255 for k=4
+ /** Increases accuracy of overloaded multi-bit tables */
+ public static boolean LOCKED_INCREMENT=false;
+ public static boolean SET_LOCKED_INCREMENT=false;
+
}
=====================================
current/bloom/KCountArray7MTA.java
=====================================
@@ -5,6 +5,8 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.atomic.AtomicIntegerArray;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
import shared.KillSwitch;
import shared.Primes;
@@ -85,6 +87,12 @@ public final class KCountArray7MTA extends KCountArray {
// System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
matrix=allocMatrix(numArrays, wordsPerArray);
+
+ useLocks=(cellBits>1 && hashes>1) && (LOCKED_INCREMENT || !SET_LOCKED_INCREMENT);
+ if(useLocks){
+ locks=new Lock[NUM_LOCKS];
+ for(int i=0; i<NUM_LOCKS; i++){locks[i]=new ReentrantLock();}
+ }else{locks=null;}
// matrix=new AtomicIntegerArray[numArrays];
// for(int i=0; i<matrix.length; i++){
@@ -266,20 +274,38 @@ public final class KCountArray7MTA extends KCountArray {
if(x<prefilterLimit){return;}
}
+ if(useLocks){incrementLocked(rawKey);}
+ else{incrementUnlocked(rawKey);}
+ }
+
+ private void incrementUnlocked(long rawKey){
long key2=rawKey;
for(int i=0; i<hashes; i++){
key2=hash(key2, i);
if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
-// assert(readHashed(key2)==0);
-
-// int bnum=(int)(key2&arrayMask);
incrementHashedLocal(key2);
-// assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
-// assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
key2=Long.rotateRight(key2, hashBits);
}
}
+ private void incrementLocked(long rawKey){
+ final Lock lock=getLock(rawKey);
+ lock.lock();
+ final int min=read(rawKey);
+ if(min>=maxValue){
+ lock.unlock();
+ return;
+ }
+ long key2=rawKey;
+ for(int i=0; i<hashes; i++){
+ key2=hash(key2, i);
+ if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
+ incrementHashedLocal_ifAtMost(key2, min);
+ key2=Long.rotateRight(key2, hashBits);
+ }
+ lock.unlock();
+ }
+
@Override
public final void decrement(final long rawKey){
if(verbose){System.err.println("\n*** Decrementing raw key "+rawKey+" ***");}
@@ -315,18 +341,39 @@ public final class KCountArray7MTA extends KCountArray {
if(x<prefilterLimit){return x;}
}
+ if(useLocks){return incrementAndReturnUnincrementedLocked(rawKey, incr);}
+ else{return incrementAndReturnUnincrementedUnlocked(rawKey, incr);}
+ }
+
+ private int incrementAndReturnUnincrementedLocked(final long rawKey, final int incr){
+ final Lock lock=getLock(rawKey);
+ lock.lock();
+ final int min=read(rawKey);
+ if(min>=maxValue){
+ lock.unlock();
+ return min;
+ }
+ long key2=rawKey;
+ int value=maxValue;
+ for(int i=0; i<hashes; i++){
+ key2=hash(key2, i);
+ if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
+ int x=incrementHashedLocalAndReturnUnincremented_ifAtMost(key2, incr, min);
+ value=min(value, x);
+ key2=Long.rotateRight(key2, hashBits);
+ }
+ lock.unlock();
+ return value;
+ }
+
+ private int incrementAndReturnUnincrementedUnlocked(final long rawKey, final int incr){
long key2=rawKey;
int value=maxValue;
for(int i=0; i<hashes; i++){
key2=hash(key2, i);
if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
-// assert(readHashed(key2)==0);
-
-// int bnum=(int)(key2&arrayMask);
int x=incrementHashedLocalAndReturnUnincremented(key2, incr);
value=min(value, x);
-// assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
-// assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
key2=Long.rotateRight(key2, hashBits);
}
return value;
@@ -588,6 +635,11 @@ public final class KCountArray7MTA extends KCountArray {
}
}
+ private final Lock getLock(long rawKey){
+ final Lock lock=locks[(int)((rawKey&Long.MAX_VALUE)%(NUM_LOCKS))];
+ return lock;
+ }
+
private int incrementHashedLocal(long key){
final int num=(int)(key&arrayMask);
final AtomicIntegerArray array=matrix[num];
@@ -607,6 +659,25 @@ public final class KCountArray7MTA extends KCountArray {
return value;
}
+ private int incrementHashedLocal_ifAtMost(long key, final int thresh){
+ final int num=(int)(key&arrayMask);
+ final AtomicIntegerArray array=matrix[num];
+ key=(key>>>arrayBits)%(cellMod);
+// key=(key>>>(arrayBits+1))%(cellMod);
+ int index=(int)(key>>>indexShift);
+ int cellShift=(int)(cellBits*key);
+ int value, word, word2;
+ do{
+ assert(index>=0) : key+", "+cellMod+", "+cellBits+", "+valueMask+", "+arrayMask+", "+index+", "+num;
+ word=array.get(index);
+ value=((word>>>cellShift)&valueMask);
+ if(value>thresh){return value;}//Too high; don't increment
+ value=min(value+1, maxValue);
+ word2=(value<<cellShift)|(word&~((valueMask)<<cellShift));
+ }while(word!=word2 && !array.compareAndSet(index, word, word2));
+ return value;
+ }
+
private int incrementHashedLocalAndReturnUnincremented(long key, int incr){
assert(incr>=0);
final int num=(int)(key&arrayMask);
@@ -626,6 +697,26 @@ public final class KCountArray7MTA extends KCountArray {
return value;
}
+ private int incrementHashedLocalAndReturnUnincremented_ifAtMost(long key, int incr, final int thresh){
+ assert(incr>=0);
+ final int num=(int)(key&arrayMask);
+ final AtomicIntegerArray array=matrix[num];
+ key=(key>>>arrayBits)%(cellMod);
+// key=(key>>>(arrayBits+1))%(cellMod);
+ int index=(int)(key>>>indexShift);
+ int cellShift=(int)(cellBits*key);
+ int value, word, word2;
+ do{
+ word=array.get(index);
+ value=((word>>>cellShift)&valueMask);
+ if(value>thresh){return value;}//Too high; don't increment
+ int value2=min(value+incr, maxValue);
+ word2=(value2<<cellShift)|(word&~((valueMask)<<cellShift));
+ }while(word!=word2 && !array.compareAndSet(index, word, word2));
+// if(value==1){cellsUsedPersonal.incrementAndGet(num);}
+ return value;
+ }
+
private int decrementHashedLocal(long key){
final int num=(int)(key&arrayMask);
final AtomicIntegerArray array=matrix[num];
@@ -689,6 +780,10 @@ public final class KCountArray7MTA extends KCountArray {
private static final int hashCellMask=hashArrayLength-1;
private KCountArray prefilter;
+
+ private final transient boolean useLocks;
+ private final transient Lock[] locks;
+ private static final transient int NUM_LOCKS=1999;
private static long counter=0;
private static long SEEDMASK=0;
=====================================
current/driver/ConcatenateFiles.java
=====================================
@@ -53,8 +53,7 @@ public class ConcatenateFiles {
}
System.err.print(".");
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
public static void concatenateDirectory(final String in, String out) throws IOException{
=====================================
current/driver/SelectReads.java
=====================================
@@ -59,8 +59,7 @@ public final class SelectReads {
}
}
tf.close();
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
=====================================
current/jgi/BBDuk.java
=====================================
@@ -1021,6 +1021,8 @@ public class BBDuk {
s=Data.findPath("?mtst.fa");
}else if("adapters".equalsIgnoreCase(s)){
s=Data.findPath("?adapters.fa");
+ }else if("pacbioadapter".equalsIgnoreCase(s) || "pacbioadapters".equalsIgnoreCase(s)){
+ s=Data.findPath("?PacBioAdapter.fa");
}else if("truseq".equalsIgnoreCase(s)){
s=Data.findPath("?truseq.fa.gz");
}else if("nextera".equalsIgnoreCase(s)){
=====================================
current/jgi/CountGC.java
=====================================
@@ -233,8 +233,7 @@ public class CountGC {
}
if(tsw!=null){
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
LIMSUM=limsum;
return overall;
@@ -334,8 +333,7 @@ public class CountGC {
}
if(tsw!=null){
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
LIMSUM=limsum;
return overall;
=====================================
current/jgi/CoveragePileup.java
=====================================
@@ -733,8 +733,7 @@ public class CoveragePileup {
}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
=====================================
current/jgi/KmerCoverage.java
=====================================
@@ -227,9 +227,9 @@ public class KmerCoverage {
final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
assert(maxCount>0);
- HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+ HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
- HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+ HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
histogram_total=new long[HIST_LEN];
}
=====================================
current/jgi/KmerNormalize.java
=====================================
@@ -821,8 +821,7 @@ public class KmerNormalize {
tsw.println(r);
if(r.mate!=null){tsw.println(r.mate);}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}catch(Throwable t){
System.err.println("ERROR: "+t);
return false;
@@ -872,9 +871,9 @@ public class KmerNormalize {
final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
assert(maxCount>0);
- HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+ HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
- HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+ HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
THREAD_HIST_LEN=Tools.min(THREAD_HIST_LEN, HIST_LEN);
khistogram=new AtomicLongArray(HIST_LEN);
=====================================
current/jgi/RandomGenome.java
=====================================
@@ -171,8 +171,7 @@ public class RandomGenome {
}
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
}
void processAmino(Timer t){
@@ -204,8 +203,7 @@ public class RandomGenome {
bb.clear();
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
}
/*--------------------------------------------------------------*/
=====================================
current/jgi/ReadKmerDepthDistribution.java
=====================================
@@ -236,9 +236,9 @@ public class ReadKmerDepthDistribution {
final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
assert(maxCount>0);
- HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+ HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
- HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+ HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
THREAD_HIST_LEN=Tools.min(THREAD_HIST_LEN, HIST_LEN);
histogram_total=new AtomicLongArray(HIST_LEN);
=====================================
current/pacbio/MergeReadsAndGenome.java
=====================================
@@ -134,8 +134,7 @@ public class MergeReadsAndGenome {
}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
}
public static long appendReads(ConcurrentReadInputStream cris, TextStreamWriter tsw, long id){
=====================================
current/shared/Parser.java
=====================================
@@ -6,6 +6,7 @@ import java.util.Arrays;
import java.util.HashSet;
import align2.QualityTools;
+import bloom.KCountArray;
import cardinality.CardinalityTracker;
import cardinality.LogLog16;
import cardinality.LogLog2;
@@ -736,6 +737,16 @@ public class Parser {
TaxTree.protFull=true;
}
+ else if(a.equalsIgnoreCase("lockedincrement")){
+ if("auto".equalsIgnoreCase(b)){
+ KCountArray.LOCKED_INCREMENT=true;
+ KCountArray.SET_LOCKED_INCREMENT=false;
+ }else{
+ KCountArray.LOCKED_INCREMENT=Parse.parseBoolean(b);
+ KCountArray.SET_LOCKED_INCREMENT=true;
+ }
+ }
+
else{
return false;
}
=====================================
current/shared/ReadStats.java
=====================================
@@ -847,8 +847,7 @@ public class ReadStats {
tsw.print("\n");
if(y<=0){break;}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -875,8 +874,7 @@ public class ReadStats {
tsw.print("\n");
if(y<=0){break;}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -965,8 +963,7 @@ public class ReadStats {
}
}
tsw.print(sb);
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1015,8 +1012,7 @@ public class ReadStats {
tsw.print("\n");
if(y<=0){break;}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1059,8 +1055,7 @@ public class ReadStats {
}
tsw.print("\n");
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1139,8 +1134,7 @@ public class ReadStats {
// System.err.println(qm+"\t"+qs+"\t"+qi+"\t"+qd);
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1175,8 +1169,7 @@ public class ReadStats {
);
tsw.print("\n");
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1201,8 +1194,7 @@ public class ReadStats {
);
tsw.print("\n");
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
@@ -1318,8 +1310,7 @@ public class ReadStats {
bsw.print(bb);
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
errorState|=bsw.errorState;
}
@@ -1361,8 +1352,7 @@ public class ReadStats {
bsw.print(bb);
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
errorState|=bsw.errorState;
}
@@ -1438,8 +1428,7 @@ public class ReadStats {
}
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
errorState|=bsw.errorState;
}
@@ -1478,8 +1467,7 @@ public class ReadStats {
bsw.print(bb);
}
}
- bsw.poison();
- bsw.waitForFinish();
+ bsw.poisonAndWait();
errorState|=bsw.errorState;
}
@@ -1515,8 +1503,7 @@ public class ReadStats {
tsw.print(String.format(Locale.ROOT, "%.1f", i*mult)+"\t"+x+"\t"+x2+"\n");
}
}
- tsw.poison();
- tsw.waitForFinish();
+ tsw.poisonAndWait();
errorState|=tsw.errorState;
}
=====================================
current/shared/Shared.java
=====================================
@@ -125,8 +125,8 @@ public class Shared {
public static final int GAPCOST=Tools.max(1, GAPLEN/64);
public static final byte GAPC='-';
- public static String BBMAP_VERSION_STRING="38.97";
- public static String BBMAP_VERSION_NAME="Honeybee";
+ public static String BBMAP_VERSION_STRING="38.98";
+ public static String BBMAP_VERSION_NAME="Mistletoe";
public static boolean TRIM_READ_COMMENTS=false;
public static boolean TRIM_RNAME=false; //For mapped sam reads
=====================================
current/shared/TrimRead.java
=====================================
@@ -277,6 +277,8 @@ public final class TrimRead implements Serializable {
if(r.match!=null){
return trimReadWithMatch(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength, Integer.MAX_VALUE, trimClip);
+ }else if(r.samline!=null){
+ return trimReadWithMatchFast(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength);
}
final byte[] bases=r.bases, qual=r.quality;
@@ -289,6 +291,7 @@ public final class TrimRead implements Serializable {
}
final int total=leftTrimAmount+rightTrimAmount;
+// System.err.println("D: L="+leftTrimAmount+", R="+rightTrimAmount+", len="+r.length()+", tot="+total);
if(total>0){
r.bases=KillSwitch.copyOfRange(bases, leftTrimAmount, len-rightTrimAmount);
r.quality=(leftTrimAmount+rightTrimAmount>=qlen ? null : KillSwitch.copyOfRange(qual, leftTrimAmount, qlen-rightTrimAmount));
@@ -577,44 +580,51 @@ public final class TrimRead implements Serializable {
// }
// }
}
-
- /** Special case of 100% match */
+
+ /** Special case of 100% match, or no match string */
public static int trimReadWithMatchFast(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
- assert(r.match!=null);
+ assert(r.match!=null || sl!=null);
if(r.bases==null){return 0;}
if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
final boolean shortmatch=r.shortmatch();
- final byte[] old=r.match;
+ final byte[] oldMatch=r.match;
r.match=null;
+ r.samline=null;
final int trimmed;
- if(sl.strand()==Shared.MINUS){
+// System.err.println(rightTrimAmount+", "+leftTrimAmount);
+ if(sl!=null && sl.strand()==Shared.MINUS){
trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
}else{
trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
}
+ r.samline=sl;
if(trimmed<1){
- r.match=old;
+ r.match=oldMatch;
return 0;
}
- ByteBuilder bb=new ByteBuilder();
final int len=r.length();
- if(shortmatch){
- bb.append((byte)'m');
- if(len>1){bb.append(len);}
- }else{
- for(int i=0; i<len; i++){bb.append((byte)'m');}
+ ByteBuilder bb=new ByteBuilder();
+ if(oldMatch!=null){
+ if(shortmatch){
+ bb.append((byte)'m');
+ if(len>1){bb.append(len);}
+ }else{
+ for(int i=0; i<len; i++){bb.append((byte)'m');}
+ }
+ r.match=bb.toBytes();
+ bb.clear();
}
- r.match=bb.toBytes();
- bb.clear();
if(sl!=null){
sl.pos+=leftTrimAmount;
- if(sl.cigar!=null){
+ if(sl.cigar!=null && sl.cigar.length()>0){
+ char c=sl.cigar.charAt(sl.cigar.length()-1);
+ assert(c=='M' || c=='=') : c+"; "+sl.cigar+"\n"+sl;
bb.append(len);
- bb.append(SamLine.VERSION>1.3 ? '=' : 'm');
+ bb.append(SamLine.VERSION>1.3 ? '=' : 'M');
sl.cigar=bb.toString();
}
sl.seq=r.bases;
@@ -631,11 +641,44 @@ public final class TrimRead implements Serializable {
}
return trimmed;
}
+
+// //Should be unneeded, just use the above function
+// public static int trimReadWithoutMatch(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
+// if(r.bases==null){return 0;}
+// if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
+// if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
+//
+// assert(r.match==null);
+// final int trimmed;
+// if(sl!=null && sl.strand()==Shared.MINUS){
+// trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
+// }else{
+// trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
+// }
+// if(trimmed<1){return 0;}
+//
+// if(sl!=null){
+// sl.pos+=leftTrimAmount;
+// assert(sl.cigar==null);
+// sl.seq=r.bases;
+// sl.qual=r.quality;
+// if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
+// ArrayList<String> list=new ArrayList<String>(2);
+// for(int i=0; i<sl.optional.size(); i++){
+// String s=sl.optional.get(i);
+// if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
+// }
+// sl.optional.clear();
+// sl.optional.addAll(list);
+// }
+// }
+// return trimmed;
+// }
//TODO: This is slow
//TODO: Note, returns a negative number if the whole read is supposed to be trimmed
public static int trimReadWithMatch(final Read r, final SamLine sl,
- int leftTrimAmount, int rightTrimAmount, int minFinalLength, int scafLen, boolean trimClip){
+ int leftTrimAmount, int rightTrimAmount, int minFinalLength, long scafLen, boolean trimClip){
if(r.bases==null || (leftTrimAmount<1 && rightTrimAmount<1 && !trimClip)){return 0;}
if(!r.containsNonM() && !trimClip){
return trimReadWithMatchFast(r, sl, leftTrimAmount, rightTrimAmount, minFinalLength);
=====================================
current/stream/SamLine.java
=====================================
@@ -658,7 +658,7 @@ public class SamLine implements Serializable {
/*---------------- Cigar ----------------*/
/*--------------------------------------------------------------*/
- public static String toCigar13(byte[] match, int readStart, int readStop, int reflen, byte[] bases){
+ public static String toCigar13(byte[] match, int readStart, int readStop, long reflen, byte[] bases){
if(match==null || readStart==readStop){return null;}
ByteBuilder sb=new ByteBuilder(8);
int count=0;
@@ -760,7 +760,7 @@ public class SamLine implements Serializable {
}
- public static String toCigar14(byte[] match, int readStart, int readStop, int reflen, byte[] bases){
+ public static String toCigar14(byte[] match, int readStart, int readStop, long reflen, byte[] bases){
// assert(false) : readStart+", "+readStop+", "+reflen;
if(match==null || readStart==readStop){return null;}
ByteBuilder sb=new ByteBuilder(8);
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+bbmap (38.98+dfsg-1) unstable; urgency=medium
+
+ * New upstream version
+
+ -- Andreas Tille <tille at debian.org> Tue, 23 Aug 2022 16:19:34 +0200
+
bbmap (38.97+dfsg-1) unstable; urgency=medium
* New upstream version
=====================================
docs/changelog.txt
=====================================
@@ -901,6 +901,9 @@ Added maxload flag to BBCMS.
Added trimtips to BBDuk, mainly for trimming adapters on both ends of PacBio reads.
Changed processing of reads longer than 200bp to force ASCII-33 quality.
Enable automatic entryfilter in Clumpify to handle libraries with mostly identical reads.
+38.98
+Added bloom filter option lockedincrement, which substantially increases accuracy of overloaded counting Bloom filters, with a ~15% speed reduction. Disabled by default, except for BBCMS.
+Fixed a possible race condition in RQCFilter file writing.
todo: outshort for fungalrelease.
View it on GitLab: https://salsa.debian.org/med-team/bbmap/-/compare/d95e7fb5b260358a2fae04f475d75b1eed9f4605...346038b3cda2e049556d42992e92585b82c151f4
--
View it on GitLab: https://salsa.debian.org/med-team/bbmap/-/compare/d95e7fb5b260358a2fae04f475d75b1eed9f4605...346038b3cda2e049556d42992e92585b82c151f4
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220823/371b3841/attachment-0001.htm>
More information about the debian-med-commit
mailing list