[med-svn] [Git][med-team/bbmap][upstream] New upstream version 38.98+dfsg

Andreas Tille (@tille) gitlab at salsa.debian.org
Tue Aug 23 15:23:20 BST 2022



Andreas Tille pushed to branch upstream at Debian Med / bbmap


Commits:
cc001214 by Andreas Tille at 2022-08-23T16:19:16+02:00
New upstream version 38.98+dfsg
- - - - -


22 changed files:

- README.md
- bbcms.sh
- current/align2/BBSplitter.java
- current/bloom/BloomFilterCorrectorWrapper.java
- current/bloom/KCountArray.java
- current/bloom/KCountArray7MTA.java
- current/driver/ConcatenateFiles.java
- current/driver/SelectReads.java
- current/jgi/BBDuk.java
- current/jgi/CountGC.java
- current/jgi/CoveragePileup.java
- current/jgi/KmerCoverage.java
- current/jgi/KmerNormalize.java
- current/jgi/RandomGenome.java
- current/jgi/ReadKmerDepthDistribution.java
- current/pacbio/MergeReadsAndGenome.java
- current/shared/Parser.java
- current/shared/ReadStats.java
- current/shared/Shared.java
- current/shared/TrimRead.java
- current/stream/SamLine.java
- docs/changelog.txt


Changes:

=====================================
README.md
=====================================
@@ -3,4 +3,4 @@
 # Language: Java, Bash
 # Information about documentation is in /docs/readme.txt.
 
-# Version 38.97
+# Version 38.98


=====================================
bbcms.sh
=====================================
@@ -3,7 +3,7 @@
 usage(){
 echo "
 Written by Brian Bushnell
-Last modified September 4, 2018
+Last modified August 8, 2022
 
 Description:  Error corrects reads and/or filters by depth, storing
 kmer counts in a count-min sketch (a Bloom filter variant).
@@ -64,6 +64,8 @@ cells=          Option to set the number of cells manually.  By default this
                 to set this is to ensure deterministic output.
 seed=0          This will change the hash function used.  Useful if running
                 iteratively with a very full table.  -1 uses a random seed.
+lockedincrement=t  Increases counting accuracy for a slight speed penalty.
+                Could have low performance on very low-complexity sequence.
                 
 Depth filtering parameters:
 mincount=0      If positive, reads with kmer counts below mincount will


=====================================
current/align2/BBSplitter.java
=====================================
@@ -441,8 +441,7 @@ public class BBSplitter {
 			}
 			tf.close();
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		
 		return refname;
 	}
@@ -1154,8 +1153,7 @@ public class BBSplitter {
 			
 			tsw.println("samtools index "+bam3);
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		
 		try {
 			File f=new File(outname);
@@ -1237,7 +1235,7 @@ public class BBSplitter {
 				sb.setLength(0);
 			}
 		}
-		tsw.poison();
+		tsw.poisonAndWait();
 	}
 	
 	static final void clearStatics(){


=====================================
current/bloom/BloomFilterCorrectorWrapper.java
=====================================
@@ -80,6 +80,7 @@ public class BloomFilterCorrectorWrapper {
 		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
 		ReadWrite.MAX_ZIP_THREADS=Tools.max(Shared.threads()>1 ? 2 : 1, Shared.threads()>20 ? Shared.threads()/2 : Shared.threads());
 		BBMerge.strict=true;
+		KCountArray.LOCKED_INCREMENT=true;
 		
 		//Create a parser object
 		Parser parser=new Parser();


=====================================
current/bloom/KCountArray.java
=====================================
@@ -59,8 +59,9 @@ public abstract class KCountArray implements Serializable {
 //			}else{
 //				kca=new KCountArray8MT(cells_, cbits_, gap_, hashes_, prefilter); //Like 7MT but uses prefilter
 //			}
-			kca=new KCountArray7MTA(cells_, cbits_, gap_, hashes_, prefilter, prefilterLimit_); //Like 4MT but uses primes
-
+			
+			kca=new KCountArray7MTA(cells_, cbits_, gap_, hashes_, prefilter, prefilterLimit_);
+			
 //			if(prefilter==null){
 //				kca=new KCountArray9MT(cells_, cbits_, gap_, hashes_); //Like 7MT but uses canonical kmers
 //			}else{
@@ -520,4 +521,8 @@ public abstract class KCountArray implements Serializable {
 	private static final long canonK=4;
 	static final long canonMask=(1<<(canonK*2))-1; //e.g. 255 for k=4
 	
+	/** Increases accuracy of overloaded multi-bit tables */
+	public static boolean LOCKED_INCREMENT=false;
+	public static boolean SET_LOCKED_INCREMENT=false;
+	
 }


=====================================
current/bloom/KCountArray7MTA.java
=====================================
@@ -5,6 +5,8 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicIntegerArray;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
 
 import shared.KillSwitch;
 import shared.Primes;
@@ -85,6 +87,12 @@ public final class KCountArray7MTA extends KCountArray {
 //		System.out.println("cells="+cells+", words="+words+", wordsPerArray="+wordsPerArray+", numArrays="+numArrays+", hashes="+hashes);
 		
 		matrix=allocMatrix(numArrays, wordsPerArray);
+		
+		useLocks=(cellBits>1 && hashes>1) && (LOCKED_INCREMENT || !SET_LOCKED_INCREMENT);
+		if(useLocks){
+			locks=new Lock[NUM_LOCKS];
+			for(int i=0; i<NUM_LOCKS; i++){locks[i]=new ReentrantLock();}
+		}else{locks=null;}
 				
 //		matrix=new AtomicIntegerArray[numArrays];
 //		for(int i=0; i<matrix.length; i++){
@@ -266,20 +274,38 @@ public final class KCountArray7MTA extends KCountArray {
 			if(x<prefilterLimit){return;}
 		}
 		
+		if(useLocks){incrementLocked(rawKey);}
+		else{incrementUnlocked(rawKey);}
+	}
+	
+	private void incrementUnlocked(long rawKey){
 		long key2=rawKey;
 		for(int i=0; i<hashes; i++){
 			key2=hash(key2, i);
 			if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
-//			assert(readHashed(key2)==0);
-			
-//			int bnum=(int)(key2&arrayMask);
 			incrementHashedLocal(key2);
-//			assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
-//			assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
 			key2=Long.rotateRight(key2, hashBits);
 		}
 	}
 	
+	private void incrementLocked(long rawKey){
+		final Lock lock=getLock(rawKey);
+		lock.lock();
+		final int min=read(rawKey);
+		if(min>=maxValue){
+			lock.unlock();
+			return;
+		}
+		long key2=rawKey;
+		for(int i=0; i<hashes; i++){
+			key2=hash(key2, i);
+			if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
+			incrementHashedLocal_ifAtMost(key2, min);
+			key2=Long.rotateRight(key2, hashBits);
+		}
+		lock.unlock();
+	}
+	
 	@Override
 	public final void decrement(final long rawKey){
 		if(verbose){System.err.println("\n*** Decrementing raw key "+rawKey+" ***");}
@@ -315,18 +341,39 @@ public final class KCountArray7MTA extends KCountArray {
 			if(x<prefilterLimit){return x;}
 		}
 		
+		if(useLocks){return incrementAndReturnUnincrementedLocked(rawKey, incr);}
+		else{return incrementAndReturnUnincrementedUnlocked(rawKey, incr);}
+	}
+	
+	private int incrementAndReturnUnincrementedLocked(final long rawKey, final int incr){
+		final Lock lock=getLock(rawKey);
+		lock.lock();
+		final int min=read(rawKey);
+		if(min>=maxValue){
+			lock.unlock();
+			return min;
+		}
+		long key2=rawKey;
+		int value=maxValue;
+		for(int i=0; i<hashes; i++){
+			key2=hash(key2, i);
+			if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
+			int x=incrementHashedLocalAndReturnUnincremented_ifAtMost(key2, incr, min);
+			value=min(value, x);
+			key2=Long.rotateRight(key2, hashBits);
+		}
+		lock.unlock();
+		return value;
+	}
+	
+	private int incrementAndReturnUnincrementedUnlocked(final long rawKey, final int incr){
 		long key2=rawKey;
 		int value=maxValue;
 		for(int i=0; i<hashes; i++){
 			key2=hash(key2, i);
 			if(verbose){System.err.println("key2="+key2+", value="+readHashed(key2));}
-//			assert(readHashed(key2)==0);
-			
-//			int bnum=(int)(key2&arrayMask);
 			int x=incrementHashedLocalAndReturnUnincremented(key2, incr);
 			value=min(value, x);
-//			assert(read(rawKey)<=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
-//			assert(readHashed(key2)>=min+incr) : "i="+i+", original="+min+", new should be <="+(min+incr)+", new="+read(rawKey)+", max="+maxValue+", key="+rawKey;
 			key2=Long.rotateRight(key2, hashBits);
 		}
 		return value;
@@ -588,6 +635,11 @@ public final class KCountArray7MTA extends KCountArray {
 		}
 	}
 	
+	private final Lock getLock(long rawKey){
+		final Lock lock=locks[(int)((rawKey&Long.MAX_VALUE)%(NUM_LOCKS))];
+		return lock;
+	}
+	
 	private int incrementHashedLocal(long key){
 		final int num=(int)(key&arrayMask);
 		final AtomicIntegerArray array=matrix[num];
@@ -607,6 +659,25 @@ public final class KCountArray7MTA extends KCountArray {
 		return value;
 	}
 	
+	private int incrementHashedLocal_ifAtMost(long key, final int thresh){
+		final int num=(int)(key&arrayMask);
+		final AtomicIntegerArray array=matrix[num];
+		key=(key>>>arrayBits)%(cellMod);
+//		key=(key>>>(arrayBits+1))%(cellMod);
+		int index=(int)(key>>>indexShift);
+		int cellShift=(int)(cellBits*key);
+		int value, word, word2;
+		do{
+			assert(index>=0) : key+", "+cellMod+", "+cellBits+", "+valueMask+", "+arrayMask+", "+index+", "+num;
+			word=array.get(index);
+			value=((word>>>cellShift)&valueMask);
+			if(value>thresh){return value;}//Too high; don't increment
+			value=min(value+1, maxValue);
+			word2=(value<<cellShift)|(word&~((valueMask)<<cellShift));
+		}while(word!=word2 && !array.compareAndSet(index, word, word2));
+		return value;
+	}
+	
 	private int incrementHashedLocalAndReturnUnincremented(long key, int incr){
 		assert(incr>=0);
 		final int num=(int)(key&arrayMask);
@@ -626,6 +697,26 @@ public final class KCountArray7MTA extends KCountArray {
 		return value;
 	}
 	
+	private int incrementHashedLocalAndReturnUnincremented_ifAtMost(long key, int incr, final int thresh){
+		assert(incr>=0);
+		final int num=(int)(key&arrayMask);
+		final AtomicIntegerArray array=matrix[num];
+		key=(key>>>arrayBits)%(cellMod);
+//		key=(key>>>(arrayBits+1))%(cellMod);
+		int index=(int)(key>>>indexShift);
+		int cellShift=(int)(cellBits*key);
+		int value, word, word2;
+		do{
+			word=array.get(index);
+			value=((word>>>cellShift)&valueMask);
+			if(value>thresh){return value;}//Too high; don't increment
+			int value2=min(value+incr, maxValue);
+			word2=(value2<<cellShift)|(word&~((valueMask)<<cellShift));
+		}while(word!=word2 && !array.compareAndSet(index, word, word2));
+//		if(value==1){cellsUsedPersonal.incrementAndGet(num);}
+		return value;
+	}
+	
 	private int decrementHashedLocal(long key){
 		final int num=(int)(key&arrayMask);
 		final AtomicIntegerArray array=matrix[num];
@@ -689,6 +780,10 @@ public final class KCountArray7MTA extends KCountArray {
 	private static final int hashCellMask=hashArrayLength-1;
 	
 	private KCountArray prefilter;
+	
+	private final transient boolean useLocks;
+	private final transient Lock[] locks;
+	private static final transient int NUM_LOCKS=1999;
 
 	private static long counter=0;
 	private static long SEEDMASK=0;


=====================================
current/driver/ConcatenateFiles.java
=====================================
@@ -53,8 +53,7 @@ public class ConcatenateFiles {
 			}
 			System.err.print(".");
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 	}
 	
 	public static void concatenateDirectory(final String in, String out) throws IOException{


=====================================
current/driver/SelectReads.java
=====================================
@@ -59,8 +59,7 @@ public final class SelectReads {
 			}
 		}
 		tf.close();
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		
 	}
 	


=====================================
current/jgi/BBDuk.java
=====================================
@@ -1021,6 +1021,8 @@ public class BBDuk {
 				s=Data.findPath("?mtst.fa");
 			}else if("adapters".equalsIgnoreCase(s)){
 				s=Data.findPath("?adapters.fa");
+			}else if("pacbioadapter".equalsIgnoreCase(s) || "pacbioadapters".equalsIgnoreCase(s)){
+				s=Data.findPath("?PacBioAdapter.fa");
 			}else if("truseq".equalsIgnoreCase(s)){
 				s=Data.findPath("?truseq.fa.gz");
 			}else if("nextera".equalsIgnoreCase(s)){


=====================================
current/jgi/CountGC.java
=====================================
@@ -233,8 +233,7 @@ public class CountGC {
 		}
 		
 		if(tsw!=null){
-			tsw.poison();
-			tsw.waitForFinish();
+			tsw.poisonAndWait();
 		}
 		LIMSUM=limsum;
 		return overall;
@@ -334,8 +333,7 @@ public class CountGC {
 		}
 		
 		if(tsw!=null){
-			tsw.poison();
-			tsw.waitForFinish();
+			tsw.poisonAndWait();
 		}
 		LIMSUM=limsum;
 		return overall;


=====================================
current/jgi/CoveragePileup.java
=====================================
@@ -733,8 +733,7 @@ public class CoveragePileup {
 			}
 		}
 		
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 	}
 	
 	


=====================================
current/jgi/KmerCoverage.java
=====================================
@@ -227,9 +227,9 @@ public class KmerCoverage {
 			
 			final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
 			assert(maxCount>0);
-			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
 			assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
-			HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+			HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
 			
 			histogram_total=new long[HIST_LEN];
 		}


=====================================
current/jgi/KmerNormalize.java
=====================================
@@ -821,8 +821,7 @@ public class KmerNormalize {
 				tsw.println(r);
 				if(r.mate!=null){tsw.println(r.mate);}
 			}
-			tsw.poison();
-			tsw.waitForFinish();
+			tsw.poisonAndWait();
 		}catch(Throwable t){
 			System.err.println("ERROR: "+t);
 			return false;
@@ -872,9 +871,9 @@ public class KmerNormalize {
 			
 			final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
 			assert(maxCount>0);
-			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
 			assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
-			HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+			HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
 			THREAD_HIST_LEN=Tools.min(THREAD_HIST_LEN, HIST_LEN);
 
 			khistogram=new AtomicLongArray(HIST_LEN);


=====================================
current/jgi/RandomGenome.java
=====================================
@@ -171,8 +171,7 @@ public class RandomGenome {
 				}
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 	}
 	
 	void processAmino(Timer t){
@@ -204,8 +203,7 @@ public class RandomGenome {
 				bb.clear();
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 	}
 	
 	/*--------------------------------------------------------------*/


=====================================
current/jgi/ReadKmerDepthDistribution.java
=====================================
@@ -236,9 +236,9 @@ public class ReadKmerDepthDistribution {
 			
 			final int maxCount=(int)(cbits>16 ? Integer.MAX_VALUE : (1L<<cbits)-1);
 			assert(maxCount>0);
-			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount));
+			HIST_LEN_PRINT=Tools.max(1, Tools.min(HIST_LEN_PRINT, maxCount+1));
 			assert(HIST_LEN_PRINT<=Integer.MAX_VALUE) : HIST_LEN_PRINT+", "+Integer.MAX_VALUE;
-			HIST_LEN=(int)Tools.min(maxCount, Tools.max(HIST_LEN_PRINT, HIST_LEN));
+			HIST_LEN=(int)Tools.min(maxCount+1, Tools.max(HIST_LEN_PRINT, HIST_LEN));
 			THREAD_HIST_LEN=Tools.min(THREAD_HIST_LEN, HIST_LEN);
 
 			histogram_total=new AtomicLongArray(HIST_LEN);


=====================================
current/pacbio/MergeReadsAndGenome.java
=====================================
@@ -134,8 +134,7 @@ public class MergeReadsAndGenome {
 			}
 		}
 		
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 	}
 	
 	public static long appendReads(ConcurrentReadInputStream cris, TextStreamWriter tsw, long id){


=====================================
current/shared/Parser.java
=====================================
@@ -6,6 +6,7 @@ import java.util.Arrays;
 import java.util.HashSet;
 
 import align2.QualityTools;
+import bloom.KCountArray;
 import cardinality.CardinalityTracker;
 import cardinality.LogLog16;
 import cardinality.LogLog2;
@@ -736,6 +737,16 @@ public class Parser {
 			TaxTree.protFull=true;
 		}
 		
+		else if(a.equalsIgnoreCase("lockedincrement")){
+			if("auto".equalsIgnoreCase(b)){
+				KCountArray.LOCKED_INCREMENT=true;
+				KCountArray.SET_LOCKED_INCREMENT=false;
+			}else{
+				KCountArray.LOCKED_INCREMENT=Parse.parseBoolean(b);
+				KCountArray.SET_LOCKED_INCREMENT=true;
+			}
+		}
+		
 		else{
 			return false;
 		}


=====================================
current/shared/ReadStats.java
=====================================
@@ -847,8 +847,7 @@ public class ReadStats {
 			tsw.print("\n");
 			if(y<=0){break;}
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -875,8 +874,7 @@ public class ReadStats {
 			tsw.print("\n");
 			if(y<=0){break;}
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -965,8 +963,7 @@ public class ReadStats {
 			}
 		}
 		tsw.print(sb);
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1015,8 +1012,7 @@ public class ReadStats {
 			tsw.print("\n");
 			if(y<=0){break;}
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1059,8 +1055,7 @@ public class ReadStats {
 			}
 			tsw.print("\n");
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1139,8 +1134,7 @@ public class ReadStats {
 //			System.err.println(qm+"\t"+qs+"\t"+qi+"\t"+qd);
 		}
 		
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1175,8 +1169,7 @@ public class ReadStats {
 					);
 			tsw.print("\n");
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1201,8 +1194,7 @@ public class ReadStats {
 					);
 			tsw.print("\n");
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	
@@ -1318,8 +1310,7 @@ public class ReadStats {
 				bsw.print(bb);
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 		errorState|=bsw.errorState;
 	}
 	
@@ -1361,8 +1352,7 @@ public class ReadStats {
 				bsw.print(bb);
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 		errorState|=bsw.errorState;
 	}
 	
@@ -1438,8 +1428,7 @@ public class ReadStats {
 				}
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 		errorState|=bsw.errorState;
 	}
 	
@@ -1478,8 +1467,7 @@ public class ReadStats {
 				bsw.print(bb);
 			}
 		}
-		bsw.poison();
-		bsw.waitForFinish();
+		bsw.poisonAndWait();
 		errorState|=bsw.errorState;
 	}
 	
@@ -1515,8 +1503,7 @@ public class ReadStats {
 				tsw.print(String.format(Locale.ROOT, "%.1f", i*mult)+"\t"+x+"\t"+x2+"\n");
 			}
 		}
-		tsw.poison();
-		tsw.waitForFinish();
+		tsw.poisonAndWait();
 		errorState|=tsw.errorState;
 	}
 	


=====================================
current/shared/Shared.java
=====================================
@@ -125,8 +125,8 @@ public class Shared {
 	public static final int GAPCOST=Tools.max(1, GAPLEN/64);
 	public static final byte GAPC='-';
 	
-	public static String BBMAP_VERSION_STRING="38.97";
-	public static String BBMAP_VERSION_NAME="Honeybee";
+	public static String BBMAP_VERSION_STRING="38.98";
+	public static String BBMAP_VERSION_NAME="Mistletoe";
 	
 	public static boolean TRIM_READ_COMMENTS=false;
 	public static boolean TRIM_RNAME=false; //For mapped sam reads


=====================================
current/shared/TrimRead.java
=====================================
@@ -277,6 +277,8 @@ public final class TrimRead implements Serializable {
 		
 		if(r.match!=null){
 			return trimReadWithMatch(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength, Integer.MAX_VALUE, trimClip);
+		}else if(r.samline!=null){
+			return trimReadWithMatchFast(r, r.samline, leftTrimAmount, rightTrimAmount, minResultingLength);
 		}
 		
 		final byte[] bases=r.bases, qual=r.quality;
@@ -289,6 +291,7 @@ public final class TrimRead implements Serializable {
 		}
 		
 		final int total=leftTrimAmount+rightTrimAmount;
+//		System.err.println("D: L="+leftTrimAmount+", R="+rightTrimAmount+", len="+r.length()+", tot="+total);
 		if(total>0){
 			r.bases=KillSwitch.copyOfRange(bases, leftTrimAmount, len-rightTrimAmount);
 			r.quality=(leftTrimAmount+rightTrimAmount>=qlen ? null : KillSwitch.copyOfRange(qual, leftTrimAmount, qlen-rightTrimAmount));
@@ -577,44 +580,51 @@ public final class TrimRead implements Serializable {
 //			}
 //		}
 	}
-	
-	/** Special case of 100% match */
+
+	/** Special case of 100% match, or no match string */
 	public static int trimReadWithMatchFast(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
-		assert(r.match!=null);
+		assert(r.match!=null || sl!=null);
 		if(r.bases==null){return 0;}
 		if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
 		if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
 		
 		final boolean shortmatch=r.shortmatch();
-		final byte[] old=r.match;
+		final byte[] oldMatch=r.match;
 		r.match=null;
+		r.samline=null;
 		final int trimmed;
-		if(sl.strand()==Shared.MINUS){
+//		System.err.println(rightTrimAmount+", "+leftTrimAmount);
+		if(sl!=null && sl.strand()==Shared.MINUS){
 			trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
 		}else{
 			trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
 		}
+		r.samline=sl;
 		if(trimmed<1){
-			r.match=old;
+			r.match=oldMatch;
 			return 0;
 		}
 		
-		ByteBuilder bb=new ByteBuilder();
 		final int len=r.length();
-		if(shortmatch){
-			bb.append((byte)'m');
-			if(len>1){bb.append(len);}
-		}else{
-			for(int i=0; i<len; i++){bb.append((byte)'m');}
+		ByteBuilder bb=new ByteBuilder();
+		if(oldMatch!=null){
+			if(shortmatch){
+				bb.append((byte)'m');
+				if(len>1){bb.append(len);}
+			}else{
+				for(int i=0; i<len; i++){bb.append((byte)'m');}
+			}
+			r.match=bb.toBytes();
+			bb.clear();
 		}
-		r.match=bb.toBytes();
-		bb.clear();
 		
 		if(sl!=null){
 			sl.pos+=leftTrimAmount;
-			if(sl.cigar!=null){
+			if(sl.cigar!=null && sl.cigar.length()>0){
+				char c=sl.cigar.charAt(sl.cigar.length()-1);
+				assert(c=='M' || c=='=') : c+"; "+sl.cigar+"\n"+sl;
 				bb.append(len);
-				bb.append(SamLine.VERSION>1.3 ? '=' : 'm');
+				bb.append(SamLine.VERSION>1.3 ? '=' : 'M');
 				sl.cigar=bb.toString();
 			}
 			sl.seq=r.bases;
@@ -631,11 +641,44 @@ public final class TrimRead implements Serializable {
 		}
 		return trimmed;
 	}
+	
+//	//Should be unneeded, just use the above function
+//	public static int trimReadWithoutMatch(final Read r, final SamLine sl, final int leftTrimAmount, final int rightTrimAmount, final int minFinalLength){
+//		if(r.bases==null){return 0;}
+//		if(leftTrimAmount<1 && rightTrimAmount<1){return 0;}
+//		if(leftTrimAmount+rightTrimAmount>=r.length()){return -leftTrimAmount-rightTrimAmount;}
+//		
+//		assert(r.match==null);
+//		final int trimmed;
+//		if(sl!=null && sl.strand()==Shared.MINUS){
+//			trimmed=trimByAmount(r, rightTrimAmount, leftTrimAmount, minFinalLength, false);
+//		}else{
+//			trimmed=trimByAmount(r, leftTrimAmount, rightTrimAmount, minFinalLength, false);
+//		}
+//		if(trimmed<1){return 0;}
+//		
+//		if(sl!=null){
+//			sl.pos+=leftTrimAmount;
+//			assert(sl.cigar==null);
+//			sl.seq=r.bases;
+//			sl.qual=r.quality;
+//			if(trimmed>0 && sl.optional!=null && sl.optional.size()>0){
+//				ArrayList<String> list=new ArrayList<String>(2);
+//				for(int i=0; i<sl.optional.size(); i++){
+//					String s=sl.optional.get(i);
+//					if(s.startsWith("PG:") || s.startsWith("RG:") || s.startsWith("X") || s.startsWith("Y") || s.startsWith("Z")){list.add(s);} //Only keep safe flags.
+//				}
+//				sl.optional.clear();
+//				sl.optional.addAll(list);
+//			}
+//		}
+//		return trimmed;
+//	}
 
 	//TODO: This is slow
 	//TODO: Note, returns a negative number if the whole read is supposed to be trimmed
 	public static int trimReadWithMatch(final Read r, final SamLine sl, 
-			int leftTrimAmount, int rightTrimAmount, int minFinalLength, int scafLen, boolean trimClip){
+			int leftTrimAmount, int rightTrimAmount, int minFinalLength, long scafLen, boolean trimClip){
 		if(r.bases==null || (leftTrimAmount<1 && rightTrimAmount<1 && !trimClip)){return 0;}
 		if(!r.containsNonM() && !trimClip){
 			return trimReadWithMatchFast(r, sl, leftTrimAmount, rightTrimAmount, minFinalLength);


=====================================
current/stream/SamLine.java
=====================================
@@ -658,7 +658,7 @@ public class SamLine implements Serializable {
 	/*----------------             Cigar            ----------------*/
 	/*--------------------------------------------------------------*/
 	
-	public static String toCigar13(byte[] match, int readStart, int readStop, int reflen, byte[] bases){
+	public static String toCigar13(byte[] match, int readStart, int readStop, long reflen, byte[] bases){
 		if(match==null || readStart==readStop){return null;}
 		ByteBuilder sb=new ByteBuilder(8);
 		int count=0;
@@ -760,7 +760,7 @@ public class SamLine implements Serializable {
 	}
 	
 	
-	public static String toCigar14(byte[] match, int readStart, int readStop, int reflen, byte[] bases){
+	public static String toCigar14(byte[] match, int readStart, int readStop, long reflen, byte[] bases){
 //		assert(false) : readStart+", "+readStop+", "+reflen;
 		if(match==null || readStart==readStop){return null;}
 		ByteBuilder sb=new ByteBuilder(8);


=====================================
docs/changelog.txt
=====================================
@@ -901,6 +901,9 @@ Added maxload flag to BBCMS.
 Added trimtips to BBDuk, mainly for trimming adapters on both ends of PacBio reads.
 Changed processing of reads longer than 200bp to force ASCII-33 quality.
 Enable automatic entryfilter in Clumpify to handle libraries with mostly identical reads.
+38.98
+Added bloom filter option lockedincrement, which substantially increases accuracy of overloaded counting Bloom filters, with a ~15% speed reduction. Disabled by default, except for BBCMS.
+Fixed a possible race condition in RQCFilter file writing.
 
 
 todo: outshort for fungalrelease.



View it on GitLab: https://salsa.debian.org/med-team/bbmap/-/commit/cc00121489282111028657399afd6616b4bf7017

-- 
View it on GitLab: https://salsa.debian.org/med-team/bbmap/-/commit/cc00121489282111028657399afd6616b4bf7017
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220823/e920a9a7/attachment-0001.htm>


More information about the debian-med-commit mailing list