[med-svn] [Git][med-team/milib][master] 6 commits: New upstream version 2.2.0+dfsg

Pierre Gruet (@pgt) gitlab at salsa.debian.org
Fri Dec 30 14:48:35 GMT 2022



Pierre Gruet pushed to branch master at Debian Med / milib


Commits:
83910ebc by Pierre Gruet at 2022-12-30T14:34:19+01:00
New upstream version 2.2.0+dfsg
- - - - -
390d1de7 by Pierre Gruet at 2022-12-30T14:34:24+01:00
Update upstream source from tag 'upstream/2.2.0+dfsg'

Update to upstream version '2.2.0+dfsg'
with Debian dir f6d4a25326159ffe808c7218651c54de047b3ac2
- - - - -
3e77efd7 by Pierre Gruet at 2022-12-30T14:38:08+01:00
Raising Standards version to 4.6.2 (no change)

- - - - -
981be991 by Pierre Gruet at 2022-12-30T14:38:13+01:00
Refreshing patches

- - - - -
eb241869 by Pierre Gruet at 2022-12-30T14:38:27+01:00
Updating changelog

- - - - -
6cbd8fa8 by Pierre Gruet at 2022-12-30T14:38:42+01:00
Upload to unstable

- - - - -


22 changed files:

- build.gradle.kts
- debian/changelog
- debian/control
- debian/patches/build_gradle.patch
- src/main/java/com/milaboratory/core/alignment/kaligner1/KMapper.java
- src/main/java/com/milaboratory/core/alignment/kaligner2/KMapper2.java
- src/main/java/com/milaboratory/core/motif/BitapData.java
- src/main/java/com/milaboratory/core/motif/BitapPattern.java
- src/main/java/com/milaboratory/core/motif/BitapStateIterator.java
- + src/main/java/com/milaboratory/core/motif/LongArrayBitHelper.java
- src/main/java/com/milaboratory/core/motif/Motif.java
- src/main/java/com/milaboratory/core/sequence/NSequenceWithQualityBuilder.java
- src/main/java/com/milaboratory/primitivio/ObjectMapperProvider.java
- src/main/java/com/milaboratory/primitivio/PrimitivIOStateBuilder.java
- src/main/java/com/milaboratory/primitivio/Util.java
- src/main/java/com/milaboratory/util/ArraysUtils.java
- src/main/java/com/milaboratory/util/GlobalObjectMappers.java
- src/main/java/com/milaboratory/util/ObjectSerializer.java
- src/main/java/com/milaboratory/util/sorting/Sorter.java
- src/test/java/com/milaboratory/core/motif/BitapPatternTest.java
- + src/test/java/com/milaboratory/core/motif/LongArrayBitHelperTest.java
- src/test/java/com/milaboratory/core/motif/MotifTest.java


Changes:

=====================================
build.gradle.kts
=====================================
@@ -50,7 +50,7 @@ repositories {
     }
 }
 
-val jacksonBomVersion = "2.13.3"
+val jacksonBomVersion = "2.14.0"
 val junitVersion = "4.13.2"
 val redberryPipeVersion = "1.3.0"
 


=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+milib (2.2.0+dfsg-1) unstable; urgency=medium
+
+  * New upstream version 2.2.0+dfsg
+  * Refreshing patches
+  * Raising Standards version to 4.6.2 (no change)
+
+ -- Pierre Gruet <pgt at debian.org>  Fri, 30 Dec 2022 14:38:35 +0100
+
 milib (2.1.0+dfsg-2) unstable; urgency=medium
 
   * Revising the list of (build-)dependencies and the classpath


=====================================
debian/control
=====================================
@@ -22,7 +22,7 @@ Build-Depends-Indep: junit4 <!nocheck>,
                      libmockito-java <!nocheck>,
                      libredberry-pipe-java,
                      libtrove3-java
-Standards-Version: 4.6.1
+Standards-Version: 4.6.2
 Vcs-Browser: https://salsa.debian.org/med-team/milib
 Vcs-Git: https://salsa.debian.org/med-team/milib.git
 Homepage: https://milaboratory.com/


=====================================
debian/patches/build_gradle.patch
=====================================
@@ -60,10 +60,10 @@ Last-Update: 2022-07-01
 -    }
  }
  
--val jacksonBomVersion = "2.13.3"
+-val jacksonBomVersion = "2.14.0"
 -val junitVersion = "4.13.2"
 -val redberryPipeVersion = "1.3.0"
-+def jacksonBomVersion = "2.13.3"
++def jacksonBomVersion = "2.14.0"
 +def junitVersion = "4.13.2"
 +def redberryPipeVersion = "1.3.0"
  


=====================================
src/main/java/com/milaboratory/core/alignment/kaligner1/KMapper.java
=====================================
@@ -175,7 +175,7 @@ public final class KMapper implements java.io.Serializable {
                    int offsetMask,
                    int kValue,
                    int[][] base, int[] lengths,
-                   float absoluteMinScore,  float relativeMinScore,
+                   float absoluteMinScore, float relativeMinScore,
                    float matchScore, float mismatchPenalty, float offsetShiftPenalty,
                    int minAlignmentLength, int maxIndels,
                    boolean floatingLeftBound, boolean floatingRightBound,
@@ -456,15 +456,18 @@ public final class KMapper implements java.io.Serializable {
 
         IntArrayList seedPositions = new IntArrayList((to - from) / minDistance + 2);
         int seedPosition = from;
-        seedPositions.add(seedPosition);
+        if (!sequence.containsWildcards(seedPosition, seedPosition + kValue))
+            seedPositions.add(seedPosition);
 
         Well19937c random = RandomUtil.getThreadLocalRandom();
 
         while ((seedPosition += random.nextInt(maxDistance + 1 - minDistance) + minDistance) < to - kValue)
-            seedPositions.add(seedPosition);
+            if (!sequence.containsWildcards(seedPosition, seedPosition + kValue))
+                seedPositions.add(seedPosition);
 
         //if (seedPositions.get(seedPositions.size() - 1) != to - kValue)
-        seedPositions.add(to - kValue);
+        if (!sequence.containsWildcards(to - kValue, to))
+            seedPositions.add(to - kValue);
 
         int[] seeds = new int[seedPositions.size()];
 


=====================================
src/main/java/com/milaboratory/core/alignment/kaligner2/KMapper2.java
=====================================
@@ -404,8 +404,8 @@ public final class KMapper2 implements java.io.Serializable {
 
                 //Detecting homopolymeric kMers and dropping them
                 //TODO:::!!!!
-//                if (((kmer ^ (kmer >>> 2)) & tMask) == 0 && ((kmer ^ (kmer << 2)) & (tMask << 2)) == 0)
-//                    continue;
+                //                if (((kmer ^ (kmer >>> 2)) & tMask) == 0 && ((kmer ^ (kmer << 2)) & (tMask << 2)) == 0)
+                //                    continue;
 
                 addKmer(holesMask, kmer, id, i);
             }
@@ -509,15 +509,18 @@ public final class KMapper2 implements java.io.Serializable {
         int seedPosition = from;
 
         // Adding firs possible position
-        seedPositions.add(seedPosition);
+        if (!sequence.containsWildcards(seedPosition, seedPosition + nValue))
+            seedPositions.add(seedPosition);
 
         // Generating random positions of seeds
         RandomGenerator random = RandomUtil.getThreadLocalRandom();
         while ((seedPosition += random.nextInt(maxDistance + 1 - minDistance) + minDistance) < to - nValue)
-            seedPositions.add(seedPosition);
+            if (!sequence.containsWildcards(seedPosition, seedPosition + nValue))
+                seedPositions.add(seedPosition);
 
         // Adding last possible position to the lis of seed positions
-        seedPositions.add(to - nValue);
+        if (!sequence.containsWildcards(to - nValue, to))
+            seedPositions.add(to - nValue);
 
         int kmer;
         final IntArrayList[] candidates = cache.candidates;


=====================================
src/main/java/com/milaboratory/core/motif/BitapData.java
=====================================
@@ -1,50 +1,46 @@
-/*
- * Copyright (c) 2022 MiLaboratories Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
 package com.milaboratory.core.motif;
 
 import com.milaboratory.util.BitArray;
 
-import java.io.Serializable;
+import static com.milaboratory.core.motif.LongArrayBitHelper.resetBitLittleEndian;
+import static com.milaboratory.core.motif.LongArrayBitHelper.upDivideBy64;
+import static com.milaboratory.util.ArraysUtils.deepClone;
 
-final class BitapData implements Serializable {
+final class BitapData {
     final int size;
-    final long[] patternMask;
-    final long[] reversePatternMask;
+    final int segmentCount;
+    final long[][] patternMask;
+    final long[][] reversePatternMask;
 
-    BitapData(int size, long[] patternMask, long[] reversePatternMask) {
+    BitapData(int size, long[][] patternMask, long[][] reversePatternMask) {
         if (patternMask.length != reversePatternMask.length)
             throw new IllegalArgumentException();
+        if (patternMask[0].length != reversePatternMask[0].length)
+            throw new IllegalArgumentException();
 
         this.size = size;
+        this.segmentCount = upDivideBy64(size);
+
+        if (patternMask[0].length != segmentCount)
+            throw new IllegalArgumentException();
+
         this.patternMask = patternMask;
         this.reversePatternMask = reversePatternMask;
     }
 
+    /** Basically transforms bitap data, so that it contains 'N' letters in positions where exactMask has zeros. */
     BitapData toSecondary(BitArray exactMask) {
         if (exactMask.size() != size)
             throw new IllegalArgumentException();
 
-        long[] newPatternMask = patternMask.clone();
-        long[] newReversePatternMask = reversePatternMask.clone();
+        long[][] newPatternMask = deepClone(patternMask);
+        long[][] newReversePatternMask = deepClone(reversePatternMask);
 
-        for (int j = 0; j < size; ++j)
-            if (!exactMask.get(j))
-                for (int i = 0; i < patternMask.length; ++i) {
-                    newPatternMask[i] &= ~(1L << j);
-                    newReversePatternMask[i] &= ~(1L << (size - j - 1));
+        for (int p = 0; p < size; ++p)
+            if (!exactMask.get(p))
+                for (int l = 0; l < patternMask.length; ++l) {
+                    resetBitLittleEndian(newPatternMask[l], p);
+                    resetBitLittleEndian(newReversePatternMask[l], size - p - 1);
                 }
         return new BitapData(size, newPatternMask, newReversePatternMask);
     }


=====================================
src/main/java/com/milaboratory/core/motif/BitapPattern.java
=====================================
@@ -22,7 +22,7 @@ import com.milaboratory.util.BitArray;
  * Use {@link Motif#getBitapPattern()} to create bitap pattern.
  */
 public final class BitapPattern implements java.io.Serializable {
-    final Motif motif;
+    final Motif<?> motif;
     final BitapData
             mainData,
             secondaryData;
@@ -36,7 +36,7 @@ public final class BitapPattern implements java.io.Serializable {
     /**
      * Use {@link Motif#getBitapPattern()} to create bitap pattern.
      */
-    BitapPattern(Motif motif, BitapData mainData, BitArray exactMask) {
+    BitapPattern(Motif<?> motif, BitapData mainData, BitArray exactMask) {
         this.motif = motif;
         this.mainData = mainData;
         this.exactMask = exactMask;
@@ -46,7 +46,7 @@ public final class BitapPattern implements java.io.Serializable {
         } else
             this.secondaryData = mainData.toSecondary(exactMask);
 
-        MotifWithExactMask me = new MotifWithExactMask(motif, exactMask);
+        MotifWithExactMask<?> me = new MotifWithExactMask(motif, exactMask);
         this.matchScore = me.defaultMatchScore;
         this.mismatchScore = me.mismatchScore;
         this.averageMismatchPenalty = me.averageMismatchPenalty;


=====================================
src/main/java/com/milaboratory/core/motif/BitapStateIterator.java
=====================================
@@ -17,11 +17,13 @@ package com.milaboratory.core.motif;
 
 import com.milaboratory.core.sequence.Sequence;
 
+import static com.milaboratory.core.motif.LongArrayBitHelper.*;
+
 abstract class BitapStateIterator {
     final BitapData data;
     final Sequence sequence;
     int errors;
-    final long[] R;
+    final long[][] R;
     final int to;
     int symbolsProcessed = 0;
     int current;
@@ -32,9 +34,9 @@ abstract class BitapStateIterator {
             throw new IllegalArgumentException();
         this.data = data;
         this.sequence = sequence;
-        this.R = new long[count];
-        for (int i = 0; i < count; ++i)
-            R[i] = (~0L) << i;
+        this.R = LongArrayBitHelper.ones2D(count, data.segmentCount);
+        for (int i = 1; i < count; ++i)
+            leftShiftLittleEndian(R[i], i);
         this.to = to;
         this.current = from;
     }
@@ -63,11 +65,10 @@ abstract class BitapStateIterator {
                 return false;
 
             // Main part
-            long matchingMask = (1L << (data.size - 1));
-            R[0] <<= 1;
-            R[0] |= data.patternMask[sequence.codeAt(current)];
+            leftShiftLittleEndian(R[0], 1);
+            orAssign(R[0], data.patternMask[sequence.codeAt(current)]);
             ++current;
-            match = (0 == (R[0] & matchingMask));
+            match = !getBitLittleEndian(R[0], data.size - 1);
 
             // Next state calculated
             return true;
@@ -94,35 +95,44 @@ abstract class BitapStateIterator {
                 return false;
 
             int d;
-            long preMismatchTmp, mismatchTmp;
+            long[]
+                    swapTemp,
+                    // These are the only two array allocations in this method
+                    preMismatchTmp = new long[data.segmentCount],
+                    mismatchTmp = new long[data.segmentCount];
 
             // Main part
-            long matchingMask = (1L << (data.size - 1));
+            // long matchingMask = (1L << ());
 
-            long currentPatternMask = data.patternMask[sequence.codeAt(current)];
+            long[] currentPatternMask = data.patternMask[sequence.codeAt(current)];
             ++current;
             ++symbolsProcessed;
 
             // Exact match on the previous step == match with insertion on current step
-            R[0] <<= 1;
-            mismatchTmp = R[0];
-            R[0] |= currentPatternMask;
+            leftShiftLittleEndian(R[0], 1);
+            assign(mismatchTmp, R[0]);
+            orAssign(R[0], currentPatternMask);
 
-            if (0 == (R[0] & matchingMask)) {
+            if (!getBitLittleEndian(R[0], data.size - 1)) {
                 errors = 0;
                 match = true;
             }
 
             for (d = 1; d < R.length; ++d) {
-                R[d] <<= 1;
-                preMismatchTmp = R[d];
-                R[d] |= currentPatternMask;
-                R[d] &= mismatchTmp;
-                if (!match && 0 == (R[d] & matchingMask) && symbolsProcessed >= data.size) {
+                leftShiftLittleEndian(R[d], 1);
+                assign(preMismatchTmp, R[d]);
+                orAssign(R[d], currentPatternMask);
+                andAssign(R[d], mismatchTmp);
+                if (!match && !getBitLittleEndian(R[d], data.size - 1) && symbolsProcessed >= data.size) {
                     errors = d;
                     match = true;
                 }
+
+                // Read as: mismatchTmp = preMismatchTmp
+                // but preserving already allocated array for the next preMismatchTmp
+                swapTemp = mismatchTmp;
                 mismatchTmp = preMismatchTmp;
+                preMismatchTmp = swapTemp;
             }
 
             return true;
@@ -140,37 +150,57 @@ abstract class BitapStateIterator {
             super(data, sequence, maxErrors + 1, from, to);
         }
 
-        void updateState(long currentPatternMask) {
-            long matchingMask = (1L << (data.size - 1));
+        void updateState(long[] currentPatternMask) {
+            // long matchingMask = (1L << ());
+            // data.size - 1
 
-            long preInsertionTmp, preMismatchTmp,
-                    insertionTmp, deletionTmp, mismatchTmp;
+            long[]
+                    swapTmp,
+                    preInsertionTmp = new long[data.segmentCount],
+                    preMismatchTmp = new long[data.segmentCount],
+                    insertionTmp = new long[data.segmentCount],
+                    deletionTmp = new long[data.segmentCount],
+                    mismatchTmp = new long[data.segmentCount];
 
             // Exact match on the previous step == match with insertion on current step
-            insertionTmp = R[0];
-            R[0] <<= 1;
-            mismatchTmp = R[0];
-            R[0] |= currentPatternMask;
-            deletionTmp = R[0];
+            assign(insertionTmp, R[0]);
+            leftShiftLittleEndian(R[0], 1);
+            assign(mismatchTmp, R[0]);
+            orAssign(R[0], currentPatternMask);
+            assign(deletionTmp, R[0]);
 
-            if (0 == (R[0] & matchingMask)) {
+            if (!getBitLittleEndian(R[0], data.size - 1)) {
                 errors = 0;
                 match = true;
             }
 
             for (int d = 1; d < R.length; ++d) {
-                preInsertionTmp = R[d];
-                R[d] <<= 1;
-                preMismatchTmp = R[d];
-                R[d] |= currentPatternMask;
-                R[d] &= insertionTmp & mismatchTmp & (deletionTmp << 1);
-                if (!match && 0 == (R[d] & matchingMask) && symbolsProcessed >= data.size - R.length + 1) {
+                assign(preInsertionTmp, R[d]);
+                leftShiftLittleEndian(R[d], 1);
+                assign(preMismatchTmp, R[d]);
+                orAssign(R[d], currentPatternMask);
+
+                leftShiftLittleEndian(deletionTmp, 1);
+                andAssign(R[d], insertionTmp, mismatchTmp, deletionTmp);
+                if (!match && !getBitLittleEndian(R[d], data.size - 1) && symbolsProcessed >= data.size - R.length + 1) {
                     errors = d;
                     match = true;
                 }
-                deletionTmp = R[d];
+
+                assign(deletionTmp, R[d]);
+
+                // Read as:
+                //   insertionTmp = preInsertionTmp;
+                //   mismatchTmp = preMismatchTmp;
+                // but preserving already allocated array for the next preMismatchTmp
+
+                swapTmp = insertionTmp;
                 insertionTmp = preInsertionTmp;
+                preInsertionTmp = swapTmp;
+
+                swapTmp = mismatchTmp;
                 mismatchTmp = preMismatchTmp;
+                preMismatchTmp = swapTmp;
             }
         }
     }


=====================================
src/main/java/com/milaboratory/core/motif/LongArrayBitHelper.java
=====================================
@@ -0,0 +1,154 @@
+package com.milaboratory.core.motif;
+
+import java.util.Arrays;
+
+public final class LongArrayBitHelper {
+    private LongArrayBitHelper() {
+    }
+
+    public static void assign(long[] a, long[] b) {
+        assert a.length == b.length;
+        System.arraycopy(b, 0, a, 0, a.length);
+    }
+
+    /** a &= b */
+    public static void andAssign(long[] a, long[] b) {
+        if (a.length != b.length)
+            throw new IllegalArgumentException();
+        for (int i = 0; i < a.length; i++)
+            a[i] &= b[i];
+    }
+
+    /** a &= b & c & d */
+    public static void andAssign(long[] a, long[] b, long[] c, long[] d) {
+        if (a.length != b.length || b.length != c.length || c.length != d.length)
+            throw new IllegalArgumentException();
+        for (int i = 0; i < a.length; i++)
+            a[i] &= b[i] & c[i] & d[i];
+    }
+
+    /** a |= b */
+    public static void orAssign(long[] a, long[] b) {
+        if (a.length != b.length)
+            throw new IllegalArgumentException();
+        for (int i = 0; i < a.length; i++)
+            a[i] |= b[i];
+    }
+
+    public static int upDivideBy64(int i) {
+        return (i + 63) / 64;
+    }
+
+    public static long[] ones(int elements) {
+        long[] a = new long[elements];
+        Arrays.fill(a, ~0L);
+        return a;
+    }
+
+    public static long[][] ones2D(int elements1, int elements2) {
+        long[][] a = new long[elements1][elements2];
+        for (int i = 0; i < elements1; i++)
+            Arrays.fill(a[i], ~0L);
+        return a;
+    }
+
+    public static void setAll(long[] a) {
+        Arrays.fill(a, ~0L);
+    }
+
+    public static void resetBitLittleEndian(long[] a, int idx) {
+        a[idx / 64] &= ~(1L << (idx % 64));
+    }
+
+    public static void setBitLittleEndian(long[] a, int idx) {
+        a[idx / 64] |= (1L << (idx % 64));
+    }
+
+    public static boolean getBitLittleEndian(long[] a, int idx) {
+        return ((a[idx / 64] >>> (idx % 64)) & 1L) == 1L;
+    }
+
+    // Following code adopted from https://github.com/patrickfav/bytes-java
+
+    private static final long LONG_ALL_ONE = 0xFFFFFFFFFFFFFFFFL;
+
+    public static void rightShiftBigEndian(long[] a, int shift) {
+        final int shiftMod = shift % 64;
+        final long carryMask = LONG_ALL_ONE << (64 - shiftMod);
+        final int offsetBytes = shift / 64;
+
+        int sourceIndex;
+        for (int i = a.length - 1; i >= 0; i--) {
+            sourceIndex = i - offsetBytes;
+            if (sourceIndex < 0)
+                a[i] = 0;
+            else {
+                long src = a[sourceIndex];
+                long dst = src >>> shiftMod;
+                if (sourceIndex - 1 >= 0)
+                    dst |= a[sourceIndex - 1] << (64 - shiftMod) & carryMask;
+                a[i] = dst;
+            }
+        }
+    }
+
+    public static void rightShiftLittleEndian(long[] a, int shift) {
+        final int shiftMod = shift % 64;
+        final long carryMask = LONG_ALL_ONE << (64 - shiftMod);
+        final int offsetBytes = shift / 64;
+
+        int sourceIndex;
+        for (int i = 0; i < a.length; i++) {
+            sourceIndex = i + offsetBytes;
+            if (sourceIndex >= a.length)
+                a[i] = 0;
+            else {
+                long src = a[sourceIndex];
+                long dst = src >>> shiftMod;
+                if (sourceIndex + 1 < a.length)
+                    dst |= a[sourceIndex + 1] << (64 - shiftMod) & carryMask;
+                a[i] = dst;
+            }
+        }
+    }
+
+    public static void leftShiftBigEndian(long[] a, int shift) {
+        final int shiftMod = shift % 64;
+        final long carryMask = (1L << shiftMod) - 1L;
+        final int offsetBytes = shift / 64;
+
+        int sourceIndex;
+        for (int i = 0; i < a.length; i++) {
+            sourceIndex = i + offsetBytes;
+            if (sourceIndex >= a.length)
+                a[i] = 0;
+            else {
+                long src = a[sourceIndex];
+                long dst = src << shiftMod;
+                if (sourceIndex + 1 < a.length)
+                    dst |= a[sourceIndex + 1] >>> (64 - shiftMod) & carryMask;
+                a[i] = dst;
+            }
+        }
+    }
+
+    public static void leftShiftLittleEndian(long[] a, int shift) {
+        final int shiftMod = shift % 64;
+        final long carryMask = (1L << shiftMod) - 1L;
+        final int offsetBytes = shift / 64;
+
+        int sourceIndex;
+        for (int i = a.length - 1; i >= 0; i--) {
+            sourceIndex = i - offsetBytes;
+            if (sourceIndex < 0)
+                a[i] = 0;
+            else {
+                long src = a[sourceIndex];
+                long dst = src << shiftMod;
+                if (sourceIndex - 1 >= 0)
+                    dst |= a[sourceIndex - 1] >>> (64 - shiftMod) & carryMask;
+                a[i] = dst;
+            }
+        }
+    }
+}


=====================================
src/main/java/com/milaboratory/core/motif/Motif.java
=====================================
@@ -22,7 +22,7 @@ import com.milaboratory.core.sequence.Sequence;
 import com.milaboratory.core.sequence.Wildcard;
 import com.milaboratory.util.BitArray;
 
-import java.util.Arrays;
+import static com.milaboratory.core.motif.LongArrayBitHelper.*;
 
 /**
  * Data structure for efficient exact and fuzzy matching/searching of sequences (wildcard-aware).
@@ -120,25 +120,20 @@ public final class Motif<S extends Sequence<S>> implements java.io.Serializable
     }
 
     public BitapPattern getBitapPattern() {
-        if (size >= 64)
-            throw new RuntimeException("Supports motifs with length less then 64.");
         return bitapPattern;
     }
 
     public BitapPattern toBitapPattern(BitArray exactMask) {
-        if (size >= 64)
-            return null;
         int aSize = alphabet.size();
-        long[] patternMask = new long[aSize],
-                reversePatternMask = new long[aSize];
-        Arrays.fill(patternMask, ~0);
-        Arrays.fill(reversePatternMask, ~0);
-        int p = 0;
-        for (int i = 0; i < aSize; ++i)
-            for (int j = 0; j < size; ++j)
-                if (data.get(p++)) {
-                    patternMask[i] &= ~(1L << j);
-                    reversePatternMask[i] &= ~(1L << (size - j - 1));
+        int segments = upDivideBy64(size);
+        long[][] patternMask = ones2D(aSize, segments),
+                reversePatternMask = ones2D(aSize, segments);
+        int i = 0;
+        for (int l = 0; l < aSize; ++l)
+            for (int p = 0; p < size; ++p)
+                if (data.get(i++)) {
+                    resetBitLittleEndian(patternMask[l],  p);
+                    resetBitLittleEndian(reversePatternMask[l], size - p - 1);
                 }
         BitapData mainData = new BitapData(size, patternMask, reversePatternMask);
         return new BitapPattern(this, mainData, exactMask);


=====================================
src/main/java/com/milaboratory/core/sequence/NSequenceWithQualityBuilder.java
=====================================
@@ -62,7 +62,7 @@ public class NSequenceWithQualityBuilder implements SeqBuilder<NSequenceWithQual
     }
 
     public NSequenceWithQualityBuilder append(char symbol, byte quality) {
-        return append(NucleotideSequence.ALPHABET.symbolToCode(symbol), quality);
+        return append(NucleotideSequence.ALPHABET.symbolToCodeWithException(symbol), quality);
     }
 
     @Override


=====================================
src/main/java/com/milaboratory/primitivio/ObjectMapperProvider.java
=====================================
@@ -1,5 +1,6 @@
 package com.milaboratory.primitivio;
 
+import com.fasterxml.jackson.databind.MapperFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.milaboratory.primitivio.annotations.Serializable;
 import com.milaboratory.util.GlobalObjectMappers;
@@ -8,13 +9,23 @@ import com.milaboratory.util.GlobalObjectMappers;
  * By implementing this interface, one can customise object mapper that will be used for serialization in classes with
  * activated {@link Serializable#asJson()} option.
  */
-public interface ObjectMapperProvider {
-    ObjectMapper getObjectMapper();
+public abstract class ObjectMapperProvider {
+    private final ObjectMapper objectMapper;
 
-    final class Default implements ObjectMapperProvider {
-        @Override
-        public ObjectMapper getObjectMapper() {
-            return GlobalObjectMappers.getOneLine();
+    protected ObjectMapperProvider(ObjectMapper objectMapper) {
+        if (!objectMapper.isEnabled(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY)) {
+            throw new IllegalArgumentException("objectMapper must support SORT_PROPERTIES_ALPHABETICALLY");
+        }
+        this.objectMapper = objectMapper;
+    }
+
+    public ObjectMapper getObjectMapper() {
+        return objectMapper;
+    }
+
+    public static final class Default extends ObjectMapperProvider {
+        public Default() {
+            super(GlobalObjectMappers.getOneLineOrdered());
         }
     }
 }


=====================================
src/main/java/com/milaboratory/primitivio/PrimitivIOStateBuilder.java
=====================================
@@ -46,6 +46,10 @@ public final class PrimitivIOStateBuilder implements HasPrimitivIOState {
         this.manager = manager;
     }
 
+    public SerializersManager getSerializersManager() {
+        return manager;
+    }
+
     @Override
     public void putKnownReference(Object ref) {
         knownReferences.add(ref);


=====================================
src/main/java/com/milaboratory/primitivio/Util.java
=====================================
@@ -105,7 +105,7 @@ public final class Util {
         return list;
     }
 
-    public static <K, V> void writeMap(Map<K, V> map, PrimitivO output) {
+    public static <K, V> void writeMap(SortedMap<K, V> map, PrimitivO output) {
         output.writeVarInt(map.size());
         for (Map.Entry<K, V> entry : map.entrySet()) {
             output.writeObject(entry.getKey());


=====================================
src/main/java/com/milaboratory/util/ArraysUtils.java
=====================================
@@ -104,4 +104,11 @@ public final class ArraysUtils {
         System.arraycopy(array2, 0, r, array1.length, array2.length);
         return r;
     }
+
+    public static long[][] deepClone(long[][] array) {
+        long[][] newArray = new long[array.length][];
+        for (int i = 0; i < newArray.length; i++)
+            newArray[i] = array[i].clone();
+        return newArray;
+    }
 }


=====================================
src/main/java/com/milaboratory/util/GlobalObjectMappers.java
=====================================
@@ -18,8 +18,10 @@ package com.milaboratory.util;
 import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
+import com.fasterxml.jackson.databind.MapperFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.databind.json.JsonMapper;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -36,6 +38,7 @@ public final class GlobalObjectMappers {
     private static final Object sync = new Object();
 
     private static ObjectMapper ONE_LINE = null;
+    private static ObjectMapper ONE_LINE_ORDERED = null;
     private static ObjectMapper PRETTY = null;
 
     public static String toOneLine(Object object) throws JsonProcessingException {
@@ -61,7 +64,8 @@ public final class GlobalObjectMappers {
         synchronized (sync) {
             if (ONE_LINE == null) {
                 ONE_LINE = new ObjectMapper()
-                        .configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false);
+                        .configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false)
+                        .enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
                 for (Consumer<ObjectMapper> modifier : mapperModifiers)
                     modifier.accept(ONE_LINE);
             }
@@ -69,13 +73,29 @@ public final class GlobalObjectMappers {
         }
     }
 
+    public static ObjectMapper getOneLineOrdered() {
+        synchronized (sync) {
+            if (ONE_LINE_ORDERED == null) {
+                ONE_LINE_ORDERED = JsonMapper.builder()
+                        .enable(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY)
+                        .build()
+                        .configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false)
+                        .enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
+                for (Consumer<ObjectMapper> modifier : mapperModifiers)
+                    modifier.accept(ONE_LINE_ORDERED);
+            }
+            return ONE_LINE_ORDERED;
+        }
+    }
+
     public static ObjectMapper getPretty() {
         synchronized (sync) {
             if (PRETTY == null) {
                 PRETTY = new ObjectMapper()
                         .enable(SerializationFeature.INDENT_OUTPUT)
                         .configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false)
-                        .setDefaultPrettyPrinter(new DefaultPrettyPrinter1());
+                        .setDefaultPrettyPrinter(new DefaultPrettyPrinter1())
+                        .enable(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS);
                 for (Consumer<ObjectMapper> modifier : mapperModifiers)
                     modifier.accept(PRETTY);
             }


=====================================
src/main/java/com/milaboratory/util/ObjectSerializer.java
=====================================
@@ -50,6 +50,11 @@ public interface ObjectSerializer<O> {
          */
         private SerializersManager sm;
 
+        public PrimitivIOObjectSerializer(Class<O> clazz, SerializersManager sm) {
+            this.clazz = clazz;
+            this.sm = sm;
+        }
+
         public PrimitivIOObjectSerializer(Class<O> clazz) {
             this.clazz = clazz;
             this.sm = new SerializersManager();


=====================================
src/main/java/com/milaboratory/util/sorting/Sorter.java
=====================================
@@ -19,6 +19,7 @@ import cc.redberry.pipe.CUtils;
 import cc.redberry.pipe.OutputPort;
 import cc.redberry.pipe.OutputPortCloseable;
 import cc.redberry.pipe.util.Chunk;
+import com.milaboratory.primitivio.SerializersManager;
 import com.milaboratory.util.ObjectSerializer;
 import gnu.trove.list.array.TLongArrayList;
 import org.apache.commons.io.output.CloseShieldOutputStream;
@@ -32,6 +33,7 @@ import java.util.PriorityQueue;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.atomic.AtomicReference;
 
 /**
  * Created by poslavsky on 28/02/2017.
@@ -72,9 +74,26 @@ public final class Sorter<T> {
             Class<T> clazz,
             File tempFile) throws IOException {
         return sort(initialSource, comparator, chunkSize,
-                new ObjectSerializer.PrimitivIOObjectSerializer<>(clazz), tempFile);
+                new ObjectSerializer.PrimitivIOObjectSerializer<>(clazz),
+                tempFile);
     }
 
+    /**
+     * Sort objects supporting PrimitivIO serialization.
+     */
+    public static <T> OutputPortCloseable<T> sort(
+            OutputPort<T> initialSource,
+            Comparator<T> comparator,
+            int chunkSize,
+            Class<T> clazz,
+            SerializersManager sm,
+            File tempFile) throws IOException {
+        return sort(initialSource, comparator, chunkSize,
+                new ObjectSerializer.PrimitivIOObjectSerializer<>(clazz, sm),
+                tempFile);
+    }
+
+
     public static <T> OutputPortCloseable<T> sort(
             OutputPort<T> initialSource,
             Comparator<T> comparator,
@@ -95,6 +114,7 @@ public final class Sorter<T> {
             long maxBlockSize = 0;
             long previousPosition = 0;
             CountDownLatch currentBlockWriteLatch = new CountDownLatch(0);
+            AtomicReference<Throwable> error = new AtomicReference<>(null);
             while ((chunk = chunked.take()) != null) {
                 final Object[] data = chunk.toArray();
 
@@ -108,13 +128,20 @@ public final class Sorter<T> {
 
                 // Waiting previous block to be fully flushed to the stream
                 currentBlockWriteLatch.await();
+                if (error.get() != null)
+                    throw new RuntimeException(error.get());
                 final CountDownLatch finalLatch = currentBlockWriteLatch = new CountDownLatch(1);
 
                 // Initiating block serialization in a separate thread
                 executor.submit(() -> {
-                    chunkOffsets.add(output.getByteCount());
-                    serializer.write((Collection) Arrays.asList(data), new CloseShieldOutputStream(output));
-                    finalLatch.countDown();
+                    try {
+                        chunkOffsets.add(output.getByteCount());
+                        serializer.write((Collection) Arrays.asList(data), CloseShieldOutputStream.wrap(output));
+                    } catch (Throwable t) {
+                        error.compareAndSet(null, t);
+                    } finally {
+                        finalLatch.countDown();
+                    }
                 });
 
                 // Tracking last chunk size
@@ -123,6 +150,8 @@ public final class Sorter<T> {
 
             // Waiting last block to be written
             currentBlockWriteLatch.await();
+            if (error.get() != null)
+                throw new RuntimeException(error.get());
 
             memoryBudget = maxBlockSize;
         } catch (InterruptedException e) {


=====================================
src/test/java/com/milaboratory/core/motif/BitapPatternTest.java
=====================================
@@ -318,7 +318,7 @@ public class BitapPatternTest {
 
         out:
         for (int i = 0; i < its; ++i) {
-            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 5, 60);
+            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 5, 250);
 
             NucleotideSequence seqM = seq;
             int muts = 1 + rg.nextInt(Math.min(10, seq.size() - 1));
@@ -327,8 +327,8 @@ public class BitapPatternTest {
                 seqM = mutation.mutate(seqM);
             }
 
-            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 40);
-            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 40);
+            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 250);
+            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 250);
             NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);
 
             Motif<NucleotideSequence> motif = new Motif<>(seq);
@@ -343,6 +343,13 @@ public class BitapPatternTest {
                     found = true;
             }
 
+            if (!found) {
+                System.out.println("Seq = " + seq);
+                System.out.println("SeqM = " + seqM);
+                System.out.println("SeqLeft = " + seqLeft);
+                System.out.println("SeqRight = " + seqRight);
+            }
+
             assertTrue("On iteration = " + i + " with seed " + seed, found);
         }
     }
@@ -354,17 +361,16 @@ public class BitapPatternTest {
         rg = new Well19937c(seed);
         int its = TestUtil.its(1000, 100000);
 
-        out:
         for (int i = 0; i < its; ++i) {
-            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 5, 60);
+            NucleotideSequence seq = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 5, 250);
 
             NucleotideSequence seqM = seq;
-            int muts = 1 + rg.nextInt(Math.min(10, seq.size()));
+            int muts = 1 + rg.nextInt(Math.min(10, seq.size() - 1));
             for (int j = 0; j < muts; ++j)
                 seqM = UniformMutationsGenerator.createUniformMutationAsObject(seqM, rg).mutate(seqM);
 
-            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 40);
-            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 40);
+            NucleotideSequence seqLeft = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 250);
+            NucleotideSequence seqRight = TestUtil.randomSequence(NucleotideSequence.ALPHABET, rg, 0, 250);
             NucleotideSequence fullSeq = SequencesUtils.concatenate(seqLeft, seqM, seqRight);
 
             Motif<NucleotideSequence> motif = new Motif<>(seq);
@@ -379,6 +385,13 @@ public class BitapPatternTest {
                     found = true;
             }
 
+            if (!found) {
+                System.out.println("Seq = " + seq);
+                System.out.println("SeqM = " + seqM);
+                System.out.println("SeqLeft = " + seqLeft);
+                System.out.println("SeqRight = " + seqRight);
+            }
+
             assertTrue("On iteration = " + i + " with seed " + seed, found);
         }
     }


=====================================
src/test/java/com/milaboratory/core/motif/LongArrayBitHelperTest.java
=====================================
@@ -0,0 +1,35 @@
+package com.milaboratory.core.motif;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class LongArrayBitHelperTest {
+    public static long[] a1() {
+        return new long[]{0xF0000000000F0000L, 0xFF000000F0000000L, 0x000F00000000000FL};
+    }
+
+    @Test
+    public void test1() {
+        long[] a;
+
+        a = a1();
+        LongArrayBitHelper.leftShiftLittleEndian(a, 4);
+        //                                 {0xF0000000000F0000L, 0xFF000000F0000000L, 0x000F00000000000FL};
+        Assert.assertArrayEquals(new long[]{0x0000000000F00000L, 0xF000000F0000000FL, 0x00F00000000000FFL}, a);
+
+        a = a1();
+        LongArrayBitHelper.leftShiftBigEndian(a, 4);
+        //                                 {0xF0000000000F0000L, 0xFF000000F0000000L, 0x000F00000000000FL};
+        Assert.assertArrayEquals(new long[]{0x0000000000F0000FL, 0xF000000F00000000L, 0x00F00000000000F0L}, a);
+
+        a = a1();
+        LongArrayBitHelper.rightShiftLittleEndian(a, 4);
+        //                                 {0xF0000000000F0000L, 0xFF000000F0000000L, 0x000F00000000000FL};
+        Assert.assertArrayEquals(new long[]{0x0F0000000000F000L, 0xFFF000000F000000L, 0x0000F00000000000L}, a);
+
+        a = a1();
+        LongArrayBitHelper.rightShiftBigEndian(a, 4);
+        //                                 {0xF0000000000F0000L, 0xFF000000F0000000L, 0x000F00000000000FL};
+        Assert.assertArrayEquals(new long[]{0x0F0000000000F000L, 0x0FF000000F000000L, 0x0000F00000000000L}, a);
+    }
+}
\ No newline at end of file


=====================================
src/test/java/com/milaboratory/core/motif/MotifTest.java
=====================================
@@ -72,13 +72,7 @@ public class MotifTest {
                 assertTrue(motif.matches(seq, i));
         }
     }
-
-    @Test
-    public void test4() throws Exception {
-        Motif<AminoAcidSequence> se = new AminoAcidSequence("CASSLAP").toMotif();
-        IOTestUtil.assertJavaSerialization(se);
-    }
-
+    
     @Test
     public void testInformation() {
         assertEquals(20, new NucleotideSequence("ATTAGACAAT").toMotif().matchBitScore(), 0.1);



View it on GitLab: https://salsa.debian.org/med-team/milib/-/compare/4227511351d8d5c0bda8a2b1c6270188b5365874...6cbd8fa8442adc06e673fad396df6f117ebeb754

-- 
View it on GitLab: https://salsa.debian.org/med-team/milib/-/compare/4227511351d8d5c0bda8a2b1c6270188b5365874...6cbd8fa8442adc06e673fad396df6f117ebeb754
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221230/c437dca6/attachment-0001.htm>


More information about the debian-med-commit mailing list