[med-svn] [Git][med-team/python-ncls][upstream] New upstream version 0.0.63+ds

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Tue Nov 2 17:49:22 GMT 2021



Nilesh Patra pushed to branch upstream at Debian Med / python-ncls


Commits:
eaea5e28 by Nilesh Patra at 2021-11-02T18:34:35+05:30
New upstream version 0.0.63+ds
- - - - -


6 changed files:

- − .travis.yml
- CHANGELOG
- ncls/__init__.py
- ncls/src/ncls.pyx
- ncls/src/ncls32.pyx
- ncls/version.py


Changes:

=====================================
.travis.yml deleted
=====================================
@@ -1,9 +0,0 @@
-language: python
-python:
-  - "3.6"
-install:
-  - pip install cython pytest
-  - python setup.py install
-
-# functionality is tested in the pyranges package
-script: py.test tests/test_ncls.py && cd ~/ && python -c 'import ncls; print(ncls.__version__)'


=====================================
CHANGELOG
=====================================
@@ -1,3 +1,15 @@
+# 0.0.63 (18.10.21)
+- fix critical error: fix 1024-error for subtract 64 bit
+
+# 0.0.62 (20.09.21)
+- fix critical error: fix 1024-error for subtract
+
+# 0.0.61 (13.09.21)
+- fix critical error: in case of more than 1024 overlaps for on interval only reported 1024 first
+
+# 0.0.58/59/60 (hotfixes) (09.03.21)
+- try to fix type mismatches and make causes more explicit
+
 # 0.0.57 (14.10.20)
 - accept lists/pd.Series in constructor
 


=====================================
ncls/__init__.py
=====================================
@@ -8,10 +8,11 @@ def NCLS(starts, ends, ids):
     if isinstance(starts, list) or "pandas" in str(type(starts)):
         starts, ends, ids = [np.array(s) for s in [starts, ends, ids]]
 
+    ids = ids.astype(np.int64)
     if starts.dtype == np.int64:
-        return NCLS64(starts, ends, ids)
+        return NCLS64(starts.astype(np.int64), ends.astype(np.int64), ids)
     elif starts.dtype == np.int32:
-        return NCLS32(starts, ends, ids)
+        return NCLS32(starts.astype(np.int32), ends.astype(np.int32), ids)
     else:
         raise Exception("Starts/Ends not int64 or int32: " + str(starts.dtype))
 
@@ -24,7 +25,7 @@ def FNCLS(starts, ends, ids):
         starts, ends, ids = [np.array(s) for s in [starts, ends, ids]]
 
     if starts.dtype == np.double:
-        return FNCLS(starts, ends, ids)
+        return FNCLS(starts, ends.astype(np.double), ids)
     else:
         raise Exception("Starts/Ends not double: " + str(starts.dtype))
 


=====================================
ncls/src/ncls.pyx
=====================================
@@ -72,13 +72,17 @@ cdef class NCLS64:
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.initializedcheck(False)
-    cpdef all_overlaps_both(self, const int64_t [::1] starts, const int64_t [::1] ends, const int64_t [::1] indexes):
+    cpdef all_overlaps_both(self,
+                            const int64_t [::1] starts,
+                            const int64_t [::1] ends,
+                            const int64_t [::1] indexes):
 
         cdef int i = 0
         cdef int nhit = 0
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         output_arr_other = np.zeros(length, dtype=np.int64)
@@ -95,24 +99,17 @@ cdef class NCLS64:
         if not self.im: # if empty
             return [], []
 
-        # from time import time
-        # start = time()
         it_alloc = cn.interval_iterator_alloc()
         it = it_alloc
         for loop_counter in range(length):
 
-            # print("loop_counter", loop_counter)
-            # print("start", starts[loop_counter])
-            # print("ends", ends[loop_counter])
-
-            # remember first pointer for dealloc
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
                                 &(nhit), &(it)) # GET NEXT BUFFER CHUNK
 
-                # print("nhit", nhit)
                 if nfound + nhit >= length:
 
                     length = (length + nhit) * 2
@@ -122,11 +119,6 @@ cdef class NCLS64:
                     output_other = output_arr_other
 
                 while i < nhit:
-                    # print("  i", i)
-
-                    # print("length", length)
-                    # print("nfound", nfound)
-                    # print("loop_counter", loop_counter)
                     output[nfound] = indexes[loop_counter]
                     output_other[nfound] = im_buf[i].target_id
 
@@ -136,13 +128,13 @@ cdef class NCLS64:
                     nfound += 1
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
         cn.free_interval_iterator(it_alloc)
-        # end = time()
-
-        # print("ncls time:", end - start)
 
         return output_arr[:nfound], output_arr_other[:nfound]
 
@@ -158,6 +150,7 @@ cdef class NCLS64:
         cdef int loop_counter = 0
         cdef int nfound = 0
         cdef int max_end = -1
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         output_arr_other = np.zeros(length, dtype=np.int64)
@@ -177,16 +170,14 @@ cdef class NCLS64:
         it_alloc = cn.interval_iterator_alloc()
         it = it_alloc
         for loop_counter in range(length):
-
-            # print("loop_counter", loop_counter)
-            # remember first pointer for dealloc
-            while it:
+            max_end = -1
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
                                 &(nhit), &(it)) # GET NEXT BUFFER CHUNK
                 if nhit:
-                    max_end = -1
                     while i < nhit:
                         if im_buf[i].end >= max_end:
                             # print("max_end", im_buf[i].end)
@@ -198,6 +189,9 @@ cdef class NCLS64:
 
                     nfound += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -216,6 +210,7 @@ cdef class NCLS64:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         output_arr_other = np.zeros(length, dtype=np.int64)
@@ -237,16 +232,13 @@ cdef class NCLS64:
         for loop_counter in range(length):
 
             # remember first pointer for dealloc
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
                                 &(nhit), &(it)) # GET NEXT BUFFER CHUNK
 
-                # print("nhit", nhit)
-                # print("length", length)
-                # print("nfound", nfound)
-                # print(nfound + nhit >= length)
                 if nfound + nhit >= length:
 
                     length = (length + nhit) * 2
@@ -266,6 +258,9 @@ cdef class NCLS64:
                     nfound += 1
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -283,6 +278,7 @@ cdef class NCLS64:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         cdef int64_t [::1] output
@@ -302,7 +298,8 @@ cdef class NCLS64:
 
             # remember first pointer for dealloc
 
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -320,6 +317,9 @@ cdef class NCLS64:
 
                     nfound += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -344,13 +344,11 @@ cdef class NCLS64:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
-        # output_arr = np.zeros(length, dtype=np.int64)
         output_arr_length = np.zeros(length, dtype=np.int64)
-        # cdef int64_t [::1] output
         cdef int64_t [::1] output_length
 
-        # output = output_arr
         output_length = output_arr_length
 
         cdef cn.IntervalIterator *it
@@ -367,7 +365,8 @@ cdef class NCLS64:
             start = starts[loop_counter]
             end = ends[loop_counter]
             # remember first pointer for dealloc
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -377,6 +376,8 @@ cdef class NCLS64:
                     output_length[loop_counter] += int_min(im_buf[i].end, end) - int_max(im_buf[i].start, start)
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
 
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
@@ -445,7 +446,8 @@ cdef class NCLS64:
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.initializedcheck(False)
-    cpdef set_difference_helper(self, const int64_t [::1] starts, const int64_t [::1] ends, const int64_t [::1] indexes):
+    cpdef set_difference_helper(self, const int64_t [::1] starts, const int64_t [::1] ends, const int64_t [::1] indexes, const int64_t [::1] nhits,
+                                const int64_t[::1] nhits):
 
         cdef int i
         cdef int nhit = 0
@@ -456,6 +458,7 @@ cdef class NCLS64:
         cdef int loop_counter = 0
         cdef int overlap_type_nb = 0
         cdef int na = -1
+        cdef int spent = 0
 
 
         output_arr = np.zeros(length, dtype=np.int64)
@@ -475,27 +478,21 @@ cdef class NCLS64:
         if not self.im: # if empty
             return [], [], []
 
-
         it_alloc = cn.interval_iterator_alloc()
         it = it_alloc
         for loop_counter in range(length):
 
-            while it:
+            nhit = nhits[loop_counter]
+            nstart = starts[loop_counter]
+            nend = ends[loop_counter]
+
+            while nhit > 0:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
-                                &(nhit), &(it)) # GET NEXT BUFFER CHUNK
-
-                #print("nhits:", nhit)
-
-                nstart = starts[loop_counter]
-                nend = ends[loop_counter]
-
-                # print("nstart", nstart)
-                # print("nend", nend)
+                                &(na), &(it)) # GET NEXT BUFFER CHUNK
 
                 if nfound + nhit >= length:
-
                     length = (length + nhit) * 2
                     output_arr = np.resize(output_arr, length)
                     output_arr_start = np.resize(output_arr_start, length)
@@ -512,62 +509,45 @@ cdef class NCLS64:
                     output[nfound] = indexes[loop_counter]
                     i = nhit
                     nfound += 1
+                    break
 
-                while i < nhit:
-                    # print("--- i:", i)
-                    # print("--- im_buf[i]", im_buf[i])
-                    #print("  B start:", im_buf[i].start)
-                    #print("  B end:", im_buf[i].end)
+                max_i = 1024 if nhit > 1024 else nhit
 
+                while i < max_i:
                     # in case the start contributes nothing
-                    if i < nhit - 1:
-                        # print("  i < nhit - 1")
+                    if nstart < im_buf[i].start:
+                        output[nfound] = indexes[loop_counter]
+                        output_start[nfound] = nstart
+                        output_end[nfound] = im_buf[i].start
+                        nfound += 1
+                    nstart = im_buf[i].end
+
+                    i += 1
+
+                nhit = nhit - 1024
+
+                if nhit <= 0:
+                    i = i - 1
+                    if im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]:
+                        # print("im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]")
+                        #print("we are here " * 10)
 
-                        if nstart < im_buf[i].start:
-                            #print("  new_start", nstart)
-                            #print("  new_end", im_buf[i].start)
+                        output_start[nfound] = -1
+                        output_end[nfound] = -1
+                        output[nfound] = <long> indexes[loop_counter]
+                        nfound += 1
+                    else:
+                        if im_buf[i].start > nstart:
                             output[nfound] = indexes[loop_counter]
                             output_start[nfound] = nstart
                             output_end[nfound] = im_buf[i].start
                             nfound += 1
 
-                        nstart = im_buf[i].end
-                    elif i == nhit - 1:
-
-                        # print("i == nhit -1")
-                        #print("im_buf[i].start", im_buf[i].start)
-                        #print("im_buf[i].end", im_buf[i].end)
-                        #print("nstart", nstart)
-                        #print("ends[loop_counter]", ends[loop_counter])
-
-                        if im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]:
-                            # print("im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]")
-                            #print("we are here " * 10)
-
-                            output_start[nfound] = -1
-                            output_end[nfound] = -1
-                            output[nfound] = <long> indexes[loop_counter]
+                        if im_buf[i].end < ends[loop_counter]:
+                            output[nfound] = indexes[loop_counter]
+                            output_start[nfound] = im_buf[i].end
+                            output_end[nfound] = ends[loop_counter]
                             nfound += 1
-                        else:
-                            if im_buf[i].start > nstart:
-                                # print("im_buf[i].start > nstart", im_buf[i].start, nstart)
-                                output[nfound] = indexes[loop_counter]
-                                output_start[nfound] = nstart
-                                output_end[nfound] = im_buf[i].start
-                                nfound += 1
-
-                            if im_buf[i].end < ends[loop_counter]:
-                                # print("im_buf[i].end < ends[loop_counter]", im_buf[i].end, ends[loop_counter])
-                                # print("i, loop_counter", i, loop_counter)
-                                # print("indexes[loop_counter]", indexes[loop_counter])
-                                # print("indexes", indexes[loop_counte  rloop_counter])
-
-                                output[nfound] = indexes[loop_counter]
-                                output_start[nfound] = im_buf[i].end
-                                output_end[nfound] = ends[loop_counter]
-                                nfound += 1
-
-                    i += 1
 
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
@@ -629,6 +609,7 @@ cdef class NCLS64:
         cdef int loop_counter = 0
         cdef int nfound = 0
         cdef int64_t start, end
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         output_arr_other = np.zeros(length, dtype=np.int64)
@@ -654,7 +635,8 @@ cdef class NCLS64:
 
             start = starts[loop_counter]
             end = ends[loop_counter]
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, start, end, self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -678,6 +660,9 @@ cdef class NCLS64:
                         nfound += 1
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 


=====================================
ncls/src/ncls32.pyx
=====================================
@@ -73,13 +73,17 @@ cdef class NCLS32:
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.initializedcheck(False)
-    cpdef all_overlaps_both(self, const int32_t [::1] starts, const int32_t [::1] ends, const int64_t [::1] indexes):
+    cpdef all_overlaps_both(self,
+                            const int32_t [::1] starts,
+                            const int32_t [::1] ends,
+                            const int64_t [::1] indexes):
 
         cdef int i = 0
         cdef int nhit = 0
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=long)
         output_arr_other = np.zeros(length, dtype=long)
@@ -100,8 +104,9 @@ cdef class NCLS32:
         it = it_alloc
         for loop_counter in range(length):
 
+            spent = 0
             # remember first pointer for dealloc
-            while it:
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -130,6 +135,10 @@ cdef class NCLS32:
                     nfound += 1
                     i += 1
 
+                if not nhit == 1024:
+                    spent = 1
+
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -148,6 +157,7 @@ cdef class NCLS32:
         cdef int loop_counter = 0
         cdef int nfound = 0
         cdef int max_end = -1
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=long)
         output_arr_other = np.zeros(length, dtype=long)
@@ -170,13 +180,14 @@ cdef class NCLS32:
 
             # print("loop_counter", loop_counter)
             # remember first pointer for dealloc
-            while it:
+            spent = 0
+            max_end = -1
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
                                 &(nhit), &(it)) # GET NEXT BUFFER CHUNK
 
-                max_end = -1
 
                 # """Finding last overlap in NCLS: iterate from start, find last maximal end."""
 
@@ -192,6 +203,9 @@ cdef class NCLS32:
 
                     nfound += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -210,6 +224,7 @@ cdef class NCLS32:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=long)
         output_arr_other = np.zeros(length, dtype=long)
@@ -231,7 +246,8 @@ cdef class NCLS32:
         for loop_counter in range(length):
 
             # remember first pointer for dealloc
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -263,6 +279,9 @@ cdef class NCLS32:
                     nfound += 1
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -282,6 +301,7 @@ cdef class NCLS32:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=long)
         cdef long [::1] output
@@ -301,7 +321,8 @@ cdef class NCLS32:
 
             # remember first pointer for dealloc
 
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -319,6 +340,9 @@ cdef class NCLS32:
 
                     nfound += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 
@@ -343,6 +367,7 @@ cdef class NCLS32:
         cdef int length = len(starts)
         cdef int loop_counter = 0
         cdef int nfound = 0
+        cdef int spent = 0
 
         # output_arr = np.zeros(length, dtype=long)
         output_arr_length = np.zeros(length, dtype=long)
@@ -366,7 +391,8 @@ cdef class NCLS32:
             start = starts[loop_counter]
             end = ends[loop_counter]
             # remember first pointer for dealloc
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -376,6 +402,8 @@ cdef class NCLS32:
                     output_length[loop_counter] += int_min(im_buf[i].end, end) - int_max(im_buf[i].start, start)
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
 
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
@@ -444,7 +472,8 @@ cdef class NCLS32:
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.initializedcheck(False)
-    cpdef set_difference_helper(self, const int32_t [::1] starts, const int32_t [::1] ends, const int64_t [::1] indexes):
+    cpdef set_difference_helper(self, const int32_t [::1] starts, const int32_t [::1] ends, const int64_t [::1] indexes,
+                                const int64_t[::1] nhits):
 
         cdef int i = 0
         cdef int nhit = 0
@@ -452,10 +481,10 @@ cdef class NCLS32:
         cdef int32_t nstart = 0
         cdef int32_t nend = 0
         cdef int length = len(starts)
-        cdef int loop_counter = 0
         cdef int overlap_type_nb = 0
         cdef int na = -1
-
+        cdef int spent = 0
+        cdef int max_i = 0
 
         output_arr = np.zeros(length, dtype=np.int64)
         output_arr_start = np.zeros(length, dtype=np.int32)
@@ -476,30 +505,21 @@ cdef class NCLS32:
 
         it_alloc = cn.interval_iterator_alloc()
         it = it_alloc
-        for loop_counter in range(length):
-
-            while it:
-                i = 0
 
-                nstart = starts[loop_counter]
-                nend = ends[loop_counter]
+        for loop_counter in range(length):
 
-                # print("----" * 5)
-                # print("loop counter", loop_counter)
-                # print("nstart", nstart)
-                # print("nend", nend)
+            nhit = nhits[loop_counter]
+            nstart = starts[loop_counter]
+            nend = ends[loop_counter]
 
-                cn.find_intervals(it, nstart, nend, self.im, self.ntop,
+            while nhit > 0:
+                i = 0
+                cn.find_intervals(it, starts[loop_counter], ends[loop_counter], self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
-                                &(nhit), &(it)) # GET NEXT BUFFER CHUNK
-
-                #print("nhits:", nhit)
-
-
+                                &(na), &(it)) # GET NEXT BUFFER CHUNK
 
                 if nfound + nhit >= length:
-
-                    length = (nfound + nhit) * 2
+                    length = (length + nhit) * 2
                     output_arr = np.resize(output_arr, length)
                     output_arr_start = np.resize(output_arr_start, length)
                     output = output_arr
@@ -507,9 +527,6 @@ cdef class NCLS32:
                     output_arr_end = np.resize(output_arr_end, length)
                     output_end = output_arr_end
 
-                # print("  length is", length)
-                # print("  nfound is", nfound)
-
                 # B covers whole of A; ignore
                 if nhit == 1 and starts[loop_counter] > im_buf[i].start and ends[loop_counter] < im_buf[i].end:
                     # print("ignore me!")
@@ -518,66 +535,46 @@ cdef class NCLS32:
                     output[nfound] = indexes[loop_counter]
                     i = nhit
                     nfound += 1
+                    break
 
-                while i < nhit:
-                    # print("    i", i)
-                    # print("--- i:", i)
-                    # print("--- im_buf[i]", im_buf[i])
-                    # print("  B start:", im_buf[i].start)
-                    # print("  B end:", im_buf[i].end)
-                    # print("  nfound:", nfound)
-                    # print("  output_arr_start", output_arr_start)
-                    # print("  output_arr_end", output_arr_end)
+                max_i = 1024 if nhit > 1024 else nhit
 
+                while i < max_i:
                     # in case the start contributes nothing
-                    if i < nhit - 1:
-                        # print("  i < nhit - 1")
+                    if nstart < im_buf[i].start:
+                        output[nfound] = indexes[loop_counter]
+                        output_start[nfound] = nstart
+                        output_end[nfound] = im_buf[i].start
+                        nfound += 1
+                    nstart = im_buf[i].end
 
-                        if nstart < im_buf[i].start:
-                            #print("  new_start", nstart)
-                            #print("  new_end", im_buf[i].start)
-                            output[nfound] = indexes[loop_counter]
-                            output_start[nfound] = nstart
-                            output_end[nfound] = im_buf[i].start
-                            nfound += 1
+                    i += 1
 
-                        nstart = im_buf[i].end
-                    elif i == nhit - 1:
+                nhit = nhit - 1024
 
-                        # print("i == nhit -1")
-                        #print("im_buf[i].start", im_buf[i].start)
-                        #print("im_buf[i].end", im_buf[i].end)
-                        #print("nstart", nstart)
-                        #print("ends[loop_counter]", ends[loop_counter])
+                if nhit <= 0:
+                    i = i - 1
 
-                        if im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]:
-                            # print("im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]")
-                            #print("we are here " * 10)
+                    if im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]:
+                        # print("im_buf[i].start <= nstart and im_buf[i].end >= ends[loop_counter]")
+                        #print("we are here " * 10)
 
-                            output_start[nfound] = -1
-                            output_end[nfound] = -1
+                        output_start[nfound] = -1
+                        output_end[nfound] = -1
+                        output[nfound] = <long> indexes[loop_counter]
+                        nfound += 1
+                    else:
+                        if im_buf[i].start > nstart:
                             output[nfound] = indexes[loop_counter]
+                            output_start[nfound] = nstart
+                            output_end[nfound] = im_buf[i].start
                             nfound += 1
-                        else:
-                            if im_buf[i].start > nstart:
-                                # print("im_buf[i].start > nstart", im_buf[i].start, nstart)
-                                output[nfound] = indexes[loop_counter]
-                                output_start[nfound] = nstart
-                                output_end[nfound] = im_buf[i].start
-                                nfound += 1
-
-                            if im_buf[i].end < ends[loop_counter]:
-                                # print("im_buf[i].end < ends[loop_counter]", im_buf[i].end, ends[loop_counter])
-                                # print("i, loop_counter", i, loop_counter)
-                                # print("indexes[loop_counter]", indexes[loop_counter])
-                                # print("indexes", indexes[loop_counte  rloop_counter])
-
-                                output[nfound] = indexes[loop_counter]
-                                output_start[nfound] = im_buf[i].end
-                                output_end[nfound] = ends[loop_counter]
-                                nfound += 1
 
-                    i += 1
+                        if im_buf[i].end < ends[loop_counter]:
+                            output[nfound] = indexes[loop_counter]
+                            output_start[nfound] = im_buf[i].end
+                            output_end[nfound] = ends[loop_counter]
+                            nfound += 1
 
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
@@ -639,6 +636,7 @@ cdef class NCLS32:
         cdef int loop_counter = 0
         cdef int nfound = 0
         cdef int start, end
+        cdef int spent = 0
 
         output_arr = np.zeros(length, dtype=np.long)
         output_arr_other = np.zeros(length, dtype=np.long)
@@ -664,7 +662,8 @@ cdef class NCLS32:
 
             start = starts[loop_counter]
             end = ends[loop_counter]
-            while it:
+            spent = 0
+            while not spent:
                 i = 0
                 cn.find_intervals(it, start, end, self.im, self.ntop,
                                 self.subheader, self.nlists, im_buf, 1024,
@@ -688,6 +687,9 @@ cdef class NCLS32:
                         nfound += 1
                     i += 1
 
+                if nhit < 1024:
+                    spent = 1
+
             cn.reset_interval_iterator(it_alloc)
             it = it_alloc
 


=====================================
ncls/version.py
=====================================
@@ -1 +1 @@
-__version__ = "0.0.57"
+__version__ = "0.0.63"



View it on GitLab: https://salsa.debian.org/med-team/python-ncls/-/commit/eaea5e289b2f03b27162c052e58e0f442d4fa425

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-ncls/-/commit/eaea5e289b2f03b27162c052e58e0f442d4fa425
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20211102/577af81b/attachment-0001.htm>


More information about the debian-med-commit mailing list