[med-svn] [Git][python-team/packages/python-ncls][upstream] New upstream version 0.0.70+ds
Colin Watson (@cjwatson)
gitlab at salsa.debian.org
Sun Nov 30 14:51:41 GMT 2025
Colin Watson pushed to branch upstream at Debian Python Team / packages / python-ncls
Commits:
8ff16f01 by Colin Watson at 2025-11-30T13:31:55+00:00
New upstream version 0.0.70+ds
- - - - -
29 changed files:
- + .github/workflows/build_and_upload_wheels.yml
- + .github/workflows/install_doctest_lint_typecheck.yml
- CHANGELOG
- + PKG-INFO
- README.md
- examples/test_all_overlaps_both.py
- examples/test_find_overlap_list.py
- examples/test_fncls.py
- + ncls.egg-info/PKG-INFO
- + ncls.egg-info/SOURCES.txt
- + ncls.egg-info/dependency_links.txt
- + ncls.egg-info/requires.txt
- + ncls.egg-info/top_level.txt
- ncls/__init__.py
- ncls/src/cfncls.pxd
- − ncls/src/cgraph.c
- ncls/src/cncls32.pxd
- ncls/src/fintervaldb.c
- ncls/src/fncls.pyx
- ncls/src/intervaldb.c
- ncls/src/ncls.pyx
- ncls/src/ncls32.pyx
- − ncls/version.py
- + pyproject.toml
- + setup.cfg
- setup.py
- − tests/gencode.py
- + tests/test_1024.py
- tests/test_ncls.py
Changes:
=====================================
.github/workflows/build_and_upload_wheels.yml
=====================================
@@ -0,0 +1,60 @@
+name: Build and upload to PyPI
+
+env:
+ # skip EOL interpreters
+ CIBW_SKIP: "cp36-* cp37-* cp38-* pp*"
+
+on:
+ workflow_dispatch:
+
+jobs:
+ build_wheels:
+ name: Build wheels on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest]
+
+ steps:
+ - uses: actions/checkout at v3
+
+ - name: Build wheels
+ uses: pypa/cibuildwheel at v2.16.2
+
+ - name: Upload wheels
+ uses: actions/upload-artifact at v4
+ with:
+ name: wheels-${{ matrix.os }}
+ path: ./wheelhouse/*.whl
+
+ build_sdist:
+ name: Build source distribution
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout at v3
+
+ - name: Build sdist
+ run: pipx run build --sdist
+
+ - name: Upload sdist
+ uses: actions/upload-artifact at v4
+ with:
+ name: sdist
+ path: dist/*.tar.gz
+
+ upload_pypi:
+ needs: [build_wheels, build_sdist]
+ runs-on: ubuntu-latest
+ steps:
+ - name: Download all artifacts
+ uses: actions/download-artifact at v4
+ with:
+ path: dist
+ merge-multiple: true
+
+ - name: Publish to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1 # stay on latest 1.x
+ with:
+ user: __token__ # default, but kept explicit
+ password: ${{ secrets.PYPI_API_TOKEN }} # new secret
+ verbose: true
=====================================
.github/workflows/install_doctest_lint_typecheck.yml
=====================================
@@ -0,0 +1,80 @@
+name: Install, test, doctest, lint, typecheck ncls
+
+# Source: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#using-multiple-python-versions
+
+on: [pull_request, workflow_dispatch]
+
+jobs:
+ install_and_test:
+ runs-on: ubuntu-latest
+ strategy:
+ # You can use PyPy versions in python-version.
+ # For example, pypy2.7 and pypy3.9
+ matrix:
+ python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+
+ steps:
+ - uses: actions/checkout at v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python at v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - run:
+ pip install pytest
+ - name: Install
+ run: |
+ pip install .
+ pytest tests/*.py
+
+ isort:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout at v3
+ - name: Set up Python environment
+ uses: actions/setup-python at v4
+ with:
+ python-version: "3.11"
+ - run: pip install isort
+ - run: isort --profile black -l 120 --check --diff ncls tests
+
+ black:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout at v3
+ - name: Set up Python environment
+ uses: actions/setup-python at v4
+ with:
+ python-version: "3.11.0"
+ - run: pip install black
+ - run: black -l 120 --check --diff ncls tests
+
+ flake8:
+ runs-on: ubuntu-latest
+ name: Lint
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout at v3
+ - name: Set up Python environment
+ uses: actions/setup-python at v4
+ - name: flake8 Lint install
+ run: pip install flake8
+ - name: flake8 Lint
+ run: flake8 --max-line-length=120 --ignore E203,E501,W503 ncls tests
+
+ mypy:
+ runs-on: ubuntu-latest
+ name: Mypy
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout at v3
+ - name: Set up Python environment
+ uses: actions/setup-python at v4
+ with:
+ python-version: "3.11"
+ - name: Install mypy
+ run: pip install mypy
+ - name: mypy
+ run: |
+ pip install numpy # for the typing stubs
+ python -m pip install pandas-stubs types-setuptools
+ mypy ncls
=====================================
CHANGELOG
=====================================
@@ -1,3 +1,16 @@
+# 0.0.67/68 (12.05.23)
+- Remove .c files that should be regenerated (@starsareintherose)
+
+# 0.0.66 (09.01.23)
+- fix more deprecated stuff
+
+# 0.0.65 (27.05.2022)
+- fix depr warning numpy (np.long -> np.int)
+
+# 0.0.64 (11.02.22)
+- fix Cython.Build module import on case-sensitive file systems
+- update legacy build files to work with never versions of Python and gcc.
+
# 0.0.63 (18.10.21)
- fix critical error: fix 1024-error for subtract 64 bit
=====================================
PKG-INFO
=====================================
@@ -0,0 +1,164 @@
+Metadata-Version: 2.4
+Name: ncls
+Version: 0.0.70
+Summary: A fast interval tree-like implementation in C, wrapped for the Python ecosystem.
+Author-email: Endre Bakken Stovner <endbak at pm.me>
+License: MIT
+Project-URL: Homepage, http://github.com/pyranges/ncls
+Keywords: ncls,interval-tree,genomics
+Classifier: Programming Language :: Python :: 3
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Other Environment
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Topic :: Scientific/Engineering
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Provides-Extra: dev
+Requires-Dist: black; extra == "dev"
+Requires-Dist: bumpver; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Requires-Dist: pip-tools; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Dynamic: license-file
+
+# Nested containment list
+
+## Deprecation notice
+
+While I'll continue maintaining this library I suggest you change to [ruranges](https://github.com/pyranges/ruranges/) which is a more lightweight and faster library with many more operations than NCLS.
+
+## NCLS
+
+[](https://travis-ci.org/hunt-genes/ncls) [](https://badge.fury.io/py/ncls)
+
+The Nested Containment List is a datastructure for interval overlap queries,
+like the interval tree. It is usually an order of magnitude faster than the
+interval tree both for building and query lookups.
+
+The implementation here is a revived version of the one used in the now defunct
+PyGr library, which died of bitrot. I have made it less memory-consuming and
+created wrapper functions which allows batch-querying the NCLS for further speed
+gains.
+
+It was implemented to be the cornerstone of the PyRanges project, but I have made
+it available to the Python community as a stand-alone library. Enjoy.
+
+Original Paper: https://academic.oup.com/bioinformatics/article/23/11/1386/199545
+Cite: http://dx.doi.org/10.1093/bioinformatics/btz615
+
+## Cite
+
+If you use this library in published research cite
+
+http://dx.doi.org/10.1093/bioinformatics/btz615
+
+## Install
+
+```
+pip install ncls
+```
+
+## Usage
+
+```python
+from ncls import NCLS
+
+import pandas as pd
+
+starts = pd.Series(range(0, 5))
+ends = starts + 100
+ids = starts
+
+subject_df = pd.DataFrame({"Start": starts, "End": ends}, index=ids)
+
+print(subject_df)
+# Start End
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+
+ncls = NCLS(starts.values, ends.values, ids.values)
+
+# python API, slower
+it = ncls.find_overlap(0, 2)
+for i in it:
+ print(i)
+# (0, 100, 0)
+# (1, 101, 1)
+
+starts_query = pd.Series([1, 3])
+ends_query = pd.Series([52, 14])
+indexes_query = pd.Series([10000, 100])
+
+query_df = pd.DataFrame({"Start": starts_query.values, "End": ends_query.values}, index=indexes_query.values)
+
+query_df
+# Start End
+# 10000 1 52
+# 100 3 14
+
+
+# everything done in C/Cython; faster
+l_idxs, r_idxs = ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
+l_idxs, r_idxs
+# (array([10000, 10000, 10000, 10000, 10000, 100, 100, 100, 100,
+# 100]), array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]))
+
+print(query_df.loc[l_idxs])
+# Start End
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 100 3 14
+# 100 3 14
+# 100 3 14
+# 100 3 14
+# 100 3 14
+print(subject_df.loc[r_idxs])
+# Start End
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+
+# return intervals in python (slow/mem-consuming)
+intervals = ncls.intervals()
+intervals
+# [(0, 100, 0), (1, 101, 1), (2, 102, 2), (3, 103, 3), (4, 104, 4)]
+```
+
+There is also an experimental floating point version of the NCLS called FNCLS.
+See the examples folder.
+
+## Benchmark
+
+Test file of 100 million intervals (created by subsetting gencode gtf with replacement):
+
+| Library | Function | Time (s) | Memory (GB) |
+| --- | --- | --- | --- |
+| bx-python | build | 161.7 | 2.5 |
+| ncls | build | 3.15 | 0.5 |
+| bx-python | overlap | 148.4 | 4.3 |
+| ncls | overlap | 7.2 | 0.5 |
+
+Building is 50 times faster and overlap queries are 20 times faster. Memory
+usage is one fifth and one ninth.
+
+## Original paper
+
+> Alexander V. Alekseyenko, Christopher J. Lee; Nested Containment List (NCList): a new algorithm for accelerating interval query of genome alignment and interval databases, Bioinformatics, Volume 23, Issue 11, 1 June 2007, Pages 1386–1393, https://doi.org/10.1093/bioinformatics/btl647
=====================================
README.md
=====================================
@@ -1,5 +1,11 @@
# Nested containment list
+## Deprecation notice
+
+While I'll continue maintaining this library I suggest you change to [ruranges](https://github.com/pyranges/ruranges/) which is a more lightweight and faster library with many more operations than NCLS.
+
+## NCLS
+
[](https://travis-ci.org/hunt-genes/ncls) [](https://badge.fury.io/py/ncls)
The Nested Containment List is a datastructure for interval overlap queries,
=====================================
examples/test_all_overlaps_both.py
=====================================
@@ -1,6 +1,3 @@
-
-
-
from ncls import NCLS
import pickle
@@ -8,13 +5,13 @@ import pandas as pd
import numpy as np
-starts = np.array(list(reversed([3, 5, 8])), dtype=np.long)
-ends = np.array(list(reversed([6, 7, 9])), dtype=np.long)
-indexes = np.array(list(reversed([0, 1, 2])), dtype=np.long)
+starts = np.array(list(reversed([3, 5, 8])), dtype=np.int)
+ends = np.array(list(reversed([6, 7, 9])), dtype=np.int)
+indexes = np.array(list(reversed([0, 1, 2])), dtype=np.int)
-# starts = np.array([3, 5, 8], dtype=np.long)
-# ends = np.array([6, 7, 9], dtype=np.long)
-# indexes = np.array([0, 1, 2], dtype=np.long)
+# starts = np.array([3, 5, 8], dtype=np.int)
+# ends = np.array([6, 7, 9], dtype=np.int)
+# indexes = np.array([0, 1, 2], dtype=np.int)
ncls = NCLS(starts, ends, indexes)
=====================================
examples/test_find_overlap_list.py
=====================================
@@ -1,6 +1,3 @@
-
-
-
from ncls import NCLS
import pickle
@@ -14,9 +11,9 @@ ids = starts
ncls = NCLS(starts, ends, ids)
-starts2 = np.array([0, 10, 20, 40000], dtype=np.long)
-ends2 = np.array([5, 15, 25, 50000], dtype=np.long)
-indexes2 = np.array([0, 1, 2, 3], dtype=np.long)
+starts2 = np.array([0, 10, 20, 40000], dtype=np.int)
+ends2 = np.array([5, 15, 25, 50000], dtype=np.int)
+indexes2 = np.array([0, 1, 2, 3], dtype=np.int)
print(starts)
=====================================
examples/test_fncls.py
=====================================
@@ -1,8 +1,10 @@
from ncls import FNCLS
import numpy as np
+
np.random.seed(0)
import pandas as pd
+
size = int(1e4)
starts = np.random.randint(0, high=int(1e6), size=size) + np.random.random()
=====================================
ncls.egg-info/PKG-INFO
=====================================
@@ -0,0 +1,164 @@
+Metadata-Version: 2.4
+Name: ncls
+Version: 0.0.70
+Summary: A fast interval tree-like implementation in C, wrapped for the Python ecosystem.
+Author-email: Endre Bakken Stovner <endbak at pm.me>
+License: MIT
+Project-URL: Homepage, http://github.com/pyranges/ncls
+Keywords: ncls,interval-tree,genomics
+Classifier: Programming Language :: Python :: 3
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Other Environment
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: POSIX :: Linux
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Topic :: Scientific/Engineering
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Provides-Extra: dev
+Requires-Dist: black; extra == "dev"
+Requires-Dist: bumpver; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Requires-Dist: pip-tools; extra == "dev"
+Requires-Dist: pytest; extra == "dev"
+Dynamic: license-file
+
+# Nested containment list
+
+## Deprecation notice
+
+While I'll continue maintaining this library I suggest you change to [ruranges](https://github.com/pyranges/ruranges/) which is a more lightweight and faster library with many more operations than NCLS.
+
+## NCLS
+
+[](https://travis-ci.org/hunt-genes/ncls) [](https://badge.fury.io/py/ncls)
+
+The Nested Containment List is a datastructure for interval overlap queries,
+like the interval tree. It is usually an order of magnitude faster than the
+interval tree both for building and query lookups.
+
+The implementation here is a revived version of the one used in the now defunct
+PyGr library, which died of bitrot. I have made it less memory-consuming and
+created wrapper functions which allows batch-querying the NCLS for further speed
+gains.
+
+It was implemented to be the cornerstone of the PyRanges project, but I have made
+it available to the Python community as a stand-alone library. Enjoy.
+
+Original Paper: https://academic.oup.com/bioinformatics/article/23/11/1386/199545
+Cite: http://dx.doi.org/10.1093/bioinformatics/btz615
+
+## Cite
+
+If you use this library in published research cite
+
+http://dx.doi.org/10.1093/bioinformatics/btz615
+
+## Install
+
+```
+pip install ncls
+```
+
+## Usage
+
+```python
+from ncls import NCLS
+
+import pandas as pd
+
+starts = pd.Series(range(0, 5))
+ends = starts + 100
+ids = starts
+
+subject_df = pd.DataFrame({"Start": starts, "End": ends}, index=ids)
+
+print(subject_df)
+# Start End
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+
+ncls = NCLS(starts.values, ends.values, ids.values)
+
+# python API, slower
+it = ncls.find_overlap(0, 2)
+for i in it:
+ print(i)
+# (0, 100, 0)
+# (1, 101, 1)
+
+starts_query = pd.Series([1, 3])
+ends_query = pd.Series([52, 14])
+indexes_query = pd.Series([10000, 100])
+
+query_df = pd.DataFrame({"Start": starts_query.values, "End": ends_query.values}, index=indexes_query.values)
+
+query_df
+# Start End
+# 10000 1 52
+# 100 3 14
+
+
+# everything done in C/Cython; faster
+l_idxs, r_idxs = ncls.all_overlaps_both(starts_query.values, ends_query.values, indexes_query.values)
+l_idxs, r_idxs
+# (array([10000, 10000, 10000, 10000, 10000, 100, 100, 100, 100,
+# 100]), array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]))
+
+print(query_df.loc[l_idxs])
+# Start End
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 10000 1 52
+# 100 3 14
+# 100 3 14
+# 100 3 14
+# 100 3 14
+# 100 3 14
+print(subject_df.loc[r_idxs])
+# Start End
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+# 0 0 100
+# 1 1 101
+# 2 2 102
+# 3 3 103
+# 4 4 104
+
+# return intervals in python (slow/mem-consuming)
+intervals = ncls.intervals()
+intervals
+# [(0, 100, 0), (1, 101, 1), (2, 102, 2), (3, 103, 3), (4, 104, 4)]
+```
+
+There is also an experimental floating point version of the NCLS called FNCLS.
+See the examples folder.
+
+## Benchmark
+
+Test file of 100 million intervals (created by subsetting gencode gtf with replacement):
+
+| Library | Function | Time (s) | Memory (GB) |
+| --- | --- | --- | --- |
+| bx-python | build | 161.7 | 2.5 |
+| ncls | build | 3.15 | 0.5 |
+| bx-python | overlap | 148.4 | 4.3 |
+| ncls | overlap | 7.2 | 0.5 |
+
+Building is 50 times faster and overlap queries are 20 times faster. Memory
+usage is one fifth and one ninth.
+
+## Original paper
+
+> Alexander V. Alekseyenko, Christopher J. Lee; Nested Containment List (NCList): a new algorithm for accelerating interval query of genome alignment and interval databases, Bioinformatics, Volume 23, Issue 11, 1 June 2007, Pages 1386–1393, https://doi.org/10.1093/bioinformatics/btl647
=====================================
ncls.egg-info/SOURCES.txt
=====================================
@@ -0,0 +1,44 @@
+.gitattributes
+.gitignore
+.travis.yml
+CHANGELOG
+LICENSE
+MANIFEST.in
+README.md
+build_wheels.sh
+pyproject.toml
+setup.py
+.github/workflows/build_and_upload_wheels.yml
+.github/workflows/install_doctest_lint_typecheck.yml
+examples/test_all_overlaps_both.py
+examples/test_find_overlap_list.py
+examples/test_fncls.py
+examples/test_mini.csv
+ncls/__init__.py
+ncls.egg-info/PKG-INFO
+ncls.egg-info/SOURCES.txt
+ncls.egg-info/dependency_links.txt
+ncls.egg-info/requires.txt
+ncls.egg-info/top_level.txt
+ncls/src/__init__.py
+ncls/src/cfncls.pxd
+ncls/src/cgraph.h
+ncls/src/cncls.pxd
+ncls/src/cncls32.pxd
+ncls/src/default.h
+ncls/src/fintervaldb.c
+ncls/src/fintervaldb.h
+ncls/src/fncls.c
+ncls/src/fncls.pyx
+ncls/src/intervaldb.c
+ncls/src/intervaldb.h
+ncls/src/intervaldb32.c
+ncls/src/intervaldb32.h
+ncls/src/ncls.c
+ncls/src/ncls.pyx
+ncls/src/ncls32.c
+ncls/src/ncls32.pyx
+ncls/src/utarray.h
+tests/test_1024.py
+tests/test_ncls.py
+tests/valgrind-python.supp
\ No newline at end of file
=====================================
ncls.egg-info/dependency_links.txt
=====================================
@@ -0,0 +1 @@
+
=====================================
ncls.egg-info/requires.txt
=====================================
@@ -0,0 +1,8 @@
+numpy
+
+[dev]
+black
+bumpver
+isort
+pip-tools
+pytest
=====================================
ncls.egg-info/top_level.txt
=====================================
@@ -0,0 +1,4 @@
+dist
+examples
+ncls
+tests
=====================================
ncls/__init__.py
=====================================
@@ -1,10 +1,19 @@
-from ncls.src.ncls import NCLS64
-from ncls.src.ncls32 import NCLS32
-
import numpy as np
-def NCLS(starts, ends, ids):
+try:
+ from importlib.metadata import version as _ver
+except ImportError:
+ try:
+ from importlib_metadata import version as _ver
+ except ImportError:
+ import pkg_resources
+ _ver = lambda name: pkg_resources.get_distribution(name).version
+
+from ncls.src.ncls import NCLS64 # type: ignore
+from ncls.src.ncls32 import NCLS32 # type: ignore
+
+def NCLS(starts, ends, ids):
if isinstance(starts, list) or "pandas" in str(type(starts)):
starts, ends, ids = [np.array(s) for s in [starts, ends, ids]]
@@ -18,8 +27,7 @@ def NCLS(starts, ends, ids):
def FNCLS(starts, ends, ids):
-
- from ncls.src.fncls import FNCLS
+ from ncls.src.fncls import FNCLS # type: ignore
if isinstance(starts, list) or "pandas" in str(type(starts)):
starts, ends, ids = [np.array(s) for s in [starts, ends, ids]]
@@ -28,5 +36,3 @@ def FNCLS(starts, ends, ids):
return FNCLS(starts, ends.astype(np.double), ids)
else:
raise Exception("Starts/Ends not double: " + str(starts.dtype))
-
-from ncls.version import __version__
=====================================
ncls/src/cfncls.pxd
=====================================
@@ -51,10 +51,6 @@ cdef extern from "ncls/src/fintervaldb.h":
int free_interval_iterator(IntervalIterator *it)
IntervalIterator *reset_interval_iterator(IntervalIterator *it)
int *alloc_array(int n)
- int find_intervals_stack(int start_stack[], int end_stack[], int sp, int start,
- int end, IntervalMap im[], int n,
- SublistHeader subheader[], IntervalMap buf[],
- int *nfound)
int find_intervals(IntervalIterator *it0,
double start,
=====================================
ncls/src/cgraph.c deleted
=====================================
@@ -1,119 +0,0 @@
-
-
-#include "cgraph.h"
-
-
-CDict *cdict_alloc(int n)
-{
- CDict *d=0;
-
- d=calloc(1,sizeof(CDict));
- if (d==0) /* calloc FAILED!! */
- return 0;
- d->dict=calloc(n,sizeof(CDictEntry));
- if (d->dict==0) { /* calloc FAILED!! */
- free(d); /* DUMP OUR EMPTY STRUCTURE */
- return 0;
- }
- return d; /* RETURN OUR DATA STRUCTURE */
-}
-
-
-int cdict_free(CDict *d)
-{
- free(d->dict);
- free(d);
- return 0;
-}
-
-
-int cdict_qsort_cmp(const void *void_a,const void *void_b)
-{ /* STRAIGHTFORWARD COMPARISON OF SIGNED start VALUES, LONGER INTERVALS 1ST */
- CDictEntry *a=(CDictEntry *)void_a,*b=(CDictEntry *)void_b;
- if (a->k<b->k)
- return -1;
- else if (a->k>b->k)
- return 1;
- else
- return 0;
-}
-
-
-CDictEntry *cdict_getitem(CDict *d,int k)
-{
- int l=0,mid,r;
- CDictEntry *p;
-
- if (d==0) /* HANDLE NULL POINTER PROPERLY */
- return 0;
-
- p=d->dict; /* SORTED ARRAY OF ENTRIES */
- r=d->n;
- while (l<r) {
- mid=(l+r)/2;
- if (p[mid].k==k)
- return p+mid;
- else if (p[mid].k<k)
- l=mid+1;
- else
- r=mid;
- }
- return 0;
-}
-
-
-
-CGraph *cgraph_alloc(int n)
-{
- CGraph *d=0;
-
- d=calloc(1,sizeof(CGraph));
- if (d==0) /* calloc FAILED!! */
- return 0;
- d->dict=calloc(n,sizeof(CGraphEntry));
- if (d->dict==0) { /* calloc FAILED!! */
- free(d); /* DUMP OUR EMPTY STRUCTURE */
- return 0;
- }
- return d; /* RETURN OUR DATA STRUCTURE */
-}
-
-
-int cgraph_free(CGraph *d)
-{
- int i;
- for (i=0;i<d->n;i++) /* DUMP ALL ASSOCIATED DICTIONARIES */
- cdict_free(d->dict[i].v);
- free(d->dict);
- free(d);
- return 0;
-}
-
-
-CGraphEntry *cgraph_getitem(CGraph *d,int k)
-{
- int l=0,mid,r;
- CGraphEntry *p;
-
- if (d==0) /* HANDLE NULL POINTER PROPERLY */
- return 0;
-
- p=d->dict; /* SORTED ARRAY OF ENTRIES */
- r=d->n;
- while (l<r) {
- mid=(l+r)/2;
- if (p[mid].k==k)
- return p+mid;
- else if (p[mid].k<k)
- l=mid+1;
- else
- r=mid;
- }
- return 0;
-}
-
-
-int *calloc_int(int n)
-{
- return (int *)calloc(n,sizeof(int));
-}
=====================================
ncls/src/cncls32.pxd
=====================================
@@ -1,6 +1,7 @@
from libc.stdint cimport int32_t, int64_t
+
cdef extern from "stdlib.h":
void free(void *)
void *malloc(size_t)
=====================================
ncls/src/fintervaldb.c
=====================================
@@ -909,336 +909,3 @@ int free_interval_dbfile(IntervalDBFile *db_file)
-
-/* int save_text_file(char filestem[],char basestem[], */
-/* char err_msg[],FILE *ofile) */
-/* { */
-/* int i,n,ntop,div,nlists,nii,npad; */
-/* char path[2048]; */
-/* IntervalMap im; */
-/* IntervalIndex ii; */
-/* SublistHeader subheader; */
-/* FILE *ifile=NULL; */
-
-/* sprintf(path,"%s.size",filestem); /\* READ BASIC SIZE INFO*\/ */
-/* ifile=fopen(path,"r"); /\* text file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* if (5!=fscanf(ifile,"%d %d %d %d %d",&n,&ntop,&div,&nlists,&nii)) */
-/* goto fread_error_occurred; */
-/* fclose(ifile); */
-/* npad=ntop%div; */
-/* if (npad>0) /\* PAD TO AN EXACT MULTIPLE OF div *\/ */
-/* npad=ntop+(div-npad); */
-/* else /\* AN EXACT MULTIPLE OF div, SO NO PADDING *\/ */
-/* npad=ntop; */
-
-/* if (fprintf(ofile,"SIZE\t%s\t%d %d %d %d %d\n", */
-/* basestem,n,ntop,div,nlists,nii)<0) */
-/* goto write_error_occurred; */
-
-/* if (nii>0) { */
-/* sprintf(path,"%s.index",filestem); /\* READ THE COMPACTED INDEX *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nii;i++) { */
-/* if (1!=fread(&ii,sizeof(IntervalIndex),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"I %d %d\n",ii.start,ii.end)<0) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if(nlists>0){ */
-/* sprintf(path,"%s.subhead",filestem); /\* READ THE SUBHEADER LIST *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nlists;i++) { */
-/* if (1!=fread(&subheader,sizeof(SublistHeader),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"S %d %d\n",subheader.start,subheader.len)<0) */
-/* goto write_error_occurred; */
-/* npad=subheader.start+subheader.len; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if (npad>0) { */
-/* sprintf(path,"%s.idb",filestem); /\* READ THE DATABASE *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<npad;i++) { */
-/* if (1!=fread(&im,sizeof(IntervalMap),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"M %d %d %d %d %d %d\n",im.start,im.end, */
-/* im.target_id,im.target_start, */
-/* im.target_end,im.sublist)<0) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-/* return 0; /\* INDICATES NO ERROR OCCURRED *\/ */
-/* unable_to_open_file: */
-/* if (err_msg) */
-/* sprintf(err_msg,"unable to open file %s",path); */
-/* return -1; */
-/* fread_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error or EOF reading file %s",path); */
-/* return -1; */
-/* write_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error writing output file! out of disk space?"); */
-/* return -1; */
-/* } */
-
-
-
-/* int text_file_to_binaries(FILE *infile,char buildpath[],char err_msg[]) */
-/* { */
-/* int i,n,ntop,div,nlists,nii,npad; */
-/* char path[2048],line[32768],filestem[2048]; */
-/* IntervalMap im; */
-/* IntervalIndex ii; */
-/* SublistHeader subheader; */
-/* FILE *ifile=NULL; */
-
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (6!=sscanf(line,"SIZE\t%s\t%d %d %d %d %d", */
-/* filestem,&n,&ntop,&div,&nlists,&nii)) */
-/* goto fread_error_occurred; */
-/* sprintf(path,"%s%s.size",buildpath,filestem); /\* SAVE BASIC SIZE INFO*\/ */
-/* ifile=fopen(path,"w"); /\* text file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* if (fprintf(ifile,"%d %d %d %d %d\n",n,ntop,div,nlists,nii)<0) */
-/* goto write_error_occurred; */
-/* fclose(ifile); */
-/* npad=ntop%div; */
-/* if (npad>0) /\* PAD TO AN EXACT MULTIPLE OF div *\/ */
-/* npad=ntop+(div-npad); */
-/* else /\* AN EXACT MULTIPLE OF div, SO NO PADDING *\/ */
-/* npad=ntop; */
-
-/* if (nii>0) { */
-/* sprintf(path,"%s%s.index",buildpath,filestem); /\* SAVE INDEX INFO*\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nii;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (2!=sscanf(line,"I %d %d",&(ii.start),&(ii.end))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&ii,sizeof(IntervalIndex),1,ifile)) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if(nlists>0){ */
-/* sprintf(path,"%s%s.subhead",buildpath,filestem); /\* SAVE THE SUBHEADER LIST *\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nlists;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (2!=sscanf(line,"S %d %d",&(subheader.start),&(subheader.len))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&subheader,sizeof(SublistHeader),1,ifile)) */
-/* goto write_error_occurred; */
-/* npad=subheader.start+subheader.len; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* sprintf(path,"%s%s.idb",buildpath,filestem); /\* SAVE THE ACTUAL INTERVAL DB*\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<npad;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (6!=sscanf(line,"M %d %d %d %d %d %d",&(im.start),&(im.end), */
-/* &(im.target_id),&(im.target_start), */
-/* &(im.target_end),&(im.sublist))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&im,sizeof(IntervalMap),1,ifile)) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-
-/* return 0; /\* INDICATES NO ERROR OCCURRED *\/ */
-/* unable_to_open_file: */
-/* if (err_msg) */
-/* sprintf(err_msg,"unable to open file %s",path); */
-/* return -1; */
-/* fread_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error or EOF reading input file"); */
-/* return -1; */
-/* write_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error writing file %s! out of disk space?", */
-/* path); */
-/* return -1; */
-/* } */
-
-
-
-/* int main(int argc, char **argv) { */
-
-/* int interval_map_size = 1024; */
-
-/* IntervalMap *im; */
-/* int len = 10000000; */
-/* SublistHeader *sl; */
-/* int *p_n = malloc(sizeof *p_n); */
-/* int *p_nlists = malloc(sizeof *p_nlists); */
-/* int *nhits = malloc(sizeof *nhits); */
-
-/* FILE *ifp; */
-
-/* ifp = fopen("../test.csv", "r"); */
-
-/* struct timeval tv1, tv2; */
-/* gettimeofday(&tv1, NULL); */
-/* im = read_intervals(len, ifp); */
-/* gettimeofday(&tv2, NULL); */
-/* printf ("Total time = %f seconds\n", */
-/* (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + */
-/* (double) (tv2.tv_sec - tv1.tv_sec)); */
-
-/* gettimeofday(&tv1, NULL); */
-/* sl = build_nested_list(im, len, p_n, p_nlists); */
-/* gettimeofday(&tv2, NULL); */
-/* printf ("Total time = %f seconds\n", */
-/* (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + */
-/* (double) (tv2.tv_sec - tv1.tv_sec)); */
-/* IntervalIterator *it = interval_iterator_alloc(); */
-
-/* IntervalMap im_buf[interval_map_size]; */
-
-/* printf("*p_nlists %d\n", *p_nlists); */
-/* find_intervals(it, 0, 500, im, len, sl, *p_nlists, im_buf, interval_map_size, nhits, &it); */
-/* printf("*nhits %d\n", *nhits); */
-
-/* int i; */
-/* for (i = 0; i < *nhits; i++){ */
-/* printf("Start %d End %d Id %d\n", im_buf[i].target_start, im_buf[i].target_end, im_buf[i].target_id); */
-/* } */
-
-/* free(p_n); */
-/* free(p_nlists); */
-/* free(nhits); */
-
-/* } */
-
-
-/* int find_k_next(int start, int end, */
-/* IntervalMap im[], int n, */
-/* SublistHeader subheader[], int nlists, */
-/* IntervalMap buf[], int ktofind, */
-/* int *p_nreturn) */
-/* { */
-/* IntervalIterator *it=NULL,*it2=NULL; */
-/* int nfound=0,j,k; */
-/* /\* IntervalMap *results = interval_map_alloc(ktofind); *\/ */
-
-/* /\* CALLOC(it,1,IntervalIterator); *\/ */
-
-/* if (it->n == 0) { /\* DEFAULT: SEARCH THE TOP NESTED LIST *\/ */
-/* it->n=n; */
-/* it->i=find_overlap_start(start,end,im,n); */
-/* } */
-
-/* do { */
-/* while (it->i>=0 && it->i<it->n && (nfound < ktofind)) { */
-/* if (!HAS_OVERLAP_POSITIVE(im[it->i],start,end)) { */
-/* buf[nfound] = im[it->i]; /\*SAVE THIS HIT TO BUFFER *\/ */
-/* nfound++; */
-/* } */
-/* k=im[it->i].sublist; /\* GET SUBLIST OF i IF ANY *\/ */
-/* it->i++; /\* ADVANCE TO NEXT INTERVAL *\/ */
-/* if (k>=0 && (j=find_suboverlap_start(start,end,k,im,subheader))>=0) { */
-/* PUSH_ITERATOR_STACK(it,it2,IntervalIterator); /\* RECURSE TO SUBLIST *\/ */
-/* it2->i = j; /\* START OF OVERLAPPING HITS IN THIS SUBLIST *\/ */
-/* it2->n = subheader[k].start+subheader[k].len; /\* END OF SUBLIST *\/ */
-/* it=it2; /\* PUSH THE ITERATOR STACK *\/ */
-/* } */
-/* } */
-/* } while (POP_ITERATOR_STACK(it)); /\* IF STACK EXHAUSTED, EXIT *\/ */
-/* free_interval_iterator(it); /\* takes care of the whole stack *\/ */
-/* it=NULL; /\* ITERATOR IS EXHAUSTED *\/ */
-
-/* *p_nreturn=nfound; /\* #INTERVALS FOUND IN THIS PASS *\/ */
-/* } */
-
-
-int find_intervals_stack(int start_stack[], int end_stack[], int sp, int start,
- int end, IntervalMap im[], int n,
- SublistHeader subheader[], IntervalMap buf[],
- int *p_nreturn)
-{
- /* IntervalIterator *it=NULL,*it2=NULL; */
- /* printf("In very beginning!\n"); */
- /* return 0; */
- int nfound = 0, j, k;
- /* printf("j: %d, sp: %d, start_stack[sp]: %d", 0, sp, sp); */
-
- /* if (sp == 0) { */
- clock_t t;
- t = clock();
- j = find_overlap_start(start,end,im,n);
- t = clock() - t;
- double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
- printf("fun() took %f seconds to execute \n", time_taken);
- start_stack[sp] = j;
- end_stack[sp] = n;
- /* } */
-
- /* printf("We are before loop\n"); */
- /* printf("start, end: %d, %d", start_stack[sp], end_stack[sp]); */
-
- /* fflush(stdout); */
-
- while (sp >= 0) {
- /* printf("Outer loop. sp: %d, st: %d, end: %d\n", sp, start_stack[sp], end_stack[sp]); */
- /* fflush(stdout); */
- while (start_stack[sp] >= 0 && start_stack[sp] < end_stack[sp] && \
- HAS_OVERLAP_POSITIVE(im[start_stack[sp]], start, end)) {
- /* printf("Inner loop. sp: %d\n", start_stack[sp]); */
- /* printf("Interval added: %d, %d, %d\n", im[start_stack[sp]].start, im[start_stack[sp]].end, im[start_stack[sp]].target_id); */
- memcpy(buf+nfound, im + start_stack[sp], sizeof(IntervalMap)); /*SAVE THIS HIT TO BUFFER */
-
- nfound++;
- k=im[sp].sublist; /* GET SUBLIST OF i IF ANY */
-
- start_stack[sp++]++; /* ADVANCE TO NEXT INTERVAL */
- if (k>=0 && (j=find_suboverlap_start(start,end,k,im,subheader))>=0) {
- sp++;
- start_stack[sp] = j;
- end_stack[sp] = subheader[k].start + subheader[k].len; /* END OF SUBLIST */
- }
-
- if (nfound>=1024){ /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
- goto finally_return_result;
- }
- }
-
- sp--;
-
- }
-
- finally_return_result:
-
- *p_nreturn = nfound; /* #INTERVALS FOUND IN THIS PASS */
-
- return sp;
-}
=====================================
ncls/src/fncls.pyx
=====================================
@@ -1,14 +1,13 @@
-cimport ncls.src.cfncls as cn
-
cimport cython
-
from libc.stdint cimport int64_t
+from libc.stdlib cimport malloc
+cimport ncls.src.cfncls as cn
cimport ncls.src.cncls as cn
-from libc.stdlib cimport malloc
import numpy as np
+
cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b
# import ctypes as c
=====================================
ncls/src/intervaldb.c
=====================================
@@ -931,338 +931,3 @@ int free_interval_dbfile(IntervalDBFile *db_file)
return 0;
}
-
-
-
-/* int save_text_file(char filestem[],char basestem[], */
-/* char err_msg[],FILE *ofile) */
-/* { */
-/* int i,n,ntop,div,nlists,nii,npad; */
-/* char path[2048]; */
-/* IntervalMap im; */
-/* IntervalIndex ii; */
-/* SublistHeader subheader; */
-/* FILE *ifile=NULL; */
-
-/* sprintf(path,"%s.size",filestem); /\* READ BASIC SIZE INFO*\/ */
-/* ifile=fopen(path,"r"); /\* text file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* if (5!=fscanf(ifile,"%d %d %d %d %d",&n,&ntop,&div,&nlists,&nii)) */
-/* goto fread_error_occurred; */
-/* fclose(ifile); */
-/* npad=ntop%div; */
-/* if (npad>0) /\* PAD TO AN EXACT MULTIPLE OF div *\/ */
-/* npad=ntop+(div-npad); */
-/* else /\* AN EXACT MULTIPLE OF div, SO NO PADDING *\/ */
-/* npad=ntop; */
-
-/* if (fprintf(ofile,"SIZE\t%s\t%d %d %d %d %d\n", */
-/* basestem,n,ntop,div,nlists,nii)<0) */
-/* goto write_error_occurred; */
-
-/* if (nii>0) { */
-/* sprintf(path,"%s.index",filestem); /\* READ THE COMPACTED INDEX *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nii;i++) { */
-/* if (1!=fread(&ii,sizeof(IntervalIndex),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"I %d %d\n",ii.start,ii.end)<0) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if(nlists>0){ */
-/* sprintf(path,"%s.subhead",filestem); /\* READ THE SUBHEADER LIST *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nlists;i++) { */
-/* if (1!=fread(&subheader,sizeof(SublistHeader),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"S %d %d\n",subheader.start,subheader.len)<0) */
-/* goto write_error_occurred; */
-/* npad=subheader.start+subheader.len; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if (npad>0) { */
-/* sprintf(path,"%s.idb",filestem); /\* READ THE DATABASE *\/ */
-/* ifile=fopen(path,"rb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<npad;i++) { */
-/* if (1!=fread(&im,sizeof(IntervalMap),1,ifile)) */
-/* goto fread_error_occurred; */
-/* if (fprintf(ofile,"M %d %d %d %d %d %d\n",im.start,im.end, */
-/* im.target_id,im.target_start, */
-/* im.target_end,im.sublist)<0) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-/* return 0; /\* INDICATES NO ERROR OCCURRED *\/ */
-/* unable_to_open_file: */
-/* if (err_msg) */
-/* sprintf(err_msg,"unable to open file %s",path); */
-/* return -1; */
-/* fread_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error or EOF reading file %s",path); */
-/* return -1; */
-/* write_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error writing output file! out of disk space?"); */
-/* return -1; */
-/* } */
-
-
-
-/* int text_file_to_binaries(FILE *infile,char buildpath[],char err_msg[]) */
-/* { */
-/* int i,n,ntop,div,nlists,nii,npad; */
-/* char path[2048],line[32768],filestem[2048]; */
-/* IntervalMap im; */
-/* IntervalIndex ii; */
-/* SublistHeader subheader; */
-/* FILE *ifile=NULL; */
-
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (6!=sscanf(line,"SIZE\t%s\t%d %d %d %d %d", */
-/* filestem,&n,&ntop,&div,&nlists,&nii)) */
-/* goto fread_error_occurred; */
-/* sprintf(path,"%s%s.size",buildpath,filestem); /\* SAVE BASIC SIZE INFO*\/ */
-/* ifile=fopen(path,"w"); /\* text file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* if (fprintf(ifile,"%d %d %d %d %d\n",n,ntop,div,nlists,nii)<0) */
-/* goto write_error_occurred; */
-/* fclose(ifile); */
-/* npad=ntop%div; */
-/* if (npad>0) /\* PAD TO AN EXACT MULTIPLE OF div *\/ */
-/* npad=ntop+(div-npad); */
-/* else /\* AN EXACT MULTIPLE OF div, SO NO PADDING *\/ */
-/* npad=ntop; */
-
-/* if (nii>0) { */
-/* sprintf(path,"%s%s.index",buildpath,filestem); /\* SAVE INDEX INFO*\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nii;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (2!=sscanf(line,"I %d %d",&(ii.start),&(ii.end))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&ii,sizeof(IntervalIndex),1,ifile)) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* if(nlists>0){ */
-/* sprintf(path,"%s%s.subhead",buildpath,filestem); /\* SAVE THE SUBHEADER LIST *\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<nlists;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (2!=sscanf(line,"S %d %d",&(subheader.start),&(subheader.len))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&subheader,sizeof(SublistHeader),1,ifile)) */
-/* goto write_error_occurred; */
-/* npad=subheader.start+subheader.len; */
-/* } */
-/* fclose(ifile); */
-/* } */
-
-/* sprintf(path,"%s%s.idb",buildpath,filestem); /\* SAVE THE ACTUAL INTERVAL DB*\/ */
-/* ifile=fopen(path,"wb"); /\* binary file *\/ */
-/* if (!ifile) */
-/* goto unable_to_open_file; */
-/* for (i=0;i<npad;i++) { */
-/* if (NULL==fgets(line,32767,infile)) */
-/* goto fread_error_occurred; */
-/* if (6!=sscanf(line,"M %d %d %d %d %d %d",&(im.start),&(im.end), */
-/* &(im.target_id),&(im.target_start), */
-/* &(im.target_end),&(im.sublist))) */
-/* goto fread_error_occurred; */
-/* if (1!=fwrite(&im,sizeof(IntervalMap),1,ifile)) */
-/* goto write_error_occurred; */
-/* } */
-/* fclose(ifile); */
-
-/* return 0; /\* INDICATES NO ERROR OCCURRED *\/ */
-/* unable_to_open_file: */
-/* if (err_msg) */
-/* sprintf(err_msg,"unable to open file %s",path); */
-/* return -1; */
-/* fread_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error or EOF reading input file"); */
-/* return -1; */
-/* write_error_occurred: */
-/* if (err_msg) */
-/* sprintf(err_msg,"error writing file %s! out of disk space?", */
-/* path); */
-/* return -1; */
-/* } */
-
-
-
-/* int main(int argc, char **argv) { */
-
-/* int interval_map_size = 1024; */
-
-/* IntervalMap *im; */
-/* int len = 10000000; */
-/* SublistHeader *sl; */
-/* int *p_n = malloc(sizeof *p_n); */
-/* int *p_nlists = malloc(sizeof *p_nlists); */
-/* int *nhits = malloc(sizeof *nhits); */
-
-/* FILE *ifp; */
-
-/* ifp = fopen("../test.csv", "r"); */
-
-/* struct timeval tv1, tv2; */
-/* gettimeofday(&tv1, NULL); */
-/* im = read_intervals(len, ifp); */
-/* gettimeofday(&tv2, NULL); */
-/* printf ("Total time = %f seconds\n", */
-/* (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + */
-/* (double) (tv2.tv_sec - tv1.tv_sec)); */
-
-/* gettimeofday(&tv1, NULL); */
-/* sl = build_nested_list(im, len, p_n, p_nlists); */
-/* gettimeofday(&tv2, NULL); */
-/* printf ("Total time = %f seconds\n", */
-/* (double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + */
-/* (double) (tv2.tv_sec - tv1.tv_sec)); */
-/* IntervalIterator *it = interval_iterator_alloc(); */
-
-/* IntervalMap im_buf[interval_map_size]; */
-
-/* printf("*p_nlists %d\n", *p_nlists); */
-/* find_intervals(it, 0, 500, im, len, sl, *p_nlists, im_buf, interval_map_size, nhits, &it); */
-/* printf("*nhits %d\n", *nhits); */
-
-/* int i; */
-/* for (i = 0; i < *nhits; i++){ */
-/* printf("Start %d End %d Id %d\n", im_buf[i].target_start, im_buf[i].target_end, im_buf[i].target_id); */
-/* } */
-
-/* free(p_n); */
-/* free(p_nlists); */
-/* free(nhits); */
-
-/* } */
-
-
-/* int find_k_next(int start, int end, */
-/* IntervalMap im[], int n, */
-/* SublistHeader subheader[], int nlists, */
-/* IntervalMap buf[], int ktofind, */
-/* int *p_nreturn) */
-/* { */
-/* IntervalIterator *it=NULL,*it2=NULL; */
-/* int nfound=0,j,k; */
-/* /\* IntervalMap *results = interval_map_alloc(ktofind); *\/ */
-
-/* /\* CALLOC(it,1,IntervalIterator); *\/ */
-
-/* if (it->n == 0) { /\* DEFAULT: SEARCH THE TOP NESTED LIST *\/ */
-/* it->n=n; */
-/* it->i=find_overlap_start(start,end,im,n); */
-/* } */
-
-/* do { */
-/* while (it->i>=0 && it->i<it->n && (nfound < ktofind)) { */
-/* if (!HAS_OVERLAP_POSITIVE(im[it->i],start,end)) { */
-/* buf[nfound] = im[it->i]; /\*SAVE THIS HIT TO BUFFER *\/ */
-/* nfound++; */
-/* } */
-/* k=im[it->i].sublist; /\* GET SUBLIST OF i IF ANY *\/ */
-/* it->i++; /\* ADVANCE TO NEXT INTERVAL *\/ */
-/* if (k>=0 && (j=find_suboverlap_start(start,end,k,im,subheader))>=0) { */
-/* PUSH_ITERATOR_STACK(it,it2,IntervalIterator); /\* RECURSE TO SUBLIST *\/ */
-/* it2->i = j; /\* START OF OVERLAPPING HITS IN THIS SUBLIST *\/ */
-/* it2->n = subheader[k].start+subheader[k].len; /\* END OF SUBLIST *\/ */
-/* it=it2; /\* PUSH THE ITERATOR STACK *\/ */
-/* } */
-/* } */
-/* } while (POP_ITERATOR_STACK(it)); /\* IF STACK EXHAUSTED, EXIT *\/ */
-/* free_interval_iterator(it); /\* takes care of the whole stack *\/ */
-/* it=NULL; /\* ITERATOR IS EXHAUSTED *\/ */
-
-/* *p_nreturn=nfound; /\* #INTERVALS FOUND IN THIS PASS *\/ */
-/* } */
-
-
-int find_intervals_stack(int start_stack[], int end_stack[], int sp, int start,
- int end, IntervalMap im[], int n,
- SublistHeader subheader[], IntervalMap buf[],
- int *p_nreturn)
-{
- /* IntervalIterator *it=NULL,*it2=NULL; */
- /* printf("In very beginning!\n"); */
- /* return 0; */
- int nfound = 0, j, k;
- /* printf("j: %d, sp: %d, start_stack[sp]: %d", 0, sp, sp); */
-
- /* if (sp == 0) { */
- clock_t t;
- t = clock();
- j = find_overlap_start(start,end,im,n);
- t = clock() - t;
- double time_taken = ((double)t)/CLOCKS_PER_SEC; // in seconds
- printf("fun() took %f seconds to execute \n", time_taken);
- start_stack[sp] = j;
- end_stack[sp] = n;
- /* } */
-
- /* printf("We are before loop\n"); */
- /* printf("start, end: %d, %d", start_stack[sp], end_stack[sp]); */
-
- /* fflush(stdout); */
-
- while (sp >= 0) {
- /* printf("Outer loop. sp: %d, st: %d, end: %d\n", sp, start_stack[sp], end_stack[sp]); */
- /* fflush(stdout); */
- while (start_stack[sp] >= 0 && start_stack[sp] < end_stack[sp] && \
- HAS_OVERLAP_POSITIVE(im[start_stack[sp]], start, end)) {
- /* printf("Inner loop. sp: %d\n", start_stack[sp]); */
- /* printf("Interval added: %d, %d, %d\n", im[start_stack[sp]].start, im[start_stack[sp]].end, im[start_stack[sp]].target_id); */
- memcpy(buf+nfound, im + start_stack[sp], sizeof(IntervalMap)); /*SAVE THIS HIT TO BUFFER */
-
- nfound++;
- k=im[sp].sublist; /* GET SUBLIST OF i IF ANY */
-
- start_stack[sp++]++; /* ADVANCE TO NEXT INTERVAL */
- if (k>=0 && (j=find_suboverlap_start(start,end,k,im,subheader))>=0) {
- sp++;
- start_stack[sp] = j;
- end_stack[sp] = subheader[k].start + subheader[k].len; /* END OF SUBLIST */
- }
-
- if (nfound>=1024){ /* FILLED THE BUFFER, RETURN THE RESULTS SO FAR */
- goto finally_return_result;
- }
- }
-
- sp--;
-
- }
-
- finally_return_result:
-
- *p_nreturn = nfound; /* #INTERVALS FOUND IN THIS PASS */
-
- return sp;
-}
=====================================
ncls/src/ncls.pyx
=====================================
@@ -1,16 +1,15 @@
# import numpy as cnp
import sys
-
cimport cython
-
from libc.stdint cimport int64_t
+from libc.stdlib cimport malloc
cimport ncls.src.cncls as cn
-from libc.stdlib cimport malloc
import numpy as np
+
cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b
# import ctypes as c
=====================================
ncls/src/ncls32.pyx
=====================================
@@ -1,17 +1,17 @@
# import numpy as cnp
import sys
-
cimport cython
-
from libc.stdint cimport int32_t, int64_t
-# from cython.stdint import int32
+from libc.stdlib cimport malloc
cimport ncls.src.cncls32 as cn
-from libc.stdlib cimport malloc
+# from cython.stdint import int32
+
import numpy as np
+
cdef inline int int_max(int a, int b): return a if a >= b else b
cdef inline int int_min(int a, int b): return a if a <= b else b
# cdef inline int overlap(int a_start, int a_end, int b_start, b_end): int_max(0, int_min(a_end, b_end) - int_max(a_start, b_start))
@@ -85,8 +85,8 @@ cdef class NCLS32:
cdef int nfound = 0
cdef int spent = 0
- output_arr = np.zeros(length, dtype=long)
- output_arr_other = np.zeros(length, dtype=long)
+ output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_other = np.zeros(length, dtype=np.int_)
cdef long [::1] output
cdef long [::1] output_other
@@ -159,8 +159,8 @@ cdef class NCLS32:
cdef int max_end = -1
cdef int spent = 0
- output_arr = np.zeros(length, dtype=long)
- output_arr_other = np.zeros(length, dtype=long)
+ output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_other = np.zeros(length, dtype=np.int_)
cdef long [::1] output
cdef long [::1] output_other
@@ -226,8 +226,8 @@ cdef class NCLS32:
cdef int nfound = 0
cdef int spent = 0
- output_arr = np.zeros(length, dtype=long)
- output_arr_other = np.zeros(length, dtype=long)
+ output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_other = np.zeros(length, dtype=np.int_)
cdef long [::1] output
cdef long [::1] output_other
@@ -303,7 +303,7 @@ cdef class NCLS32:
cdef int nfound = 0
cdef int spent = 0
- output_arr = np.zeros(length, dtype=long)
+ output_arr = np.zeros(length, dtype=np.int_)
cdef long [::1] output
output = output_arr
@@ -369,8 +369,8 @@ cdef class NCLS32:
cdef int nfound = 0
cdef int spent = 0
- # output_arr = np.zeros(length, dtype=long)
- output_arr_length = np.zeros(length, dtype=long)
+ # output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_length = np.zeros(length, dtype=np.int_)
# cdef long [::1] output
cdef long [::1] output_length
@@ -595,8 +595,8 @@ cdef class NCLS32:
cdef int loop_counter = 0
cdef int nfound = 0
- output_arr = np.zeros(length, dtype=np.long)
- output_arr_other = np.zeros(length, dtype=np.long)
+ output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_other = np.zeros(length, dtype=np.int_)
cdef long [::1] output
cdef long [::1] output_other
@@ -638,8 +638,8 @@ cdef class NCLS32:
cdef int start, end
cdef int spent = 0
- output_arr = np.zeros(length, dtype=np.long)
- output_arr_other = np.zeros(length, dtype=np.long)
+ output_arr = np.zeros(length, dtype=np.int_)
+ output_arr_other = np.zeros(length, dtype=np.int_)
cdef long [::1] output
cdef long [::1] output_other
@@ -715,7 +715,7 @@ cdef class NCLS32:
if not self.im: # if empty
return []
- output_arr = np.zeros(length, dtype=long)
+ output_arr = np.zeros(length, dtype=np.int_)
cdef long [::1] output
output = output_arr
=====================================
ncls/version.py deleted
=====================================
@@ -1 +0,0 @@
-__version__ = "0.0.63"
=====================================
pyproject.toml
=====================================
@@ -0,0 +1,35 @@
+[build-system]
+requires = ["setuptools>=61.0.0", "wheel", "cython", "setuptools_scm[toml]>=6.2"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ncls"
+version = "0.0.70"
+description = "A fast interval tree-like implementation in C, wrapped for the Python ecosystem."
+readme = "README.md"
+authors = [{ name = "Endre Bakken Stovner", email = "endbak at pm.me" }]
+license = { text = "MIT" }
+classifiers = [
+ "Programming Language :: Python :: 3",
+ "Development Status :: 4 - Beta", "Environment :: Other Environment",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: POSIX :: Linux",
+ "Operating System :: MacOS :: MacOS X",
+ "Topic :: Scientific/Engineering"
+]
+keywords = ["ncls", "interval-tree", "genomics"]
+dependencies = ["numpy"]
+
+[project.optional-dependencies]
+dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]
+
+[tool.setuptools.packages.find]
+where = ["."]
+
+[tool.pytype]
+inputs = ['ncls']
+
+[project.urls]
+Homepage="http://github.com/pyranges/ncls"
=====================================
setup.cfg
=====================================
@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0
+
=====================================
setup.py
=====================================
@@ -1,96 +1,37 @@
-from setuptools import find_packages, Extension, Command
-from distutils.core import setup
-
-
-# try:
-# from Cython.Build import cythonize
-# except ImportError:
-# def cythonize(*args, **kwargs):
-# from Cython.Build import cythonize
-# return cythonize(*args, **kwargs)
-
-
-
-
-
import os
-import sys
-CLASSIFIERS = """Development Status :: 5 - Production/Stable
-Operating System :: MacOS :: MacOS X
-Operating System :: Microsoft :: Windows :: Windows NT/2000
-Operating System :: OS Independent
-Operating System :: POSIX
-Operating System :: POSIX :: Linux
-Operating System :: Unix
-Programming Language :: Python
-Topic :: Scientific/Engineering
-Topic :: Scientific/Engineering :: Bio-Informatics"""
-
-# split into lines and filter empty ones
-CLASSIFIERS = CLASSIFIERS.split("\n")
-
-# macros = [("CYTHON_TRACE", "1")]
-
-# # extension sources
-# macros = []
+from setuptools import Extension
+from distutils.core import setup
-# if macros:
-# from Cython.Compiler.Options import get_directive_defaults
-# directive_defaults = get_directive_defaults()
-# directive_defaults['linetrace'] = True
-# directive_defaults['binding'] = True
dir_path = os.path.dirname(os.path.realpath(__file__))
include_dirs = [dir_path + "/ncls/src", dir_path]
-__version__ = open("ncls/version.py").readline().split(" = ")[1].replace(
- '"', '').strip()
-
-
-
extensions = [
Extension(
- "ncls.src.ncls", ["ncls/src/ncls.pyx", "ncls/src/intervaldb.c"],
+ "ncls.src.ncls",
+ ["ncls/src/ncls.pyx", "ncls/src/intervaldb.c"],
# define_macros=macros,
- include_dirs=include_dirs),
+ include_dirs=include_dirs,
+ ),
Extension(
- "ncls.src.ncls32", ["ncls/src/ncls32.pyx", "ncls/src/intervaldb32.c"],
+ "ncls.src.ncls32",
+ ["ncls/src/ncls32.pyx", "ncls/src/intervaldb32.c"],
# define_macros=macros,
- include_dirs=include_dirs),
+ include_dirs=include_dirs,
+ ),
Extension(
- "ncls.src.fncls", ["ncls/src/fncls.pyx", "ncls/src/fintervaldb.c"],
+ "ncls.src.fncls",
+ ["ncls/src/fncls.pyx", "ncls/src/fintervaldb.c"],
# define_macros=macros,
- include_dirs=include_dirs)]
+ include_dirs=include_dirs,
+ ),
+]
+
+from Cython.Build import cythonize
-# using setuptools to cythonize if cython not found
-# not recommended by cython docs, but still
-try:
- from cython.Build import cythonize
- ext_modules = cythonize(extensions, language_level=2)
-except ImportError:
- ext_modules = extensions
+ext_modules = cythonize(extensions, language_level=2)
-setup(
- name = "ncls",
- version=__version__,
- packages=find_packages(),
- ext_modules = ext_modules,
- setup_requires = ["cython"],
- install_requires = ["numpy"],
- # py_modules=["pyncls"],
- description = \
- 'A wrapper for the nested containment list data structure.',
- long_description = __doc__,
- # I am the maintainer; the datastructure was invented by
- # Alexander V. Alekseyenko and Christopher J. Lee.
- author = "Endre Bakken Stovner",
- author_email='endrebak85 at gmail.com',
- url = 'https://github.com/endrebak/ncls',
- license = 'New BSD License',
- classifiers = CLASSIFIERS,
- package_data={'': ['*.pyx', '*.pxd', '*.h', '*.c']},
- include_dirs=["."],
-)
+setup(ext_modules=ext_modules)
=====================================
tests/gencode.py deleted
=====================================
@@ -1,10 +0,0 @@
-import pandas as pd
-from ncls import NCLS
-
-d = pd.read_table("../gencode.v28.annotation.gtf.gz", usecols=[0, 3, 4], header=None, comment="#", names="Chromosome Start End".split(), dtype={"Chromosome": "category"})
-
-d = d[d.Chromosome == "chr1"]
-
-n = NCLS(d.Start.values, d.End.values, d.index.values)
-
-print(n)
=====================================
tests/test_1024.py
=====================================
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+import numpy as np
+
+from ncls import NCLS
+
+
+def test_all_overlaps_both():
+ starts = np.array([0], dtype=np.int64)
+ ends = np.array([5000], dtype=np.int64)
+ ids = np.array([0], dtype=np.int64)
+
+ ncls = NCLS(starts, ends, ids)
+
+ starts2 = np.arange(0, 2048, 2)
+ ends2 = np.arange(1, 2048, 2)
+
+ result = ncls.all_overlaps_both(starts2, ends2, starts2)
+ assert len(result[0]) == 1024
+ print(result[0])
+
+ starts2 = np.arange(0, 2 * 2048, 2)
+ ends2 = np.arange(1, 2 * 2048, 2)
+ # ncls2 = NCLS(starts2, ends2, starts2)
+
+ result = ncls.all_overlaps_both(starts2, ends2, starts2)
+ assert len(result[0]) == 2048
+ print(result[0])
=====================================
tests/test_ncls.py
=====================================
@@ -1,22 +1,19 @@
import struct
-from ncls import NCLS
-
import numpy as np
+from ncls import NCLS
-import struct
# 64 bit architecture?
if struct.calcsize("P") * 8 == 64:
- starts = np.array([5, 9_223_372_036_854_775_805], dtype=np.int64)
-
- ends = np.array([6, 9_223_372_036_854_775_807], dtype=np.int64)
-
- ids = np.array([2147483647, 3], dtype=np.int64)
-
def test_ncls():
# ids = starts
+ starts = np.array([5, 9_223_372_036_854_775_805], dtype=np.int64)
+
+ ends = np.array([6, 9_223_372_036_854_775_807], dtype=np.int64)
+
+ ids = np.array([2147483647, 3], dtype=np.int64)
print(starts, ends, ids)
@@ -26,14 +23,16 @@ if struct.calcsize("P") * 8 == 64:
assert list(ncls.find_overlap(0, 2)) == []
print("aaa", list(ncls.find_overlap(9_223_372_036_854_775_805, 9_223_372_036_854_775_806)))
- assert list(ncls.find_overlap(0, 9_223_372_036_854_775_806)) == [(5, 6, 2147483647), (9223372036854775805, 9223372036854775807, 3)]
+ assert list(ncls.find_overlap(0, 9_223_372_036_854_775_806)) == [
+ (5, 6, 2147483647),
+ (9223372036854775805, 9223372036854775807, 3),
+ ]
- r, l = ncls.all_overlaps_both(starts, ends, ids)
- assert list(r) == [2147483647, 3]
- assert list(l) == [2147483647, 3]
+ right, left = ncls.all_overlaps_both(starts, ends, ids)
+ assert list(right) == [2147483647, 3]
+ assert list(left) == [2147483647, 3]
def test_all_containments_both():
-
starts = np.array([1291845632, 3002335232], dtype=np.int64)
ends = np.array([1292894207, 3002597375], dtype=np.int64)
ids = np.array([0, 1], dtype=np.int64)
@@ -47,12 +46,10 @@ if struct.calcsize("P") * 8 == 64:
else:
- starts = np.array([5, 2_147_483_645], dtype=np.int64)
- ends = np.array([6, 2_147_483_646], dtype=np.int64)
- ids = np.array([0, 3], dtype=np.int64)
-
def test_ncls():
- # ids = starts
+ starts = np.array([5, 2_147_483_645], dtype=np.int64)
+ ends = np.array([6, 2_147_483_646], dtype=np.int64)
+ ids = np.array([0, 3], dtype=np.int64)
print(starts, ends, ids)
@@ -63,12 +60,11 @@ else:
assert list(ncls.find_overlap(0, 2)) == []
assert list(ncls.find_overlap(0, 2_147_483_647)) == [(5, 6, 0), (2_147_483_645, 2_147_483_646, 3)]
- r, l = ncls.all_overlaps_both(starts, ends, ids)
- assert list(r) == [0, 3]
- assert list(l) == [0, 3]
+ right, left = ncls.all_overlaps_both(starts, ends, ids)
+ assert list(right) == [0, 3]
+ assert list(left) == [0, 3]
def test_all_containments_both():
-
starts = np.array([5, 10], dtype=np.int64)
ends = np.array([6, 50], dtype=np.int64)
ids = np.array([0, 1], dtype=np.int64)
View it on GitLab: https://salsa.debian.org/python-team/packages/python-ncls/-/commit/8ff16f01a2be52c3923e642fbd410357322baac8
--
View it on GitLab: https://salsa.debian.org/python-team/packages/python-ncls/-/commit/8ff16f01a2be52c3923e642fbd410357322baac8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20251130/3219be8a/attachment-0001.htm>
More information about the debian-med-commit
mailing list