[med-svn] [Git][med-team/pairtools][upstream] New upstream version 1.1.2
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Sat Dec 21 20:14:36 GMT 2024
Alexandre Detiste pushed to branch upstream at Debian Med / pairtools
Commits:
ac753fea by Alexandre Detiste at 2024-12-21T21:06:29+01:00
New upstream version 1.1.2
- - - - -
21 changed files:
- + .github/workflows/python-build-wheels.yml
- .github/workflows/python-publish-test.yml
- .github/workflows/python-publish.yml
- .github/workflows/python-package.yml → .github/workflows/python-test.yml
- CHANGES.md
- MANIFEST.in
- README.md
- doc/installation.rst
- pairtools/__init__.py
- pairtools/cli/parse2.py
- pairtools/lib/parse.py
- pairtools/lib/scaling.py
- + pyproject.toml
- readthedocs.yml
- − requirements-dev.txt
- − requirements.txt
- − requirements_doc.txt
- setup.py
- + tests/data/mock.parse2-single-end.expand.sam
- + tests/data/mock.parse2-single-end.sam
- tests/test_parse2.py
Changes:
=====================================
.github/workflows/python-build-wheels.yml
=====================================
@@ -0,0 +1,88 @@
+name: Build wheels
+
+on: [workflow_dispatch]
+
+jobs:
+ make_sdist:
+ name: Make SDist
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout at v4
+ with:
+ fetch-depth: 0 # Optional, use if you use setuptools_scm
+ submodules: true # Optional, use if you have submodules
+
+ - name: Install dependencies
+ run: python -m pip install cython numpy pysam
+
+ - name: Build SDist
+ run: pipx run build --sdist
+
+ - uses: actions/upload-artifact at v4
+ with:
+ name: cibw-sdist
+ path: dist/*.tar.gz
+
+ build_wheels:
+ name: Build wheels on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ # macos-13 is an intel runner, macos-14 is apple silicon
+ os: [ubuntu-latest]
+ #, windows-latest, macos-13, macos-14]
+ python-version: [ "3.11" ] # "3.7", "3.8", "3.9", "3.10",
+
+ steps:
+ - uses: actions/checkout at v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python at v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ # - name: Build wheels
+ # uses: pypa/cibuildwheel at v2.21.0
+ # # uses: pypa/cibuildwheel at v2.17.0
+ # # env:
+ # # CIBW_SOME_OPTION: value
+ # # ...
+ # # with:
+ # # package-dir: .
+ # # output-dir: wheelhouse
+ # # config-file: "{package}/pyproject.toml"
+
+ - name: Install cibuildwheel
+ run: python -m pip install cibuildwheel==2.22.0
+
+ - name: Build wheels
+ run: python -m cibuildwheel --output-dir dist
+ # to supply options, put them in 'env', like:
+ env:
+ #CIBW_BUILD_FRONTEND: "pip; args: --no-build-isolation"
+ CIBW_BUILD_FRONTEND: "build; args: --no-isolation"
+ CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel -y;"
+
+ # we have to recompile pysam so that repairwheel can later find various libraries (libssl, libnghttp2, etc)
+ #CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel openssl-devel openldap-devel krb5-devel libssh-devel libnghttp2-devel -y;"
+ CIBW_BEFORE_BUILD: "python -m pip install setuptools cython numpy pysam --no-binary pysam"
+
+ # skip building 32-bit wheels (i686)
+ CIBW_ARCHS_LINUX: "auto64"
+
+ # we could use 2_28 to download pysam's wheel instead of compiling it ;
+ # HOWEVER THAT DIDN'T WORK BECAUSE PYSAM DEPENDS ON LIBSSL, LIBNGHTTP2, ETC, WHICH CANNOT BE FOUND
+ # SO WE ARE BACK TO COMPILING PYSAM'S WHEEL (no-binary pysam)
+ # CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28"
+
+ ## skip building pypy and musllinux
+ CIBW_SKIP: pp* *musllinux*
+
+ #CIBW_REPAIR_WHEEL_COMMAND: 'auditwheel -v repair -w {dest_dir} {wheel}'
+
+ #PIP_NO_CACHE_DIR: "false"
+ #PIP_NO_BUILD_ISOLATION: "false"
+ #PIP_NO_BINARY: "pysam"
+
+ - uses: actions/upload-artifact at v4
+ with:
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
+ path: ./dist/*.whl
\ No newline at end of file
=====================================
.github/workflows/python-publish-test.yml
=====================================
@@ -1,32 +1,51 @@
-
-# This workflows will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
name: Publish Python Package to Test PyPI
on:
- release:
- types: [prereleased]
+ # release:
+ # types: [published]
+ workflow_dispatch:
jobs:
- deploy:
-
+ publish_all:
+ name: Publish wheels and sdist to Test PyPI
+
+ # if: github.event_name == 'release' && github.event.action == 'published'
+
+ environment: testpypi
+ permissions:
+ id-token: write
runs-on: ubuntu-latest
-
steps:
- - uses: actions/checkout at v2
- - name: Set up Python
- uses: actions/setup-python at v2
- with:
- python-version: '3.10'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine cython numpy pysam
- - name: Build and publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- python setup.py sdist
- twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+ - uses: dawidd6/action-download-artifact at v7
+ with:
+ # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read`
+ #github_token: ${{secrets.GITHUB_TOKEN}}
+ # Optional, workflow file name or ID
+ # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow
+ workflow: python-build-wheels.yml
+ # Optional, the status or conclusion of a completed workflow to search for
+ # Can be one of a workflow conclusion:
+ # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required"
+ # Or a workflow status:
+ # "completed", "in_progress", "queued"
+ # Use the empty string ("") to ignore status or conclusion in the search
+ workflow_conclusion: success
+
+ - name: Publish sdist 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-sdist
+ repository-url: https://test.pypi.org/legacy/
+
+ - name: Publish wheels 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-wheels-ubuntu-latest-0
+ repository-url: https://test.pypi.org/legacy/
+
+
+
+
+
+
+
=====================================
.github/workflows/python-publish.yml
=====================================
@@ -1,31 +1,48 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-name: Upload Python Package
+name: Publish Python Package to PyPI
on:
- release:
- types: [created]
+ # release:
+ # types: [published]
+ workflow_dispatch:
jobs:
- deploy:
-
+ publish_all:
+ name: Publish wheels and sdist to PyPI
+
+ # if: github.event_name == 'release' && github.event.action == 'published'
+
+ environment: pypi
+ permissions:
+ id-token: write
runs-on: ubuntu-latest
-
steps:
- - uses: actions/checkout at v2
- - name: Set up Python
- uses: actions/setup-python at v2
- with:
- python-version: '3.10'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine cython pysam numpy
- - name: Build and publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- python setup.py sdist
- twine upload dist/*
+ - uses: dawidd6/action-download-artifact at v7
+ with:
+ # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read`
+ #github_token: ${{secrets.GITHUB_TOKEN}}
+ # Optional, workflow file name or ID
+ # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow
+ workflow: python-build-wheels.yml
+ # Optional, the status or conclusion of a completed workflow to search for
+ # Can be one of a workflow conclusion:
+ # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required"
+ # Or a workflow status:
+ # "completed", "in_progress", "queued"
+ # Use the empty string ("") to ignore status or conclusion in the search
+ workflow_conclusion: success
+
+ - name: Publish sdist 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-sdist
+
+ - name: Publish wheels 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-wheels-ubuntu-latest-0
+
+
+
+
+
+
=====================================
.github/workflows/python-package.yml → .github/workflows/python-test.yml
=====================================
@@ -1,8 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-name: Python package
-
+name: Test build, lint and test
on:
push:
branches: [ master ]
@@ -16,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout at v2
@@ -26,10 +25,9 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
- python -m pip install --upgrade pip wheel setuptools
- pip install numpy cython pysam
- pip install -r requirements-dev.txt
- pip install -e .
+ python -m pip install --upgrade pip wheel setuptools build
+ pip install cython pysam numpy
+ pip install -e .[test] --no-build-isolation -v -v
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
@@ -40,3 +38,6 @@ jobs:
run: |
pip install pytest
pytest
+
+
+
\ No newline at end of file
=====================================
CHANGES.md
=====================================
@@ -1,3 +1,17 @@
+### 1.1.2 (2024-12-11) ###
+
+Bugfixes:
+- Drop Cython-generated .c/.cpp files from the sdist
+
+### 1.1.1 (2024-12-10) ###
+
+Bugfixes:
+- Migrating to pyproject.toml + cibuildwheel. pairtools will now release binary wheels for Linux. --no-build-isolation is a mandatory flag now.
+- Require Cython during build to avoid the "circular import" bug.
+- fix API incomplete functionality for read-side detection by @agalitsyna
+
+**Full Changelog**: https://github.com/open2c/pairtools/compare/v1.1.0...v1.1.1
+
### 1.1.0 (2024-04-23) ###
Major bugfixes:
- Fix a major bug in sort that previously broke the sorting order. This bug was introduced in recent versions of pairtools #230
=====================================
MANIFEST.in
=====================================
@@ -1,8 +1,8 @@
-include CHANGES.md
include README.md
-include requirements.txt
-include requirements_doc.txt
+include CHANGES.md
include LICENSE
+include pyproject.toml
+include pytest.ini
graft tests
graft doc
@@ -11,6 +11,8 @@ prune doc/_templates
global-include *.pyx
global-include *.pxd
+global-exclude *.c
+global-exclude *.cpp
global-exclude __pycache__/*
global-exclude *.so
=====================================
README.md
=====================================
@@ -48,23 +48,33 @@ Requirements:
- Python 3.x
- Python packages `cython`, `pysam`, `bioframe`, `pyyaml`, `numpy`, `scipy`, `pandas` and `click`.
-- Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`) and `samtools`. If available, `pairtools` can compress outputs with `pbgzip` and `lz4`.
+- Command-line utilities `sort` (the Unix version), `samtools` and `bgzip` (shipped with `samtools`). If available, `pairtools` can compress outputs with `pbgzip` and `lz4`.
-For the full list of recommended versions, see [requirements in the the GitHub repo](https://github.com/open2c/pairtools/blob/detect_mutations/requirements.txt).
+For the full list of recommended versions, see [the requirements section in the pyproject.toml](https://github.com/open2c/pairtools/blob/main/pyproject.toml).
-We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager.
+There are three options for installing pairtools:
-With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel.
+1. We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager.
+
+With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel:
```sh
$ conda install -c conda-forge -c bioconda pairtools
```
-Alternatively, install non-Python dependencies and `pairtools` with Python-only dependencies from PyPI using pip:
+2. Alternatively, install non-Python dependencies (`sort`, `samtools`, `bgzip`, `pbgzip` and `lz4`) separately and download `pairtools` with Python dependencies from PyPI using pip:
```sh
-$ pip install numpy pysam cython
$ pip install pairtools
```
+3. Finally, when the two options above don't work or when you want to modify `pairtools`, build `pairtools` from source via pip's "editable" mode:
+```sh
+$ pip install numpy cython pysam
+$ git clone https://github.com/open2c/pairtools
+$ cd pairtools
+$ pip install -e ./ --no-build-isolation
+```
+
+
## Quick example
Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome:
=====================================
doc/installation.rst
=====================================
@@ -50,11 +50,17 @@ Then, you can compile and install `pairtools` in
`the development mode <https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode>`_,
which installs the package without moving it to a system folder and thus allows
immediate live-testing any changes in the python code. Please, make sure that you
-have `cython` installed!
+have `cython` and `pysam` installed!
.. code-block:: bash
+ $ pip install cython pysam numpy
$ cd pairtools
- $ pip install -e ./
-
-
+ $ pip install -e ./ --no-build-isolation
+
+A few notes on the installation:
+ - `pairtools` have to use `--no-build-isolation`, because it extends `pysam` via Cython and
+ re-compiles it during the build process. When build isolation is enabled, these `pysam` objects
+ get lost after the build.
+ - Because of the `--no-build-isolation` flag, build does not install build-requires, so you have to
+ install `cython`, `pysam` and `numpy` manually before the build.
=====================================
pairtools/__init__.py
=====================================
@@ -10,6 +10,6 @@ CLI tools to process mapped Hi-C data
"""
-__version__ = "1.1.0"
+__version__ = "1.1.2"
# from . import lib
=====================================
pairtools/cli/parse2.py
=====================================
@@ -43,10 +43,10 @@ UTIL_NAME = "pairtools_parse2"
help="""Reported position of alignments in pairs of complex walks (pos columns).
Each alignment in .bam/.sam Hi-C-like data has two ends, and you can report one or another depending of the position of alignment on a read or in a pair.
- "junction" - inner ends of sequential alignments in each pair, aka ligation junctions (complex walks default),
+ "junction" - inner ends of sequential alignments in each pair, aka ligation junctions,
"read" - 5'-end of alignments relative to R1 or R2 read coordinate system (as in traditional Hi-C),
"walk" - 5'-end of alignments relative to the whole walk coordinate system,
- "outer" - outer ends of sequential alignments in each pair. """,
+ "outer" - outer ends of sequential alignments in each pair (parse2 default). """,
)
@click.option(
"--report-orientation",
=====================================
pairtools/lib/parse.py
=====================================
@@ -9,7 +9,7 @@ I. pysam-based:
each sam entry is in fact special AlignedSegmentPairtoolized Cython object
that has alignment attributes and can be easily accessed from Python.
- Sam entries are gathered into reads by `push_pysam` function.
+ Sam entries are gathered into reads by `group_alignments_by_side` function.
2. **read** is a collection of sam entries corresponding to a single Hi-C molecule.
It is represented by three variables:
@@ -37,36 +37,6 @@ II. python-based data types are parsed from pysam-based ones:
from . import pairsam_format
from .parse_pysam import get_mismatches_c
-
-def group_alignments_by_side(sams):
- return [sam for sam in sams if sam.is_read1], [sam for sam in sams if sam.is_read2]
-
-
-def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
- sams = []
-
- prev_readID = None
- while True:
- sam_entry = next(instream, None)
- readID = sam_entry.query_name if sam_entry else None
-
- # Read is fully populated, then parse and write:
- if not (sam_entry) or ((readID != prev_readID) and prev_readID):
- if sort:
- sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start))
- out = sams if not group_by_side else group_alignments_by_side(sams)
- out = out if not return_readID else (prev_readID, out)
- yield out
-
- sams.clear()
-
- if sam_entry is None:
- break
- else:
- sams.append(sam_entry)
- prev_readID = readID
-
-
def streaming_classify(
instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs
):
@@ -124,9 +94,7 @@ def streaming_classify(
### Iterate over input pysam:
instream = iter(instream)
- for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
- if readID_transform is not None:
- readID = eval(readID_transform)
+ for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=readID_transform):
### Parse
if not parse2: # regular parser:
@@ -216,17 +184,6 @@ def streaming_classify(
### Alignment utilities: ###
############################
-
-def push_pysam(sam_entry, sams1, sams2):
- """Parse pysam AlignedSegment (sam) into pairtools sams entry"""
- flag = sam_entry.flag
- if (flag & 0x40) != 0:
- sams1.append(sam_entry) # left read, or first read in a pair
- else:
- sams2.append(sam_entry) # right read, or mate pair
- return
-
-
def empty_alignment():
return {
"chrom": pairsam_format.UNMAPPED_CHROM,
@@ -251,6 +208,45 @@ def empty_alignment():
"mismatches": "",
}
+def group_alignments_by_side(sams):
+ """Group pysam AlignedSegments (sams) into left-read (R1) and right-read (R2) sam entries"""
+
+ sams1 = []
+ sams2 = []
+ for sam_entry in sams:
+ flag = sam_entry.flag
+ if (flag & 0x40) != 0:
+ sams1.append(sam_entry) # left read, or first read in a pair
+ else:
+ sams2.append(sam_entry) # right read, or mate pair
+ return sams1, sams2
+
+
+def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=None):
+ sams = []
+
+ prev_readID = None
+ while True:
+ sam_entry = next(instream, None)
+ readID = sam_entry.query_name if sam_entry else None
+ if readID_transform is not None and readID is not None:
+ readID = eval(readID_transform)
+
+ # Read is fully populated, then parse and write:
+ if not (sam_entry) or ((readID != prev_readID) and prev_readID):
+ if sort:
+ sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start))
+ out = sams if not group_by_side else group_alignments_by_side(sams)
+ out = out if not return_readID else (prev_readID, out)
+ yield out
+
+ sams.clear()
+
+ if sam_entry is None:
+ break
+ else:
+ sams.append(sam_entry)
+ prev_readID = readID
def parse_pysam_entry(
sam,
@@ -672,7 +668,7 @@ def parse2_read(
]
algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
- algns2 = [empty_alignment()] # Empty alignment dummy
+ algns2 = [] # Empty alignment dummy
if len(algns1) > 1:
# Look for ligation pair, and report linear alignments after deduplication of complex walks:
@@ -684,6 +680,8 @@ def parse2_read(
report_position,
report_orientation,
dedup_max_mismatch,
+ expand,
+ max_expansion_depth,
)
output = [x for x in output if x[-1][-1] != "R1-2"]
return (output, algns1, algns2)
@@ -893,11 +891,11 @@ def parse_complex_walk(
**Intramolecular deduplication**
- Forward read (left): right read (right):
+ Forward read (left): right read (right):
5'------------------------->3' 3'<--------------------------5'
- algns1 algns2
+ algns1 algns2
<5---3><5---3><5---3><5---3> <3---5><3---5><3---5><3---5>
- l0 l1 l2 l3 r3 r2 r1 r0
+ l0 l1 l2 l3 r3 r2 r1 r0
Alignment - bwa mem reported hit or alignment after gaps conversion.
Left and right alignments (algns1: [l0, l1, l2, l3], algns2: [r0, r1, r2, r3])
@@ -931,8 +929,8 @@ def parse_complex_walk(
If comparison is successful, go to 6.
6. Verify.
Check that downstream pairs on the left read overlap with the upstream pairs on the right read.
- If yes, exit.
- If not, we do not have an overlap, go to step 3.
+ If yes, exit.
+ If not, we do not have an overlap, go to step 3.
"""
AVAILABLE_REPORT_POSITION = ["outer", "junction", "read", "walk"]
@@ -1009,66 +1007,70 @@ def parse_complex_walk(
if not is_overlap:
current_right_pair = 1
- # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found
- if current_right_pair == 1:
- last_reported_alignment_left = last_reported_alignment_right = 1
- if partial_overlap(
- algns1[-1],
- algns2[-1],
- max_insert_size=max_insert_size,
- dedup_max_mismatch=dedup_max_mismatch,
- ):
- if (
- n_algns1 >= 2
- ): # single alignment on right read and multiple alignments on left
- pair_index = (len(algns1) - 1, "R1")
- output_pairs.append(
- format_pair(
- algns1[-2],
- algns1[-1],
- pair_index=pair_index,
- algn2_pos3=algns2[-1]["pos5"],
- report_position=report_position,
- report_orientation=report_orientation,
+ if (n_algns2 == 0):
+ last_reported_alignment_left = 1
+ last_reported_alignment_right = 0
+ else:
+ # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found
+ if (current_right_pair == 1):
+ last_reported_alignment_left = last_reported_alignment_right = 1
+ if partial_overlap(
+ algns1[-1],
+ algns2[-1],
+ max_insert_size=max_insert_size,
+ dedup_max_mismatch=dedup_max_mismatch,
+ ):
+ if (
+ n_algns1 >= 2
+ ): # single alignment on right read and multiple alignments on left
+ pair_index = (len(algns1) - 1, "R1")
+ output_pairs.append(
+ format_pair(
+ algns1[-2],
+ algns1[-1],
+ pair_index=pair_index,
+ algn2_pos3=algns2[-1]["pos5"],
+ report_position=report_position,
+ report_orientation=report_orientation,
+ )
)
- )
- last_reported_alignment_left = 2 # set the pointer for reporting
+ last_reported_alignment_left = 2 # set the pointer for reporting
+
+ if (
+ n_algns2 >= 2
+ ): # single alignment on left read and multiple alignments on right
+ pair_index = (len(algns1), "R2")
+ output_pairs.append(
+ format_pair(
+ algns2[-1],
+ algns2[-2],
+ pair_index=pair_index,
+ algn1_pos3=algns1[-1]["pos5"],
+ report_position=report_position,
+ report_orientation=report_orientation,
+ )
+ )
+ last_reported_alignment_right = 2 # set the pointer for reporting
+
+ # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check,
+ # it's a non-ligated DNA fragment that we don't report.
- if (
- n_algns2 >= 2
- ): # single alignment on left read and multiple alignments on right
- pair_index = (len(algns1), "R2")
+ else: # end alignments do not overlap, report regular pair:
+ pair_index = (len(algns1), "R1-2")
output_pairs.append(
format_pair(
+ algns1[-1],
algns2[-1],
- algns2[-2],
pair_index=pair_index,
- algn1_pos3=algns1[-1]["pos5"],
report_position=report_position,
report_orientation=report_orientation,
)
)
- last_reported_alignment_right = 2 # set the pointer for reporting
-
- # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check,
- # it's a non-ligated DNA fragment that we don't report.
-
- else: # end alignments do not overlap, report regular pair:
- pair_index = (len(algns1), "R1-2")
- output_pairs.append(
- format_pair(
- algns1[-1],
- algns2[-1],
- pair_index=pair_index,
- report_position=report_position,
- report_orientation=report_orientation,
- )
- )
- else: # there was an overlap, set some pointers:
- last_reported_alignment_left = (
- last_reported_alignment_right
- ) = current_right_pair
+ else: # there was an overlap, set some pointers:
+ last_reported_alignment_left = (
+ last_reported_alignment_right
+ ) = current_right_pair
# III. Report all remaining alignments.
# Report all unique alignments on left read (sequential):
@@ -1148,7 +1150,6 @@ def expand_pairs(pairs_list, max_expansion_depth=None):
list of expanded pairs
"""
-
for algn1, _algn1, pair_index1 in pairs_list:
for _algn2, algn2, pair_index2 in pairs_list:
if pair_index1 > pair_index2:
=====================================
pairtools/lib/scaling.py
=====================================
@@ -48,12 +48,13 @@ def _to_float(arr_or_scalar):
def assign_regs(chroms, pos, regs):
- gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby(["chrom"])
+ gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby("chrom")
regs_dict = {
chrom.encode(): regs_per_chrom[["start", "end"]]
- .values.flatten()
- .astype(np.int64)
+ .values
+ .flatten()
+ .astype(np.int64)
for chrom, regs_per_chrom in gb_regs
}
=====================================
pyproject.toml
=====================================
@@ -0,0 +1,77 @@
+[project]
+name = "pairtools"
+dynamic = ['version',]
+
+dependencies = [
+ 'cython',
+ 'numpy>=1.10',
+ 'click>=6.6',
+ 'scipy>=1.7.0',
+ 'pandas>=1.3.4',
+ 'pysam>=0.15.0',
+ 'pyyaml',
+ 'bioframe>=0.3.3',
+]
+requires-python = ">=3.9"
+
+description = "CLI tools to process mapped Hi-C data"
+authors = [
+ {name = "Open2C", email = "open.chromosome.collective at gmail.com"},
+]
+license = {text = "MIT License"}
+keywords = ["genomics", "bioinformatics", "Hi-C", "contact", "chromosome"]
+readme = "README.md"
+
+classifiers = [
+ "Development Status :: 5 - Production/Stable",
+
+ "Intended Audience :: Science/Research",
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
+ "Operating System :: OS Independent",
+
+ "License :: OSI Approved :: MIT License",
+
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+]
+
+[project.optional-dependencies]
+test = [
+ 'pytest',
+ 'pytest-flake8',
+ 'pytest-cov',
+]
+
+doc = [
+ 'sphinx-click',
+ 'ipython',
+ 'nbsphinx',
+ 'Sphinx>=7.0',
+ 'sphinx_rtd_theme',
+ 'docutils>0.16',
+]
+
+
+
+[project.urls]
+Homepage = "https://github.com/open2c/pairtools"
+Documentation = "https://pairtools.readthedocs.io/en/latest/"
+Repository = "https://github.com/open2c/pairtools.git"
+Issues = "https://github.com/open2c/pairtools/issues"
+Changelog = "https://github.com/open2c/pairtools/blob/master/CHANGES.md"
+
+
+[project.scripts]
+pairtools = "pairtools.cli:cli"
+
+
+[build-system]
+requires = [
+ "setuptools",
+ "cython",
+ "numpy",
+ "pysam"]
+build-backend = "setuptools.build_meta"
=====================================
readthedocs.yml
=====================================
@@ -12,6 +12,7 @@ sphinx:
python:
install:
- - requirements: requirements_doc.txt
- method: pip
path: .
+ extra_requirements:
+ - doc
\ No newline at end of file
=====================================
requirements-dev.txt deleted
=====================================
@@ -1,4 +0,0 @@
--r requirements.txt
-pytest
-pytest-flake8
-pytest-cov
=====================================
requirements.txt deleted
=====================================
@@ -1,8 +0,0 @@
-cython
-numpy>=1.10
-click>=6.6
-scipy>=1.7.0
-pandas>=1.3.4
-pysam>=0.15.0
-pyyaml
-bioframe>=0.3.3
\ No newline at end of file
=====================================
requirements_doc.txt deleted
=====================================
@@ -1,15 +0,0 @@
-Cython
-numpy
-nose
-scipy
-pandas
-pysam
-bioframe
-click>=7.0
-sphinx-click
-ipython
-nbsphinx
-Sphinx>=7.0
-sphinx_rtd_theme
-docutils>0.16
--e .
=====================================
setup.py
=====================================
@@ -12,23 +12,8 @@ from setuptools.extension import Extension
try:
from Cython.Distutils import build_ext as _build_ext
from Cython.Build import cythonize
-
- HAVE_CYTHON = True
except ImportError:
- from setuptools.command.build_ext import build_ext as _build_ext
-
- HAVE_CYTHON = False
-
-classifiers = """\
- Development Status :: 4 - Beta
- Operating System :: OS Independent
- Programming Language :: Python
- Programming Language :: Python :: 3
- Programming Language :: Python :: 3.7
- Programming Language :: Python :: 3.8
- Programming Language :: Python :: 3.9
- Programming Language :: Python :: 3.10
-"""
+ raise ImportError('Cython is now required to build the extension modules.')
def _read(*parts, **kwargs):
@@ -48,46 +33,41 @@ def get_version():
return version
-long_description = _read("README.md")
-
-install_requires = [l for l in _read("requirements.txt").split("\n") if l]
-
-
def get_ext_modules():
- ext = ".pyx" if HAVE_CYTHON else ".c"
+ ext = ".pyx"
src_files = glob.glob(
- os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext)
+ #os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext)
+ os.path.join("pairtools", "lib", "*" + ext)
)
ext_modules = []
for src_file in src_files:
name = "pairtools.lib." + os.path.splitext(os.path.basename(src_file))[0]
- if not "pysam" in name and not "regions" in name:
- ext_modules.append(Extension(name, [src_file]))
- elif "regions" in name:
+
+ if 'pysam' in name:
+ import pysam
ext_modules.append(
Extension(
name,
[src_file],
- language="c++",
+ extra_link_args=pysam.get_libraries(),
+ include_dirs=pysam.get_include(),
+ define_macros=pysam.get_defines(),
)
)
- else:
- import pysam
+ elif "regions" in name:
ext_modules.append(
Extension(
name,
[src_file],
- extra_link_args=pysam.get_libraries(),
- include_dirs=pysam.get_include(),
- define_macros=pysam.get_defines(),
- #extra_objects=pysam.get_libraries(),
+ language="c++",
)
)
- if HAVE_CYTHON:
- # .pyx to .c
- ext_modules = cythonize(ext_modules) # , annotate=True
+ else:
+ ext_modules.append(Extension(name, [src_file]))
+
+ ext_modules = cythonize(ext_modules) # , annotate=True
return ext_modules
@@ -99,7 +79,7 @@ class build_ext(_build_ext):
# Fix to work with bootstrapped numpy installation
# http://stackoverflow.com/a/21621689/579416
# Prevent numpy from thinking it is still in its setup process:
- __builtins__.__NUMPY_SETUP__ = False
+ #__builtins__.__NUMPY_SETUP__ = False
import numpy
self.include_dirs.append(numpy.get_include())
@@ -117,27 +97,14 @@ class build_ext(_build_ext):
setup(
- name="pairtools",
- author="Open2C",
- author_email="open.chromosome.collective at gmail.com",
version=get_version(),
- license="MIT",
- description="CLI tools to process mapped Hi-C data",
- long_description=long_description,
- long_description_content_type="text/markdown",
- keywords=["genomics", "bioinformatics", "Hi-C", "contact"],
- url="https://github.com/open2c/pairtools",
ext_modules=get_ext_modules(),
cmdclass={"build_ext": build_ext},
zip_safe=False,
- classifiers=[s.strip() for s in classifiers.split("\n") if s],
- install_requires=install_requires,
- python_requires=">=3.7",
- entry_points={
- "console_scripts": [
- "pairtools = pairtools.cli:cli",
- #'pairsamtools = pairtools.cli:cli',
- ]
- },
+ # entry_points={
+ # "console_scripts": [
+ # "pairtools = pairtools.cli:cli",
+ # ]
+ # },
packages=find_packages(),
-)
\ No newline at end of file
+)
=====================================
tests/data/mock.parse2-single-end.expand.sam
=====================================
@@ -0,0 +1,11 @@
+ at SQ SN:chr1 LN:10000
+ at SQ SN:chr2 LN:10000
+ at PG ID:mock PN:mock VN:0.0.0 CL:mock
+readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid03 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
+readid03 16 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
+readid03 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
\ No newline at end of file
=====================================
tests/data/mock.parse2-single-end.sam
=====================================
@@ -0,0 +1,8 @@
+ at SQ SN:chr1 LN:10000
+ at SQ SN:chr2 LN:10000
+ at PG ID:mock PN:mock VN:0.0.0 CL:mock
+readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
=====================================
tests/test_parse2.py
=====================================
@@ -126,3 +126,138 @@ def test_mock_pysam_parse2_pair():
print()
assert assigned_pair == simulated_pair
+
+
+def test_mock_pysam_parse2_single_end():
+
+ """Testing single-end mode for parse2, no-flip mode.
+ --report-position is outer (parse2 default)
+ --report-orientation is pair (parse2 default)
+ """
+
+ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.sam")
+ mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes")
+ try:
+ result = subprocess.check_output(
+ [
+ "python",
+ "-m",
+ "pairtools",
+ "parse2",
+ "-c",
+ mock_chroms_path,
+ "--single-end",
+ "--add-pair-index",
+ "--no-flip",
+ "--report-position",
+ "outer",
+ "--report-orientation",
+ "pair",
+ mock_sam_path,
+ ],
+ ).decode("ascii")
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ print(sys.exc_info())
+ raise e
+
+ # check if the header got transferred correctly
+ sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")]
+ pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")]
+ for l in sam_header:
+ assert any([l in l2 for l2 in pairsam_header])
+
+ # check that the pairs got assigned properly
+ id_counter = 0
+ prev_id = ""
+ for l in result.split("\n"):
+ if l.startswith("#") or not l:
+ continue
+
+ if prev_id == l.split("\t")[0]:
+ id_counter += 1
+ else:
+ id_counter = 0
+ prev_id = l.split("\t")[0]
+
+ assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:]
+ print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter)
+ simulated_pair = (
+ l.split("SIMULATED:", 1)[1]
+ .split("\031", 1)[0]
+ .split("|")[id_counter]
+ .split(",")
+ )
+ print(assigned_pair)
+ print(simulated_pair, prev_id)
+ print()
+
+ assert assigned_pair == simulated_pair
+
+
+def test_mock_pysam_parse2_single_end_expand():
+
+ """Testing single-end mode for parse2, no-flip mode, with --expand.
+ --report-position is outer (parse2 default)
+ --report-orientation is pair (parse2 default)
+ """
+
+ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.expand.sam")
+ mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes")
+ try:
+ result = subprocess.check_output(
+ [
+ "python",
+ "-m",
+ "pairtools",
+ "parse2",
+ "-c",
+ mock_chroms_path,
+ "--single-end",
+ "--expand",
+ "--add-pair-index",
+ "--no-flip",
+ "--report-position",
+ "outer",
+ "--report-orientation",
+ "pair",
+ mock_sam_path,
+ ],
+ ).decode("ascii")
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ print(sys.exc_info())
+ raise e
+
+ # check if the header got transferred correctly
+ sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")]
+ pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")]
+ for l in sam_header:
+ assert any([l in l2 for l2 in pairsam_header])
+
+ # check that the pairs got assigned properly
+ id_counter = 0
+ prev_id = ""
+ for l in result.split("\n"):
+ if l.startswith("#") or not l:
+ continue
+
+ if prev_id == l.split("\t")[0]:
+ id_counter += 1
+ else:
+ id_counter = 0
+ prev_id = l.split("\t")[0]
+
+ assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:]
+ print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter)
+ simulated_pair = (
+ l.split("SIMULATED:", 1)[1]
+ .split("\031", 1)[0]
+ .split("|")[id_counter]
+ .split(",")
+ )
+ print(assigned_pair)
+ print(simulated_pair, prev_id)
+ print()
+
+ assert assigned_pair == simulated_pair
\ No newline at end of file
View it on GitLab: https://salsa.debian.org/med-team/pairtools/-/commit/ac753fea593330018aa8419e540a97add8e01ae0
--
View it on GitLab: https://salsa.debian.org/med-team/pairtools/-/commit/ac753fea593330018aa8419e540a97add8e01ae0
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241221/7daf25e9/attachment-0001.htm>
More information about the debian-med-commit
mailing list