[med-svn] [Git][med-team/pairtools][master] 5 commits: New upstream version 1.1.2
Alexandre Detiste (@detiste-guest)
gitlab at salsa.debian.org
Sat Dec 21 20:14:22 GMT 2024
Alexandre Detiste pushed to branch master at Debian Med / pairtools
Commits:
ac753fea by Alexandre Detiste at 2024-12-21T21:06:29+01:00
New upstream version 1.1.2
- - - - -
84c1c6b6 by Alexandre Detiste at 2024-12-21T21:06:31+01:00
Update upstream source from tag 'upstream/1.1.2'
Update to upstream version '1.1.2'
with Debian dir 837d173031a071fb994f249cd6b0ced1410783ba
- - - - -
737d044c by Alexandre Detiste at 2024-12-21T21:10:01+01:00
use external "pipes" module
- - - - -
450dee79 by Alexandre Detiste at 2024-12-21T21:10:47+01:00
upstream switched to pybuild-plugin-pyproject
- - - - -
a3877fe2 by Alexandre Detiste at 2024-12-21T21:12:39+01:00
refresh patches
- - - - -
27 changed files:
- + .github/workflows/python-build-wheels.yml
- .github/workflows/python-publish-test.yml
- .github/workflows/python-publish.yml
- .github/workflows/python-package.yml → .github/workflows/python-test.yml
- CHANGES.md
- MANIFEST.in
- README.md
- debian/changelog
- debian/control
- debian/copyright
- debian/patches/no_install_depends_cython.patch
- − debian/patches/remove_pipes.patch
- debian/patches/series
- doc/installation.rst
- pairtools/__init__.py
- pairtools/cli/parse2.py
- pairtools/lib/parse.py
- pairtools/lib/scaling.py
- + pyproject.toml
- readthedocs.yml
- − requirements-dev.txt
- − requirements.txt
- − requirements_doc.txt
- setup.py
- + tests/data/mock.parse2-single-end.expand.sam
- + tests/data/mock.parse2-single-end.sam
- tests/test_parse2.py
Changes:
=====================================
.github/workflows/python-build-wheels.yml
=====================================
@@ -0,0 +1,88 @@
+name: Build wheels
+
+on: [workflow_dispatch]
+
+jobs:
+ make_sdist:
+ name: Make SDist
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout at v4
+ with:
+ fetch-depth: 0 # Optional, use if you use setuptools_scm
+ submodules: true # Optional, use if you have submodules
+
+ - name: Install dependencies
+ run: python -m pip install cython numpy pysam
+
+ - name: Build SDist
+ run: pipx run build --sdist
+
+ - uses: actions/upload-artifact at v4
+ with:
+ name: cibw-sdist
+ path: dist/*.tar.gz
+
+ build_wheels:
+ name: Build wheels on ${{ matrix.os }}
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ # macos-13 is an intel runner, macos-14 is apple silicon
+ os: [ubuntu-latest]
+ #, windows-latest, macos-13, macos-14]
+ python-version: [ "3.11" ] # "3.7", "3.8", "3.9", "3.10",
+
+ steps:
+ - uses: actions/checkout at v4
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python at v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ # - name: Build wheels
+ # uses: pypa/cibuildwheel at v2.21.0
+ # # uses: pypa/cibuildwheel at v2.17.0
+ # # env:
+ # # CIBW_SOME_OPTION: value
+ # # ...
+ # # with:
+ # # package-dir: .
+ # # output-dir: wheelhouse
+ # # config-file: "{package}/pyproject.toml"
+
+ - name: Install cibuildwheel
+ run: python -m pip install cibuildwheel==2.22.0
+
+ - name: Build wheels
+ run: python -m cibuildwheel --output-dir dist
+ # to supply options, put them in 'env', like:
+ env:
+ #CIBW_BUILD_FRONTEND: "pip; args: --no-build-isolation"
+ CIBW_BUILD_FRONTEND: "build; args: --no-isolation"
+ CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel -y;"
+
+ # we have to recompile pysam so that repairwheel can later find various libraries (libssl, libnghttp2, etc)
+ #CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel openssl-devel openldap-devel krb5-devel libssh-devel libnghttp2-devel -y;"
+ CIBW_BEFORE_BUILD: "python -m pip install setuptools cython numpy pysam --no-binary pysam"
+
+ # skip building 32-bit wheels (i686)
+ CIBW_ARCHS_LINUX: "auto64"
+
+ # we could use 2_28 to download pysam's wheel instead of compiling it ;
+ # HOWEVER THAT DIDN'T WORK BECAUSE PYSAM DEPENDS ON LIBSSL, LIBNGHTTP2, ETC, WHICH CANNOT BE FOUND
+ # SO WE ARE BACK TO COMPILING PYSAM'S WHEEL (no-binary pysam)
+ # CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28"
+
+ ## skip building pypy and musllinux
+ CIBW_SKIP: pp* *musllinux*
+
+ #CIBW_REPAIR_WHEEL_COMMAND: 'auditwheel -v repair -w {dest_dir} {wheel}'
+
+ #PIP_NO_CACHE_DIR: "false"
+ #PIP_NO_BUILD_ISOLATION: "false"
+ #PIP_NO_BINARY: "pysam"
+
+ - uses: actions/upload-artifact at v4
+ with:
+ name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
+ path: ./dist/*.whl
\ No newline at end of file
=====================================
.github/workflows/python-publish-test.yml
=====================================
@@ -1,32 +1,51 @@
-
-# This workflows will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
name: Publish Python Package to Test PyPI
on:
- release:
- types: [prereleased]
+ # release:
+ # types: [published]
+ workflow_dispatch:
jobs:
- deploy:
-
+ publish_all:
+ name: Publish wheels and sdist to Test PyPI
+
+ # if: github.event_name == 'release' && github.event.action == 'published'
+
+ environment: testpypi
+ permissions:
+ id-token: write
runs-on: ubuntu-latest
-
steps:
- - uses: actions/checkout at v2
- - name: Set up Python
- uses: actions/setup-python at v2
- with:
- python-version: '3.10'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine cython numpy pysam
- - name: Build and publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- python setup.py sdist
- twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+ - uses: dawidd6/action-download-artifact at v7
+ with:
+ # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read`
+ #github_token: ${{secrets.GITHUB_TOKEN}}
+ # Optional, workflow file name or ID
+ # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow
+ workflow: python-build-wheels.yml
+ # Optional, the status or conclusion of a completed workflow to search for
+ # Can be one of a workflow conclusion:
+ # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required"
+ # Or a workflow status:
+ # "completed", "in_progress", "queued"
+ # Use the empty string ("") to ignore status or conclusion in the search
+ workflow_conclusion: success
+
+ - name: Publish sdist 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-sdist
+ repository-url: https://test.pypi.org/legacy/
+
+ - name: Publish wheels 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-wheels-ubuntu-latest-0
+ repository-url: https://test.pypi.org/legacy/
+
+
+
+
+
+
+
=====================================
.github/workflows/python-publish.yml
=====================================
@@ -1,31 +1,48 @@
-# This workflow will upload a Python Package using Twine when a release is created
-# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
-
-name: Upload Python Package
+name: Publish Python Package to PyPI
on:
- release:
- types: [created]
+ # release:
+ # types: [published]
+ workflow_dispatch:
jobs:
- deploy:
-
+ publish_all:
+ name: Publish wheels and sdist to PyPI
+
+ # if: github.event_name == 'release' && github.event.action == 'published'
+
+ environment: pypi
+ permissions:
+ id-token: write
runs-on: ubuntu-latest
-
steps:
- - uses: actions/checkout at v2
- - name: Set up Python
- uses: actions/setup-python at v2
- with:
- python-version: '3.10'
- - name: Install dependencies
- run: |
- python -m pip install --upgrade pip
- pip install setuptools wheel twine cython pysam numpy
- - name: Build and publish
- env:
- TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
- TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
- run: |
- python setup.py sdist
- twine upload dist/*
+ - uses: dawidd6/action-download-artifact at v7
+ with:
+ # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read`
+ #github_token: ${{secrets.GITHUB_TOKEN}}
+ # Optional, workflow file name or ID
+ # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow
+ workflow: python-build-wheels.yml
+ # Optional, the status or conclusion of a completed workflow to search for
+ # Can be one of a workflow conclusion:
+ # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required"
+ # Or a workflow status:
+ # "completed", "in_progress", "queued"
+ # Use the empty string ("") to ignore status or conclusion in the search
+ workflow_conclusion: success
+
+ - name: Publish sdist 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-sdist
+
+ - name: Publish wheels 📦 to PyPI
+ uses: pypa/gh-action-pypi-publish at release/v1
+ with:
+ packages-dir: cibw-wheels-ubuntu-latest-0
+
+
+
+
+
+
=====================================
.github/workflows/python-package.yml → .github/workflows/python-test.yml
=====================================
@@ -1,8 +1,7 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
-name: Python package
-
+name: Test build, lint and test
on:
push:
branches: [ master ]
@@ -16,7 +15,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
- python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout at v2
@@ -26,10 +25,9 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
- python -m pip install --upgrade pip wheel setuptools
- pip install numpy cython pysam
- pip install -r requirements-dev.txt
- pip install -e .
+ python -m pip install --upgrade pip wheel setuptools build
+ pip install cython pysam numpy
+ pip install -e .[test] --no-build-isolation -v -v
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
@@ -40,3 +38,6 @@ jobs:
run: |
pip install pytest
pytest
+
+
+
\ No newline at end of file
=====================================
CHANGES.md
=====================================
@@ -1,3 +1,17 @@
+### 1.1.2 (2024-12-11) ###
+
+Bugfixes:
+- Drop Cython-generated .c/.cpp files from the sdist
+
+### 1.1.1 (2024-12-10) ###
+
+Bugfixes:
+- Migrating to pyproject.toml + cibuildwheel. pairtools will now release binary wheels for Linux. --no-build-isolation is a mandatory flag now.
+- Require Cython during build to avoid the "circular import" bug.
+- fix API incomplete functionality for read-side detection by @agalitsyna
+
+**Full Changelog**: https://github.com/open2c/pairtools/compare/v1.1.0...v1.1.1
+
### 1.1.0 (2024-04-23) ###
Major bugfixes:
- Fix a major bug in sort that previously broke the sorting order. This bug was introduced in recent versions of pairtools #230
=====================================
MANIFEST.in
=====================================
@@ -1,8 +1,8 @@
-include CHANGES.md
include README.md
-include requirements.txt
-include requirements_doc.txt
+include CHANGES.md
include LICENSE
+include pyproject.toml
+include pytest.ini
graft tests
graft doc
@@ -11,6 +11,8 @@ prune doc/_templates
global-include *.pyx
global-include *.pxd
+global-exclude *.c
+global-exclude *.cpp
global-exclude __pycache__/*
global-exclude *.so
=====================================
README.md
=====================================
@@ -48,23 +48,33 @@ Requirements:
- Python 3.x
- Python packages `cython`, `pysam`, `bioframe`, `pyyaml`, `numpy`, `scipy`, `pandas` and `click`.
-- Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`) and `samtools`. If available, `pairtools` can compress outputs with `pbgzip` and `lz4`.
+- Command-line utilities `sort` (the Unix version), `samtools` and `bgzip` (shipped with `samtools`). If available, `pairtools` can compress outputs with `pbgzip` and `lz4`.
-For the full list of recommended versions, see [requirements in the the GitHub repo](https://github.com/open2c/pairtools/blob/detect_mutations/requirements.txt).
+For the full list of recommended versions, see [the requirements section in the pyproject.toml](https://github.com/open2c/pairtools/blob/main/pyproject.toml).
-We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager.
+There are three options for installing pairtools:
-With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel.
+1. We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager.
+
+With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel:
```sh
$ conda install -c conda-forge -c bioconda pairtools
```
-Alternatively, install non-Python dependencies and `pairtools` with Python-only dependencies from PyPI using pip:
+2. Alternatively, install non-Python dependencies (`sort`, `samtools`, `bgzip`, `pbgzip` and `lz4`) separately and download `pairtools` with Python dependencies from PyPI using pip:
```sh
-$ pip install numpy pysam cython
$ pip install pairtools
```
+3. Finally, when the two options above don't work or when you want to modify `pairtools`, build `pairtools` from source via pip's "editable" mode:
+```sh
+$ pip install numpy cython pysam
+$ git clone https://github.com/open2c/pairtools
+$ cd pairtools
+$ pip install -e ./ --no-build-isolation
+```
+
+
## Quick example
Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+pairtools (1.1.2-1) UNRELEASED; urgency=medium
+
+ * New upstream version 1.1.2
+ * use external "pipes" module
+ * upstream switched to pybuild-plugin-pyproject
+
+ -- Alexandre Detiste <tchet at debian.org> Sat, 21 Dec 2024 21:10:50 +0100
+
pairtools (1.1.0-1) unstable; urgency=medium
* Team upload.
=====================================
debian/control
=====================================
@@ -6,6 +6,7 @@ Uploaders: Antoni Villalonga <antoni at friki.cat>
Build-Depends: debhelper-compat (= 13),
dh-sequence-python3,
python3-all-dev,
+ pybuild-plugin-pyproject,
cython3,
python-is-python3,
python3-setuptools,
@@ -16,6 +17,7 @@ Build-Depends: debhelper-compat (= 13),
python3-pandas,
python3-pysam (>= 0.20.0+ds-3~),
python3-pytest <!nocheck>,
+ python3-standard-pipes,
python3-scipy,
libhts-dev,
python3-yaml <!nocheck>
@@ -27,9 +29,11 @@ Rules-Requires-Root: no
Package: python3-pairtools
Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends},
- ${python3:Depends}
+Depends:
+ python3-standard-pipes,
+ ${shlibs:Depends},
+ ${misc:Depends},
+ ${python3:Depends},
Suggests: python3-pairtools-examples (>= 1.0.2)
Description: Framework to process sequencing data from a Hi-C experiment
Simple and fast command-line framework to process sequencing data from a Hi-C
=====================================
debian/copyright
=====================================
@@ -45,57 +45,3 @@ License: GPL-2+
.
On Debian systems, the complete text of the GNU General
Public License version 2 can be found in "/usr/share/common-licenses/GPL-2".
-
-Files: debian/patches/remove_pipes.patch
-Copyright: 1992 Guido Van Rossum
-License: PSF2
- .
- PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
- --------------------------------------------
- .
- 1. This LICENSE AGREEMENT is between the Python Software Foundation
- ("PSF"), and the Individual or Organization ("Licensee") accessing and
- otherwise using this software ("Python") in source or binary form and
- its associated documentation.
- .
- 2. Subject to the terms and conditions of this License Agreement, PSF
- hereby grants Licensee a nonexclusive, royalty-free, world-wide
- license to reproduce, analyze, test, perform and/or display publicly,
- prepare derivative works, distribute, and otherwise use Python alone
- or in any derivative version, provided, however, that PSF's License
- Agreement and PSF's notice of copyright, i.e., "Copyright (c) 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
- 2013, 2014 Python Software Foundation; All Rights Reserved" are
- retained in Python alone or in any derivative version prepared by
- Licensee.
- .
- 3. In the event Licensee prepares a derivative work that is based on
- or incorporates Python or any part thereof, and wants to make
- the derivative work available to others as provided herein, then
- Licensee hereby agrees to include in any such work a brief summary of
- the changes made to Python.
- .
- 4. PSF is making Python available to Licensee on an "AS IS"
- basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
- IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
- DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
- FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
- INFRINGE ANY THIRD PARTY RIGHTS.
- .
- 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
- FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
- A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
- OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
- .
- 6. This License Agreement will automatically terminate upon a material
- breach of its terms and conditions.
- .
- 7. Nothing in this License Agreement shall be deemed to create any
- relationship of agency, partnership, or joint venture between PSF and
- Licensee. This License Agreement does not grant permission to use PSF
- trademarks or trade name in a trademark sense to endorse or promote
- products or services of Licensee, or any third party.
- .
- 8. By copying, installing or otherwise using Python, Licensee
- agrees to be bound by the terms and conditions of this License
- Agreement.
=====================================
debian/patches/no_install_depends_cython.patch
=====================================
@@ -3,16 +3,13 @@ Bug-Debian: https://bugs.debian.org/1057997
Author: Andreas Tille <tille at debian.org>
Last-Update: Mon, 11 Dec 2023 10:10:25 +0100
---- a/requirements.txt
-+++ b/requirements.txt
-@@ -1,8 +1,7 @@
--cython
- numpy>=1.10
- click>=6.6
- scipy>=1.7.0
- pandas>=1.3.4
- pysam>=0.15.0
- pyyaml
--bioframe>=0.3.3
-\ No newline at end of file
-+bioframe>=0.3.3
+--- a/pyproject.toml
++++ b/pyproject.toml
+@@ -3,7 +3,6 @@
+ dynamic = ['version',]
+
+ dependencies = [
+- 'cython',
+ 'numpy>=1.10',
+ 'click>=6.6',
+ 'scipy>=1.7.0',
=====================================
debian/patches/remove_pipes.patch deleted
=====================================
@@ -1,273 +0,0 @@
---- a/pairtools/cli/split.py
-+++ b/pairtools/cli/split.py
-@@ -1,7 +1,6 @@
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- import sys
--import pipes
- import click
-
- from ..lib import fileio, pairsam_format, headerops
---- a/pairtools/lib/fileio.py
-+++ b/pairtools/lib/fileio.py
-@@ -1,8 +1,9 @@
- import shutil
--import pipes
- import subprocess
- import sys
-
-+from . import pipes
-+
- class ParseError(Exception):
- pass
-
---- /dev/null
-+++ b/pairtools/lib/pipes.py
-@@ -0,0 +1,247 @@
-+"""Conversion pipeline templates.
-+
-+The problem:
-+------------
-+
-+Suppose you have some data that you want to convert to another format,
-+such as from GIF image format to PPM image format. Maybe the
-+conversion involves several steps (e.g. piping it through compress or
-+uuencode). Some of the conversion steps may require that their input
-+is a disk file, others may be able to read standard input; similar for
-+their output. The input to the entire conversion may also be read
-+from a disk file or from an open file, and similar for its output.
-+
-+The module lets you construct a pipeline template by sticking one or
-+more conversion steps together. It will take care of creating and
-+removing temporary files if they are necessary to hold intermediate
-+data. You can then use the template to do conversions from many
-+different sources to many different destinations. The temporary
-+file names used are different each time the template is used.
-+
-+The templates are objects so you can create templates for many
-+different conversion steps and store them in a dictionary, for
-+instance.
-+
-+
-+Directions:
-+-----------
-+
-+To create a template:
-+ t = Template()
-+
-+To add a conversion step to a template:
-+ t.append(command, kind)
-+where kind is a string of two characters: the first is '-' if the
-+command reads its standard input or 'f' if it requires a file; the
-+second likewise for the output. The command must be valid /bin/sh
-+syntax. If input or output files are required, they are passed as
-+$IN and $OUT; otherwise, it must be possible to use the command in
-+a pipeline.
-+
-+To add a conversion step at the beginning:
-+ t.prepend(command, kind)
-+
-+To convert a file to another file using a template:
-+ sts = t.copy(infile, outfile)
-+If infile or outfile are the empty string, standard input is read or
-+standard output is written, respectively. The return value is the
-+exit status of the conversion pipeline.
-+
-+To open a file for reading or writing through a conversion pipeline:
-+ fp = t.open(file, mode)
-+where mode is 'r' to read the file, or 'w' to write it -- just like
-+for the built-in function open() or for os.popen().
-+
-+To create a new template object initialized to a given one:
-+ t2 = t.clone()
-+""" # '
-+
-+
-+import re
-+import os
-+import tempfile
-+# we import the quote function rather than the module for backward compat
-+# (quote used to be an undocumented but used function in pipes)
-+from shlex import quote
-+
-+__all__ = ["Template"]
-+
-+# Conversion step kinds
-+
-+FILEIN_FILEOUT = 'ff' # Must read & write real files
-+STDIN_FILEOUT = '-f' # Must write a real file
-+FILEIN_STDOUT = 'f-' # Must read a real file
-+STDIN_STDOUT = '--' # Normal pipeline element
-+SOURCE = '.-' # Must be first, writes stdout
-+SINK = '-.' # Must be last, reads stdin
-+
-+stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
-+ SOURCE, SINK]
-+
-+
-+class Template:
-+ """Class representing a pipeline template."""
-+
-+ def __init__(self):
-+ """Template() returns a fresh pipeline template."""
-+ self.debugging = 0
-+ self.reset()
-+
-+ def __repr__(self):
-+ """t.__repr__() implements repr(t)."""
-+ return '<Template instance, steps=%r>' % (self.steps,)
-+
-+ def reset(self):
-+ """t.reset() restores a pipeline template to its initial state."""
-+ self.steps = []
-+
-+ def clone(self):
-+ """t.clone() returns a new pipeline template with identical
-+ initial state as the current one."""
-+ t = Template()
-+ t.steps = self.steps[:]
-+ t.debugging = self.debugging
-+ return t
-+
-+ def debug(self, flag):
-+ """t.debug(flag) turns debugging on or off."""
-+ self.debugging = flag
-+
-+ def append(self, cmd, kind):
-+ """t.append(cmd, kind) adds a new step at the end."""
-+ if not isinstance(cmd, str):
-+ raise TypeError('Template.append: cmd must be a string')
-+ if kind not in stepkinds:
-+ raise ValueError('Template.append: bad kind %r' % (kind,))
-+ if kind == SOURCE:
-+ raise ValueError('Template.append: SOURCE can only be prepended')
-+ if self.steps and self.steps[-1][1] == SINK:
-+ raise ValueError('Template.append: already ends with SINK')
-+ if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
-+ raise ValueError('Template.append: missing $IN in cmd')
-+ if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
-+ raise ValueError('Template.append: missing $OUT in cmd')
-+ self.steps.append((cmd, kind))
-+
-+ def prepend(self, cmd, kind):
-+ """t.prepend(cmd, kind) adds a new step at the front."""
-+ if not isinstance(cmd, str):
-+ raise TypeError('Template.prepend: cmd must be a string')
-+ if kind not in stepkinds:
-+ raise ValueError('Template.prepend: bad kind %r' % (kind,))
-+ if kind == SINK:
-+ raise ValueError('Template.prepend: SINK can only be appended')
-+ if self.steps and self.steps[0][1] == SOURCE:
-+ raise ValueError('Template.prepend: already begins with SOURCE')
-+ if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
-+ raise ValueError('Template.prepend: missing $IN in cmd')
-+ if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
-+ raise ValueError('Template.prepend: missing $OUT in cmd')
-+ self.steps.insert(0, (cmd, kind))
-+
-+ def open(self, file, rw):
-+ """t.open(file, rw) returns a pipe or file object open for
-+ reading or writing; the file is the other end of the pipeline."""
-+ if rw == 'r':
-+ return self.open_r(file)
-+ if rw == 'w':
-+ return self.open_w(file)
-+ raise ValueError('Template.open: rw must be \'r\' or \'w\', not %r'
-+ % (rw,))
-+
-+ def open_r(self, file):
-+ """t.open_r(file) and t.open_w(file) implement
-+ t.open(file, 'r') and t.open(file, 'w') respectively."""
-+ if not self.steps:
-+ return open(file, 'r')
-+ if self.steps[-1][1] == SINK:
-+ raise ValueError('Template.open_r: pipeline ends width SINK')
-+ cmd = self.makepipeline(file, '')
-+ return os.popen(cmd, 'r')
-+
-+ def open_w(self, file):
-+ if not self.steps:
-+ return open(file, 'w')
-+ if self.steps[0][1] == SOURCE:
-+ raise ValueError('Template.open_w: pipeline begins with SOURCE')
-+ cmd = self.makepipeline('', file)
-+ return os.popen(cmd, 'w')
-+
-+ def copy(self, infile, outfile):
-+ return os.system(self.makepipeline(infile, outfile))
-+
-+ def makepipeline(self, infile, outfile):
-+ cmd = makepipeline(infile, self.steps, outfile)
-+ if self.debugging:
-+ print(cmd)
-+ cmd = 'set -x; ' + cmd
-+ return cmd
-+
-+
-+def makepipeline(infile, steps, outfile):
-+ # Build a list with for each command:
-+ # [input filename or '', command string, kind, output filename or '']
-+
-+ list = []
-+ for cmd, kind in steps:
-+ list.append(['', cmd, kind, ''])
-+ #
-+ # Make sure there is at least one step
-+ #
-+ if not list:
-+ list.append(['', 'cat', '--', ''])
-+ #
-+ # Take care of the input and output ends
-+ #
-+ [cmd, kind] = list[0][1:3]
-+ if kind[0] == 'f' and not infile:
-+ list.insert(0, ['', 'cat', '--', ''])
-+ list[0][0] = infile
-+ #
-+ [cmd, kind] = list[-1][1:3]
-+ if kind[1] == 'f' and not outfile:
-+ list.append(['', 'cat', '--', ''])
-+ list[-1][-1] = outfile
-+ #
-+ # Invent temporary files to connect stages that need files
-+ #
-+ garbage = []
-+ for i in range(1, len(list)):
-+ lkind = list[i-1][2]
-+ rkind = list[i][2]
-+ if lkind[1] == 'f' or rkind[0] == 'f':
-+ (fd, temp) = tempfile.mkstemp()
-+ os.close(fd)
-+ garbage.append(temp)
-+ list[i-1][-1] = list[i][0] = temp
-+ #
-+ for item in list:
-+ [inf, cmd, kind, outf] = item
-+ if kind[1] == 'f':
-+ cmd = 'OUT=' + quote(outf) + '; ' + cmd
-+ if kind[0] == 'f':
-+ cmd = 'IN=' + quote(inf) + '; ' + cmd
-+ if kind[0] == '-' and inf:
-+ cmd = cmd + ' <' + quote(inf)
-+ if kind[1] == '-' and outf:
-+ cmd = cmd + ' >' + quote(outf)
-+ item[1] = cmd
-+ #
-+ cmdlist = list[0][1]
-+ for item in list[1:]:
-+ [cmd, kind] = item[1:3]
-+ if item[0] == '':
-+ if 'f' in kind:
-+ cmd = '{ ' + cmd + '; }'
-+ cmdlist = cmdlist + ' |\n' + cmd
-+ else:
-+ cmdlist = cmdlist + '\n' + cmd
-+ #
-+ if garbage:
-+ rmcmd = 'rm -f'
-+ for file in garbage:
-+ rmcmd = rmcmd + ' ' + quote(file)
-+ trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
-+ cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
-+ #
-+ return cmdlist
=====================================
debian/patches/series
=====================================
@@ -4,5 +4,4 @@ tests-to-python3
fix-pysam-ftbfs.patch
do-not-run-coverage.patch
no_install_depends_cython.patch
-remove_pipes.patch
syntax_warning.patch
=====================================
doc/installation.rst
=====================================
@@ -50,11 +50,17 @@ Then, you can compile and install `pairtools` in
`the development mode <https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode>`_,
which installs the package without moving it to a system folder and thus allows
immediate live-testing any changes in the python code. Please, make sure that you
-have `cython` installed!
+have `cython` and `pysam` installed!
.. code-block:: bash
+ $ pip install cython pysam numpy
$ cd pairtools
- $ pip install -e ./
-
-
+ $ pip install -e ./ --no-build-isolation
+
+A few notes on the installation:
+ - `pairtools` have to use `--no-build-isolation`, because it extends `pysam` via Cython and
+ re-compiles it during the build process. When build isolation is enabled, these `pysam` objects
+ get lost after the build.
+ - Because of the `--no-build-isolation` flag, build does not install build-requires, so you have to
+ install `cython`, `pysam` and `numpy` manually before the build.
=====================================
pairtools/__init__.py
=====================================
@@ -10,6 +10,6 @@ CLI tools to process mapped Hi-C data
"""
-__version__ = "1.1.0"
+__version__ = "1.1.2"
# from . import lib
=====================================
pairtools/cli/parse2.py
=====================================
@@ -43,10 +43,10 @@ UTIL_NAME = "pairtools_parse2"
help="""Reported position of alignments in pairs of complex walks (pos columns).
Each alignment in .bam/.sam Hi-C-like data has two ends, and you can report one or another depending of the position of alignment on a read or in a pair.
- "junction" - inner ends of sequential alignments in each pair, aka ligation junctions (complex walks default),
+ "junction" - inner ends of sequential alignments in each pair, aka ligation junctions,
"read" - 5'-end of alignments relative to R1 or R2 read coordinate system (as in traditional Hi-C),
"walk" - 5'-end of alignments relative to the whole walk coordinate system,
- "outer" - outer ends of sequential alignments in each pair. """,
+ "outer" - outer ends of sequential alignments in each pair (parse2 default). """,
)
@click.option(
"--report-orientation",
=====================================
pairtools/lib/parse.py
=====================================
@@ -9,7 +9,7 @@ I. pysam-based:
each sam entry is in fact special AlignedSegmentPairtoolized Cython object
that has alignment attributes and can be easily accessed from Python.
- Sam entries are gathered into reads by `push_pysam` function.
+ Sam entries are gathered into reads by `group_alignments_by_side` function.
2. **read** is a collection of sam entries corresponding to a single Hi-C molecule.
It is represented by three variables:
@@ -37,36 +37,6 @@ II. python-based data types are parsed from pysam-based ones:
from . import pairsam_format
from .parse_pysam import get_mismatches_c
-
-def group_alignments_by_side(sams):
- return [sam for sam in sams if sam.is_read1], [sam for sam in sams if sam.is_read2]
-
-
-def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
- sams = []
-
- prev_readID = None
- while True:
- sam_entry = next(instream, None)
- readID = sam_entry.query_name if sam_entry else None
-
- # Read is fully populated, then parse and write:
- if not (sam_entry) or ((readID != prev_readID) and prev_readID):
- if sort:
- sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start))
- out = sams if not group_by_side else group_alignments_by_side(sams)
- out = out if not return_readID else (prev_readID, out)
- yield out
-
- sams.clear()
-
- if sam_entry is None:
- break
- else:
- sams.append(sam_entry)
- prev_readID = readID
-
-
def streaming_classify(
instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs
):
@@ -124,9 +94,7 @@ def streaming_classify(
### Iterate over input pysam:
instream = iter(instream)
- for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True):
- if readID_transform is not None:
- readID = eval(readID_transform)
+ for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=readID_transform):
### Parse
if not parse2: # regular parser:
@@ -216,17 +184,6 @@ def streaming_classify(
### Alignment utilities: ###
############################
-
-def push_pysam(sam_entry, sams1, sams2):
- """Parse pysam AlignedSegment (sam) into pairtools sams entry"""
- flag = sam_entry.flag
- if (flag & 0x40) != 0:
- sams1.append(sam_entry) # left read, or first read in a pair
- else:
- sams2.append(sam_entry) # right read, or mate pair
- return
-
-
def empty_alignment():
return {
"chrom": pairsam_format.UNMAPPED_CHROM,
@@ -251,6 +208,45 @@ def empty_alignment():
"mismatches": "",
}
+def group_alignments_by_side(sams):
+ """Group pysam AlignedSegments (sams) into left-read (R1) and right-read (R2) sam entries"""
+
+ sams1 = []
+ sams2 = []
+ for sam_entry in sams:
+ flag = sam_entry.flag
+ if (flag & 0x40) != 0:
+ sams1.append(sam_entry) # left read, or first read in a pair
+ else:
+ sams2.append(sam_entry) # right read, or mate pair
+ return sams1, sams2
+
+
+def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=None):
+ sams = []
+
+ prev_readID = None
+ while True:
+ sam_entry = next(instream, None)
+ readID = sam_entry.query_name if sam_entry else None
+ if readID_transform is not None and readID is not None:
+ readID = eval(readID_transform)
+
+ # Read is fully populated, then parse and write:
+ if not (sam_entry) or ((readID != prev_readID) and prev_readID):
+ if sort:
+ sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start))
+ out = sams if not group_by_side else group_alignments_by_side(sams)
+ out = out if not return_readID else (prev_readID, out)
+ yield out
+
+ sams.clear()
+
+ if sam_entry is None:
+ break
+ else:
+ sams.append(sam_entry)
+ prev_readID = readID
def parse_pysam_entry(
sam,
@@ -672,7 +668,7 @@ def parse2_read(
]
algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap)
- algns2 = [empty_alignment()] # Empty alignment dummy
+ algns2 = [] # Empty alignment dummy
if len(algns1) > 1:
# Look for ligation pair, and report linear alignments after deduplication of complex walks:
@@ -684,6 +680,8 @@ def parse2_read(
report_position,
report_orientation,
dedup_max_mismatch,
+ expand,
+ max_expansion_depth,
)
output = [x for x in output if x[-1][-1] != "R1-2"]
return (output, algns1, algns2)
@@ -893,11 +891,11 @@ def parse_complex_walk(
**Intramolecular deduplication**
- Forward read (left): right read (right):
+ Forward read (left): right read (right):
5'------------------------->3' 3'<--------------------------5'
- algns1 algns2
+ algns1 algns2
<5---3><5---3><5---3><5---3> <3---5><3---5><3---5><3---5>
- l0 l1 l2 l3 r3 r2 r1 r0
+ l0 l1 l2 l3 r3 r2 r1 r0
Alignment - bwa mem reported hit or alignment after gaps conversion.
Left and right alignments (algns1: [l0, l1, l2, l3], algns2: [r0, r1, r2, r3])
@@ -931,8 +929,8 @@ def parse_complex_walk(
If comparison is successful, go to 6.
6. Verify.
Check that downstream pairs on the left read overlap with the upstream pairs on the right read.
- If yes, exit.
- If not, we do not have an overlap, go to step 3.
+ If yes, exit.
+ If not, we do not have an overlap, go to step 3.
"""
AVAILABLE_REPORT_POSITION = ["outer", "junction", "read", "walk"]
@@ -1009,66 +1007,70 @@ def parse_complex_walk(
if not is_overlap:
current_right_pair = 1
- # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found
- if current_right_pair == 1:
- last_reported_alignment_left = last_reported_alignment_right = 1
- if partial_overlap(
- algns1[-1],
- algns2[-1],
- max_insert_size=max_insert_size,
- dedup_max_mismatch=dedup_max_mismatch,
- ):
- if (
- n_algns1 >= 2
- ): # single alignment on right read and multiple alignments on left
- pair_index = (len(algns1) - 1, "R1")
- output_pairs.append(
- format_pair(
- algns1[-2],
- algns1[-1],
- pair_index=pair_index,
- algn2_pos3=algns2[-1]["pos5"],
- report_position=report_position,
- report_orientation=report_orientation,
+ if (n_algns2 == 0):
+ last_reported_alignment_left = 1
+ last_reported_alignment_right = 0
+ else:
+ # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found
+ if (current_right_pair == 1):
+ last_reported_alignment_left = last_reported_alignment_right = 1
+ if partial_overlap(
+ algns1[-1],
+ algns2[-1],
+ max_insert_size=max_insert_size,
+ dedup_max_mismatch=dedup_max_mismatch,
+ ):
+ if (
+ n_algns1 >= 2
+ ): # single alignment on right read and multiple alignments on left
+ pair_index = (len(algns1) - 1, "R1")
+ output_pairs.append(
+ format_pair(
+ algns1[-2],
+ algns1[-1],
+ pair_index=pair_index,
+ algn2_pos3=algns2[-1]["pos5"],
+ report_position=report_position,
+ report_orientation=report_orientation,
+ )
)
- )
- last_reported_alignment_left = 2 # set the pointer for reporting
+ last_reported_alignment_left = 2 # set the pointer for reporting
+
+ if (
+ n_algns2 >= 2
+ ): # single alignment on left read and multiple alignments on right
+ pair_index = (len(algns1), "R2")
+ output_pairs.append(
+ format_pair(
+ algns2[-1],
+ algns2[-2],
+ pair_index=pair_index,
+ algn1_pos3=algns1[-1]["pos5"],
+ report_position=report_position,
+ report_orientation=report_orientation,
+ )
+ )
+ last_reported_alignment_right = 2 # set the pointer for reporting
+
+ # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check,
+ # it's a non-ligated DNA fragment that we don't report.
- if (
- n_algns2 >= 2
- ): # single alignment on left read and multiple alignments on right
- pair_index = (len(algns1), "R2")
+ else: # end alignments do not overlap, report regular pair:
+ pair_index = (len(algns1), "R1-2")
output_pairs.append(
format_pair(
+ algns1[-1],
algns2[-1],
- algns2[-2],
pair_index=pair_index,
- algn1_pos3=algns1[-1]["pos5"],
report_position=report_position,
report_orientation=report_orientation,
)
)
- last_reported_alignment_right = 2 # set the pointer for reporting
-
- # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check,
- # it's a non-ligated DNA fragment that we don't report.
-
- else: # end alignments do not overlap, report regular pair:
- pair_index = (len(algns1), "R1-2")
- output_pairs.append(
- format_pair(
- algns1[-1],
- algns2[-1],
- pair_index=pair_index,
- report_position=report_position,
- report_orientation=report_orientation,
- )
- )
- else: # there was an overlap, set some pointers:
- last_reported_alignment_left = (
- last_reported_alignment_right
- ) = current_right_pair
+ else: # there was an overlap, set some pointers:
+ last_reported_alignment_left = (
+ last_reported_alignment_right
+ ) = current_right_pair
# III. Report all remaining alignments.
# Report all unique alignments on left read (sequential):
@@ -1148,7 +1150,6 @@ def expand_pairs(pairs_list, max_expansion_depth=None):
list of expanded pairs
"""
-
for algn1, _algn1, pair_index1 in pairs_list:
for _algn2, algn2, pair_index2 in pairs_list:
if pair_index1 > pair_index2:
=====================================
pairtools/lib/scaling.py
=====================================
@@ -48,12 +48,13 @@ def _to_float(arr_or_scalar):
def assign_regs(chroms, pos, regs):
- gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby(["chrom"])
+ gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby("chrom")
regs_dict = {
chrom.encode(): regs_per_chrom[["start", "end"]]
- .values.flatten()
- .astype(np.int64)
+ .values
+ .flatten()
+ .astype(np.int64)
for chrom, regs_per_chrom in gb_regs
}
=====================================
pyproject.toml
=====================================
@@ -0,0 +1,77 @@
+[project]
+name = "pairtools"
+dynamic = ['version',]
+
+dependencies = [
+ 'cython',
+ 'numpy>=1.10',
+ 'click>=6.6',
+ 'scipy>=1.7.0',
+ 'pandas>=1.3.4',
+ 'pysam>=0.15.0',
+ 'pyyaml',
+ 'bioframe>=0.3.3',
+]
+requires-python = ">=3.9"
+
+description = "CLI tools to process mapped Hi-C data"
+authors = [
+ {name = "Open2C", email = "open.chromosome.collective at gmail.com"},
+]
+license = {text = "MIT License"}
+keywords = ["genomics", "bioinformatics", "Hi-C", "contact", "chromosome"]
+readme = "README.md"
+
+classifiers = [
+ "Development Status :: 5 - Production/Stable",
+
+ "Intended Audience :: Science/Research",
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
+ "Operating System :: OS Independent",
+
+ "License :: OSI Approved :: MIT License",
+
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+]
+
+[project.optional-dependencies]
+test = [
+ 'pytest',
+ 'pytest-flake8',
+ 'pytest-cov',
+]
+
+doc = [
+ 'sphinx-click',
+ 'ipython',
+ 'nbsphinx',
+ 'Sphinx>=7.0',
+ 'sphinx_rtd_theme',
+ 'docutils>0.16',
+]
+
+
+
+[project.urls]
+Homepage = "https://github.com/open2c/pairtools"
+Documentation = "https://pairtools.readthedocs.io/en/latest/"
+Repository = "https://github.com/open2c/pairtools.git"
+Issues = "https://github.com/open2c/pairtools/issues"
+Changelog = "https://github.com/open2c/pairtools/blob/master/CHANGES.md"
+
+
+[project.scripts]
+pairtools = "pairtools.cli:cli"
+
+
+[build-system]
+requires = [
+ "setuptools",
+ "cython",
+ "numpy",
+ "pysam"]
+build-backend = "setuptools.build_meta"
=====================================
readthedocs.yml
=====================================
@@ -12,6 +12,7 @@ sphinx:
python:
install:
- - requirements: requirements_doc.txt
- method: pip
path: .
+ extra_requirements:
+ - doc
\ No newline at end of file
=====================================
requirements-dev.txt deleted
=====================================
@@ -1,4 +0,0 @@
--r requirements.txt
-pytest
-pytest-flake8
-pytest-cov
=====================================
requirements.txt deleted
=====================================
@@ -1,8 +0,0 @@
-cython
-numpy>=1.10
-click>=6.6
-scipy>=1.7.0
-pandas>=1.3.4
-pysam>=0.15.0
-pyyaml
-bioframe>=0.3.3
\ No newline at end of file
=====================================
requirements_doc.txt deleted
=====================================
@@ -1,15 +0,0 @@
-Cython
-numpy
-nose
-scipy
-pandas
-pysam
-bioframe
-click>=7.0
-sphinx-click
-ipython
-nbsphinx
-Sphinx>=7.0
-sphinx_rtd_theme
-docutils>0.16
--e .
=====================================
setup.py
=====================================
@@ -12,23 +12,8 @@ from setuptools.extension import Extension
try:
from Cython.Distutils import build_ext as _build_ext
from Cython.Build import cythonize
-
- HAVE_CYTHON = True
except ImportError:
- from setuptools.command.build_ext import build_ext as _build_ext
-
- HAVE_CYTHON = False
-
-classifiers = """\
- Development Status :: 4 - Beta
- Operating System :: OS Independent
- Programming Language :: Python
- Programming Language :: Python :: 3
- Programming Language :: Python :: 3.7
- Programming Language :: Python :: 3.8
- Programming Language :: Python :: 3.9
- Programming Language :: Python :: 3.10
-"""
+ raise ImportError('Cython is now required to build the extension modules.')
def _read(*parts, **kwargs):
@@ -48,46 +33,41 @@ def get_version():
return version
-long_description = _read("README.md")
-
-install_requires = [l for l in _read("requirements.txt").split("\n") if l]
-
-
def get_ext_modules():
- ext = ".pyx" if HAVE_CYTHON else ".c"
+ ext = ".pyx"
src_files = glob.glob(
- os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext)
+ #os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext)
+ os.path.join("pairtools", "lib", "*" + ext)
)
ext_modules = []
for src_file in src_files:
name = "pairtools.lib." + os.path.splitext(os.path.basename(src_file))[0]
- if not "pysam" in name and not "regions" in name:
- ext_modules.append(Extension(name, [src_file]))
- elif "regions" in name:
+
+ if 'pysam' in name:
+ import pysam
ext_modules.append(
Extension(
name,
[src_file],
- language="c++",
+ extra_link_args=pysam.get_libraries(),
+ include_dirs=pysam.get_include(),
+ define_macros=pysam.get_defines(),
)
)
- else:
- import pysam
+ elif "regions" in name:
ext_modules.append(
Extension(
name,
[src_file],
- extra_link_args=pysam.get_libraries(),
- include_dirs=pysam.get_include(),
- define_macros=pysam.get_defines(),
- #extra_objects=pysam.get_libraries(),
+ language="c++",
)
)
- if HAVE_CYTHON:
- # .pyx to .c
- ext_modules = cythonize(ext_modules) # , annotate=True
+ else:
+ ext_modules.append(Extension(name, [src_file]))
+
+ ext_modules = cythonize(ext_modules) # , annotate=True
return ext_modules
@@ -99,7 +79,7 @@ class build_ext(_build_ext):
# Fix to work with bootstrapped numpy installation
# http://stackoverflow.com/a/21621689/579416
# Prevent numpy from thinking it is still in its setup process:
- __builtins__.__NUMPY_SETUP__ = False
+ #__builtins__.__NUMPY_SETUP__ = False
import numpy
self.include_dirs.append(numpy.get_include())
@@ -117,27 +97,14 @@ class build_ext(_build_ext):
setup(
- name="pairtools",
- author="Open2C",
- author_email="open.chromosome.collective at gmail.com",
version=get_version(),
- license="MIT",
- description="CLI tools to process mapped Hi-C data",
- long_description=long_description,
- long_description_content_type="text/markdown",
- keywords=["genomics", "bioinformatics", "Hi-C", "contact"],
- url="https://github.com/open2c/pairtools",
ext_modules=get_ext_modules(),
cmdclass={"build_ext": build_ext},
zip_safe=False,
- classifiers=[s.strip() for s in classifiers.split("\n") if s],
- install_requires=install_requires,
- python_requires=">=3.7",
- entry_points={
- "console_scripts": [
- "pairtools = pairtools.cli:cli",
- #'pairsamtools = pairtools.cli:cli',
- ]
- },
+ # entry_points={
+ # "console_scripts": [
+ # "pairtools = pairtools.cli:cli",
+ # ]
+ # },
packages=find_packages(),
-)
\ No newline at end of file
+)
=====================================
tests/data/mock.parse2-single-end.expand.sam
=====================================
@@ -0,0 +1,11 @@
+ at SQ SN:chr1 LN:10000
+ at SQ SN:chr2 LN:10000
+ at PG ID:mock PN:mock VN:0.0.0 CL:mock
+readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid03 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
+readid03 16 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
+readid03 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1
\ No newline at end of file
=====================================
tests/data/mock.parse2-single-end.sam
=====================================
@@ -0,0 +1,8 @@
+ at SQ SN:chr1 LN:10000
+ at SQ SN:chr2 LN:10000
+ at PG ID:mock PN:mock VN:0.0.0 CL:mock
+readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1
+readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
+readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1
=====================================
tests/test_parse2.py
=====================================
@@ -126,3 +126,138 @@ def test_mock_pysam_parse2_pair():
print()
assert assigned_pair == simulated_pair
+
+
+def test_mock_pysam_parse2_single_end():
+
+ """Testing single-end mode for parse2, no-flip mode.
+ --report-position is outer (parse2 default)
+ --report-orientation is pair (parse2 default)
+ """
+
+ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.sam")
+ mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes")
+ try:
+ result = subprocess.check_output(
+ [
+ "python",
+ "-m",
+ "pairtools",
+ "parse2",
+ "-c",
+ mock_chroms_path,
+ "--single-end",
+ "--add-pair-index",
+ "--no-flip",
+ "--report-position",
+ "outer",
+ "--report-orientation",
+ "pair",
+ mock_sam_path,
+ ],
+ ).decode("ascii")
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ print(sys.exc_info())
+ raise e
+
+ # check if the header got transferred correctly
+ sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")]
+ pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")]
+ for l in sam_header:
+ assert any([l in l2 for l2 in pairsam_header])
+
+ # check that the pairs got assigned properly
+ id_counter = 0
+ prev_id = ""
+ for l in result.split("\n"):
+ if l.startswith("#") or not l:
+ continue
+
+ if prev_id == l.split("\t")[0]:
+ id_counter += 1
+ else:
+ id_counter = 0
+ prev_id = l.split("\t")[0]
+
+ assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:]
+ print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter)
+ simulated_pair = (
+ l.split("SIMULATED:", 1)[1]
+ .split("\031", 1)[0]
+ .split("|")[id_counter]
+ .split(",")
+ )
+ print(assigned_pair)
+ print(simulated_pair, prev_id)
+ print()
+
+ assert assigned_pair == simulated_pair
+
+
+def test_mock_pysam_parse2_single_end_expand():
+
+ """Testing single-end mode for parse2, no-flip mode, with --expand.
+ --report-position is outer (parse2 default)
+ --report-orientation is pair (parse2 default)
+ """
+
+ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.expand.sam")
+ mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes")
+ try:
+ result = subprocess.check_output(
+ [
+ "python",
+ "-m",
+ "pairtools",
+ "parse2",
+ "-c",
+ mock_chroms_path,
+ "--single-end",
+ "--expand",
+ "--add-pair-index",
+ "--no-flip",
+ "--report-position",
+ "outer",
+ "--report-orientation",
+ "pair",
+ mock_sam_path,
+ ],
+ ).decode("ascii")
+ except subprocess.CalledProcessError as e:
+ print(e.output)
+ print(sys.exc_info())
+ raise e
+
+ # check if the header got transferred correctly
+ sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")]
+ pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")]
+ for l in sam_header:
+ assert any([l in l2 for l2 in pairsam_header])
+
+ # check that the pairs got assigned properly
+ id_counter = 0
+ prev_id = ""
+ for l in result.split("\n"):
+ if l.startswith("#") or not l:
+ continue
+
+ if prev_id == l.split("\t")[0]:
+ id_counter += 1
+ else:
+ id_counter = 0
+ prev_id = l.split("\t")[0]
+
+ assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:]
+ print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter)
+ simulated_pair = (
+ l.split("SIMULATED:", 1)[1]
+ .split("\031", 1)[0]
+ .split("|")[id_counter]
+ .split(",")
+ )
+ print(assigned_pair)
+ print(simulated_pair, prev_id)
+ print()
+
+ assert assigned_pair == simulated_pair
\ No newline at end of file
View it on GitLab: https://salsa.debian.org/med-team/pairtools/-/compare/fedabf602515580646c0d8c960375e202cc75bef...a3877fe292d51491132421a2f457454d2f8a671b
--
View it on GitLab: https://salsa.debian.org/med-team/pairtools/-/compare/fedabf602515580646c0d8c960375e202cc75bef...a3877fe292d51491132421a2f457454d2f8a671b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241221/8d94d06e/attachment-0001.htm>
More information about the debian-med-commit
mailing list