[med-svn] [Git][med-team/python-py2bit][upstream] New upstream version 0.3.1
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Tue Aug 9 19:30:57 BST 2022
Nilesh Patra pushed to branch upstream at Debian Med / python-py2bit
Commits:
b55fd12c by Nilesh Patra at 2022-08-09T23:49:28+05:30
New upstream version 0.3.1
- - - - -
7 changed files:
- − .travis.yml
- README.md
- + azure-pipelines.yml
- lib2bit/2bit.c
- py2bit.h
- py2bitTest/test.py
- setup.py
Changes:
=====================================
.travis.yml deleted
=====================================
@@ -1,10 +0,0 @@
-language: python
-python:
- - "2.6"
- - "2.7"
- - "3.3"
- - "3.4"
- - "3.5"
- - "3.6"
-install: python ./setup.py install
-script: nosetests -sv
=====================================
README.md
=====================================
@@ -15,6 +15,8 @@ Table of Contents
* [Print file information](#print-file-information)
* [Fetch a sequence](#fetch-a-sequence)
* [Fetch per-base statistics](#fetch-per-base-statistics)
+ * [Fetch masked blocks](#fetch-masked-blocks)
+ * [Close a file](#close-a-file)
* [A note on coordinates](#a-note-on-coordinates)
# Installation
@@ -108,6 +110,31 @@ If integer counts are preferred, then they can instead be returned.
>>> tb.bases("chr1", 24, 74, False)
{'A': 6, 'C': 6, 'T': 6, 'G': 6}
+## Fetch masked blocks
+
+There are two kinds of masking blocks that can be present in 2bit files: hard-masked and soft-masked. Hard-masked blocks are stretches of NNNN, as are commonly found near telomeres and centromeres. Soft-masked blocks are runs of lowercase A/C/T/G, typically indicating repeat elements or low-complexity stretches. In can sometimes be useful to query this information from 2bit files:
+
+ >>> tb.hardMaskedBlocks("chr1")
+ [(0, 50), (100, 150)]
+
+In this (small) example, there are two stretches of hard-masked sequence, from 0 to 50 and again from 100 to 150 (see the note below about coordinates). If you would instead like to query all blocks overlapping with a specific region, you can specify the region bounds:
+
+ >>> tb.hardMaskedBlocks("chr1", 75, 101)
+ [(100, 150)]
+
+If there are no overlapping regions, then an empty list is returned:
+
+ >>> tb.hardMaskedBlocks("chr1", 75, 100)
+ []
+
+Instead of `hardMaskedBlocks()`, one can use `softMaskedBlocks()` in an identical manner:
+
+ >>> tb = py2bit.open("foo.2bit", storeMasked=True)
+ >>> tb.softMaskedBlocks("chr1")
+ [(62, 70)]
+
+As shown, you **must** specify `storeMasked=True` or you will receive a run time error.
+
## Close a file
A `TwoBit` object can be closed with the `close()` method.
=====================================
azure-pipelines.yml
=====================================
@@ -0,0 +1,36 @@
+pr:
+ autoCancel: true
+
+jobs:
+
+- job: 'Linux'
+ pool:
+ vmImage: 'ubuntu-latest'
+ strategy:
+ matrix:
+ Python36:
+ python.version: '3.6'
+ Python37:
+ python.version: '3.7'
+ Python38:
+ python.version: '3.8'
+ Python39:
+ python.version: '3.9'
+ Python310:
+ python.version: '3.10'
+ maxParallel: 5
+
+ steps:
+ - bash: echo "##vso[task.prependpath]/usr/share/miniconda/bin"
+ displayName: Add conda to PATH
+
+ - bash: conda create -n foo -q --yes -c conda-forge -c bioconda python=$(python.version) libcurl zlib pip nose gcc_linux-64
+ displayName: Installing dependencies
+ - bash: |
+ source activate foo
+ python -m pip install . --no-deps --ignore-installed -vvv
+ displayName: Installing py2bit
+ - bash: |
+ source activate foo
+ nosetests -sv
+ displayName: Test py2bit
=====================================
lib2bit/2bit.c
=====================================
@@ -278,6 +278,7 @@ uint8_t getByteMaskFromOffset(int offset) {
void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end, int fraction) {
void *out;
uint32_t tmp[4] = {0, 0, 0, 0}, len = end - start + (start % 4), i = 0, j = 0;
+ uint32_t seqLen = end - start;
uint32_t blockStart, blockEnd, maskIdx = (uint32_t) -1, maskStart, maskEnd, foo;
uint8_t *bytes = NULL, mask = 0, offset;
@@ -375,10 +376,10 @@ void *twobitBasesWorker(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end,
//out is in TCAG order, since that's how 2bit is stored.
//However, for whatever reason I went with ACTG in the first release...
if(fraction) {
- ((double*) out)[0] = ((double) tmp[2])/((double) len);
- ((double*) out)[1] = ((double) tmp[1])/((double) len);
- ((double*) out)[2] = ((double) tmp[0])/((double) len);
- ((double*) out)[3] = ((double) tmp[3])/((double) len);
+ ((double*) out)[0] = ((double) tmp[2])/((double) seqLen);
+ ((double*) out)[1] = ((double) tmp[1])/((double) seqLen);
+ ((double*) out)[2] = ((double) tmp[0])/((double) seqLen);
+ ((double*) out)[3] = ((double) tmp[3])/((double) seqLen);
} else {
((uint32_t*) out)[0] = tmp[2];
((uint32_t*) out)[1] = tmp[1];
=====================================
py2bit.h
=====================================
@@ -1,7 +1,7 @@
#include <Python.h>
#include "2bit.h"
-#define pyTwoBitVersion "0.3.0"
+#define pyTwoBitVersion "0.3.1"
typedef struct {
PyObject_HEAD
=====================================
py2bitTest/test.py
=====================================
@@ -31,6 +31,8 @@ class Test():
def testSequence(self):
tb = py2bit.open(self.fname, True)
assert(tb.sequence("chr1") == "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATCGATCGTAGCTAGCTAGCTAGCTGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")
+ assert(tb.sequence("chr1", 1, 2) == "N")
+ assert(tb.sequence("chr1", 1, 3) == "NN")
assert(tb.sequence("chr1", 0, 1000) == "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATCGATCGTAGCTAGCTAGCTAGCTGATCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN")
assert(tb.sequence("chr1", 24, 74) == "NNNNNNNNNNNNNNNNNNNNNNNNNNACGTACGTACGTagctagctGATC")
tb.close()
@@ -42,12 +44,6 @@ class Test():
assert(tb.bases("chr1", 24, 74, False) == {'A': 6, 'C': 6, 'T': 6, 'G': 6})
tb.close()
- def testSequence(self):
- tb = py2bit.open(self.fname, True)
- assert(tb.sequence("chr1", 1, 3) == "NN")
- assert(tb.sequence("chr1", 1, 2) == "N")
- tb.close()
-
def testHardMaskedBlocks(self):
tb = py2bit.open(self.fname, True)
assert(tb.hardMaskedBlocks("chr1") == [(0, 50), (100, 150)])
=====================================
setup.py
=====================================
@@ -17,13 +17,13 @@ module1 = Extension('py2bit',
include_dirs = ['lib2bit', sysconfig.get_config_var("INCLUDEPY")])
setup(name = 'py2bit',
- version = '0.3.0',
+ version = '0.3.1',
description = 'A package for accessing 2bit files using lib2bit',
author = "Devon P. Ryan",
- author_email = "ryan at ie-freiburg.mpg.de",
+ author_email = "dpryan79 at gmail.com",
url = "https://github.com/deeptools/py2bit",
license = "MIT",
- download_url = "https://github.com/deeptools/py2bit/tarball/0.3.0",
+ download_url = "https://github.com/deeptools/py2bit",
keywords = ["bioinformatics", "2bit"],
classifier = ["Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
@@ -31,13 +31,8 @@ setup(name = 'py2bit',
"Programming Language :: C",
"Programming Language :: Python",
"Programming Language :: Python :: 2",
- "Programming Language :: Python :: 2.6",
"Programming Language :: Python :: 2.7",
"Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.3",
- "Programming Language :: Python :: 3.4",
- "Programming Language :: Python :: 3.5",
- "Programming Language :: Python :: 3.6",
"Programming Language :: Python :: Implementation :: CPython",
"Operating System :: POSIX",
"Operating System :: Unix",
View it on GitLab: https://salsa.debian.org/med-team/python-py2bit/-/commit/b55fd12cb45971b6cdd2865e7658f5e3b9ebcac6
--
View it on GitLab: https://salsa.debian.org/med-team/python-py2bit/-/commit/b55fd12cb45971b6cdd2865e7658f5e3b9ebcac6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220809/ebda788e/attachment-0001.htm>
More information about the debian-med-commit
mailing list