[med-svn] [kineticstools] 01/10: Imported Upstream version 0.5.2+dfsg
Afif Elghraoui
afif at moszumanska.debian.org
Sun Jul 3 03:37:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository kineticstools.
commit 5ef124ee9a0246d656717c290b105f1cde5a8701
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sat Jul 2 15:21:19 2016 -0700
Imported Upstream version 0.5.2+dfsg
---
.circleci/installHDF5.sh | 14 +++++
circle.yml | 18 ++++++
kineticsTools/ipdSummary.py | 68 +++++++++++------------
kineticsTools/summarizeModifications.py | 18 +++---
requirements-ci.txt | 12 ++++
requirements-dev.txt | 3 +
setup.py | 6 +-
test/cram/case-ctrl.t | 2 +-
test/cram/detection.t | 2 +-
test/cram/detection_bam.t | 8 +--
test/cram/detection_bam_dataset.t | 8 +--
test/cram/detection_bam_lossless.t | 6 +-
test/cram/identify.t | 2 +-
test/cram/long_running/README.txt | 6 +-
test/cram/long_running/detect_and_identify_Bsub.t | 4 +-
test/cram/long_running/detect_and_identify_Cagg.t | 2 +-
test/cram/long_running/detect_and_identify_Hpyl.t | 4 +-
test/cram/long_running/detect_and_identify_Mjan.t | 2 +-
test/cram/long_running/run_jenkins.sh | 19 +++++++
test/cram/methyl-fraction-case-ctrl.t | 2 +-
test/cram/version.t | 9 +--
test/test_ReferenceUtils.py | 4 +-
test/test_inputs.py | 60 ++++++++++++++++++--
test/test_tool_contract.py | 6 +-
24 files changed, 199 insertions(+), 86 deletions(-)
diff --git a/.circleci/installHDF5.sh b/.circleci/installHDF5.sh
new file mode 100644
index 0000000..3e7ab39
--- /dev/null
+++ b/.circleci/installHDF5.sh
@@ -0,0 +1,14 @@
+set -x
+set -e
+if [ ! -e prefix/lib/libhdf5.so ]; then
+ wget https://www.hdfgroup.org/ftp/HDF5//releases/hdf5-1.8.12/src/hdf5-1.8.12.tar.gz
+ tar xzf hdf5-1.8.12.tar.gz
+ mkdir -p prefix
+ PREFIX=$PWD/prefix
+ cd hdf5-1.8.12
+ ./configure --prefix=$PREFIX
+ make
+ make install
+else
+ echo "HDF5 build/install already completed!"
+fi
diff --git a/circle.yml b/circle.yml
new file mode 100644
index 0000000..a1e6411
--- /dev/null
+++ b/circle.yml
@@ -0,0 +1,18 @@
+machine:
+ python:
+ version: 2.7.6
+
+dependencies:
+ # We need to manually build (and cache) a more modern libhdf5 than
+ # ubuntu precise makes available via apt---there are bugs that
+ # affect us, in libhdf5 1.8.4
+ cache_directories:
+ - .circleci/prefix
+ pre:
+ - (cd .circleci && bash installHDF5.sh)
+ - HDF5_DIR=$PWD/.circleci/prefix pip install -r requirements-ci.txt
+ - HDF5_DIR=$PWD/.circleci/prefix pip install -r requirements-dev.txt
+
+test:
+ override:
+ - make test # Run doctests in addition to the usual unit tests
diff --git a/kineticsTools/ipdSummary.py b/kineticsTools/ipdSummary.py
index 783d251..5c52c54 100755
--- a/kineticsTools/ipdSummary.py
+++ b/kineticsTools/ipdSummary.py
@@ -50,6 +50,7 @@ import Queue
import traceback
from pkg_resources import Requirement, resource_filename
+from pbcommand.common_options import add_debug_option
from pbcommand.models import FileTypes, SymbolTypes, get_pbparser
from pbcommand.cli import pbparser_runner
from pbcommand.utils import setup_log
@@ -114,30 +115,33 @@ def get_parser():
description=__doc__,
driver_exe=Constants.DRIVER_EXE,
is_distributed=True,
- nproc=SymbolTypes.MAX_NPROC)
+ nproc=SymbolTypes.MAX_NPROC,
+ default_level="WARN")
p.add_input_file_type(FileTypes.DS_ALIGN, "alignment_set",
"Alignment DataSet", "BAM or Alignment DataSet")
+ tcp = p.tool_contract_parser
# FIXME just use a positional argument...
- p.tool_contract_parser.add_input_file_type(FileTypes.DS_REF, "reference",
+ tcp.add_input_file_type(FileTypes.DS_REF, "reference",
"Reference DataSet", "Fasta or Reference DataSet")
- p.arg_parser.parser.add_argument("--reference", action="store",
+ argp = p.arg_parser.parser
+ argp.add_argument("--reference", action="store",
required=True,
type=validateFile, help="Fasta or Reference DataSet")
# XXX GFF and CSV are "option" for arg parser, not tool contract
- p.tool_contract_parser.add_output_file_type(FileTypes.GFF, "gff",
+ tcp.add_output_file_type(FileTypes.GFF, "gff",
name="GFF file",
description="GFF file of modified bases",
- default_name="basemods.gff")
- p.tool_contract_parser.add_output_file_type(FileTypes.CSV, "csv",
+ default_name="basemods")
+ tcp.add_output_file_type(FileTypes.CSV, "csv",
name="CSV file",
description="CSV file of per-nucleotide information",
- default_name="basemods.csv")
- p.arg_parser.parser.add_argument("--gff", action="store", default=None,
+ default_name="basemods")
+ argp.add_argument("--gff", action="store", default=None,
help="Output GFF file of modified bases")
- p.arg_parser.parser.add_argument("--csv", action="store", default=None,
+ argp.add_argument("--csv", action="store", default=None,
help="Output CSV file out per-nucleotide information")
# FIXME use central --nproc option
- p.arg_parser.parser.add_argument('--numWorkers', '-j',
+ argp.add_argument('--numWorkers', '-j',
dest='numWorkers',
default=1,
type=int,
@@ -152,25 +156,33 @@ def get_parser():
default=Constants.MAX_LENGTH_DEFAULT,
name="Max sequence length",
description="Maximum number of bases to process per contig")
- p.add_str(Constants.IDENTIFY_ID,
+ tcp.add_str(Constants.IDENTIFY_ID,
option_str="identify",
default="",
name="Identify basemods",
description="Specific modifications to identify (comma-separated "+\
+ "list). Currrent options are m6A and/or m4C.")
+ argp.add_argument(
+ "--identify",
+ action="store",
+ default="",
+ help="Specific modifications to identify (comma-separated "+\
"list). Currrent options are m6A, m4C, m5C_TET. Cannot be "+\
"used with --control.")
_DESC = "In the --identify mode, add --methylFraction to "+\
"command line to estimate the methylated fraction, along with "+\
"95%% confidence interval bounds."
# FIXME tool contract parser and argparser conflict
- p.tool_contract_parser.add_boolean(Constants.METHYL_FRACTION_ID,
+ tcp.add_boolean(Constants.METHYL_FRACTION_ID,
option_str="methylFraction",
default=False,
name="Compute methyl fraction",
- description=_DESC)
- p.arg_parser.parser.add_argument("--methylFraction", action="store_true",
+ description="When identifying specific modifications (m4C and/or "+
+ "m6A), enabling this option will estimate the methylated "+
+ "fraction, along with 95% confidence interval bounds.")
+ argp.add_argument("--methylFraction", action="store_true",
help=_DESC)
- _get_more_options(p.arg_parser.parser)
+ _get_more_options(argp)
return p
def _get_more_options(parser):
@@ -351,11 +363,7 @@ def _get_more_options(parser):
default=False,
help="Enable Python-level profiling (using cProfile).")
- parser.add_argument('--usePdb',
- action='store_true',
- dest="usePdb",
- default=False,
- help="Enable dropping down into pdb debugger if an Exception is raised.")
+ add_debug_option(parser)
parser.add_argument("--seed",
action="store",
@@ -364,10 +372,6 @@ def _get_more_options(parser):
default=None,
help="Random seed (for development and debugging purposes only)")
- # Verbosity
- parser.add_argument("--verbose",
- action="store_true",
- default=False)
return parser
@@ -694,11 +698,6 @@ def monitorChildProcesses(children):
time.sleep(1)
def args_runner(args):
- log = logging.getLogger()
- if args.verbose:
- log.setLevel(logging.INFO)
- else:
- log.setLevel(logging.WARN)
kt = KineticsToolsRunner(args)
return kt.start()
@@ -742,19 +741,16 @@ def resolved_tool_contract_runner(resolved_contract):
return args_runner(args_)
def main(argv=sys.argv, out=sys.stdout):
- # Log generously
- logFormat = '%(asctime)s [%(levelname)s] %(message)s'
- logging.basicConfig(format=logFormat, level=logging.WARN)
- stdOutHandler = logging.StreamHandler(sys.stdout)
- log = logging.getLogger()
+ setup_log_ = functools.partial(setup_log,
+ str_formatter='%(asctime)s [%(levelname)s] %(message)s')
try:
return pbparser_runner(
argv=argv[1:],
parser=get_parser(),
args_runner_func=args_runner,
contract_runner_func=resolved_tool_contract_runner,
- alog=log,
- setup_log_func=setup_log)
+ alog=logging.getLogger(__name__),
+ setup_log_func=setup_log_)
# FIXME is there a more central place to deal with this?
except Exception as e:
type, value, tb = sys.exc_info()
diff --git a/kineticsTools/summarizeModifications.py b/kineticsTools/summarizeModifications.py
index 59c6257..deb2768 100755
--- a/kineticsTools/summarizeModifications.py
+++ b/kineticsTools/summarizeModifications.py
@@ -35,13 +35,13 @@ Summarizes kinetic modifications in the alignment_summary.gff file.
import cProfile
from itertools import groupby
+import functools
import os
import logging
import sys
from pbcommand.models import FileTypes, get_pbparser
from pbcommand.cli import pbparser_runner
-from pbcommand.common_options import add_debug_option
from pbcommand.utils import setup_log
from pbcore.io import GffReader, Gff3Record
@@ -169,7 +169,8 @@ def get_parser():
version=__version__,
name=Constants.TOOL_ID,
description=__doc__,
- driver_exe=Constants.DRIVER_EXE)
+ driver_exe=Constants.DRIVER_EXE,
+ default_level="INFO")
p.add_input_file_type(FileTypes.GFF, "modifications",
name="GFF file",
description="Base modification GFF file")
@@ -179,23 +180,20 @@ def get_parser():
p.add_output_file_type(FileTypes.GFF, "gff_out",
name="GFF file",
description="Modified alignment summary file",
- default_name="alignment_summary_with_basemods.gff")
+ default_name="alignment_summary_with_basemods")
return p
def main(argv=sys.argv):
mp = get_parser()
- logFormat = '%(asctime)s [%(levelname)s] %(message)s'
- logging.basicConfig(level=logging.INFO, format=logFormat)
- stdOutHandler = logging.StreamHandler(sys.stdout)
- logging.Logger.root.addHandler(stdOutHandler)
- log = logging.getLogger()
+ setup_log_ = functools.partial(setup_log,
+ str_formatter='%(asctime)s [%(levelname)s] %(message)s')
return pbparser_runner(
argv=argv[1:],
parser=mp,
args_runner_func=args_runner,
contract_runner_func=resolved_tool_contract_runner,
- alog=log,
- setup_log_func=setup_log)
+ alog=logging.getLogger(__name__),
+ setup_log_func=setup_log_)
if __name__ == "__main__":
main()
diff --git a/requirements-ci.txt b/requirements-ci.txt
new file mode 100644
index 0000000..a43e0c4
--- /dev/null
+++ b/requirements-ci.txt
@@ -0,0 +1,12 @@
+cython
+numpy
+h5py
+jinja2
+networkx
+jsonschema
+xmlbuilder
+functools32
+pyxb
+# Install from github
+-e git://github.com/PacificBiosciences/pbcore.git@master#egg=pbcore
+-e git://github.com/PacificBiosciences/pbcommand.git#egg=pbcommand
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..80e9144
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,3 @@
+-r requirements.txt
+sphinx
+nose
diff --git a/setup.py b/setup.py
index 91c87bd..bc1f56b 100755
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ import sys
setup(
name='kineticsTools',
- version='0.5.1',
+ version='0.5.2',
author='Pacific Biosciences',
author_email='devnet at pacificbiosciences.com',
license=open('LICENSES.txt').read(),
@@ -16,11 +16,11 @@ setup(
export_symbols=["innerPredict", "innerPredictCtx", "init_native"])],
zip_safe=False,
install_requires=[
- 'pbcore >= 1.2.2',
+ 'pbcore >= 1.2.8',
'numpy >= 1.6.0',
'h5py >= 1.3.0',
'scipy >= 0.9.0',
- 'pbcommand >= 0.2.0',
+ 'pbcommand >= 0.3.22',
],
entry_points={'console_scripts': [
"ipdSummary = kineticsTools.ipdSummary:main",
diff --git a/test/cram/case-ctrl.t b/test/cram/case-ctrl.t
index 7442a3b..6e707a6 100644
--- a/test/cram/case-ctrl.t
+++ b/test/cram/case-ctrl.t
@@ -10,7 +10,7 @@ Load in data:
Run basic ipdSummary:
- $ ipdSummary --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
+ $ ipdSummary --log-level=WARNING --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
Look at output csv file:
diff --git a/test/cram/detection.t b/test/cram/detection.t
index b8d2fe6..822ca0b 100644
--- a/test/cram/detection.t
+++ b/test/cram/detection.t
@@ -10,7 +10,7 @@ Load in data:
Run basic ipdSummary:
- $ ipdSummary --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
+ $ ipdSummary --log-level=WARNING --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
Look at output csv file:
diff --git a/test/cram/detection_bam.t b/test/cram/detection_bam.t
index 0724a2d..77de087 100644
--- a/test/cram/detection_bam.t
+++ b/test/cram/detection_bam.t
@@ -4,13 +4,13 @@ Test detection and identification modes of ipdSummary using .bam file as input.
Load in data:
- $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+ $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
$ INPUT=$DATA/Hpyl_1_5000.bam
- $ REFERENCE=/mnt/secondary-siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
+ $ REFERENCE=/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
Run basic ipdSummary:
- $ ipdSummary --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+ $ ipdSummary --log-level=WARNING --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
Look at output csv file:
@@ -50,7 +50,7 @@ Look at output gff file:
Now try limiting the number of alignments:
- $ ipdSummary --gff tmp2.gff --csv tmp2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --maxAlignments 100 --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+ $ ipdSummary --log-level=WARNING --gff tmp2.gff --csv tmp2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --maxAlignments 100 --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
$ N_DIFF=`diff tmp1.gff tmp2.gff | wc --lines`
$ python -c "assert 100 < ${N_DIFF}, ${N_DIFF}"
diff --git a/test/cram/detection_bam_dataset.t b/test/cram/detection_bam_dataset.t
index 11c070b..95ec106 100644
--- a/test/cram/detection_bam_dataset.t
+++ b/test/cram/detection_bam_dataset.t
@@ -4,13 +4,13 @@ Test detection and identification modes of ipdSummary using .xml dataset file as
Load in data:
- $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+ $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
$ INPUT=$DATA/Hpyl_1_5000.xml
- $ REFERENCE=/mnt/secondary-siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
+ $ REFERENCE=/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
Run basic ipdSummary:
- $ ipdSummary --outfile tmp_xml1 --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+ $ ipdSummary --log-level=WARNING --outfile tmp_xml1 --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
Look at output csv file:
@@ -51,6 +51,6 @@ Look at output gff file:
Now try with a split dataset:
$ INPUT=$DATA/Hpyl_1_5000_split.xml
- $ ipdSummary --gff tmp_xml2.gff --csv tmp_xml2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
+ $ ipdSummary --log-level=WARNING --gff tmp_xml2.gff --csv tmp_xml2.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REFERENCE --referenceWindows="gi|12057207|gb|AE001439.1|:0-5000" $INPUT
$ linecount tmp_xml2.gff
274
diff --git a/test/cram/detection_bam_lossless.t b/test/cram/detection_bam_lossless.t
index 3f0c610..7beecd1 100644
--- a/test/cram/detection_bam_lossless.t
+++ b/test/cram/detection_bam_lossless.t
@@ -4,14 +4,14 @@ Test detection and identification modes of ipdSummary using .bam file as input,
Load in data:
- $ DATA=/mnt/secondary-siv/testdata/kineticsTools
+ $ DATA=/pbi/dept/secondary/siv/testdata/kineticsTools
$ INPUT=$DATA/Mjan_1_5000_lossless.bam
- $ export REF_DIR=/mnt/secondary-siv/references
+ $ export REF_DIR=/pbi/dept/secondary/siv/references
$ export REF_SEQ=${REF_DIR}/Methanocaldococcus_jannaschii_DSM2661/sequence/Methanocaldococcus_jannaschii_DSM2661.fasta
Run basic ipdSummary:
- $ ipdSummary --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REF_SEQ $INPUT
+ $ ipdSummary --log-level=WARNING --gff tmp1.gff --csv tmp1.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C --reference $REF_SEQ $INPUT
Look at output csv file:
diff --git a/test/cram/identify.t b/test/cram/identify.t
index da19d43..442d130 100644
--- a/test/cram/identify.t
+++ b/test/cram/identify.t
@@ -10,7 +10,7 @@ Load in data:
Run basic ipdSummary:
- $ ipdSummary --numWorkers 1 --pvalue 0.001 --identify m6A,m4C --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
+ $ ipdSummary --log-level=WARNING --numWorkers 1 --pvalue 0.001 --identify m6A,m4C --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
Look at output csv file:
diff --git a/test/cram/long_running/README.txt b/test/cram/long_running/README.txt
index 6a19abc..1e489e6 100644
--- a/test/cram/long_running/README.txt
+++ b/test/cram/long_running/README.txt
@@ -1,6 +1,6 @@
-====================================================
-README for /mnt/secondary-siv/testdata/kineticsTools
-====================================================
+=========================================================
+README for /pbi/dept/secondary/siv/testdata/kineticsTools
+=========================================================
Most of these files are derived from Tyson Clark's P6 chemistry validation
experiments. Bsub is an amplified control.
diff --git a/test/cram/long_running/detect_and_identify_Bsub.t b/test/cram/long_running/detect_and_identify_Bsub.t
index 7b7b439..3e4c882 100644
--- a/test/cram/long_running/detect_and_identify_Bsub.t
+++ b/test/cram/long_running/detect_and_identify_Bsub.t
@@ -4,9 +4,9 @@ Run base modification detection on B. subtilis P6 chemistry validation data
$ . $TESTDIR/../portability.sh
- $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+ $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
$ export BAMFILE=${DATA_DIR}/Bsub_aligned.subreads.bam
- $ export REF_DIR=/mnt/secondary-siv/references
+ $ export REF_DIR=/pbi/dept/secondary/siv/references
$ export REF_SEQ=${REF_DIR}/B_subtilis_strW23/sequence/B_subtilis_strW23.fasta
$ ipdSummary ${BAMFILE} --reference ${REF_SEQ} --gff tst_Bsub.gff --csv tst_Bsub.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C
diff --git a/test/cram/long_running/detect_and_identify_Cagg.t b/test/cram/long_running/detect_and_identify_Cagg.t
index aac1d6c..a509ed8 100644
--- a/test/cram/long_running/detect_and_identify_Cagg.t
+++ b/test/cram/long_running/detect_and_identify_Cagg.t
@@ -4,7 +4,7 @@ Run base modification detection on C. aggregans P6 chemistry validation data
$ . $TESTDIR/../portability.sh
- $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+ $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
$ export BAMFILE=${DATA_DIR}/Cagg_aligned.subreads.bam
$ export REF_DIR=/mnt/secondary/Smrtanalysis/current/common/references
$ export REF_SEQ=${REF_DIR}/Chloroflexus_aggregans_DSM9485/sequence/Chloroflexus_aggregans_DSM9485.fasta
diff --git a/test/cram/long_running/detect_and_identify_Hpyl.t b/test/cram/long_running/detect_and_identify_Hpyl.t
index 8067142..97c409f 100644
--- a/test/cram/long_running/detect_and_identify_Hpyl.t
+++ b/test/cram/long_running/detect_and_identify_Hpyl.t
@@ -3,9 +3,9 @@ Run base modification detection on H. pylori P6 chemistry validation data.
$ . $TESTDIR/../portability.sh
- $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+ $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
$ export BAMFILE=${DATA_DIR}/Hpyl_aligned.subreads.bam
- $ export REF_DIR=/mnt/secondary-siv/references
+ $ export REF_DIR=/pbi/dept/secondary/siv/references
$ export REF_SEQ=${REF_DIR}/Helicobacter_pylori_J99/sequence/Helicobacter_pylori_J99.fasta
$ ipdSummary ${BAMFILE} --reference ${REF_SEQ} --gff tst_Hpyl.gff --csv tst_Hpyl.csv --numWorkers 12 --pvalue 0.001 --identify m6A,m4C
diff --git a/test/cram/long_running/detect_and_identify_Mjan.t b/test/cram/long_running/detect_and_identify_Mjan.t
index 3c5f9e1..3a44f42 100644
--- a/test/cram/long_running/detect_and_identify_Mjan.t
+++ b/test/cram/long_running/detect_and_identify_Mjan.t
@@ -3,7 +3,7 @@ Run base modification detection on M. jannaschii P6 chemistry validation data.
$ . $TESTDIR/../portability.sh
- $ export DATA_DIR=/mnt/secondary-siv/testdata/kineticsTools
+ $ export DATA_DIR=/pbi/dept/secondary/siv/testdata/kineticsTools
$ export BAMFILE=${DATA_DIR}/Mjan_aligned.subreads.bam
$ export REF_DIR=/mnt/secondary/Smrtanalysis/current/common/references
$ export REF_SEQ=${REF_DIR}/Methanocaldococcus_jannaschii_DSM2661/sequence/Methanocaldococcus_jannaschii_DSM2661.fasta
diff --git a/test/cram/long_running/run_jenkins.sh b/test/cram/long_running/run_jenkins.sh
new file mode 100755
index 0000000..07356fa
--- /dev/null
+++ b/test/cram/long_running/run_jenkins.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+BASE_PATH=$1
+XML_DEST=$2
+
+if [ -z "${BASE_PATH}" ] || [ ! -d "${BASE_PATH}" ]; then
+ echo "Base path required as first argument"
+ exit 1
+fi
+if [ -z "${XML_DEST}" ] || [ -d "${XML_DEST}" ]; then
+ echo "XML output file required as first argument"
+ exit 1
+fi
+rm -f ${XML_DEST}
+
+cd ${BASE_PATH}
+virtualenv ${BASE_PATH}/venv
+${BASE_PATH}/venv/bin/pip install CramUnit
+${BASE_PATH}/venv/bin/python ${BASE_PATH}/venv/bin/run_cram_unit.py -x ${XML_DEST} ${BASE_PATH}/tests/cram/long_running
diff --git a/test/cram/methyl-fraction-case-ctrl.t b/test/cram/methyl-fraction-case-ctrl.t
index 25e9fd1..c63e719 100644
--- a/test/cram/methyl-fraction-case-ctrl.t
+++ b/test/cram/methyl-fraction-case-ctrl.t
@@ -10,7 +10,7 @@ Load in data:
Run basic ipdSummary:
- $ ipdSummary --numWorkers 1 --methylFraction --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
+ $ ipdSummary --log-level=WARNING --numWorkers 1 --methylFraction --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --control $INPUT --reference $REFERENCE $INPUT
Look at output csv file:
diff --git a/test/cram/version.t b/test/cram/version.t
index 0dd4ff3..ef00278 100644
--- a/test/cram/version.t
+++ b/test/cram/version.t
@@ -4,9 +4,10 @@ A simple test of the version and help options:
2.2
$ ipdSummary
- usage: ipdSummary [-h] [-v] [--emit-tool-contract]
+ usage: ipdSummary [-h] [--version] [--emit-tool-contract]
[--resolved-tool-contract RESOLVED_TOOL_CONTRACT]
- [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}] [--debug]
+ [--log-file LOG_FILE]
+ [--log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL} | --debug | --quiet | -v]
--reference REFERENCE [--gff GFF] [--csv CSV]
[--numWorkers NUMWORKERS] [--pvalue PVALUE]
[--maxLength MAXLENGTH] [--identify IDENTIFY]
@@ -25,8 +26,8 @@ A simple test of the version and help options:
[--refContigIndex REFCONTIGINDEX]
[-W REFERENCEWINDOWSASSTRING]
[--skipUnrecognizedContigs SKIPUNRECOGNIZEDCONTIGS]
- [--alignmentSetRefWindows] [--threaded] [--profile]
- [--usePdb] [--seed RANDOMSEED] [--verbose]
+ [--alignmentSetRefWindows] [--threaded] [--profile] [--pdb]
+ [--seed RANDOMSEED]
alignment_set
ipdSummary: error: too few arguments
[2]
diff --git a/test/test_ReferenceUtils.py b/test/test_ReferenceUtils.py
index 1c204b4..1d6d3cb 100644
--- a/test/test_ReferenceUtils.py
+++ b/test/test_ReferenceUtils.py
@@ -6,8 +6,8 @@ import os.path
from kineticsTools.ReferenceUtils import ReferenceUtils
from pbcore.io import AlignmentSet
-big_data_dir = "/mnt/secondary-siv/testdata/kineticsTools"
-ref_dir = "/mnt/secondary-siv/references"
+big_data_dir = "/pbi/dept/secondary/siv/testdata/kineticsTools"
+ref_dir = "/pbi/dept/secondary/siv/references"
logging.basicConfig()
log = logging.getLogger()
diff --git a/test/test_inputs.py b/test/test_inputs.py
index 6c2ca5a..ea699bf 100644
--- a/test/test_inputs.py
+++ b/test/test_inputs.py
@@ -16,9 +16,11 @@ logging.basicConfig()
log = logging.getLogger()
# FIXME
-data_dir = "/mnt/secondary-siv/testdata/kineticsTools"
+data_dir = "/pbi/dept/secondary/siv/testdata/kineticsTools"
class _TestBase(object):
+ MAX_ALIGNMENTS = 1500
+
"""
Common test functionality. All input type tests should inherit from this,
and yield identical results.
@@ -30,6 +32,7 @@ class _TestBase(object):
def basicOpts(self):
"""Mock up some options for the kinetic worker"""
+ self_ = self
class opts:
def __init__(self):
self.mapQvThreshold = -1
@@ -45,7 +48,7 @@ class _TestBase(object):
self.identifyMinCov = 5
self.methylMinCov = 10
self.useLDA = False
- self.maxAlignments = 1500
+ self.maxAlignments = self_.MAX_ALIGNMENTS
self.randomSeed = None
return opts()
@@ -53,7 +56,7 @@ class _TestBase(object):
raise NotImplementedError()
def getReference (self):
- refDir = "/mnt/secondary-siv/references"
+ refDir = "/pbi/dept/secondary/siv/references"
return os.path.join(refDir, "Helicobacter_pylori_J99", "sequence",
"Helicobacter_pylori_J99.fasta")
@@ -100,7 +103,7 @@ class _TestBase(object):
chunks = self.kw._chunkRawIpds(rawIpds)
#log.critical(chunks)
- def testSmallDecode (self):
+ def test_small_decode (self):
"""Test for known modifications near the start of H. pylori genome"""
# XXX should have mods on 60- (m4C), 89+ (m6A), 91- (m6A)
start = 50
@@ -143,5 +146,54 @@ class TestSplitDataset(_TestBase, unittest.TestCase):
return os.path.join(data_dir, "Hpyl_1_5000_split.xml")
+ at unittest.skipUnless(os.path.isdir(data_dir), "Missing test data directory")
+class TestChunkedDataset(_TestBase, unittest.TestCase):
+
+ def getAlignments(self):
+ return os.path.join(data_dir, "Hpyl_1_5000_chunk.xml")
+
+ @unittest.skip
+ def test_private_api(self):
+ pass
+
+ def test_small_decode(self):
+ start = 985
+ end = 1065
+ REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
+ referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
+ bounds = (start, end)
+
+ self.kw._prepForReferenceWindow(referenceWindow)
+ kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
+ mods = self.kw._decodePositiveControl(kinetics, bounds)
+ self.assertEqual(len(mods), 4)
+
+
+ at unittest.skipUnless(os.path.isdir(data_dir), "Missing test data directory")
+class TestNonStochastic(TestBam): #_TestBase, unittest.TestCase):
+ # XXX force this down to trigger RNG
+ MAX_ALIGNMENTS = 150
+
+ @unittest.skip
+ def test_private_api(self):
+ pass
+
+ def test_small_decode(self):
+ start = 50
+ end = 100
+ REF_GROUP_ID = "gi|12057207|gb|AE001439.1|"
+ referenceWindow = ReferenceWindow(0, REF_GROUP_ID, start, end)
+ bounds = (start, end)
+ self.kw._prepForReferenceWindow(referenceWindow)
+ kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
+ # XXX note that this is very dependent on the exact order of reads
+ # found by readsInRange(), which may be altered by changes to the
+ # implementation of the dataset API. It should be immune to stochastic
+ # effects, however.
+ self.assertEqual("%.5f" % kinetics[0]['ipdRatio'], "1.06460")
+ mods = self.kw._decodePositiveControl(kinetics, bounds)
+ self.assertEqual(len(mods), 3)
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_tool_contract.py b/test/test_tool_contract.py
index f7dc385..5056b24 100755
--- a/test/test_tool_contract.py
+++ b/test/test_tool_contract.py
@@ -13,8 +13,8 @@ import pbcommand.testkit
os.environ["PACBIO_TEST_ENV"] = "1" # turns off --verbose
-DATA_DIR = "/mnt/secondary-siv/testdata/kineticsTools"
-REF_DIR = "/mnt/secondary-siv/references/Helicobacter_pylori_J99"
+DATA_DIR = "/pbi/dept/secondary/siv/testdata/kineticsTools"
+REF_DIR = "/pbi/dept/secondary/siv/references/Helicobacter_pylori_J99"
class Constants(object):
@@ -86,7 +86,7 @@ class TestIpdSummaryChunk(TestIpdSummary):
gff_file = os.path.join(output_dir, rtc.task.output_files[0])
csv_file = os.path.join(output_dir, rtc.task.output_files[1])
logging.critical(gff_file)
- logging.critical(csv_file)
+ logging.critical("%s %s" % (csv_file, os.path.getsize(csv_file)))
with open(csv_file) as f:
records = [ r for r in csv.DictReader(f) ]
logging.critical("start=%s end=%s" % (records[0]['tpl'],
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/kineticstools.git
More information about the debian-med-commit
mailing list