[med-svn] [Git][med-team/kineticstools][upstream] New upstream version 0.6.1+git20180425.27a1878

Andreas Tille gitlab at salsa.debian.org
Sun Oct 28 20:32:14 GMT 2018


Andreas Tille pushed to branch upstream at Debian Med / kineticstools


Commits:
c195e645 by Andreas Tille at 2018-10-28T18:44:34Z
New upstream version 0.6.1+git20180425.27a1878
- - - - -


29 changed files:

- Makefile
- README.md
- + bamboo_build.sh
- bin/testShared.py
- bin/writeSummaryToCmp.py
- doc/manual.rst
- kineticsTools/BasicLdaEnricher.py
- kineticsTools/KineticWorker.py
- kineticsTools/MedakaLdaEnricher.py
- kineticsTools/MixtureEstimationMethods.py
- kineticsTools/ModificationDecode.py
- kineticsTools/MultiSiteCommon.py
- kineticsTools/MultiSiteDetection.py
- kineticsTools/PositiveControlEnricher.py
- kineticsTools/ReferenceUtils.py
- kineticsTools/ResultWriter.py
- + kineticsTools/internal/__init__.py
- + kineticsTools/internal/basic.py
- kineticsTools/ipdSummary.py
- kineticsTools/pipelineTools.py
- + kineticsTools/resources/SP2-C2.h5
- kineticsTools/summarizeModifications.py
- requirements-dev.txt
- test/cram/detection.t
- test/detectionMethylFractionTest.py
- test/methyFractionTest.py
- test/test.py
- + test/test_internal.py
- test/test_outputs.py


Changes:

=====================================
Makefile
=====================================
@@ -1,5 +1,8 @@
 SHELL = /bin/bash -e
 
+utest:
+	PYTHONPATH=.:${PYTHONPATH} py.test -s -v test/test_internal.py
+
 all: build install
 
 build:
@@ -23,7 +26,7 @@ clean:
 
 test: tests
 check: tests
-tests: cram-tests unit-tests extra-tests
+tests: cram-tests py-tests extra-tests
 
 cram-tests:
 	cram --xunit-file=cramtests.xml test/cram/*.t
@@ -31,8 +34,9 @@ cram-tests:
 long-tests:
 	cram test/cram/long_running/*.t
 
-unit-tests:
-	nosetests -s -v --with-xunit test/*.py
+py-tests:
+	#nosetests -s -v --with-xunit test/*.py
+	py.test -s -v --junit-xml=nosetests.xml test/*.py
 
 extra-tests:
 	cram --xunit-file=cramtests-extra.xml test/cram/extra/*.t


=====================================
README.md
=====================================
@@ -9,3 +9,7 @@ Academic Publications:
 Documentation:
  * [Tool documentation](http://github.com/PacificBiosciences/kineticsTools/blob/master/doc/manual.rst)
  * [Methods description](http://github.com/PacificBiosciences/kineticsTools/blob/master/doc/whitepaper/kinetics.pdf)
+
+DISCLAIMER
+----------
+THIS WEBSITE AND CONTENT AND ALL SITE-RELATED SERVICES, INCLUDING ANY DATA, ARE PROVIDED "AS IS," WITH ALL FAULTS, WITH NO REPRESENTATIONS OR WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, SATISFACTORY QUALITY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE. YOU ASSUME TOTAL RESPONSIBILITY AND RISK FOR YOUR USE OF THIS SITE, ALL SITE-RELATED SERVICES, AND ANY THIRD PARTY WEBSITES OR APPLICATIONS. NO ORAL OR WRITTEN INFORMATION OR ADVICE SHALL CREATE A WARRANTY OF ANY KIND. ANY REFERENCES TO SPECIFIC PRODUCTS OR SERVICES ON THE WEBSITES DO NOT CONSTITUTE OR IMPLY A RECOMMENDATION OR ENDORSEMENT BY PACIFIC BIOSCIENCES.


=====================================
bamboo_build.sh
=====================================
@@ -0,0 +1,27 @@
+#!/bin/bash -ex
+
+NX3PBASEURL=http://nexus/repository/unsupported/pitchfork/gcc-6.4.0
+export PATH=$PWD/build/bin:/mnt/software/a/anaconda2/4.2.0/bin:$PWD/bin:$PATH
+export PYTHONUSERBASE=$PWD/build
+export CFLAGS="-I/mnt/software/a/anaconda2/4.2.0/include"
+PIP="pip --cache-dir=$bamboo_build_working_directory/.pip"
+type module >& /dev/null || . /mnt/software/Modules/current/init/bash
+module load gcc
+
+rm -rf   build
+mkdir -p build/bin build/lib build/include build/share
+$PIP install --user \
+  iso8601
+$PIP install --user \
+  $NX3PBASEURL/pythonpkgs/xmlbuilder-1.0-cp27-none-any.whl \
+  $NX3PBASEURL/pythonpkgs/tabulate-0.7.5-cp27-none-any.whl \
+  $NX3PBASEURL/pythonpkgs/pysam-0.13-cp27-cp27mu-linux_x86_64.whl \
+  $NX3PBASEURL/pythonpkgs/avro-1.7.7-cp27-none-any.whl
+
+$PIP install --user -e repos/pbcommand
+$PIP install --user -e repos/pbcore
+$PIP install --user -r requirements-ci.txt
+$PIP install --user -r requirements-dev.txt
+$PIP install --user --no-index $PWD
+
+make test


=====================================
bin/testShared.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import print_function
 #################################################################################
 # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
 #
@@ -45,13 +46,13 @@ class Sub(Process):
     def run(self):
         import time
 
-        print "self.arr[10] = %f, Process = %s" % (self.arr[10], current_process())
+        print("self.arr[10] = %f, Process = %s" % (self.arr[10], current_process()))
 
-        print self.arr.shape
+        print(self.arr.shape)
 
         n = self.arr.shape[0] - 1
 
-        print "self.arr[%d] = %f, Process = %s" % (n, self.arr[n], current_process())
+        print("self.arr[%d] = %f, Process = %s" % (n, self.arr[n], current_process()))
         time.sleep(10)
 
 


=====================================
bin/writeSummaryToCmp.py
=====================================
@@ -30,6 +30,7 @@
 #################################################################################
 
 
+from __future__ import print_function
 import cProfile
 from pbcore.io import GffReader, Gff3Record
 import os
@@ -132,13 +133,13 @@ class IpdRatioSummaryWriter(PBToolRunner):
                 if field == 'sequence-header':
                     [internalTag, delim, externalTag] = value.strip().partition(' ')
                     self.seqMap[internalTag] = externalTag
-                print >>summaryWriter, line.strip()
+                print(line.strip(), file=summaryWriter)
                 continue
 
             if inHeader:
                 # We are at the end of the header -- write the tool-specific headers
                 for field in headers:
-                    print >>summaryWriter, ("##%s %s" % field)
+                    print(("##%s %s" % field), file=summaryWriter)
                 inHeader = False
 
             # Parse the line
@@ -153,7 +154,7 @@ class IpdRatioSummaryWriter(PBToolRunner):
                 rec.modsfwd = strand0Hits
                 rec.modsrev = strand1Hits
 
-                print >>summaryWriter, str(rec)
+                print(str(rec), file=summaryWriter)
 
 if __name__ == "__main__":
     kt = ModificationSummary()


=====================================
doc/manual.rst
=====================================
@@ -100,6 +100,9 @@ The following output options are available:
   - ``--csv_h5 FILENAME``: compact binary equivalent of CSV in HDF5 format
   - ``--bigwig FILENAME``: BigWig file (mostly only useful for SMRTView)
 
+If you are running base modification analysis through SMRT Link or a pbsmrtpipe
+pipeline, the GFF, HDF5, and BigWig outputs are automatically generated.
+
 
 modifications.gff
 -----------------
@@ -125,13 +128,58 @@ phase                   Not applicable
 attributes              Extra fields relevant to base mods. IPDRatio is traditional IPDRatio, context is the reference sequence -20bp to +20bp around the modification, and coverage level is the number of IPD observations used after Mapping QV filtering and accuracy filtering. If the row results from an identified modification we also include an identificationQv tag with the from the modification identification procedure. identificationQv is the phred-transformed probability of an incorrect identification, for bases that were identified as having a particular modification. frac, fracLow, fracUp are the estimated fraction of molecules carrying the modification, and the 5% confidence intervals of the estimate. The methylated fraction estimation is a beta-level feature, and should only be used for exploratory purposes.
 ================  ===========
 
-
 modifications.csv
 -----------------
+
 The modifications.csv file contains one row for each (reference position, strand) pair that appeared in the dataset with coverage at least x.
 x defaults to 3, but is configurable with '--minCoverage' flag to ipdSummary.py. The reference position index is 1-based for compatibility with the gff file the R environment.  Note that this output type scales poorly and is not
 recommended for large genomes; the HDF5 output should perform much better in
-these cases.
+these cases.  We have preserved the CSV option to support legacy applications
+but this is no longer produce by the pipelines in SMRT Link/pbsmrtpipe.
+
+
+modifications.h5
+----------------
+
+The HDF5 output largely mirrors the CSV output in content, but is structured
+slightly differently.  Each contig in the reference has its own group in the
+file, keyed by FASTA ID.  For each group, the columns in the CSV file are
+represented as arrays::
+
+  modifications.h5
+    --> refName
+      --> tpl
+      --> strand
+      --> base
+      --> score
+      --> tMean
+      --> tErr
+      --> modelPrediction
+      --> ipdRatio
+      --> coverage
+
+For example, the following code to iterate over the CSV file::
+
+    import csv
+    with open("modifications.csv") as f:
+        for rec in csv.reader(f):
+          process_record(rec)
+
+translates approximately to this code for reading the HDF5::
+
+    import h5py
+    COLUMNS="refName,tpl,strand,base,score,tMean,tErr,modelPrediction,ipdRatio,coverage".split(",")
+    with h5py.File(file_name) as f:
+        for ctg_id in sorted(f.keys()):
+            values = f[ctg_id]
+            for i in range(len(values["tpl"])):
+                rec = [ctg_id] + [fmt(values[k][i]) for k in COLUMNS[1:]]
+                process_record(rec)
+
+Note that the exact columns present in both files may vary depending on how
+kineticsTools was run; however, the example above is valid for the results of
+the pbsmrtpipe base modification analysis pipelines.
+
 
 Output columns
 --------------


=====================================
kineticsTools/BasicLdaEnricher.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 # Basic LDA Enricher class
 
 from math import sqrt
@@ -10,8 +11,8 @@ from scipy.special import gammaln as gamln
 from numpy import log, pi, log10, e, log1p, exp
 import numpy as np
 
-from MultiSiteCommon import MultiSiteCommon
-from MixtureEstimationMethods import MixtureEstimationMethods
+from .MultiSiteCommon import MultiSiteCommon
+from .MixtureEstimationMethods import MixtureEstimationMethods
 
 
 class BasicLdaEnricher(MultiSiteCommon):


=====================================
kineticsTools/KineticWorker.py
=====================================
@@ -1,3 +1,5 @@
+from __future__ import print_function
+from __future__ import absolute_import
 #################################################################################
 # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
 #
@@ -38,18 +40,18 @@ import numpy as np
 import scipy.stats.mstats as mstats
 import sys
 
-from MixtureEstimationMethods import MixtureEstimationMethods
-from MultiSiteCommon import MultiSiteCommon, canonicalBaseMap, modNames, ModificationPeakMask, FRAC, FRAClow, FRACup, log10e
+from .MixtureEstimationMethods import MixtureEstimationMethods
+from .MultiSiteCommon import MultiSiteCommon, canonicalBaseMap, modNames, ModificationPeakMask, FRAC, FRAClow, FRACup, log10e
 
-from MultiSiteDetection import *
+from .MultiSiteDetection import *
 
-from MedakaLdaEnricher import MedakaLdaEnricher
-from BasicLdaEnricher import BasicLdaEnricher
-from PositiveControlEnricher import PositiveControlEnricher
+from .MedakaLdaEnricher import MedakaLdaEnricher
+from .BasicLdaEnricher import BasicLdaEnricher
+from .PositiveControlEnricher import PositiveControlEnricher
 
 from kineticsTools.ModificationDecode import ModificationDecode, ModificationPeakMask
 
-from WorkerProcess import WorkerProcess, WorkerThread
+from .WorkerProcess import WorkerProcess, WorkerThread
 import pdb
 import traceback
 
@@ -145,13 +147,13 @@ class KineticWorker(object):
 
                     # Only convert to positive control call if we actually have enough
                     # coverage on the cognate base!
-                    if siteDict.has_key(mod['tpl']):
+                    if mod['tpl'] in siteDict:
 
                         # Copy mod identification data
                         siteDict[mod['tpl']]['modificationScore'] = mod['QMod']
                         siteDict[mod['tpl']]['modification'] = mod['modification']
 
-                        if self.options.methylFraction and mod.has_key(FRAC):
+                        if self.options.methylFraction and FRAC in mod:
                             siteDict[mod['tpl']][FRAC] = mod[FRAC]
                             siteDict[mod['tpl']][FRAClow] = mod[FRAClow]
                             siteDict[mod['tpl']][FRACup] = mod[FRACup]
@@ -161,7 +163,7 @@ class KineticWorker(object):
                         for nk in newKeys:
                             siteDict[mod['tpl']][nk] = mod[nk]
 
-                    if mod.has_key('Mask'):
+                    if 'Mask' in mod:
                         # The decoder should supply the off-target peak mask
                         mask = mod['Mask']
                         mask.append(0)  # make sure we always mask the cognate position
@@ -173,7 +175,7 @@ class KineticWorker(object):
                     # Mask out neighbor peaks that may have been caused by this mod
                     for offset in mask:
                         shadowPos = mod['tpl'] + strandSign * offset
-                        if siteDict.has_key(shadowPos):
+                        if shadowPos in siteDict:
                             siteDict[shadowPos]['offTargetPeak'] = True
 
                 finalCalls.extend(siteDict.values())
@@ -518,11 +520,11 @@ class KineticWorker(object):
             return 0.1
 
         if np.isnan(rawIpds).any():
-            print "got nan: %s" % str(rawIpds)
+            print("got nan: %s" % str(rawIpds))
 
         if rawIpds.mean() < 0.0001:
-            print "small"
-            print "got small: %s" % str(rawIpds)
+            print("small")
+            print("got small: %s" % str(rawIpds))
 
         capValue = min(10, np.percentile(rawIpds, 99))
         capIpds = np.minimum(rawIpds, capValue)


=====================================
kineticsTools/MedakaLdaEnricher.py
=====================================
@@ -1,3 +1,5 @@
+from __future__ import print_function
+from __future__ import absolute_import
 # Try to implement method used in Morishita et al.'s Medaka fish genome paper here
 
 from collections import defaultdict, Counter
@@ -13,7 +15,7 @@ from scipy.special import gammaln as gamln
 from numpy import log, pi, log10, e, log1p, exp
 import numpy as np
 
-from MultiSiteCommon import MultiSiteCommon
+from .MultiSiteCommon import MultiSiteCommon
 
 
 class MedakaLdaEnricher(MultiSiteCommon):
@@ -32,7 +34,7 @@ class MedakaLdaEnricher(MultiSiteCommon):
     def useLDAmodel(self, kinetics, pos, model, up, down ):
         """ Test out LDA model """
 
-        print "From use LDA model.\n"
+        print("From use LDA model.\n")
 
         res = np.zeros((up + down + 1, 6))
         ind = 0
@@ -57,7 +59,7 @@ class MedakaLdaEnricher(MultiSiteCommon):
 
     def callLDAstrand(self, kinetics, strand, model, up, down):
 
-        print "From callLDAstrand.\n"
+        print("From callLDAstrand.\n")
       
         tmp = [d for d in kinetics if d["strand"] == strand]
         tmp.sort(key=lambda x: x["tpl"])
@@ -75,18 +77,18 @@ class MedakaLdaEnricher(MultiSiteCommon):
 
     def aggregate(self, dataset, group_by_key, sum_value_key):
 
-        print "From aggregate.\n"
+        print("From aggregate.\n")
         emp = {}
         for item in dataset:
-            if item.has_key( sum_value_key ):
-                if emp.has_key( item[group_by_key] ):
+            if sum_value_key in item:
+                if item[group_by_key] in emp:
                     emp[ item[group_by_key] ] += item[sum_value_key]
                 else:
                     emp[ item[group_by_key] ] = item[sum_value_key]
 
         # Need to go back over the set again?
         for item in dataset:
-            if item.has_key( sum_value_key ):
+            if sum_value_key in item:
                 item[ sum_value_key ] = emp[ item[group_by_key] ]
 
         return dataset
@@ -95,7 +97,7 @@ class MedakaLdaEnricher(MultiSiteCommon):
 
     def callEnricherFunction(self, kinetics, up=10, down=10):
 
-        print "From callEnricher function.\n"
+        print("From callEnricher function.\n")
 
         fwd = self.callLDAstrand(kinetics, 0, self.fwd_model, up, down) 
         rev = self.callLDAstrand(kinetics, 1, self.rev_model, up, down)


=====================================
kineticsTools/MixtureEstimationMethods.py
=====================================
@@ -184,7 +184,7 @@ class MixtureEstimationMethods(object):
     # Bootstraps mix prop estimates to return estimate and simple bounds for 95% confidence interval
     def bootstrap(self, pos, mu0, mu1, nSamples=500):
 
-        if not self.rawKinetics.has_key(pos):
+        if pos not in self.rawKinetics:
             return np.array([float('nan'), float('nan'), float('nan')])
 
         res = np.zeros(3)


=====================================
kineticsTools/ModificationDecode.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 #################################################################################
 # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
 #
@@ -38,8 +39,8 @@ from scipy.special import gammaln as gamln
 from numpy import log, pi, log10, e, log1p, exp
 import numpy as np
 
-from MultiSiteCommon import MultiSiteCommon, canonicalBaseMap, modNames, ModificationPeakMask, FRAC, FRAClow, FRACup, log10e
-from MixtureEstimationMethods import MixtureEstimationMethods
+from .MultiSiteCommon import MultiSiteCommon, canonicalBaseMap, modNames, ModificationPeakMask, FRAC, FRAClow, FRACup, log10e
+from .MixtureEstimationMethods import MixtureEstimationMethods
 
 
 class ModificationDecode(MultiSiteCommon):
@@ -274,7 +275,7 @@ class ModificationDecode(MultiSiteCommon):
             modScore = self.scoreRegion(pos - self.post, pos + self.pre, modSeq)
             modScores = self.getRegionScores(pos - self.post, pos + self.pre, modSeq)
 
-            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
+            if self.methylFractionFlag and pos in self.rawKinetics:
                 if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                     modifiedMeanVectors = self.getContextMeans(pos - self.post, pos + self.pre, modSeq)
 
@@ -283,7 +284,7 @@ class ModificationDecode(MultiSiteCommon):
             noModScore = self.scoreRegion(pos - self.post, pos + self.pre, modSeq)
             noModScores = self.getRegionScores(pos - self.post, pos + self.pre, modSeq)
 
-            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
+            if self.methylFractionFlag and pos in self.rawKinetics:
                 if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
                     unModifiedMeanVectors = self.getContextMeans(pos - self.post, pos + self.pre, modSeq)
 
@@ -306,7 +307,7 @@ class ModificationDecode(MultiSiteCommon):
             #         if self.rawKinetics[pos].has_key('Ca5C'):
             #             llr = -self.rawKinetics[pos]['Ca5C']
             #             qModScore = 100 * llr * log10e + 100*log1p(exp(-llr))*log10e
-            if self.methylFractionFlag and self.rawKinetics.has_key(pos):
+            if self.methylFractionFlag and pos in self.rawKinetics:
 
                 if self.rawKinetics[pos]["coverage"] > self.methylMinCov:
 
@@ -333,7 +334,7 @@ class ModificationDecode(MultiSiteCommon):
         sc = 0.0
         for pos in xrange(start, end + 1):
             ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
-            if self.scores.has_key(pos):
+            if pos in self.scores:
                 sc += self.scores[pos][ctx]
 
         return sc
@@ -343,7 +344,7 @@ class ModificationDecode(MultiSiteCommon):
 
         for pos in xrange(start, end + 1):
             ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
-            if self.scores.has_key(pos):
+            if pos in self.scores:
                 scores[pos - start] = self.scores[pos][ctx]
 
         return scores
@@ -358,7 +359,7 @@ class ModificationDecode(MultiSiteCommon):
             # Add a neighboring peak to the mask if
             # a) it has a single-site qv > 20
             # b) the observed IPDs are somewhat more likely under the modified hypothesis than the unmodified hypothesis
-            if self.rawKinetics.has_key(i) and self.rawKinetics[i]["score"] > 20:
+            if i in self.rawKinetics and self.rawKinetics[i]["score"] > 20:
                 if modScores[i - start] - noModScores[i - start] > 1.0:
                     maskPos.append(i - pos)
 


=====================================
kineticsTools/MultiSiteCommon.py
=====================================
@@ -124,7 +124,7 @@ class MultiSiteCommon(object):
         return lPx
 
     def singleScore(self, position, context):
-        if self.rawKinetics.has_key(position):
+        if position in self.rawKinetics:
             siteObs = self.rawKinetics[position]
 
             # mu of model, error in model
@@ -153,11 +153,11 @@ class MultiSiteCommon(object):
         # Handle the prior for a modification at the current base here
         # unmodified bases get a prior of 0, modified bases get a prior less than 0.
         prior = 0.0
-        if self.modPriors.has_key(context[self.pre]):
+        if context[self.pre] in self.modPriors:
             prior = self.modPriors[context[self.pre]]
 
         # Handle positions where we don't have enough coverage
-        if not self.rawKinetics.has_key(position):
+        if position not in self.rawKinetics:
             return prior
 
         ll = self.singleScore(position, context)
@@ -169,7 +169,7 @@ class MultiSiteCommon(object):
         meanVector = []
         for pos in xrange(start, end + 1):
             ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
-            if self.contextMeanTable.has_key(ctx):
+            if ctx in self.contextMeanTable:
                 meanVector.append(self.contextMeanTable[ctx])
             else:
                 meanVector.append(self.gbmModel.getPredictions([ctx]))


=====================================
kineticsTools/MultiSiteDetection.py
=====================================
@@ -1,320 +1,320 @@
-#################################################################################
-# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
-#
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the distribution.
-# * Neither the name of Pacific Biosciences nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
-# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
-# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
-# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-# POSSIBILITY OF SUCH DAMAGE.
-#################################################################################
-
-from math import sqrt
-import math
-import scipy.stats as s
-import array as a
-import sys
-
-from numpy import log, pi, log10, e, log1p, exp
-import numpy as np
-import re
-
-log10e = log10(e)
-
-canonicalBaseMap = {'A': 'A', 'C': 'C', 'G': 'G', 'T': 'T', 'H': 'A', 'I': 'C', 'J': 'C', 'K': 'C'}
-modNames = {'H': 'm6A', 'I': 'm5C', 'J': 'm4C', 'K': 'm5C'}
-
-m5CCode = 'I'
-
-iupacMap = {
-    'A': 'A',
-    'C': 'C',
-    'G': 'G',
-    'T': 'T',
-    'K': 'GT',
-    'M': 'AC',
-    'R': 'AG',
-    'Y': 'CT',
-    'S': 'CG',
-    'W': 'AT',
-    'B': 'CGT',
-    'D': 'AGT',
-    'H': 'ACT',
-    'V': 'ACG',
-    'N': 'ACGT'
-}
-
-
-def findMotifPositions(seq, motifs):
-    regexs = []
-
-    # Generate a regex for each motif, honouring degenerate bases
-    for m in motifs:
-        regex = ''
-
-        for c in m:
-            regex = regex + "[" + iupacMap[c] + "]"
-
-        regexs.append(regex)
-
-    allMatches = []
-
-    # Return a list of matching positions in the sequence
-    for r in regexs:
-        rr = re.compile(r)
-        matches = [x.start() for x in rr.finditer(seq)]
-        allMatches.extend(matches)
-
-    allMatches.sort()
-
-    return allMatches
-
-
-class MultiSiteDetection(object):
-
-    def __init__(self, gbmModel, sequence, rawKinetics, callBounds, methylMinCov, motifs=['CG']):
-        """
-
-        """
-
-        self.methylMinCov = methylMinCov
-        self.motifs = motifs
-
-        self.gbmModel = gbmModel
-        self.sequence = sequence
-
-        self.callStart = callBounds[0]
-        self.callEnd = callBounds[1]
-
-        # Extents that we will attempt to call a modification
-        self.callRange = xrange(self.callStart, self.callEnd)
-
-        # These switch because we changing viewpoints
-        self.pre = gbmModel.post
-        self.post = gbmModel.pre
-
-        self.lStart = self.pre
-        self.lEnd = len(self.sequence) - self.post
-
-        # Extents that we will use for likelihoods
-        self.likelihoodRange = xrange(self.lStart, self.lEnd)
-
-        self.alternateBases = dict((x, list(sequence[x])) for x in xrange(len(sequence)))
-
-        self.rawKinetics = rawKinetics
-
-    def getConfigs(self, centerIdx):
-        ''' Enumerate all the contexts centered at centerIdx with one
-            modification added '''
-        start = centerIdx - self.pre
-        end = centerIdx + self.post
-        return self._possibleConfigs(start, end)
-
-    def _possibleConfigs(self, start, end):
-        ''' Enumerate all the contexts coming from the substring self.sequence[start,end] with one
-            modification added '''
-
-        if start == end:
-            return self.alternateBases[start]
-        else:
-            r = []
-            allSuffixes = self._possibleConfigs(start + 1, end)
-
-            # The first suffix is alway the one with no modifications
-            # Only add the alternate to that one -- that way we only
-            # get configurations with a single modification, not all combos
-
-            noModsSuffix = allSuffixes[0]
-            if len(allSuffixes) > 1:
-                    restSuffixes = allSuffixes[1:]
-            else:
-                    restSuffixes = []
-
-            # The noMods suffix get the alternates
-            for c in self.alternateBases[start]:
-                    r.append(c + noModsSuffix)
-
-            # the other suffixes already have mods -- they just get the unmodified base
-            for suffix in restSuffixes:
-                    r.append(self.alternateBases[start][0] + suffix)
-
-            return r
-
-        # Compute something for all the windows in [start, end]
-    def getContexts(self, start, end, sequence):
-        contexts = []
-
-        for pos in xrange(start, end + 1):
-            ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
-            contexts.append(ctx)
-
-        return contexts
-
-    def computeContextMeans(self):
-        """Generate a hash of the mean ipd for all candidate contexts"""
-
-        allContexts = []
-
-        for pos in self.motifPositions:
-            for offsetPos in xrange(pos - self.post, pos + self.pre + 1):
-                cfgs = self.getConfigs(offsetPos)
-                allContexts.extend(cfgs)
-
-        predictions = self.gbmModel.getPredictions(allContexts)
-        self.contextMeanTable = dict(zip(allContexts, predictions))
-
-    def decode(self):
-        """Use this method to do the full modification finding protocol"""
-
-        # Find sites matching the desired motif
-        self.findMotifs()
-
-        # Compute all the required mean ipds under all possible composite hypotheses
-        self.computeContextMeans()
-
-        # Compute a confidence for each mod and return results
-        return self.scorePositions()
-
-    def findMotifs(self):
-        """ Mark all the positions matching the requested motif """
-
-        # Generate list of matching positions
-        allMotifPositions = findMotifPositions(self.sequence, self.motifs)
-        self.motifPositions = []
-
-        for pos in allMotifPositions:
-            # Only use bases that are inside the callBounds
-            if self.callStart <= pos < self.callEnd:
-                self.alternateBases[pos].append('I')
-                self.motifPositions.append(pos)
-
-    def multiSiteDetection(self, positions, nullPred, modPred, centerPosition):
-        ''' kinetics, nullPred, and modifiedPred are parallel arrays 
-            containing the observations and predictions surrounding a 
-            single candidate motif site.  Estimate the p-value of
-            modification and the modified fraction here'''
-
-        # Apply the error model to the predictions
-        nullErr = 0.01 + 0.03 * nullPred + 0.06 * nullPred ** (1.7)
-        modErr = 0.01 + 0.03 * modPred + 0.06 * modPred ** (1.7)
-
-        obsMean = np.zeros(nullPred.shape)
-        obsErr = np.zeros(nullPred.shape)
-
-        # Get the observations into the same array format
-        for i in xrange(len(positions)):
-            position = positions[i]
-
-            if self.rawKinetics.has_key(position):
-                siteObs = self.rawKinetics[position]
-                obsMean[i] = siteObs['tMean']
-                obsErr[i] = siteObs['tErr']
-            else:
-                # Crank up the variance -- we don't have an observation at this
-                # position, so we should ignore it.
-                obsMean[i] = 0.0
-                obsErr[i] = 999999999
-
-        # Subtract off the background model from the observations and the modified prediction
-        dObs = obsMean - nullPred
-        # Error of observation and prediction are uncorrelated
-        obsSigma = obsErr ** 2 + nullErr ** 2
-        invObsSigma = 1.0 / obsSigma
-
-        # Error of null prediction and mod prediction are probably correlated -- need a better estimate of the error of the difference!!
-        dPred = modPred - nullPred
-        dPredSigma = (obsErr ** 2 + nullErr ** 2) / 2  # Just stubbing in a factor of 2 here...
-
-        weightsNumerator = invObsSigma * dPred
-        weights = weightsNumerator / (dPred * weightsNumerator).sum()
-
-        signalEstimate = (weights * dObs).sum()
-        varianceEstimate = (np.abs(weights) * obsSigma).sum()
-
-        maxSignal = (weights * dPred).sum()
-        maxSignalVariance = (np.abs(weights) * dPredSigma).sum()
-
-        # Now just run the standard erf on this Gaussian to quantify the probability that there is some signal
-        # What we want now:
-        #
-        # 1. p-value that dObs * dPred (dot product) is greater than 0.
-        # 2. Distribution of \alpha, where dObs = \alpha dPred, where \alpha \in [0,1], with appropriate error propagation
-        # 2a. Is it possible to summarize 2 with a Beta distribution?
-
-        pvalue = s.norm._cdf(-signalEstimate / varianceEstimate)
-        pvalue = max(sys.float_info.min, pvalue)
-        score = -10.0 * log10(pvalue)
-
-        centerPosition['MSscore'] = score
-        centerPosition['MSpvalue'] = pvalue
-
-        centerPosition['signal'] = signalEstimate
-        centerPosition['variance'] = varianceEstimate
-
-        centerPosition['modelSignal'] = maxSignal
-        centerPosition['modelVariance'] = maxSignalVariance
-
-        centerPosition['Mask'] = []
-
-        return centerPosition
-
-    def scorePositions(self):
-        """
-        Score each motif site in the sequence.
-        """
-
-        qvModCalls = dict()
-
-        dnaSeq = a.array('c')
-        dnaSeq.fromstring(self.sequence)
-
-        for pos in self.motifPositions:
-            if self.rawKinetics.has_key(pos):
-
-                # Fetch unmodified positions
-                nullPred = self.getRegionPredictions(pos - self.post, pos + self.pre, dnaSeq)
-
-                # Fetch modified positions and reset sequence
-                originalBase = dnaSeq[pos]
-                dnaSeq[pos] = m5CCode
-                modifiedPred = self.getRegionPredictions(pos - self.post, pos + self.pre, dnaSeq)
-                dnaSeq[pos] = originalBase
-
-                # Position that contribute to this call
-                positions = xrange(pos - self.post, pos + self.pre + 1)
-
-                # Run the multi-site detection and save the results
-                centerStats = self.rawKinetics[pos]
-                centerStats = self.multiSiteDetection(positions, nullPred, modifiedPred, centerStats)
-
-                qvModCalls[pos] = centerStats
-
-        return qvModCalls
-
-    def getRegionPredictions(self, start, end, sequence):
-        predictions = np.zeros(end - start + 1)
-
-        for pos in xrange(start, end + 1):
-            ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
-            predictions[pos - start] = self.contextMeanTable[ctx]
-
-        return predictions
+#################################################################################
+# Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
+#
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of Pacific Biosciences nor the names of its
+#   contributors may be used to endorse or promote products derived from
+#   this software without specific prior written permission.
+#
+# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
+# THIS LICENSE.  THIS SOFTWARE IS PROVIDED BY PACIFIC BIOSCIENCES AND ITS
+# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR
+# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#################################################################################
+
+from math import sqrt
+import math
+import scipy.stats as s
+import array as a
+import sys
+
+from numpy import log, pi, log10, e, log1p, exp
+import numpy as np
+import re
+
+log10e = log10(e)
+
+canonicalBaseMap = {'A': 'A', 'C': 'C', 'G': 'G', 'T': 'T', 'H': 'A', 'I': 'C', 'J': 'C', 'K': 'C'}
+modNames = {'H': 'm6A', 'I': 'm5C', 'J': 'm4C', 'K': 'm5C'}
+
+m5CCode = 'I'
+
+iupacMap = {
+    'A': 'A',
+    'C': 'C',
+    'G': 'G',
+    'T': 'T',
+    'K': 'GT',
+    'M': 'AC',
+    'R': 'AG',
+    'Y': 'CT',
+    'S': 'CG',
+    'W': 'AT',
+    'B': 'CGT',
+    'D': 'AGT',
+    'H': 'ACT',
+    'V': 'ACG',
+    'N': 'ACGT'
+}
+
+
+def findMotifPositions(seq, motifs):
+    regexs = []
+
+    # Generate a regex for each motif, honouring degenerate bases
+    for m in motifs:
+        regex = ''
+
+        for c in m:
+            regex = regex + "[" + iupacMap[c] + "]"
+
+        regexs.append(regex)
+
+    allMatches = []
+
+    # Return a list of matching positions in the sequence
+    for r in regexs:
+        rr = re.compile(r)
+        matches = [x.start() for x in rr.finditer(seq)]
+        allMatches.extend(matches)
+
+    allMatches.sort()
+
+    return allMatches
+
+
+class MultiSiteDetection(object):
+
+    def __init__(self, gbmModel, sequence, rawKinetics, callBounds, methylMinCov, motifs=['CG']):
+        """
+
+        """
+
+        self.methylMinCov = methylMinCov
+        self.motifs = motifs
+
+        self.gbmModel = gbmModel
+        self.sequence = sequence
+
+        self.callStart = callBounds[0]
+        self.callEnd = callBounds[1]
+
+        # Extents that we will attempt to call a modification
+        self.callRange = xrange(self.callStart, self.callEnd)
+
+        # These switch because we changing viewpoints
+        self.pre = gbmModel.post
+        self.post = gbmModel.pre
+
+        self.lStart = self.pre
+        self.lEnd = len(self.sequence) - self.post
+
+        # Extents that we will use for likelihoods
+        self.likelihoodRange = xrange(self.lStart, self.lEnd)
+
+        self.alternateBases = dict((x, list(sequence[x])) for x in xrange(len(sequence)))
+
+        self.rawKinetics = rawKinetics
+
+    def getConfigs(self, centerIdx):
+        ''' Enumerate all the contexts centered at centerIdx with one
+            modification added '''
+        start = centerIdx - self.pre
+        end = centerIdx + self.post
+        return self._possibleConfigs(start, end)
+
+    def _possibleConfigs(self, start, end):
+        ''' Enumerate all the contexts coming from the substring self.sequence[start,end] with one
+            modification added '''
+
+        if start == end:
+            return self.alternateBases[start]
+        else:
+            r = []
+            allSuffixes = self._possibleConfigs(start + 1, end)
+
+            # The first suffix is alway the one with no modifications
+            # Only add the alternate to that one -- that way we only
+            # get configurations with a single modification, not all combos
+
+            noModsSuffix = allSuffixes[0]
+            if len(allSuffixes) > 1:
+                    restSuffixes = allSuffixes[1:]
+            else:
+                    restSuffixes = []
+
+            # The noMods suffix get the alternates
+            for c in self.alternateBases[start]:
+                    r.append(c + noModsSuffix)
+
+            # the other suffixes already have mods -- they just get the unmodified base
+            for suffix in restSuffixes:
+                    r.append(self.alternateBases[start][0] + suffix)
+
+            return r
+
+        # Compute something for all the windows in [start, end]
+    def getContexts(self, start, end, sequence):
+        contexts = []
+
+        for pos in xrange(start, end + 1):
+            ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
+            contexts.append(ctx)
+
+        return contexts
+
+    def computeContextMeans(self):
+        """Generate a hash of the mean ipd for all candidate contexts"""
+
+        allContexts = []
+
+        for pos in self.motifPositions:
+            for offsetPos in xrange(pos - self.post, pos + self.pre + 1):
+                cfgs = self.getConfigs(offsetPos)
+                allContexts.extend(cfgs)
+
+        predictions = self.gbmModel.getPredictions(allContexts)
+        self.contextMeanTable = dict(zip(allContexts, predictions))
+
+    def decode(self):
+        """Use this method to do the full modification finding protocol"""
+
+        # Find sites matching the desired motif
+        self.findMotifs()
+
+        # Compute all the required mean ipds under all possible composite hypotheses
+        self.computeContextMeans()
+
+        # Compute a confidence for each mod and return results
+        return self.scorePositions()
+
+    def findMotifs(self):
+        """ Mark all the positions matching the requested motif """
+
+        # Generate list of matching positions
+        allMotifPositions = findMotifPositions(self.sequence, self.motifs)
+        self.motifPositions = []
+
+        for pos in allMotifPositions:
+            # Only use bases that are inside the callBounds
+            if self.callStart <= pos < self.callEnd:
+                self.alternateBases[pos].append('I')
+                self.motifPositions.append(pos)
+
+    def multiSiteDetection(self, positions, nullPred, modPred, centerPosition):
+        ''' kinetics, nullPred, and modifiedPred are parallel arrays 
+            containing the observations and predictions surrounding a 
+            single candidate motif site.  Estimate the p-value of
+            modification and the modified fraction here'''
+
+        # Apply the error model to the predictions
+        nullErr = 0.01 + 0.03 * nullPred + 0.06 * nullPred ** (1.7)
+        modErr = 0.01 + 0.03 * modPred + 0.06 * modPred ** (1.7)
+
+        obsMean = np.zeros(nullPred.shape)
+        obsErr = np.zeros(nullPred.shape)
+
+        # Get the observations into the same array format
+        for i in xrange(len(positions)):
+            position = positions[i]
+
+            if position in self.rawKinetics:
+                siteObs = self.rawKinetics[position]
+                obsMean[i] = siteObs['tMean']
+                obsErr[i] = siteObs['tErr']
+            else:
+                # Crank up the variance -- we don't have an observation at this
+                # position, so we should ignore it.
+                obsMean[i] = 0.0
+                obsErr[i] = 999999999
+
+        # Subtract off the background model from the observations and the modified prediction
+        dObs = obsMean - nullPred
+        # Error of observation and prediction are uncorrelated
+        obsSigma = obsErr ** 2 + nullErr ** 2
+        invObsSigma = 1.0 / obsSigma
+
+        # Error of null prediction and mod prediction are probably correlated -- need a better estimate of the error of the difference!!
+        dPred = modPred - nullPred
+        dPredSigma = (obsErr ** 2 + nullErr ** 2) / 2  # Just stubbing in a factor of 2 here...
+
+        weightsNumerator = invObsSigma * dPred
+        weights = weightsNumerator / (dPred * weightsNumerator).sum()
+
+        signalEstimate = (weights * dObs).sum()
+        varianceEstimate = (np.abs(weights) * obsSigma).sum()
+
+        maxSignal = (weights * dPred).sum()
+        maxSignalVariance = (np.abs(weights) * dPredSigma).sum()
+
+        # Now just run the standard erf on this Gaussian to quantify the probability that there is some signal
+        # What we want now:
+        #
+        # 1. p-value that dObs * dPred (dot product) is greater than 0.
+        # 2. Distribution of \alpha, where dObs = \alpha dPred, where \alpha \in [0,1], with appropriate error propagation
+        # 2a. Is it possible to summarize 2 with a Beta distribution?
+
+        pvalue = s.norm._cdf(-signalEstimate / varianceEstimate)
+        pvalue = max(sys.float_info.min, pvalue)
+        score = -10.0 * log10(pvalue)
+
+        centerPosition['MSscore'] = score
+        centerPosition['MSpvalue'] = pvalue
+
+        centerPosition['signal'] = signalEstimate
+        centerPosition['variance'] = varianceEstimate
+
+        centerPosition['modelSignal'] = maxSignal
+        centerPosition['modelVariance'] = maxSignalVariance
+
+        centerPosition['Mask'] = []
+
+        return centerPosition
+
+    def scorePositions(self):
+        """
+        Score each motif site in the sequence.
+        """
+
+        qvModCalls = dict()
+
+        dnaSeq = a.array('c')
+        dnaSeq.fromstring(self.sequence)
+
+        for pos in self.motifPositions:
+            if pos in self.rawKinetics:
+
+                # Fetch unmodified positions
+                nullPred = self.getRegionPredictions(pos - self.post, pos + self.pre, dnaSeq)
+
+                # Fetch modified positions and reset sequence
+                originalBase = dnaSeq[pos]
+                dnaSeq[pos] = m5CCode
+                modifiedPred = self.getRegionPredictions(pos - self.post, pos + self.pre, dnaSeq)
+                dnaSeq[pos] = originalBase
+
+                # Position that contribute to this call
+                positions = xrange(pos - self.post, pos + self.pre + 1)
+
+                # Run the multi-site detection and save the results
+                centerStats = self.rawKinetics[pos]
+                centerStats = self.multiSiteDetection(positions, nullPred, modifiedPred, centerStats)
+
+                qvModCalls[pos] = centerStats
+
+        return qvModCalls
+
+    def getRegionPredictions(self, start, end, sequence):
+        predictions = np.zeros(end - start + 1)
+
+        for pos in xrange(start, end + 1):
+            ctx = sequence[(pos - self.pre):(pos + self.post + 1)].tostring()
+            predictions[pos - start] = self.contextMeanTable[ctx]
+
+        return predictions


=====================================
kineticsTools/PositiveControlEnricher.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 # Positive Control Enricher class
 
 from math import sqrt
@@ -11,8 +12,8 @@ from numpy import *
 from numpy import log, pi, log10, e, log1p, exp
 import numpy as np
 
-from MultiSiteCommon import MultiSiteCommon
-from MixtureEstimationMethods import MixtureEstimationMethods
+from .MultiSiteCommon import MultiSiteCommon
+from .MixtureEstimationMethods import MixtureEstimationMethods
 
 
 class PositiveControlEnricher(MultiSiteCommon):


=====================================
kineticsTools/ReferenceUtils.py
=====================================
@@ -37,7 +37,7 @@ import math
 import re
 import os
 
-from pbcore.io import AlignmentSet, ReferenceSet
+from pbcore.io import ReferenceSet
 
 # FIXME pbcore keys contigs by name, but kineticsTools usually keys by ID
 ReferenceWindow = namedtuple("ReferenceWindow", ["refId", "refName", "start", "end"])


=====================================
kineticsTools/ResultWriter.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import print_function
 #################################################################################
 # Copyright (c) 2011-2013, Pacific Biosciences of California, Inc.
 #
@@ -91,7 +92,7 @@ class ResultCollectorProcess(Process):
                         del column['rawData']
 
                 # Write out all the chunks that we can
-                while chunkCache.has_key(nextChunkId):
+                while nextChunkId in chunkCache:
                     nextChunk = chunkCache.pop(nextChunkId)
                     self.onResult(nextChunk)
 
@@ -168,13 +169,13 @@ class KineticsWriter(ResultCollectorProcess):
         handlers["variance"] = fourF
         handlers["MSscore"] = lambda x: "%d" % x
 
-        print >>f, delim.join(cols)
+        print(delim.join(cols), file=f)
 
         def fmt(rowData, colName):
-            if not rowData.has_key(colName):
+            if colName not in rowData:
                 return ""
 
-            if handlers.has_key(colName):
+            if colName in handlers:
                 return handlers[colName](rowData[colName])
             else:
                 return str(rowData[colName])
@@ -185,15 +186,15 @@ class KineticsWriter(ResultCollectorProcess):
                 itemList = (yield)
 
                 for item in itemList:
-                    if item.has_key("signal"):
+                    if "signal" in item:
                         values = [fmt(item, col) for col in cols]
-                        print >>f, delim.join(values)
+                        print(delim.join(values), file=f)
 
         except GeneratorExit:
             f.close()
             return
         except Exception as e:
-            print e
+            print(e)
 
 
     @consumer
@@ -252,13 +253,13 @@ class KineticsWriter(ResultCollectorProcess):
         handlers[FRAClow] = threeF
         handlers[FRACup] = threeF
 
-        print >>f, delim.join(cols)
+        print(delim.join(cols), file=f)
 
         def fmt(rowData, colName):
-            if not rowData.has_key(colName):
+            if colName not in rowData:
                 return ""
 
-            if handlers.has_key(colName):
+            if colName in handlers:
                 return handlers[colName](rowData[colName])
             else:
                 return str(rowData[colName])
@@ -270,13 +271,13 @@ class KineticsWriter(ResultCollectorProcess):
 
                 for item in itemList:
                     values = [fmt(item, col) for col in cols]
-                    print >>f, delim.join(values)
+                    print(delim.join(values), file=f)
 
         except GeneratorExit:
             f.close()
             return
         except Exception as e:
-            print e
+            print(e)
 
     @consumer
     def bigWigConsumer(self, filename):
@@ -294,7 +295,7 @@ class KineticsWriter(ResultCollectorProcess):
                 for x in chunk:
                     pos = int(x['tpl']) + 1
                     seqid = x['refName']
-                    ranges.setdefault(seqid, (sys.maxint, 0))
+                    ranges.setdefault(seqid, (sys.maxsize, 0))
                     ranges[seqid] = (min(ranges[seqid][0], pos),
                                      max(ranges[seqid][1], pos+1))
                     rec = BaseInfo(
@@ -745,22 +746,22 @@ class KineticsWriter(ResultCollectorProcess):
         start = siteObs['tpl'] + 1
         end = siteObs['tpl'] + 1
 
-        if siteObs.has_key('motif'):
+        if 'motif' in siteObs:
             attributes.append(('motif', "%s" % siteObs['motif']))
 
-        if siteObs.has_key('id'):
+        if 'id' in siteObs:
             attributes.append(('id', "%s" % siteObs['id']))
 
-        if self.options.methylFraction and siteObs.has_key(FRAC):
+        if self.options.methylFraction and FRAC in siteObs:
             attributes.append(('frac', "%.3f" % siteObs[FRAC]))
             attributes.append(('fracLow', "%.3f" % siteObs[FRAClow]))
             attributes.append(('fracUp', "%.3f" % siteObs[FRACup]))
 
-        if siteObs.has_key('modificationScore'):
+        if 'modificationScore' in siteObs:
             # Report the QV from the modification identification module as a special tag
             attributes.append(('identificationQv', "%d" % int(round(siteObs['modificationScore']))))
 
-        if siteObs.has_key('modification'):
+        if 'modification' in siteObs:
 
             if siteObs['modification'] == '.':
                 recordType = 'modified_base'
@@ -830,11 +831,11 @@ class KineticsWriter(ResultCollectorProcess):
 
                     if siteObs['coverage'] > self.options.minCoverage:
                         # Case 1
-                        if siteObs.has_key('modification') and siteObs['modification'] != '.':
+                        if 'modification' in siteObs and siteObs['modification'] != '.':
                             gff.writeRecord(self.makeGffRecord(siteObs))
 
                         # Case 2
-                        elif siteObs['score'] > minScore and not siteObs.has_key('offTargetPeak'):
+                        elif siteObs['score'] > minScore and 'offTargetPeak' not in siteObs:
                             gff.writeRecord(self.makeGffRecord(siteObs))
 
                     # FIXME: Try not filtering:
@@ -888,7 +889,7 @@ class KineticsWriter(ResultCollectorProcess):
                 siteObsList = (yield)
 
                 for siteObs in siteObsList:
-                    if siteObs.has_key('Ca5C') and siteObs['strand'] == 0:
+                    if 'Ca5C' in siteObs and siteObs['strand'] == 0:
                         gff.writeRecord( self.makeM5CgffRecord( siteObs ) )
 
         except GeneratorExit:


=====================================
kineticsTools/internal/__init__.py
=====================================
@@ -0,0 +1,5 @@
+"""Functions with no external dependencies.
+
+These can be tested without actually installing
+(if we also avoid the C extension module).
+"""


=====================================
kineticsTools/internal/basic.py
=====================================
@@ -0,0 +1,58 @@
+"""Basic functional stuff, I guess.
+"""
+import logging
+import os
+
+LOG = logging.getLogger(__name__)
+
+
+#majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments)
+def getIpdModelFilename(ipdModel, majorityChem, paramsPath):
+    """
+    ipdModel: str
+    majorityChem: str
+    """
+    # In order of precedence they are:
+    # 1. Explicit path passed to --ipdModel
+    # 2. In-order through each directory listed in --paramsPath
+
+    if ipdModel:
+        LOG.info("Using passed-in kinetics model: {!r}".format(ipdModel))
+        return ipdModel
+
+    if majorityChem == 'unknown':
+        msg = "Chemistry cannot be identified---cannot perform kinetic analysis"
+        LOG.error(msg)
+        raise Exception(msg)
+
+    # Route any sequel chemistries to seabsicuit training (for now)
+    if majorityChem.startswith("S/"):
+        majorityChem = "SP2-C2"
+
+    # '/' is not a valid character in a file, unescaped--remove it
+    majorityChem = majorityChem.replace("/", "")
+
+    # go through each paramsPath in-order, checking if the model exists there or no
+    for paramsPath in paramsPath:
+        ipdModel = os.path.join(paramsPath, majorityChem + ".h5")
+        if os.path.isfile(ipdModel):
+            LOG.info("Using chemistry-matched kinetics model: {!r}".format(ipdModel))
+            return ipdModel
+
+    msg = "No kinetics model available for this chemistry ({!r}) on paramsPath {!r}".format(
+            ipdModel, paramsPath)
+    LOG.error(msg)
+    raise Exception(msg)
+
+
+def getResourcePathSpec(default_dir):
+    """Create list of [${SMRT_CHEMISTRY_BUNDLE_DIR}/kineticsTools, {default_dir}].
+    Return colon-separated string.
+    """
+    pths = []
+    smrtChemBundlePath = os.environ.get("SMRT_CHEMISTRY_BUNDLE_DIR", None)
+    if smrtChemBundlePath:
+        LOG.info("found SMRT_CHEMISTRY_BUNDLE_DIR, prepending to default paramsPath")
+        pths.append(os.path.join(smrtChemBundlePath, "kineticsTools"))
+    pths.append(default_dir)
+    return ':'.join(pths)


=====================================
kineticsTools/ipdSummary.py
=====================================
@@ -61,6 +61,8 @@ from kineticsTools.ResultWriter import KineticsWriter
 from kineticsTools.ipdModel import IpdModel
 from kineticsTools.ReferenceUtils import ReferenceUtils
 
+from .internal import basic
+
 __version__ = "2.3"
 
 log = logging.getLogger(__name__)
@@ -76,8 +78,9 @@ class Constants(object):
     METHYL_FRACTION_ID = "kinetics_tools.task_options.compute_methyl_fraction"
     IDENTIFY_ID = "kinetics_tools.task_options.identify"
 
-def _getResourcePath():
-    return resource_filename(Requirement.parse('kineticsTools'),'kineticsTools/resources')
+def _getResourcePathSpec():
+    default_dir = resource_filename(Requirement.parse('kineticsTools'), 'kineticsTools/resources')
+    return basic.getResourcePathSpec(default_dir)
 
 def _validateResource(func, p):
     """Basic func for validating files, dirs, etc..."""
@@ -98,6 +101,21 @@ def _validateNoneOrResource(func, p):
         return _validateResource(func, p)
 
 
+def validateNoneOrPathSpec(ps):
+    """
+    Handle optional values. If a pathspec is explicitly provided, then
+    it will be validated.
+    """
+    if ps is None:
+        return ps
+    pths = []
+    for p in ps.split(':'):
+        pths.append(_validateResource(os.path.isdir, p))
+    if not pths:
+        raise ValueError("Empty pathspec!")
+    return pths
+
+
 validateFile = functools.partial(_validateResource, os.path.isfile)
 validateDir = functools.partial(_validateResource, os.path.isdir)
 
@@ -157,21 +175,21 @@ def get_parser():
     p.add_int(Constants.MAX_LENGTH_ID,
         option_str="maxLength",
         default=Constants.MAX_LENGTH_DEFAULT,
-        name="Max sequence length",
+        name="Maximum sequence length",
         description="Maximum number of bases to process per contig")
     tcp.add_str(Constants.IDENTIFY_ID,
         option_str="identify",
-        default="",
+        default="m6A,m4C",
         name="Identify basemods",
         description="Specific modifications to identify (comma-separated "+\
             "list).  Currrent options are m6A and/or m4C.")
     argp.add_argument(
         "--identify",
         action="store",
-        default="",
+        default="m6A,m4C",
         help="Specific modifications to identify (comma-separated "+\
-            "list).  Currrent options are m6A, m4C, m5C_TET.  Cannot be "+\
-            "used with --control.")
+            "list).  Currrent options are m6A, m4C, m5C_TET.  Using --control "+\
+            "overrides this option.")
     _DESC = "In the --identify mode, add --methylFraction to "+\
             "command line to estimate the methylated fraction, along with "+\
             "95%% confidence interval bounds."
@@ -251,12 +269,12 @@ def _get_more_options(parser):
 
 
     # Parameter options:
-
+    defaultParamsPathSpec = _getResourcePathSpec()
     parser.add_argument('--paramsPath',
                         dest='paramsPath',
-                        default=_getResourcePath(),
-                        type=validateNoneOrDir,
-                        help='Directory containing in-silico trained model for each chemistry')
+                        default=defaultParamsPathSpec,
+                        type=validateNoneOrPathSpec,
+                        help='List of :-delimited directory paths containing in-silico trained models (default is "%s")' % defaultParamsPathSpec)
 
     parser.add_argument('--minCoverage',
                         dest='minCoverage',
@@ -283,6 +301,7 @@ def _get_more_options(parser):
     parser.add_argument('--ipdModel',
                         dest='ipdModel',
                         default=None,
+                        type=validateNoneOrFile,
                         help='Alternate synthetic IPD model HDF5 file')
 
     parser.add_argument('--modelIters',
@@ -400,8 +419,9 @@ class KineticsToolsRunner(object):
         if not os.path.exists(self.args.alignment_set):
             parser.error('Input AlignmentSet file provided does not exist')
 
-        if self.args.identify and self.args.control:
-            parser.error('--control and --identify are mutally exclusive. Please choose one or the other')
+        # Over-ride --identify if --control was specified
+        if self.args.control:
+            self.args.identify = ""
 
         if self.args.useLDA:
             if self.args.m5Cclassifier is None:
@@ -539,56 +559,13 @@ class KineticsToolsRunner(object):
         winEnd = refWindow.end
         pass
 
-    def loadReferenceAndModel(self, referencePath):
+    def loadReferenceAndModel(self, referencePath, ipdModelFilename):
         assert self.alignments is not None and self.referenceWindows is not None
         # Load the reference contigs - annotated with their refID from the cmp.h5
-        logging.info("Loading reference contigs %s" % referencePath)
+        logging.info("Loading reference contigs {!r}".format(referencePath))
         contigs = ReferenceUtils.loadReferenceContigs(referencePath,
             alignmentSet=self.alignments, windows=self.referenceWindows)
-
-        # There are three different ways the ipdModel can be loaded.
-        # In order of precedence they are:
-        # 1. Explicit path passed to --ipdModel
-        # 2. Path to parameter bundle, model selected using the cmp.h5's sequencingChemistry tags
-        # 3. Fall back to built-in model.
-
-        # By default, use built-in model
-        ipdModel = None
-
-        if self.args.ipdModel:
-            ipdModel = self.args.ipdModel
-            logging.info("Using passed in ipd model: %s" % self.args.ipdModel)
-            if not os.path.exists(self.args.ipdModel):
-                logging.error("Couldn't find model file: %s" % self.args.ipdModel)
-                sys.exit(1)
-        elif self.args.paramsPath:
-            if not os.path.exists(self.args.paramsPath):
-                logging.error("Params path doesn't exist: %s" % self.args.paramsPath)
-                sys.exit(1)
-
-            majorityChem = ReferenceUtils.loadAlignmentChemistry(self.alignments)
-
-            # Temporary solution for Sequel chemistries: we do not
-            # have trained kinetics models in hand yet for Sequel
-            # chemistries.  However we have observed that the P5-C3
-            # training seems to yield fairly good results on Sequel
-            # chemistries to date.  So for the moment, we will use
-            # that model for Sequel data.
-            if majorityChem.startswith("S/"):
-                logging.info("No trained model available yet for Sequel chemistries; modeling as P5-C3")
-                majorityChem = "P5-C3"
-
-            ipdModel = os.path.join(self.args.paramsPath, majorityChem + ".h5")
-            if majorityChem == 'unknown':
-                logging.error("Chemistry cannot be identified---cannot perform kinetic analysis")
-                sys.exit(1)
-            elif not os.path.exists(ipdModel):
-                logging.error("Aborting, no kinetics model available for this chemistry: %s" % ipdModel)
-                sys.exit(1)
-            else:
-                logging.info("Using Chemistry matched IPD model: %s" % ipdModel)
-
-        self.ipdModel = IpdModel(contigs, ipdModel, self.args.modelIters)
+        self.ipdModel = IpdModel(contigs, ipdModelFilename, self.args.modelIters)
 
     def loadSharedAlignmentSet(self, cmpH5Filename):
         """
@@ -636,7 +613,7 @@ class KineticsToolsRunner(object):
                     if self.args.skipUnrecognizedContigs:
                         continue
                     else:
-                        raise Exception, "Unrecognized contig!"
+                        raise Exception("Unrecognized contig!")
         elif self.args.referenceWindowsFromAlignment:
             self.referenceWindows = ReferenceUtils.referenceWindowsFromAlignment(self.alignments, self.alignments.referenceInfo)
             refNames = set([rw.refName for rw in self.referenceWindows])
@@ -647,7 +624,10 @@ class KineticsToolsRunner(object):
                 self.refInfo)
 
         # Load reference and IpdModel
-        self.loadReferenceAndModel(self.args.reference)
+        ipdModelFilename = basic.getIpdModelFilename(
+                self.args.ipdModel, ReferenceUtils.loadAlignmentChemistry(self.alignments),
+                self.args.paramsPath)
+        self.loadReferenceAndModel(self.args.reference, ipdModelFilename)
 
         # Spawn workers
         self._launchSlaveProcesses()


=====================================
kineticsTools/pipelineTools.py
=====================================
@@ -32,7 +32,7 @@
 def consumer(func):
     def start(*args, **kwargs):
         c = func(*args, **kwargs)
-        c.next()
+        next(c)
         return c
     return start
 


=====================================
kineticsTools/resources/SP2-C2.h5
=====================================
Binary files /dev/null and b/kineticsTools/resources/SP2-C2.h5 differ


=====================================
kineticsTools/summarizeModifications.py
=====================================
@@ -32,6 +32,7 @@
 """
 Summarizes kinetic modifications in the alignment_summary.gff file.
 """
+from __future__ import print_function
 
 import cProfile
 from itertools import groupby
@@ -124,13 +125,13 @@ class ModificationSummary(object):
                 if field == 'sequence-header':
                     [internalTag, delim, externalTag] = value.strip().partition(' ')
                     self.seqMap[internalTag] = externalTag
-                print >>summaryWriter, line.strip()
+                print(line.strip(), file=summaryWriter)
                 continue
 
             if inHeader:
                 # We are at the end of the header -- write the tool-specific headers
                 for field in headers:
-                    print >>summaryWriter, ("##%s %s" % field)
+                    print(("##%s %s" % field), file=summaryWriter)
                 inHeader = False
 
             # Parse the line
@@ -146,7 +147,7 @@ class ModificationSummary(object):
                 rec.modsfwd = ",".join([str(cFwd[x]) for x in self.knownModificationEvents])
                 rec.modsrev = ",".join([str(cRev[x]) for x in self.knownModificationEvents])
 
-                print >>summaryWriter, str(rec)
+                print(str(rec), file=summaryWriter)
         return 0
 
 


=====================================
requirements-dev.txt
=====================================
@@ -1,3 +1,3 @@
 sphinx
-nose
+pytest
 cram


=====================================
test/cram/detection.t
=====================================
@@ -10,7 +10,7 @@ Load in data:
 
 Run basic ipdSummary:
 
-  $ ipdSummary --log-level=WARNING --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --reference $REFERENCE $INPUT
+  $ ipdSummary --log-level=WARNING --pvalue 0.001 --numWorkers 1 --csv tmp.csv --gff tmp.gff --summary_h5 tmp.h5 --identify "" --reference $REFERENCE $INPUT
 
 Look at output csv file:
 


=====================================
test/detectionMethylFractionTest.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import logging
 import os
 import platform
@@ -41,8 +42,8 @@ class TestDetectionMethylFraction(TestSetup):
 
         # Verify that we detect m6A mods at 14982 and 14991
         m6AMods = [{'frac': x['frac'], 'fracLow': x['fracLow'], 'fracUp': x['fracUp'], 'tpl': x['tpl'], 'strand': x['strand']}
-                   for x in kinetics if x.has_key('frac') and x['tpl'] in (14982, 14991)]
-        print m6AMods
+                   for x in kinetics if 'frac' in x and x['tpl'] in (14982, 14991)]
+        print(m6AMods)
 
         for mod in m6AMods:
             self.assertGreater(mod["frac"], 0.5)


=====================================
test/methyFractionTest.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import logging
 import os
 import platform
@@ -38,7 +39,7 @@ class TestMethylFraction(TestSetup):
         self.kw._prepForReferenceWindow(referenceWindow)
         kinetics = self.kw._summarizeReferenceRegion(bounds, True, True)
         mods = self.kw._decodePositiveControl(kinetics, bounds)
-        print mods
+        print(mods)
 
         # Verify that we detect m6A mods at 14982 and 14991
         m6AMods = [x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (14982, 14991)]


=====================================
test/test.py
=====================================
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import logging
 import os
 import platform
@@ -72,14 +73,14 @@ class TestBasic(TestSetup):
         snippetFunc = self.ipdModel.snippetFunc(1, 10, 10)
         snip = snippetFunc(0, 0)
 
-        print "Got snippet at pos 0: %s" % snip
-        print "First 10 bases of lambda: %s" % (contig[0:10])
+        print("Got snippet at pos 0: %s" % snip)
+        print("First 10 bases of lambda: %s" % (contig[0:10]))
 
         lastPos = len(contig) - 1
         snip = snippetFunc(lastPos, 0)
 
-        print "Got snippet at pos %d: %s" % (lastPos, snip)
-        print "Last 10 bases of lambda: %s" % contig[-10:]
+        print("Got snippet at pos %d: %s" % (lastPos, snip))
+        print("Last 10 bases of lambda: %s" % contig[-10:])
 
     def _testSpeed(self):
 
@@ -105,44 +106,44 @@ class TestBasic(TestSetup):
         for (pos, tplStrand) in [(3, 0), (10, 0), (20, 0), (30, 0), (31, 0), (32, 0), (33, 0), (34, 0)]:
             snip = snippetFunc(pos, tplStrand)
 
-            print "Pos: %d, TplStrand: %d" % (pos, tplStrand)
-            print "Got ctx: %s" % snip
+            print("Pos: %d, TplStrand: %d" % (pos, tplStrand))
+            print("Got ctx: %s" % snip)
             #print "From lambda: %s" % (contig[(pos - 4):(pos + 11)])
 
-            print "Lut prediction: %f" % ipdFunc(pos, tplStrand)
+            print("Lut prediction: %f" % ipdFunc(pos, tplStrand))
 
             gbmPred = self.ipdModel.gbmModel.getPredictionsSlow([snip])[0]
-            print "Gbm prediction: %f" % gbmPred
+            print("Gbm prediction: %f" % gbmPred)
 
             gbmPred = self.ipdModel.gbmModel.getPredictions([snip])[0]
-            print "Gbm prediction fast: %f" % gbmPred
+            print("Gbm prediction fast: %f" % gbmPred)
 
             gbmSnippetPred = ipdModelFunc(pos, tplStrand)
-            print "Gbm pred via predictIpdFuncModel: %f" % gbmSnippetPred
+            print("Gbm pred via predictIpdFuncModel: %f" % gbmSnippetPred)
 
             if snip[4] == 'A':
                 snip2 = snip[0:4] + 'H' + snip[5:]
                 snip3 = snip[0:9] + 'H' + snip[10:]
                 gbmPred = self.ipdModel.gbmModel.getPredictionsSlow([snip2, snip3])
-                print "Methylated prediction: %s ->  %f" % (snip2, gbmPred[0])
-                print "Methylated prediction: %s ->  %f" % (snip3, gbmPred[1])
+                print("Methylated prediction: %s ->  %f" % (snip2, gbmPred[0]))
+                print("Methylated prediction: %s ->  %f" % (snip3, gbmPred[1]))
 
                 gbmPred = self.ipdModel.gbmModel.getPredictions([snip2, snip3])
-                print "Methylated prediction fast: %s ->  %f" % (snip2, gbmPred[0])
-                print "Methylated prediction fast: %s ->  %f" % (snip3, gbmPred[1])
+                print("Methylated prediction fast: %s ->  %f" % (snip2, gbmPred[0]))
+                print("Methylated prediction fast: %s ->  %f" % (snip3, gbmPred[1]))
 
             if snip[4] == 'C':
                 snip2 = snip[0:4] + 'J' + snip[5:]
                 snip3 = snip[0:9] + 'J' + snip[10:]
                 gbmPred = self.ipdModel.gbmModel.getPredictionsSlow([snip2, snip3])
-                print "Methylated prediction: %s ->  %f" % (snip2, gbmPred[0])
-                print "Methylated prediction: %s ->  %f" % (snip3, gbmPred[1])
+                print("Methylated prediction: %s ->  %f" % (snip2, gbmPred[0]))
+                print("Methylated prediction: %s ->  %f" % (snip3, gbmPred[1]))
 
                 gbmPred = self.ipdModel.gbmModel.getPredictions([snip2, snip3])
-                print "Methylated prediction fast: %s ->  %f" % (snip2, gbmPred[0])
-                print "Methylated prediction fast: %s ->  %f" % (snip3, gbmPred[1])
+                print("Methylated prediction fast: %s ->  %f" % (snip2, gbmPred[0]))
+                print("Methylated prediction fast: %s ->  %f" % (snip3, gbmPred[1]))
 
-            print ""
+            print("")
 
     def testSmallDecode(self):
         """
@@ -161,7 +162,7 @@ class TestBasic(TestSetup):
         self.kw._prepForReferenceWindow(referenceWindow)
         kinetics = self.kw._summarizeReferenceRegion(bounds, False, True)
         mods = self.kw._decodePositiveControl(kinetics, bounds)
-        print mods
+        print(mods)
 
         # Verify that we detect m6A mods at 14982 and 14991
         m6AMods = [x for x in mods if x['modification'] == 'm6A' and x['tpl'] in (14982, 14991)]


=====================================
test/test_internal.py
=====================================
@@ -0,0 +1,68 @@
+import os
+import pytest
+import kineticsTools.internal.basic as B
+
+def test_basic():
+    expected = 'anything'
+    assert expected == B.getIpdModelFilename(expected, 'foo', [])
+
+    with pytest.raises(Exception) as excinfo:
+        B.getIpdModelFilename(None, 'unknown', [])
+    assert 'Chemistry cannot be identified' in str(excinfo.value)
+
+    with pytest.raises(Exception) as excinfo:
+        B.getIpdModelFilename(None, 'foo', [])
+    assert 'No kinetics model available for this chemistry' in str(excinfo.value)
+
+
+def test_path(monkeypatch):
+    def isfile(fn):
+        if fn in ('path1/foo.h5', 'path2/foo.h5'):
+            return True
+        if fn == 'pathmissing/foo.h5':
+            return False
+        raise Exception('Called! {!r}'.format(fn))
+    monkeypatch.setattr(os.path, 'isfile', isfile)
+
+    chem = 'foo'
+
+    expected = 'path1/foo.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1'])
+
+    expected = 'path1/foo.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['pathmissing', 'path1'])
+
+    expected = 'path1/foo.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1', 'pathmissing'])
+
+    expected = 'path1/foo.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1', 'path2'])
+
+
+def test_path_with_prefixed_chem(monkeypatch):
+    def isfile(fn):
+        if fn in ('path1/SP2-C2.h5', 'path2/SP2-C2.h5'):
+            return True
+        if fn == 'pathmissing/SP2-C2.h5':
+            return False
+        raise Exception('Called! {!r}'.format(fn))
+    monkeypatch.setattr(os.path, 'isfile', isfile)
+
+    chem = 'S/foo' # S/ prefix is weird for now.
+
+    expected = 'path1/SP2-C2.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1'])
+
+    expected = 'path1/SP2-C2.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['pathmissing', 'path1'])
+
+    expected = 'path1/SP2-C2.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1', 'pathmissing'])
+
+    expected = 'path1/SP2-C2.h5'
+    assert expected == B.getIpdModelFilename(None, chem, ['path1', 'path2'])
+
+
+def test_getResourcePathSpec(monkeypatch):
+    monkeypatch.setenv('SMRT_CHEMISTRY_BUNDLE_DIR', 'foo')
+    assert 'foo/kineticsTools:bar' == B.getResourcePathSpec('bar')


=====================================
test/test_outputs.py
=====================================
@@ -2,6 +2,7 @@
 """
 Test sanity of various output formats for a minimal real-world example.
 """
+from __future__ import print_function
 
 import subprocess
 import tempfile
@@ -49,7 +50,7 @@ class TestOutputs(unittest.TestCase):
             "--reference", REFERENCE,
             ALIGNMENTS
         ]
-        print " ".join(args)
+        print(" ".join(args))
         assert subprocess.call(args) == 0
         with open(cls.csv_file) as f:
             cls.csv_records = [l.split(",") for l in f.read().splitlines()][1:]



View it on GitLab: https://salsa.debian.org/med-team/kineticstools/commit/c195e64503c99e91a9bd29a0c1ba78cef0ac32f6

-- 
View it on GitLab: https://salsa.debian.org/med-team/kineticstools/commit/c195e64503c99e91a9bd29a0c1ba78cef0ac32f6
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181028/93bb8de3/attachment-0001.html>


More information about the debian-med-commit mailing list