[med-svn] [Git][med-team/tnseq-transit][upstream] New upstream version 3.0.2

Sat Jan 18 22:06:51 GMT 2020


Steffen Möller pushed to branch upstream at Debian Med / tnseq-transit


Commits:
20d19355 by Steffen Moeller at 2020-01-18T22:46:11+01:00
New upstream version 3.0.2
- - - - -


10 changed files:

- CHANGELOG.md
- + src/fitness_defect.py
- src/pytransit/__init__.py
- src/pytransit/analysis/gi.py
- src/pytransit/analysis/resampling.py
- src/pytransit/analysis/zinb.py
- src/pytransit/doc/source/transit_methods.rst
- src/pytransit/doc/source/transit_overview.rst
- src/pytransit/export/combined_wig.py
- src/pytransit/transit_gui.py


Changes:

=====================================
CHANGELOG.md
=====================================
@@ -1,6 +1,14 @@
 # Change log
 All notable changes to this project will be documented in this file.
 
+## Version 3.0.2 2019-12-21
+#### TRANSIT:
+ - Mostly cosmetic fixes
+ - Updated some command-line and GUI messages
+ - Updated documentation (especially for GI and resampling)
+ - Removed "warning: high stderr" from gene status in ZINB
+ - Added LFCs in ZINB output
+
 ## Version 3.0.1 2019-08-01
 #### TRANSIT:
  - Add check for python3 (TRANSIT 3+ requires python3.6+)


=====================================
src/fitness_defect.py
=====================================
@@ -0,0 +1,110 @@
+import sys,random,numpy
+import pytransit.tnseq_tools
+from statsmodels.stats.multitest import fdrcorrection
+
+def read_wig(fname):
+  coords,counts = [],[]
+  for line in open(fname):
+    if line[0] not in "0123456789": continue
+    w = line.rstrip().split()
+    coord,cnt = int(w[0]),int(w[1])
+    coords.append(coord)
+    counts.append(cnt)
+  return coords,counts
+
+# remove all runs of zeros of length >= W
+
+def remove_essential_regions(wig,W):
+  runs = []
+  i,n = 0,len(wig)
+  while i<n:
+    if wig[i]>0: i += 1
+    else:
+      j = i
+      while j<n and wig[j]==0: j += 1
+      runs.append((i,j))
+      i = j
+  counts = []
+  for k,(i,j) in enumerate(runs):
+    if j-i<W: counts += wig[i:j]
+    if k<len(runs)-1:
+      next = runs[k+1][0]
+      counts += wig[j:next]
+  return counts
+
+def get_counts(coords,counts,gene):
+  cnts = []
+  start,end = gene['start'],gene['end']
+  for i,co in enumerate(coords):
+    if co>=start and co<=end: cnts.append(counts[i])
+  return cnts
+
+def sample_counts(counts,size,times):
+  samples = []
+  for i in range(times):
+    samples.append(random.sample(counts,size)) # without replacement
+  return samples
+
+###################################
+
+if len(sys.argv)<3:
+  print "usage: python fitness_defect.py <comma_separated_list_of_wig_files> <prot_table>"
+  sys.exit(0)
+
+print "# command: python",
+for x in sys.argv: print x,
+print
+
+coords,counts = read_wig(sys.argv[1])
+genes = pytransit.tnseq_tools.read_genes(sys.argv[2])
+
+noness = remove_essential_regions(counts,5) # run length
+print '# sites: %s, noness: %s' % (len(counts),len(noness))
+noness_arr = numpy.array((noness))
+noness_NZvals = noness_arr[noness_arr>0]
+print '# noness: zeros=%s, NZmean=%0.1f' % (noness_NZvals.size,numpy.mean(noness_NZvals))
+
+cache = {}
+
+results = []
+for gene in genes:
+  sys.stderr.write("%s %s\n" % (gene['rv'],gene['gene']))
+  cnts = get_counts(coords,counts,gene)
+  n = len(cnts)
+  if n==0: continue
+  nonzeros = [cnts[x] for x in numpy.nonzero(cnts)[0]]
+  zeros,NZmean = n-len(nonzeros),numpy.mean(nonzeros) if len(nonzeros)>0 else 0
+  sat = len(nonzeros)/float(n)
+  tot = sum(cnts)
+  mn = tot/float(n)
+
+  # determine p-value by comparing to sum of counts for random draws of sites from noness
+  N,alpha = 10000,0.05
+  if n in cache: sample = cache[n]
+  else: 
+    sample = sample_counts(noness,n,N)
+    cache[n] = sample
+  samplesums = [sum(lst) for lst in sample]
+  meansum = numpy.mean(samplesums)
+  PC = 1 # pseudo-counts
+  rel = (sum(cnts)+PC)/float(meansum+PC)
+  LFC = numpy.log2(rel)
+
+  lesser = len(list(filter(lambda x: x<=tot,samplesums)))
+  greater = len(list(filter(lambda x: x>=tot,samplesums)))
+  pval = min(lesser,greater)/float(N)
+
+  vals = [gene[x] for x in "rv gene start end strand".split()]
+  vals += [len(cnts),"%s" % tot,"%0.1f" % mn,"%0.3f" % sat,len(nonzeros),"%0.1f" % NZmean]
+  vals += [int(meansum),"%0.3f" % rel,"%0.3f" % LFC,pval]
+
+  results.append(vals)
+  #if gene['rv']=='Rv0020c': break
+
+pvals = [x[-1] for x in results]
+qvals = list(fdrcorrection(pvals)[1])
+results = [x+["%0.6f" % y] for x,y in zip(results,qvals)]
+
+print '\t'.join("ORF gene start end strand TAs sum mean sat NZsites NZmean expec_sum FC LFC pval qval".split())
+for vals in results:
+  print '\t'.join([str(x) for x in vals])


=====================================
src/pytransit/__init__.py
=====================================
@@ -2,6 +2,6 @@
 __all__ = ["transit_tools", "tnseq_tools", "norm_tools", "stat_tools"]
 
 
-__version__ = "v3.0.1"
+__version__ = "v3.0.2"
 prefix = "[TRANSIT]"
 


=====================================
src/pytransit/analysis/gi.py
=====================================
@@ -580,10 +580,19 @@ class GIMethod(base.QuadConditionMethod):
 
         (args, kwargs) = transit_tools.cleanargs(rawargs)
 
+        # ctrl-vs-exp = condition 1-vs-2
+        # originally, MAD defined order of CL args this way: strA/cond1, strB/cond1, strA/cond2, strB/cond
+        #ctrldataA = args[0].split(",")
+        #ctrldataB = args[1].split(",")
+        #expdataA = args[2].split(",")
+        #expdataB = args[3].split(",")
+
+        # TRI changed order of args this way: strA/cond1, strA/cond2, strB/cond1, strB/cond
         ctrldataA = args[0].split(",")
-        ctrldataB = args[1].split(",")
-        expdataA = args[2].split(",")
+        expdataA = args[1].split(",")
+        ctrldataB = args[2].split(",")
         expdataB = args[3].split(",")
+
         annotationPath = args[4]
         output_path = args[5]
         output_file = open(output_path, "w")
@@ -633,7 +642,6 @@ class GIMethod(base.QuadConditionMethod):
         Na2 = len(self.expdataA)
         Nb2 = len(self.expdataB)
 
-
         # Get data
         self.transit_message("Getting Data")
         (data, position) = transit_tools.get_validated_data(wiglist, wxobj=self.wxobj)
@@ -893,7 +901,13 @@ class GIMethod(base.QuadConditionMethod):
 
     @classmethod
     def usage_string(self):
-        return """python %s GI <comma-separated .wig control files condition A> <comma-separated .wig control files condition B> <comma-separated .wig experimental files condition A> <comma-separated .wig experimental files condition B> <annotation .prot_table or GFF3> <output file> [Optional Arguments]
+        #return """python %s GI <comma-separated .wig control files condition A> <comma-separated .wig control files condition B> <comma-separated .wig experimental files condition A> <comma-separated .wig experimental files condition B> <annotation .prot_table or GFF3> <output file> [Optional Arguments]
+        return """python %s GI <wigs_for_strA_cond1> <wigs_for_strA_cond2> <wigs_for_strB_cond1> <wigs_for_strB_cond2> <annotation .prot_table or GFF3> <output file> [Optional Arguments]
+
+        GI performs a comparison among 4 groups of datasets, strain A and B assessed in conditions 1 and 2 (e.g. control vs treatment).
+        It looks for interactions where the response to the treatment (i.e. effect on insertion counts) depends on the strain (output variable: delta_LFC).
+        Provide replicates in each group as a comma-separated list of wig files.
+        Significant interactions are those with "HDI outside ROPE?"=TRUE, and all genes are sorted by significance using BFDR."
 
         Optional Arguments:
         -s <integer>    :=  Number of samples. Default: -s 10000


=====================================
src/pytransit/analysis/resampling.py
=====================================
@@ -641,8 +641,8 @@ class ResamplingMethod(base.DualConditionMethod):
         -pc             :=  Pseudocounts to be added at each site.
         -l              :=  Perform LOESS Correction; Helps remove possible genomic position bias.
                             Default: Turned Off.
-        -iN <float>     :=  Ignore TAs occuring within given percentage (as integer) of the N terminus. Default: -iN 0
-        -iC <float>     :=  Ignore TAs occuring within given percentage (as integer) of the C terminus. Default: -iC 0
+        -iN <int>       :=  Ignore TAs occuring within given percentage (as integer) of the N terminus. Default: -iN 0
+        -iC <int>       :=  Ignore TAs occuring within given percentage (as integer) of the C terminus. Default: -iC 0
         --ctrl_lib      :=  String of letters representing library of control files in order
                             e.g. 'AABB'. Default empty. Letters used must also be used in --exp_lib
                             If non-empty, resampling will limit permutations to within-libraries.


=====================================
src/pytransit/analysis/zinb.py
=====================================
@@ -248,7 +248,7 @@ class ZinbMethod(base.MultiConditionMethod):
 
               # filter out genes that have low saturation across all conditions, since pscl sometimes does not fit params well (resulting in large negative intercepts and high std errors)
               NZpercs = aggregate(melted$cnt,by=list(melted$cond),FUN=function(x) { sum(x>0)/length(x) })
-              if (max(NZpercs$x)<=0.15) { return(c(pval=1,status="low saturation (near-essential) across all conditions, not analyzed")) }
+              if (max(NZpercs$x)<0.15) { return(c(pval=1,status="low saturation (<15%) across all conditions (pan-growth-defect) - not analyzed")) }
 
               sums = aggregate(melted$cnt,by=list(melted$cond),FUN=sum)
               # to avoid model failing due to singular condition, add fake counts of 1 to all conds if any cond is all 0s
@@ -271,7 +271,7 @@ class ZinbMethod(base.MultiConditionMethod):
                     mod = zeroinfl(as.formula(zinbMod1),data=melted,dist="negbin")
                     coeffs = summary(mod)$coefficients
                     # [,1] is col of parms, [,2] is col of stderrs, assume Intercept is always first
-                    if (coeffs$count[,2][1]>0.5) { status = 'warning: high stderr on Intercept for mod1' }
+                    #if (coeffs$count[,2][1]>0.5) { status = 'warning: high stderr on Intercept for mod1' }
                     mod
                   } else {
                     f1 = nbMod1
@@ -311,8 +311,9 @@ class ZinbMethod(base.MultiConditionMethod):
               }
 
               if (is.null(mod1) | is.null(mod0)) { return (c(1, paste0("Model Error. ", status))) }
-              if ((minCount == 0) && (sum(is.na(coef(summary(mod1))$count[,4]))>0)) { return(c(1, "Has Coefs, pvals are NAs")) } # rare failure mode - has coefs, but pvals are NA
+              if ((minCount == 0) && (sum(is.na(coef(summary(mod1))$count[,4]))>0)) { return(c(1, "Has Coefs, but Pvals are NAs (model failure)")) } # rare failure mode - has coefs, but pvals are NA
               df1 = attr(logLik(mod1),"df"); df0 = attr(logLik(mod0),"df") # should be (2*ngroups+1)-3
+              if (DEBUG) print(sprintf("delta_log_likelihood=%f",logLik(mod1)-logLik(mod0)))
               pval = pchisq(2*(logLik(mod1)-logLik(mod0)),df=df1-df0,lower.tail=F) # alternatively, could use lrtest()
               # this gives same answer, but I would need to extract the Pvalue...
               #require(lmtest)
@@ -394,8 +395,12 @@ class ZinbMethod(base.MultiConditionMethod):
                     self.transit_error("Cannot find gene: {0}".format(GENE))
                     sys.exit(0)
 
+            if (DEBUG):
+               self.transit_message("======================================================================")
+               self.transit_message(gene["rv"]+" "+gene["gene"])
+
             if (len(RvSiteindexesMap[Rv]) <= 1):
-                status.append("TA sites <= 1")
+                status.append("TA sites <= 1, not analyzed")
                 pvals.append(1)
             else:
                 # For winsorization
@@ -411,7 +416,7 @@ class ZinbMethod(base.MultiConditionMethod):
                            norm_data,
                            conditions, covariates, interactions, NZMeanByRep, LogZPercByRep)
                 if (numpy.sum(readCounts) == 0):
-                    status.append("No counts in all conditions")
+                    status.append("pan-essential (no counts in all conditions) - not analyzed")
                     pvals.append(1)
                 else:
                     df_args = {
@@ -525,8 +530,11 @@ class ZinbMethod(base.MultiConditionMethod):
 
         self.transit_message("Adding File: %s" % (self.output))
         file = open(self.output,"w")
+        if len(headersStatGroupNames)==2: lfcNames = ["LFC"] 
+        else: lfcNames = list(map(lambda v: "LFC_"+v,headersStatGroupNames))
         head = ("Rv Gene TAs".split() +
                 list(map(lambda v: "Mean_" + v, headersStatGroupNames)) +
+                lfcNames+
                 list(map(lambda v: "NZmean_" + v, headersStatGroupNames)) +
                 list(map(lambda v: "NZperc_" + v, headersStatGroupNames)) +
                 "pval padj".split() + ["status"])
@@ -535,9 +543,16 @@ class ZinbMethod(base.MultiConditionMethod):
         file.write('\t'.join(head)+EOL)
         for gene in genes:
             Rv = gene["rv"]
+            means = [statsByRv[Rv]['mean'][group] for group in orderedStatGroupNames]
+            PC = 5
+            if len(means)==2: LFCs = [numpy.math.log((means[1]+PC)/(means[0]+PC),2)]
+            else: 
+              m = numpy.mean(means)
+              LFCs = [numpy.math.log((x+PC)/(m+PC),2) for x in means]
             vals = ([Rv, gene["gene"], str(len(RvSiteindexesMap[Rv]))] +
-                    ["%0.2f" % statsByRv[Rv]['mean'][group] for group in orderedStatGroupNames] +
-                    ["%0.2f" % statsByRv[Rv]['nz_mean'][group] for group in orderedStatGroupNames] +
+                    ["%0.1f" % statsByRv[Rv]['mean'][group] for group in orderedStatGroupNames] +
+                    ["%0.3f" % x for x in LFCs]+
+                    ["%0.1f" % statsByRv[Rv]['nz_mean'][group] for group in orderedStatGroupNames] +
                     ["%0.2f" % statsByRv[Rv]['nz_perc'][group] for group in orderedStatGroupNames] +
                     ["%f" % x for x in [pvals[Rv], qvals[Rv]]]) + [run_status[Rv]]
             file.write('\t'.join(vals)+EOL)


=====================================
src/pytransit/doc/source/transit_methods.rst
=====================================
@@ -534,7 +534,7 @@ parameters are available for the method:
    which are likely not significant. This dramatically speeds up the
    computation at the cost of less accurate estimates for those genes
    that terminate early (i.e. deemed not significant). This option is
-   OFF by default.
+   OFF by default. (see Notes below)
 
 -  **Include Zeros:** Select to include  sites that are zero. This is the
    preferred behavior, however, unselecting this (thus ignoring sites that)
@@ -547,10 +547,37 @@ parameters are available for the method:
    as real differences. See the :ref:`Normalization <normalization>` section for a description
    of normalization method available in TRANSIT.
 
--  **--ctrl_lib, --exp_lib** These are for doing resampling with datasets from multiple libraries, see below.
+-  **--ctrl_lib, --exp_lib:** These are for doing resampling with datasets from multiple libraries, see below.
+
+-  **-iN, -iC:** Trimming of TA sites near N- and C-terminus.
+   The default for trimming TA sites in the termini of ORFs is 0.
+   However, TA sites in the stop codon (e.g. TAG) are automatically excluded.
+   Trimming is specified as a percentage (as an integer), so, for example,
+   if you want to trim TA sites within 5% of the termini, you would 
+   add the flags '-iN 5 -iC 5' (not 0.05).
+
+-  **-pc**: Pseudocounts for resampling.  By default, pseudocounts are not used.
+   However, if you set '-pc 5', for example, it will add an insertion count of 5
+   at a "fake" site in each condition, which can help smooth out the impact of noise
+   (e.g. large apprarent log-fold-changes (LFCs) due to just a few small counts).
+   Note that, when calculating LFCs, if either the numerator or denominator is 0,
+   a 1 is automatically added to both to prevent numerically undefined results.
+
 
 |
 
+Notes
+~~~~~
+
+I recommend using -a (adaptive resampling). It runs much faster, and the p-values
+will be very close to a full non-adaptive run (all 10,000 samples).
+
+Occasionally, people ask if resampling can be done on intergenic regions as well.
+It could be done pretty easily (for example by making a prot_table with coordinates
+for the regions between genes).  But it is usually not worthwhile, because most
+intergenic regions are small (<100 bp) contain very few TA sites (often 0-2),
+making it difficult to make confident calls on essentiality.
+
 
 Doing resampling with a combined_wig file
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -752,19 +779,33 @@ that imply a genetic interaction.
 How does it work?
 ~~~~~~~~~~~~~~~~~
 
-| For a formal description of how this method works, see our paper [DeJesus20170NAR]_:
+GI performs a comparison among 4 groups of datasets, strain A and B assessed in conditions 1 and 2 (e.g. control vs treatment).
+It looks for interactions where the response to the treatment (i.e. effect on insertion counts) depends on the strain.
+
+If you think of the effect of treatment as a log-fold-change (e.g. of
+the insert counts between control and treatment in strain A), which is
+like a "slope", then the interacting genes are those that exhibit a difference
+in the effect of the treatment between the strains, and hence a difference in the
+slopes between strain A and B (represented by 'delta_LFC' in the output file).
+
+| For a formal description of how this method works, see our paper [DeJesus2017NAR]_:
 |
 |  DeJesus, M.A., Nambi, S., Smith, C.M., Baker, R.E., Sassetti, C.M., Ioerger, T.R. `Statistical analysis of genetic interactions in Tn-Seq data. <https://www.ncbi.nlm.nih.gov/pubmed/28334803>`_ *Nucleic Acids Research.* 2017. 45(11):e93. doi: 10.1093/nar/gkx128.
 
+
+
 |
 
 
-Example
-~~~~~~~
+Usage
+~~~~~
 
 ::
 
-  python transit.py GI <comma-separated .wig control files condition A> <comma-separated .wig control files condition B> <comma-separated .wig experimental files condition A> <comma-separated .wig experimental files condition B> <annotation .prot_table or GFF3> <output file> [Optional Arguments]
+  python transit.py GI <wigs_for_strA_cond1> <wigs_for_strA_cond2> <wigs_for_strB_cond1> <wigs_for_strB_cond2> <annotation .prot_table or GFF3> <output file> [Optional Arguments]
+
+        Provide replicates in each group as a comma-separated list of wig files.
+
         Optional Arguments:
         -s <integer>    :=  Number of samples. Default: -s 10000
         --rope <float>  :=  Region of Practical Equivalence. Area around 0 (i.e. 0 +/- ROPE) that is NOT of interest. Can be thought of similar to the area of the null-hypothesis. Default: --rope 0.5
@@ -776,6 +817,19 @@ Example
 
 You can think of 'control' and 'experimental' samples as 'untreated' vs. 'treated'.
 
+Example
+~~~~~~~
+
+In this example, the effect of a knockout of SigB is being evaluated for its effect on tolerance of isoniazid.
+Some genes may become more essential (or less) in the presence of INH in the wild-type strain.
+The genes implied to interact with SigB are those whose response to INH changes in the knock-out strain compared to the wild-type.
+Note there are 2 replicates in each of the 4 groups of datasets.
+
+::
+
+  python transit/src/transit.py GI WT_untreated1.wig,WT_untreated2.wig WT_INH_1.wig,WT_INH_2.wig delta_SigB_untreated1.wig,delta_SigB_untreated2.wig delta_SigB_INH_1.wig,delta_SigB_INH_2.wig mc2_155_tamu.prot_table GI_delta_SigB_INH.txt
+
+
 Parameters
 ~~~~~~~~~~
 
@@ -858,6 +912,11 @@ typical threshold for conditional essentiality on is q-value < 0.05.
 
 |
 
+Significant interactions are those with "HDI outside ROPE?"=TRUE.
+
+All genes are sorted by significance using BFDR.
+
+
 
 .. rst-class:: transit_sectionend
 ----
@@ -1010,7 +1069,9 @@ to be installed on your system.  See :ref:`Installation Instructions <install-zi
 How does it work?
 ~~~~~~~~~~~~~~~~~
 
-| For a formal description of how this method works, see our `paper on bioRxiv <https://www.biorxiv.org/content/10.1101/590281v1>`_.
+| For a formal description of how this method works, see our paper [ZINB]_: 
+|
+|  Subramaniyam S, DeJesus MA, Zaveri A, Smith CM, Baker RE, Ehrt S, Schnappinger D, Sassetti CM, Ioerger TR. (2019).  `Statistical analysis of variability in TnSeq data across conditions using Zero-Inflated Negative Binomial regression. <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3156-z>`_, *BMC Bioinformatics*. 2019 Nov 21;20(1):603. doi: 10.1186/s12859-019-3156-z.
 
 
 
@@ -1019,7 +1080,7 @@ Example
 
 ::
 
-  python transit.py zinb <combined wig file> <samples_metadata file> <annotation .prot_table> <output file> [Optional Arguments]
+  python3 transit.py zinb <combined wig file> <samples_metadata file> <annotation .prot_table> <output file> [Optional Arguments]
         Optional Arguments:
         -n <string>         :=  Normalization method. Default: -n TTR
         --ignore-conditions <cond1,cond2> :=  Comma separated list of conditions to ignore, for the analysis.
@@ -1030,8 +1091,7 @@ Example
         --covars <covar1,covar2...>     :=  Comma separated list of covariates (in metadata file) to include, for the analysis.
         --interactions <covar1,covar2...>     :=  Comma separated list of covariates to include, that interact with the condition for the analysis.
         --gene <RV number or Gene name> := Run method for one gene and print model output.
-
-
+        -v              := verbose, print out the model coefficients for each gene.
 
 
 .. _combined_wig:
@@ -1295,9 +1355,15 @@ typical threshold for conditional essentiality on is q-value < 0.05.
 +---------------------+-----------------------------------------------------------------+
 | TAs                 | Number of TA sites in Gene                                      |
 +---------------------+-----------------------------------------------------------------+
-| <Condition Mean>    | Mean read-counts for the gene, by condition                     |
+| <Mean_Condition>    | Mean read-counts for the gene, by condition                     |
++---------------------+-----------------------------------------------------------------+
+| <LFC_Condition>     | Log-fold-change (base 2) of mean insertion count relative to    |
+|                     | mean across all conditions. Pseudo-counts of 5 are added.       |
+|                     | If only 2 conditions, LFC is based on ratio of second to first. |
++---------------------+-----------------------------------------------------------------+
+| <NZmean_Condition>  | Mean read-counts for the gene, by condition                     |
 +---------------------+-----------------------------------------------------------------+
-| <Condition NZMean>  | Non-zero Mean read-counts for the gene, by condition            |
+| <NZperc_Condition>  | Non-zero Mean read-counts for the gene, by condition            |
 +---------------------+-----------------------------------------------------------------+
 | p-value             | P-value calculated by the ZINB test.                            |
 +---------------------+-----------------------------------------------------------------+


=====================================
src/pytransit/doc/source/transit_overview.rst
=====================================
@@ -149,5 +149,7 @@ Other references, including methods utilized by TRANSIT:
 .. [DeJesus2016] `DeJesus, M.A. and Ioerger, T.R. (2016). Normalization of transposon-mutant library sequencing datasets to improve identification of conditionally essential genes. Journal of Bioinformatics and Computational Biology, 14(3):1642004 <http://www.ncbi.nlm.nih.gov/pubmed/26932272>`_
 
 
-.. [DeJesus20170NAR] `DeJesus, M.A., Nambi, S., Smith, C.M., Baker, R.E., Sassetti, C.M., Ioerger, T.R. Statistical analysis of genetic interactions in Tn-Seq data.  Nucleic Acids Research. 2017. 45(11):e93. doi: 10.1093/nar/gkx128. <https://www.ncbi.nlm.nih.gov/pubmed/28334803>`_
+.. [DeJesus2017NAR] `DeJesus, M.A., Nambi, S., Smith, C.M., Baker, R.E., Sassetti, C.M., Ioerger, T.R. Statistical analysis of genetic interactions in Tn-Seq data.  Nucleic Acids Research. 2017. 45(11):e93. doi: 10.1093/nar/gkx128. <https://www.ncbi.nlm.nih.gov/pubmed/28334803>`_
+
+.. [ZINB] `Subramaniyam S, DeJesus MA, Zaveri A, Smith CM, Baker RE, Ehrt S, Schnappinger D, Sassetti CM, Ioerger TR. (2019).  Statistical analysis of variability in TnSeq data across conditions using Zero-Inflated Negative Binomial regression. *BMC Bioinformatics*. 2019 Nov 21;20(1):603. doi: 10.1186/s12859-019-3156-z. <https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-019-3156-z>`_
 


=====================================
src/pytransit/export/combined_wig.py
=====================================
@@ -188,7 +188,7 @@ class CombinedWigMethod(base.SingleConditionMethod):
             self.output.write("%d\t%s\t%s\n" % (position[i],vals,",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
             # Update progress
             text = "Running Export Method... %5.1f%%" % (100.0*i/N)
-            self.progress_update(text, i)
+            if i%1000==0: self.progress_update(text, i)
         self.output.close()
 
 


=====================================
src/pytransit/transit_gui.py
=====================================
@@ -1757,7 +1757,7 @@ along with TRANSIT.  If not, see <http://www.gnu.org/licenses/>.
         window = 100
         for j in range(K):
 
-            size = len(position)/window + 1
+            size = int(len(position)/window) + 1 # python3 requires explicit rounding to int
             x_w = numpy.zeros(size)
             y_w = numpy.zeros(size)
             for i in range(size):
@@ -1767,7 +1767,7 @@ along with TRANSIT.  If not, see <http://www.gnu.org/licenses/>.
             y_smooth = stat_tools.loess(x_w, y_w, h=10000)
             plt.plot(x_w, y_w, "g+")
             plt.plot(x_w, y_smooth, "b-")
-            plt.xlabel("Genomic Position")
+            plt.xlabel("Genomic Position (TA sites)")
             plt.ylabel("Reads per 100 insertion sites")
 
             plt.title("LOESS Fit - %s" % transit_tools.basename(datasets_selected[j]) )



View it on GitLab: https://salsa.debian.org/med-team/tnseq-transit/commit/20d19355d501e251533c1e6e708100581caffe85

-- 
View it on GitLab: https://salsa.debian.org/med-team/tnseq-transit/commit/20d19355d501e251533c1e6e708100581caffe85
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200118/ebff47ec/attachment-0001.html>