[med-svn] [Git][med-team/tnseq-transit][upstream] New upstream version 2.1.2

Andreas Tille gitlab at salsa.debian.org
Tue May 22 16:26:19 BST 2018


Andreas Tille pushed to branch upstream at Debian Med / tnseq-transit


Commits:
815892c5 by Andreas Tille at 2018-05-22T16:39:38+02:00
New upstream version 2.1.2
- - - - -


20 changed files:

- README.md
- VERSION
- src/pytpp/__main__.py
- src/pytpp/tpp_gui.py
- src/pytpp/tpp_tools.py
- src/pytransit/__init__.py
- src/pytransit/__main__.py
- src/pytransit/analysis/binomial.py
- src/pytransit/analysis/example.py
- src/pytransit/analysis/griffin.py
- src/pytransit/analysis/gumbel.py
- src/pytransit/analysis/hmm.py
- src/pytransit/analysis/rankproduct.py
- src/pytransit/analysis/resampling.py
- src/pytransit/analysis/tn5gaps.py
- src/pytransit/doc/source/transit_install.rst
- src/pytransit/tnseq_tools.py
- src/pytransit/transit_gui.py
- src/pytransit/transit_tools.py
- src/tpp.py


Changes:

=====================================
README.md
=====================================
--- a/README.md
+++ b/README.md
@@ -1,14 +1,27 @@
 
 
 
-# TRANSIT 2.1.0
+# TRANSIT 2.1.2
 
 
 [![Build Status](https://travis-ci.org/mad-lab/transit.svg?branch=master)](https://travis-ci.org/mad-lab/transit)   [![Documentation Status](https://readthedocs.org/projects/transit/badge/?version=latest)](http://transit.readthedocs.io/en/latest/?badge=latest) 
 
 
 
-**Version 2.1.0 changes (June, 20017)**
+
+**Version 2.1.2 changes (May, 2018)**
+- Improved resampling on comparisons with unbalanced number of replicates.
+- Improved speed of TPP.
+- Added Barseq functionality to TPP.
+- Fixed bug with creating resampling histograms on GUI mode.
+- Miscellaneous code improvements.
+
+
+**Version 2.1.1 changes (July, 2017)**
+- Miscellaneous bug fixes
+
+
+**Version 2.1.0 changes (June, 2017)**
 - Added tooltips next to most parameters to explain their functionality.
 - Added Quality Control window, with choice for normalization method.
 - Added more normalization options to the HMM method.
@@ -23,7 +36,6 @@
 - Lots of bug fixes.
 
 
-
 **Version 2.0.2 changes (August, 2016)**
 - Added support for for custom primers in TPP.
 - Added support for annotations in GFF3 format.


=====================================
VERSION
=====================================
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-version: 2.1.0-18-g4192-mod
+version: 2.1.2-1-gccf8-mod


=====================================
src/pytpp/__main__.py
=====================================
--- a/src/pytpp/__main__.py
+++ b/src/pytpp/__main__.py
@@ -38,7 +38,7 @@ def main(arguments=[]):
 
     vars = Globals()
 
-    if len(arguments) <= 1 and hasWx:
+    if len(arguments)==0 and hasWx:        
         app = wx.App(False)
         form = MyForm(vars)
         form.update_dataset_list()
@@ -78,7 +78,8 @@ def main(arguments=[]):
 
         # Check for strange flags
         known_flags = set(["tn5", "help", "himar1", "protocol", "primer", "reads1",
-                           "reads2", "bwa", "ref", "maxreads", "output", "mismatches", "flags"])
+                           "reads2", "bwa", "ref", "maxreads", "output", "mismatches", "flags",
+                           "barseq_catalog_in", "barseq_catalog_out"])
         unknown_flags = set(kwargs.keys()) - known_flags
         if unknown_flags:
             print "error: unrecognized flags:", ", ".join(unknown_flags)


=====================================
src/pytpp/tpp_gui.py
=====================================
--- a/src/pytpp/tpp_gui.py
+++ b/src/pytpp/tpp_gui.py
@@ -95,19 +95,6 @@ if hasWx:
             bmp = wx.ArtProvider.GetBitmap(wx.ART_INFORMATION, wx.ART_OTHER, (16, 16))
 
 
-            # BWA
-            sizer0 = wx.BoxSizer(wx.HORIZONTAL)
-            label0 = wx.StaticText(panel, label='BWA executable:',size=(330,-1))
-            sizer0.Add(label0,0,wx.ALIGN_CENTER_VERTICAL,0)
-
-            self.picker0 = wx.lib.filebrowsebutton.FileBrowseButton(panel, id = wx.ID_ANY, size=(400,30), dialogTitle='Path to BWA', fileMode=wx.OPEN, fileMask='bwa*', startDirectory=os.path.dirname(vars.bwa), initialValue=vars.bwa, labelText='')
-
-             
-            sizer0.Add(self.picker0, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
-            sizer0.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Specify a path to the BWA executable (including the executable)."), flag=wx.CENTER, border=0)
-            sizer0.Add((10, 1), 0, wx.EXPAND)
-            sizer.Add(sizer0,0,wx.EXPAND,0)
-
             # REFERENCE
             sizer3 = wx.BoxSizer(wx.HORIZONTAL)
             label3 = wx.StaticText(panel, label='Choose a reference genome (FASTA):',size=(330,-1))
@@ -147,7 +134,7 @@ if hasWx:
             self.base = wx.TextCtrl(panel,value=vars.base,size=(400,30))
             sizer5.Add(self.base, proportion=1.0, flag=wx.EXPAND|wx.ALL, border=5)
             sizer5.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Select a a label prefix that will be used when writing output files e.g. 'wt_run1'"), flag=wx.CENTER, border=0)
-            sizer5.Add((130, 1), 0, wx.EXPAND) 
+            sizer5.Add((10, 1), 0, wx.EXPAND) 
             sizer.Add(sizer5,0,wx.EXPAND,0)
 
 
@@ -164,12 +151,12 @@ The Sassetti protocol generally assumes the reads include the primer prefix and 
 
 The Mme1 protocol generally assumes reads do NOT include the primer prefix, and that the reads are sequenced in the reverse direction"""
             sizer_protocol.Add(TPPIcon(panel, wx.ID_ANY, bmp, protocol_tooltip_text), flag=wx.CENTER, border=0)
-            sizer_protocol.Add((130, 1), 0, wx.EXPAND) 
+            sizer_protocol.Add((10, 1), 0, wx.EXPAND) 
             sizer.Add(sizer_protocol,0,wx.EXPAND,0)
 
             self.Bind(wx.EVT_COMBOBOX, self.OnProtocolSelection, id=self.protocol.GetId())
 
-
+          
             # TRANSPOSON
             sizer8 = wx.BoxSizer(wx.HORIZONTAL)
             label8 = wx.StaticText(panel, label='Transposon used:',size=(340,-1))
@@ -178,7 +165,7 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
             self.transposon.SetStringSelection(vars.transposon)
             sizer8.Add(self.transposon, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
             sizer8.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Select the transposon used to construct the TnSeq libraries. This will automatically populate the primer prefix field. Select custom to specify your own sequence."), flag=wx.CENTER, border=0)
-            sizer8.Add((130, 1), 0, wx.EXPAND)
+            sizer8.Add((10, 1), 0, wx.EXPAND)
             sizer.Add(sizer8,0,wx.EXPAND,0)
 
 
@@ -189,7 +176,7 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
             self.prefix = wx.TextCtrl(panel,value=str(vars.prefix), size=(400,30))
             sizer4.Add(self.prefix, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
             sizer4.Add(TPPIcon(panel, wx.ID_ANY, bmp, "If present in the reads, specify the primer sequence. If it has been stripped away already, leave this field empty."), flag=wx.CENTER, border=0)
-            sizer4.Add((130, 1), 0, wx.EXPAND)
+            sizer4.Add((10, 1), 0, wx.EXPAND)
             sizer.Add(sizer4,0,wx.EXPAND,0)
             
             self.Bind(wx.EVT_COMBOBOX, self.OnTransposonSelection, id=self.transposon.GetId())
@@ -199,22 +186,35 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
             sizer6 = wx.BoxSizer(wx.HORIZONTAL)
             label6 = wx.StaticText(panel, label='Max reads (leave blank to use all):',size=(340,-1))
             sizer6.Add(label6,0,wx.ALIGN_CENTER_VERTICAL,0)
-            self.maxreads = wx.TextCtrl(panel,size=(400,30))
+            self.maxreads = wx.TextCtrl(panel,size=(150,30))
             sizer6.Add(self.maxreads, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
             sizer6.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Maximum reads to use from the reads files. Useful for running only a portion of very large number of reads. Leave blank to use all the reads."), flag=wx.CENTER, border=0)
-            sizer6.Add((130, 1), 0, wx.EXPAND)
+            sizer6.Add((10, 1), 0, wx.EXPAND)
             sizer.Add(sizer6,0,wx.EXPAND,0)
 
             # MISMATCHES
             sizer7 = wx.BoxSizer(wx.HORIZONTAL)
             label7 = wx.StaticText(panel, label='Mismatches allowed in Tn prefix:',size=(340,-1))
             sizer7.Add(label7,0,wx.ALIGN_CENTER_VERTICAL,0)
-            self.mismatches = wx.TextCtrl(panel,value=str(vars.mm1),size=(400,30))
+            self.mismatches = wx.TextCtrl(panel,value=str(vars.mm1),size=(150,30))
             sizer7.Add(self.mismatches, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
             sizer7.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Number of mismatches allowed in the tn-prefix before discarding the read."), flag=wx.CENTER, border=0)
-            sizer7.Add((130, 1), 0, wx.EXPAND)
+            sizer7.Add((10, 1), 0, wx.EXPAND)
             sizer.Add(sizer7,0,wx.EXPAND,0)    
 
+            # BWA
+            sizer0 = wx.BoxSizer(wx.HORIZONTAL)
+            label0 = wx.StaticText(panel, label='BWA executable:',size=(330,-1))
+            sizer0.Add(label0,0,wx.ALIGN_CENTER_VERTICAL,0)
+
+            self.picker0 = wx.lib.filebrowsebutton.FileBrowseButton(panel, id = wx.ID_ANY, size=(400,30), dialogTitle='Path to BWA', fileMode=wx.OPEN, fileMask='bwa*', startDirectory=os.path.dirname(vars.bwa), initialValue=vars.bwa, labelText='')
+
+             
+            sizer0.Add(self.picker0, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
+            sizer0.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Specify a path to the BWA executable (including the executable)."), flag=wx.CENTER, border=0)
+            sizer0.Add((10, 1), 0, wx.EXPAND)
+            sizer.Add(sizer0,0,wx.EXPAND,0)
+
             # BWA FLAGS
             sizer8 = wx.BoxSizer(wx.HORIZONTAL)
             label8 = wx.StaticText(panel, label='BWA flags (Optional)',size=(340,-1))
@@ -222,10 +222,34 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
             self.flags = wx.TextCtrl(panel,value=vars.flags,size=(400,30))
             sizer8.Add(self.flags, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
             sizer8.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Use this textobx to enter any desired flags for the BWA alignment. For example, to limit the number of mismatches to 1, type: -k 1. See the BWA documentation for all possible flags."), flag=wx.CENTER, border=0)
-            sizer8.Add((130, 1), 0, wx.EXPAND)
+            sizer8.Add((10, 1), 0, wx.EXPAND)
             sizer.Add(sizer8,0,wx.EXPAND,0)
 
 
+            # BARSEQ CATALOG
+            sizer9 = wx.BoxSizer(wx.HORIZONTAL)
+            label9 = wx.StaticText(panel, label='BarSeq Catalog file:',size=(120,-1))
+            sizer9.Add(label9,0,wx.ALIGN_CENTER_VERTICAL,0)
+
+            self.barseq_select = wx.ComboBox(panel,choices=['this is not a Barseq dataset','read catalog file','write catalog file'],size=(200,30)) ## # does a BoxSizer use wx.HORIZONTAL, not wx.EXPAND?
+            self.barseq_select.SetSelection(0)
+            sizer9.Add(self.barseq_select, proportion=0.5, flag=wx.EXPAND|wx.ALL, border=5) ## 
+
+            self.picker9 = wx.lib.filebrowsebutton.FileBrowseButton(panel, id=wx.ID_ANY, dialogTitle='Please select the Barseq catalog filename', fileMode=wx.OPEN, size=(400,30), startDirectory=os.path.dirname(vars.fq2), initialValue="", labelText='', ) # no need for this: changeCallback=self.OnChanged9 ; initialValue set below ; no file mask
+            sizer9.Add(self.picker9, proportion=1, flag=wx.EXPAND|wx.ALL, border=5)
+
+            if vars.barseq_catalog_in!=None:
+              self.barseq_select.SetSelection(1)
+              self.picker9.SetValue(vars.barseq_catalog_in)
+            if vars.barseq_catalog_out!=None:
+              self.barseq_select.SetSelection(2)
+              self.picker9.SetValue(vars.barseq_catalog_out)
+
+            sizer9.Add(TPPIcon(panel, wx.ID_ANY, bmp, "Select a filename for BarSeq catalog."), flag=wx.CENTER, border=0)
+            sizer9.Add((10, 1), 0, wx.EXPAND) 
+            sizer.Add(sizer9,0,wx.EXPAND,0)
+
+
 
 
 #
@@ -384,6 +408,7 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
 #
 
         def map_reads(self,event):
+
             # add bwa path, prefix
             bwapath = self.picker0.GetValue()
             fq1, fq2, ref, base, prefix, maxreads = self.picker1.GetValue(), self.picker2.GetValue(), self.picker3.GetValue(), self.base.GetValue(), self.prefix.GetValue(), self.maxreads.GetValue()
@@ -408,6 +433,11 @@ The Mme1 protocol generally assumes reads do NOT include the primer prefix, and 
             if maxreads == '': self.vars.maxreads = -1
             else: self.vars.maxreads = int(maxreads)
 
+            barseq_select = self.barseq_select.GetSelection()
+            self.vars.barseq_catalog_in = self.vars.barseq_catalog_out = None
+            if barseq_select==1: self.vars.barseq_catalog_in = self.picker9.GetValue()
+            if barseq_select==2: self.vars.barseq_catalog_out = self.picker9.GetValue()
+
             self.vars.action = "start"
             self.Close()
             return 0


=====================================
src/pytpp/tpp_tools.py
=====================================
--- a/src/pytpp/tpp_tools.py
+++ b/src/pytpp/tpp_tools.py
@@ -25,6 +25,7 @@ import sys, re, shutil
 import platform
 import gzip
 import subprocess
+from collections import defaultdict
 
 def cleanargs(rawargs):
     #TODO: Write docstring
@@ -79,7 +80,7 @@ def fastq2reads(infile,outfile,maxreads):
     if maxreads > -1:
         if tot > maxreads:
             break
-    if cnt==0: 
+    if cnt==0:
       h = line[1:] # strip off '@'
       #h = h.replace(' ','_')
       output.write(">%s" % h)
@@ -123,17 +124,17 @@ def fix_paired_headers_for_bwa(reads1,reads2):
       e = e[:i]+e[i+1:]
       f = f[:i]+f[i+1:]
     c.write(e+"\n")
-    d.write(f+"\n")      
+    d.write(f+"\n")
   except Exception as ex:
     a.close(); b.close(); c.close(); d.close()
     error(ex.args[0])
   a.close(); b.close(); c.close(); d.close()
-  
+
   if(os.path.exists(reads1)): os.remove(reads1)
   if(os.path.exists(reads2)): os.remove(reads2)
   os.rename(temp1, reads1)
   os.rename(temp2, reads2)
-  
+
   '''
   if platform.platform == 'win32':
 	  os.system("move %s %s" % (temp1,reads1))
@@ -143,6 +144,72 @@ def fix_paired_headers_for_bwa(reads1,reads2):
 	  os.system("mv %s %s" % (temp2, reads2))
   '''
 
+
+# checks for a match allowing 1 or 2 mismatches
+# if not a match returns -1,-1. If match occurs, returns 1 and the start index of match
+def bit_parallel_with_max_2_error(text, pattern, m):
+    S_table = defaultdict(int)
+    for i, c in enumerate(pattern):
+        S_table[c] |= 1 << i
+    R0 = 0
+    R1 = 0
+    R2 = 0
+    mask = 1 << (m - 1)
+    for j, c in enumerate(text):
+        S = S_table[c]
+        shR0 = (R0 << 1) | 1
+        shR1 = (R1 << 1) | 1
+        R0 = shR0 & S
+        R1 = ((R1 << 1) | 1) & S | shR0
+        R2 = ((R2 << 1) | 1) & S | shR1
+        # first if-statement commented because we have already checked for exact match using find() in mmfind()
+        #if R0 & mask:  #exact match
+            #return 1, j - m + 1
+        if R1 & mask:   # 1 mismatch
+            return 1, j - m + 1
+        if R2 & mask:   # 2 mismatches
+            return 1, j - m + 1
+    return -1,-1
+
+
+# checks for a match allowing 1 mismatch
+# if not a match returns -1,-1. If match occurs, returns 1 and the start index of match
+def bit_parallel_with_max_1_error(text, pattern, m):
+    S_table = defaultdict(int)
+    for i, c in enumerate(pattern):
+        S_table[c] |= 1 << i
+    R0 = 0
+    R1 = 0
+    mask = 1 << (m - 1)
+    for j, c in enumerate(text):
+        S = S_table[c]
+        shR0 = (R0 << 1) | 1
+        R0 = shR0 & S
+        R1 = ((R1 << 1) | 1) & S | shR0
+        # first if-statement commented because we have already checked for exact match using find() in mmfind()
+        #if R0 & mask:  #exact match
+            #return 1, j - m + 1
+        if R1 & mask:   # 1 mismatch
+            return 1, j - m + 1
+    return -1,-1
+
+
+# this function is a replacement for below mmfind() for speedup
+# it assumes the length of H is <32
+# find index of H[1..m] in G[1..n] with up to max (1 or 2) mismatches
+def mmfind(G,n,H,m,max): # lengths; assume n>m
+    a = G.find(H)
+    if a!=-1: return a # shortcut for perfect matches
+    a,b = -1,-1
+    if max==1 :
+        a,b = bit_parallel_with_max_1_error(G, H, m)
+    elif max==2 :
+        a,b = bit_parallel_with_max_2_error(G, H, m)
+    if a==1: return b
+    return -1
+
+
+''' replaced with above mmfind()
 # find index of H[1..m] in G[1..n] with up to max mismatches
 
 def mmfind(G,n,H,m,max): # lengths; assume n>m
@@ -155,6 +222,8 @@ def mmfind(G,n,H,m,max): # lengths; assume n>m
       if cnt>max: break
     if cnt<=max: return i
   return -1
+'''
+
 
 def extract_staggered(infile,outfile,vars):
   Tn = vars.prefix
@@ -165,12 +234,19 @@ def extract_staggered(infile,outfile,vars):
 
   #P,Q = 5,10 # 1-based inclusive positions to look for start of Tn prefix
   P,Q = 0,15
+  if vars.barseq_catalog_out!=None: Q = 100 # relax for barseq
 
   vars.tot_tgtta = 0
   vars.truncated_reads = 0
   output = open(outfile,"w")
   tot = 0
   #print infile
+  if vars.barseq_catalog_out!=None:
+    barcodes_file = vars.base+".barseq" # I could define this in vars
+    catalog = open(barcodes_file,"w")
+    barseq1 = "TGCAGGGATGTCCACGAGGTCTCT" # const regions surrounding barcode
+    barseq2 = "CGTACGCTGCAGGTCGACGGCCGG"
+    barseq1len,barseq2len = len(barseq1),len(barseq2)
   for line in open(infile):
     #print line
     line = line.rstrip()
@@ -184,21 +260,30 @@ def extract_staggered(infile,outfile,vars):
     if a>=P and a<=Q:
       gstart,gend = a+lenTn,readlen
       if b!=-1: gend = b; vars.truncated_reads += 1
-      #if gend-gstart<20: continue # too short
-      if gend-gstart<5: continue # too short
+      if gend-gstart<20: continue # too short # I should make this a param
       output.write(header+"\n")
       output.write(line[gstart:gend]+"\n")
       vars.tot_tgtta += 1
+    if vars.barseq_catalog_out!=None:
+      n = max(a,readlen)
+      c = mmfind(line,n,barseq1,barseq1len,vars.mm1) # only have to search as far as Tn prefix
+      d = mmfind(line,n,barseq2,barseq2len,vars.mm1)
+      seq = "XXXXXXXXXXXXXXXXXXXX"
+      if c!=-1 and d!=-1:
+        size = d-c-barseq1len
+        if size>=15 and size<=25: seq = line[c+barseq1len:d]
+      catalog.write(header+"\n")
+      catalog.write(seq+"\n")
+  if vars.barseq_catalog_out!=None: catalog.close()
   output.close()
   if vars.tot_tgtta == 0:
     raise ValueError("Error: Input files did not contain any reads matching prefix sequence with %d mismatches" % vars.mm1)
 
 
-
-
 def message(s):
-  print "[tn_preprocess]",s
-  sys.stdout.flush()
+  #print "[tn_preprocess]",s
+  #sys.stdout.flush()
+  sys.stderr.write("[tn_preprocess] "+s+"\n")
 
 def get_id(line):
   a,b = line.find(":")+1,line.rfind("#")
@@ -210,7 +295,7 @@ def get_id(line):
 def select_reads(goodreads,infile,outfile):
   hash = {}
   for line in open(goodreads):
-    if line[0]=='>': 
+    if line[0]=='>':
       #id = line[line.find(":")+1:line.rfind("#")]
       id = get_id(line)
       hash[id] = 1
@@ -236,7 +321,7 @@ def replace_ids(infile1,infile2,outfile):
     b = g.readline()
     if len(a)<2: break
     if a[0]=='>': header = a
-    else: 
+    else:
       h.write(header)
       h.write(b)
   f.close()
@@ -282,7 +367,7 @@ def template_counts(ref,sam,bcfile,vars):
   genome = read_genome(ref)
   barcodes = {}
 
-  
+
   fil1 = open(bcfile)
   fil2 = open(sam)
 
@@ -295,7 +380,7 @@ def template_counts(ref,sam,bcfile,vars):
   for line in fil2:
     if idx==2: break
     idx+=1
-  
+
   '''
   for line in open(bcfile):
     line = line.rstrip()
@@ -319,12 +404,12 @@ def template_counts(ref,sam,bcfile,vars):
       w = line.split('\t')
       code = samcode(w[1])
       if 'S' in w[5]: continue #elimate softclipped reads
-      if code[6]=="1": # previously checked for for reads1's via w[1]<128 
-        vars.tot_tgtta += 1 
+      if code[6]=="1": # previously checked for for reads1's via w[1]<128
+        vars.tot_tgtta += 1
         if code[2]=="0": vars.r1 += 1
       if code[7]=="1" and code[2]=="0": vars.r2 += 1
       # include "improperly mapped reads, which might just be short frags
-      #if w[1]=="99" or w[1]=="83" or w[1]=="97" or w[1]=="81": 
+      #if w[1]=="99" or w[1]=="83" or w[1]=="97" or w[1]=="81":
       if code[6]=="1" and code[2]=="0" and code[3]=="0": # both reads mapped (proper or not)
         vars.mapped += 1
         readlen = len(w[9])
@@ -369,7 +454,7 @@ def increase_counts(pos,sites, strand):
             sites[pos][4] += 1  #if read has been found before, tally 1 more in R reads
         sites[pos][5] += 1  #if read has been found before, tally 1 more in R reads
         sites[pos][6] += 1  #if read has been found before, tally 1 more in R reads
-    
+
 
 def read_counts(ref,sam,vars):
     genome = read_genome(ref)
@@ -378,7 +463,7 @@ def read_counts(ref,sam,vars):
         if genome[i:i+2]=="TA" or vars.transposon=='Tn5':
           pos = i+1
           sites[pos] = [pos,0,0,0,0,0,0]
-     
+
     hits = {}
     vars.tot_tgtta,vars.mapped = 0,0
     vars.r1 = vars.r2 = 0
@@ -408,7 +493,7 @@ def read_counts(ref,sam,vars):
                     site1 = pos + delta #if on + strand, take column 3 position and add 1bp)
                     if site1 in sites:
                         increase_counts(site1, sites, strand)
-    
+
     results = []
     for key in sorted(sites.keys()):
         results.append(sites[key])
@@ -440,7 +525,7 @@ def driver(vars):
      extract_reads(vars)
 
      run_bwa(vars)
- 
+
      generate_output(vars)
 
   except ValueError as err:
@@ -468,7 +553,7 @@ def uncompress(filename):
 
 def extract_reads(vars):
     message("extracting reads...")
-    
+
     flag = ['','']
     for idx, name in enumerate([vars.fq1, vars.fq2]):
         if idx==1 and vars.single_end==True: continue
@@ -479,11 +564,11 @@ def extract_reads(vars):
                 break
             flag[idx] = 'FASTQ'
             break
-        fil.close() 
+        fil.close()
 
     if vars.fq1.endswith('.gz'):
        vars.fq1 = uncompress(vars.fq1)
-        
+
     if vars.fq2.endswith('.gz'):
        vars.fq2 = uncompress(vars.fq2)
 
@@ -498,9 +583,9 @@ def extract_reads(vars):
       message("creating %s" % vars.trimmed1)
       extract_staggered(vars.reads1,vars.trimmed1,vars)
 
-      return 
+      return
 
-    if(flag[1] == 'FASTQ'):  
+    if(flag[1] == 'FASTQ'):
         message("fastq2reads: %s -> %s" % (vars.fq2,vars.reads2))
         fastq2reads(vars.fq2,vars.reads2,vars.maxreads)
     else:
@@ -610,7 +695,7 @@ def run_bwa(vars):
     if not os.path.exists(vars.ref+".amb"):
       cmd = [vars.bwa, "index", vars.ref]
       bwa_subprocess(cmd, sys.stdout)
-       
+
 
     cmd = [vars.bwa, "aln"]
     if vars.flags.strip():
@@ -626,7 +711,7 @@ def run_bwa(vars):
         bwa_subprocess(cmd, outfile)
 
     else:
-     
+
         cmd = [vars.bwa, "aln"]
         if vars.flags.strip():
             cmd.extend(vars.flags.split(" "))
@@ -665,7 +750,7 @@ def get_read_length(filename):
    fil = open(filename)
    i = 0
    for line in fil:
-      if i == 1: 
+      if i == 1:
          #print "reads1 line: " + line
          return len(line.strip())
       i+=1
@@ -683,7 +768,89 @@ def get_genomic_portion(filename):
    return tot_len/n
 
 
+# return list of (item,cnt) sorted by counts
+
+def popularity(lst):
+  hash = {}
+  for x in lst:
+    if x not in hash: hash[x] = 0
+    hash[x] += 1
+  data = [(hash[x],x) for x in hash.keys()]
+  data.sort(reverse=True)
+  data = [(y,x) for (x,y) in data]
+  return data
+
+
+def create_barseq_catalog(vars):
+  barcodes = {} # headers->barcodes
+  for line in open(vars.base+".barseq"):
+    if line[0]=='>': 
+      header = line.rstrip()[1:]
+      header = header.split()[0] # in case it has a space, which is dropped by bwa in sam file
+    else: barcodes[header] = line.split('\n')[0]
+
+  sites,nreads = {},0
+  for line in open(vars.sam):
+    if line[0]=='@': continue
+    w = line.split('\t')
+    samcode = int(w[1])
+    nreads += 1
+    if samcode in [0,16]: # what about PE reads?
+      header,coord = w[0],int(w[3])
+      # see how I adjust coords in read_counts()
+      strand,delta = 'F',-2
+      if samcode==16: strand,delta = 'R',len(w[9]); 
+      coord += delta
+      if strand=='R': coord *= -1
+      if coord not in sites: sites[coord] = [] # use -co for minus strand
+      sites[coord].append(barcodes[header])
+
+  mapsto = {} # barcodes->sites
+  totbc,maptoTAs = 0,0
+  genome = read_genome(vars.ref)
+  for site,bclist in sites.items():
+    for bc in bclist:
+      if bc not in mapsto: mapsto[bc] = []
+      if 'X' not in bc: 
+        mapsto[bc].append(site)
+        totbc += 1
+        if site<0: site *= -1
+        if genome[site-1:site+1]=="TA": maptoTAs += 1
+
+  # good barcodes are those that are associated with only 1 site (must be TA?)
+  # what about redundant sites like IS elements?
+  goodbc = {}
+  for bc,sites2 in mapsto.items(): 
+    pop = popularity(sites2) # make a table of locations at which the barcode appears
+    #print bc,pop
+    if len(pop)==1: goodbc[bc] = 1
+  #for x in sorted(sites.items()): print x[0],genome[x[0]-1:x[0]+1],popularity(x[1])
+
+  file = open(vars.barseq_catalog_out,"w")  
+  file.write("# Barseq (stats are at the bottom): reads = %s, ref = %s\n" % (vars.fq1,vars.ref))
+  n = len(genome)
+  a,b = 0,0
+  for i in range(n-1):
+    if genome[i:i+2]=="TA":
+      a += 1
+      co = i+1
+      barcodes_pos = [(x,'+') for x in sites.get(co,[])]
+      barcodes_neg = [(x,'-') for x in sites.get(-co,[])]
+      pop = popularity(barcodes_pos+barcodes_neg)
+      if len(pop)>0: b += 1
+      else: file.write("# %s %s\n" %(co,genome[i:i+2]))
+      for (bc,strand),cnt in pop:
+        if bc in goodbc: 
+          file.write("%s %s %s %s %s\n" % (co,genome[i:i+2],strand,bc,cnt))
+  file.write("# total_reads: %s, total_barcodes: %s, map_to_TAs: %s, distinct_bc: %s, unimapped: %s, TA_sites_hit: %s/%s\n" % (nreads,totbc,maptoTAs,len(mapsto.keys()),len(goodbc.keys()),b,a))
+  file.close()
+
+
 def generate_output(vars):
+  if vars.barseq_catalog_out!=None: 
+    message("creating Barseq catalog file: "+vars.barseq_catalog_out)
+    create_barseq_catalog(vars)
+
   message("tabulating template counts and statistics...")
   if vars.single_end==True: counts = read_counts(vars.ref,vars.sam,vars) # return read counts copied as template counts
   else: counts = template_counts(vars.ref,vars.sam,vars.barcodes1,vars)
@@ -720,7 +887,7 @@ def generate_output(vars):
     if Himar1[:-5] in line and Himar1 not in line: misprimed += 1
 
 
-  
+
 
   rcounts = [x[5] for x in counts]
   tcounts = [x[6] for x in counts]
@@ -742,7 +909,7 @@ def generate_output(vars):
     BC_corr = corr([x for x in rcounts if x!=0],[x for x in tcounts if x!=0])
   except ValueError:
     BC_corr = float("nan")
- 
+
 
   read_length = get_read_length(vars.base + ".reads1")
   mean_r1_genomic = get_genomic_portion(vars.base + ".trimmed1")
@@ -799,7 +966,7 @@ def generate_output(vars):
       if '#' in line:
           print line.rstrip()
   infile.close()
-  
+
 #############################################################################
 
 def error(s):
@@ -831,7 +998,7 @@ def set_attributes(vars, attributes_list, override=False):
         else:
             if not hasattr(vars, attr):
                 setattr(vars, attr, value)
-            
+
 
 def set_sassetti_defaults(vars):
     attributes_list = []
@@ -866,7 +1033,7 @@ def set_tn5_defaults(vars):
 
 
 def verify_inputs(vars):
-  
+
     if not os.path.exists(vars.fq1): error("reads1 file not found: "+vars.fq1)
     vars.single_end = False
     if vars.fq2=="": vars.single_end = True
@@ -874,6 +1041,7 @@ def verify_inputs(vars):
     if not os.path.exists(vars.ref): error("reference file not found: "+vars.ref)
     if vars.base == '': error("prefix cannot be empty")
     if vars.fq1 == vars.fq2: error('fastq files cannot be identical')
+    if vars.barseq_catalog_in!=None and vars.barseq_catalog_out!=None: error('barseq catalog input and output files cannot both be defined at the same time')
 
     # If Mme1 protocol, warn that we don't use read2 file
     if vars.protocol.lower() == "mme1" and not vars.single_end:
@@ -902,7 +1070,8 @@ def initialize_globals(vars, args=[], kwargs={}):
     vars.protocol = "Sassetti"
     vars.prefix = "ACTTATCAGCCAACCTGTTA"
     vars.flags = ""
-   
+    vars.barseq_catalog_in = vars.barseq_catalog_out = None
+    
     # Update defaults
     protocol = kwargs.get("protocol", "").lower()
     if protocol:
@@ -935,9 +1104,13 @@ def initialize_globals(vars, args=[], kwargs={}):
         vars.base = kwargs["output"]
     if "mismatches" in kwargs:
         vars.mm1 = int(kwargs["mismatches"])
+    if "barseq_catalog_in" in kwargs:
+        vars.barseq_catalog_in = kwargs["barseq_catalog_in"]
+    if "barseq_catalog_out" in kwargs:
+        vars.barseq_catalog_out = kwargs["barseq_catalog_out"]
     if "flags" in kwargs:
         vars.flags = kwargs["flags"]
-
+    # note: if last flag expected an arg but was end of list, it gets value True ; check for this and report as missing # TRU, 10/28/17
 
 
 def read_config(vars):
@@ -954,6 +1127,8 @@ def read_config(vars):
     if len(w)>=2 and w[0]=='protocol': vars.protocol = " ".join(w[1:])
     if len(w)>=2 and w[0]=='primer': vars.prefix = w[1]
     if len(w)>=2 and w[0]=='flags': vars.flags = " ".join(w[1:])
+    if len(w)>=2 and w[0]=='barseq_catalog_in': vars.barseq_catalog_in = w[1]
+    if len(w)>=2 and w[0]=='barseq_catalog_out': vars.barseq_catalog_out = w[1]
 
 
 def save_config(vars):
@@ -963,17 +1138,17 @@ def save_config(vars):
   f.write("ref %s\n" % vars.ref)
   f.write("bwa %s\n" % vars.bwa)
   f.write("prefix %s\n" % vars.base)
-  f.write("mismatches1 %s\n" % vars.mm1) 
+  f.write("mismatches1 %s\n" % vars.mm1)
   f.write("transposon %s\n" % vars.transposon)
   f.write("protocol %s\n" % vars.protocol)
   f.write("primer %s\n" % vars.prefix)
   f.write("flags %s\n" % vars.flags)
+  if vars.barseq_catalog_in!=None: f.write("barseq_catalog_in %s\n" % vars.barseq_catalog_in)
+  if vars.barseq_catalog_out!=None: f.write("barseq_catalog_out %s\n" % vars.barseq_catalog_out)
   f.close()
 
 def show_help():
-  print 'usage: python PATH/src/tpp.py -bwa <PATH_TO_EXECUTABLE> -ref <REF_SEQ> -reads1 <FASTQ_OR_FASTA_FILE> [-reads2 <FASTQ_OR_FASTA_FILE>] -output <BASE_FILENAME> [-maxreads <N>] [-mismatches <N>] [-flags "<STRING>"] [-tn5|-himar1] [-primer <seq>]'
-    
+  print 'usage: python PATH/src/tpp.py -bwa <EXECUTABLE_WITH_PATH> -ref <REF_SEQ> -reads1 <FASTQ_OR_FASTA_FILE> [-reads2 <FASTQ_OR_FASTA_FILE>] -output <BASE_FILENAME> [-maxreads <N>] [-mismatches <N>] [-flags "<STRING>"] [-tn5|-himar1] [-primer <seq>] [-barseq_catalog_in|_out <file>]'
+
 class Globals:
   pass
-
-


=====================================
src/pytransit/__init__.py
=====================================
--- a/src/pytransit/__init__.py
+++ b/src/pytransit/__init__.py
@@ -2,6 +2,6 @@
 __all__ = ["transit_tools", "tnseq_tools", "norm_tools", "stat_tools"]
 
 
-__version__ = "v2.1.1"
+__version__ = "v2.1.2"
 prefix = "[TRANSIT]"
 


=====================================
src/pytransit/__main__.py
=====================================
--- a/src/pytransit/__main__.py
+++ b/src/pytransit/__main__.py
@@ -39,9 +39,9 @@ def main(args=None):
     # Check if running in GUI Mode
     if len(sys.argv) == 1 and hasWx:
 
+        import matplotlib.pyplot
         import pytransit.transit_gui as transit_gui
         transit_tools.transit_message("Running in GUI Mode")
-
         app = wx.App(False)
 
         #create an object of CalcFrame
@@ -62,6 +62,8 @@ def main(args=None):
             print "\t - %s" % m
     # Running in Console mode
     else:
+        import matplotlib
+        matplotlib.use("Agg")
         method_name = sys.argv[1]
         if method_name not in all_methods:
             print "Error: The '%s' method is unknown." % method_name
@@ -70,6 +72,7 @@ def main(args=None):
                 print "\t - %s" % m
             print "Usage: python %s <method>" % sys.argv[0]
         else:
+
             methodobj = all_methods[method_name].method.fromconsole()
             methodobj.Run()
 


=====================================
src/pytransit/analysis/binomial.py
=====================================
--- a/src/pytransit/analysis/binomial.py
+++ b/src/pytransit/analysis/binomial.py
@@ -193,7 +193,7 @@ class BinomialMethod(base.SingleConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata, transposons):
             return None
 
 
@@ -295,8 +295,19 @@ class BinomialMethod(base.SingleConditionMethod):
         self.progress_range(self.samples+self.burnin)
 
         #Get orf data
+        #self.transit_message("Getting Data")
+        #G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
+
         self.transit_message("Getting Data")
-        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
+        (data, position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
+        (K,N) = data.shape
+
+        if self.normalization and self.normalization != "nonorm":
+            self.transit_message("Normalizing using: %s" % self.normalization)
+            (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata, self.annotation_path)
+
+        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=1, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)
+
 
 
         #Parameters


=====================================
src/pytransit/analysis/example.py
=====================================
--- a/src/pytransit/analysis/example.py
+++ b/src/pytransit/analysis/example.py
@@ -101,7 +101,7 @@ class ExampleMethod(base.SingleConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata, transposons):
             return None
 
         #Read the parameters from the wxPython widgets
@@ -135,9 +135,6 @@ class ExampleMethod(base.SingleConditionMethod):
     def fromargs(self, rawargs): 
         (args, kwargs) = transit_tools.cleanargs(rawargs)
 
-        print "ARGS:", args
-        print "KWARGS:", kwargs
-
         ctrldata = args[0].split(",")
         annotationPath = args[1]
         outpath = args[2]
@@ -167,7 +164,16 @@ class ExampleMethod(base.SingleConditionMethod):
         
         #Get orf data
         self.transit_message("Getting Data")
-        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, norm="TTR", ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
+        (data, position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
+        (K,N) = data.shape
+
+        if self.normalization and self.normalization != "nonorm":
+            self.transit_message("Normalizing using: %s" % self.normalization)
+            (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata, self.annotation_path)
+
+        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=1, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)
+
+
 
         data = []
         N = len(G)


=====================================
src/pytransit/analysis/griffin.py
=====================================
--- a/src/pytransit/analysis/griffin.py
+++ b/src/pytransit/analysis/griffin.py
@@ -145,7 +145,7 @@ class GriffinMethod(base.SingleConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata, transposons):
             return None
 
 
@@ -219,7 +219,19 @@ class GriffinMethod(base.SingleConditionMethod):
 
         #Get orf data
         self.transit_message("Getting Data")
-        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=self.minread, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
+
+        (data, position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
+        (K,N) = data.shape
+
+        if self.normalization and self.normalization != "nonorm":
+            self.transit_message("Normalizing using: %s" % self.normalization)
+            (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata, self.annotation_path)
+
+        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=1, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)
+
+
+
+
 
         N = len(G)
         self.progress_range(N)


=====================================
src/pytransit/analysis/gumbel.py
=====================================
--- a/src/pytransit/analysis/gumbel.py
+++ b/src/pytransit/analysis/gumbel.py
@@ -188,7 +188,7 @@ class GumbelMethod(base.SingleConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata, transposons):
             return None
 
 
@@ -321,11 +321,21 @@ class GumbelMethod(base.SingleConditionMethod):
         
         #Get orf data
         self.transit_message("Reading Annotation")
+
+        #Validate data has empty sites
+        #(status, genome) = transit_tools.validate_wig_format(self.ctrldata, wxobj=self.wxobj)
+        #if status <2: tn_used = "himar1"
+        #else: tn_used = "tn5"
+
         self.transit_message("Getting Data")
+        (data, position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
+        (K,N) = data.shape
 
-        
+        if self.normalization and self.normalization != "nonorm":
+            self.transit_message("Normalizing using: %s" % self.normalization)
+            (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata, self.annotation_path)
 
-        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=self.minread, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
+        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, minread=self.minread, reps=self.replicates, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)
 
         ii_good = numpy.array([self.good_orf(g) for g in G]) # Gets index of the genes that can be analyzed
 
@@ -359,29 +369,37 @@ class GumbelMethod(base.SingleConditionMethod):
         i = 1; count = 0;
         while i < self.samples:
 
-            # PHI
-            acc = 1.0
-            phi_new  = phi_old + random.gauss(mu_c, sigma_c)
-            i0 = Z_sample[:,i-1] == 0
-            if phi_new > 1 or phi_new <= 0 or (self.F_non(phi_new, N[i0], R[i0]) - self.F_non(phi_old, N[i0], R[i0])) < math.log(random.uniform(0,1)):
-                phi_new = phi_old
-                acc = 0.0
-                flag = 0
+            try:
+                # PHI
+                acc = 1.0
+                phi_new  = phi_old + random.gauss(mu_c, sigma_c)
+                i0 = Z_sample[:,i-1] == 0
+                if phi_new > 1 or phi_new <= 0 or (self.F_non(phi_new, N[i0], R[i0]) - self.F_non(phi_old, N[i0], R[i0])) < math.log(random.uniform(0,1)):
+                    phi_new = phi_old
+                    acc = 0.0
+                    flag = 0
             
-            # Z
-            Z = self.sample_Z(phi_new, w1, N, R, S, T, mu_s, sigma_s, SIG)
+                # Z
+                Z = self.sample_Z(phi_new, w1, N, R, S, T, mu_s, sigma_s, SIG)
             
             # w1
-            N_ESS = sum(Z == 1)
-            w1 = scipy.stats.beta.rvs(N_ESS + ALPHA_w, N_GOOD - N_ESS + BETA_w)
+                N_ESS = sum(Z == 1)
+                w1 = scipy.stats.beta.rvs(N_ESS + ALPHA_w, N_GOOD - N_ESS + BETA_w)
             
-            count +=1
-            acctot+=acc
+                count +=1
+                acctot+=acc
             
-            if (count > self.burnin) and (count % self.trim == 0):
-                phi_sample[i] = phi_new
-                Z_sample[:,i] = Z
-                i+=1
+                if (count > self.burnin) and (count % self.trim == 0):
+                    phi_sample[i] = phi_new
+                    Z_sample[:,i] = Z
+                    i+=1
+            except ValueError as e:
+                self.transit_message("Error: %s" % e) 
+                self.transit_message("This is likely to have been caused by poor data (e.g. too sparse).") 
+                self.transit_message("If the density of the dataset is too low, the Gumbel method will not work.") 
+                self.transit_message("Quitting.") 
+                return
+                
             
             phi_old = phi_new
             #Update progress


=====================================
src/pytransit/analysis/hmm.py
=====================================
--- a/src/pytransit/analysis/hmm.py
+++ b/src/pytransit/analysis/hmm.py
@@ -212,7 +212,7 @@ class HMMMethod(base.SingleConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata, transposons):
             return None
 
 
@@ -279,7 +279,7 @@ class HMMMethod(base.SingleConditionMethod):
         
         #Get data
         self.transit_message("Getting Data")
-        (data, position) = tnseq_tools.get_data(self.ctrldata)
+        (data, position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
         (K,N) = data.shape
 
         # Normalize data


=====================================
src/pytransit/analysis/rankproduct.py
=====================================
--- a/src/pytransit/analysis/rankproduct.py
+++ b/src/pytransit/analysis/rankproduct.py
@@ -153,7 +153,7 @@ class RankProductMethod(base.DualConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata+expdata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata+expdata, transposons):
             return None
 
 
@@ -243,7 +243,7 @@ class RankProductMethod(base.DualConditionMethod):
         Kexp = len(self.expdata)
         #Get orf data
         self.transit_message("Getting Data")
-        (data, position) = tnseq_tools.get_data(self.ctrldata+self.expdata)
+        (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj)
         if self.normalization != "none":
             self.transit_message("Normalizing using: %s" % self.normalization)
 


=====================================
src/pytransit/analysis/resampling.py
=====================================
--- a/src/pytransit/analysis/resampling.py
+++ b/src/pytransit/analysis/resampling.py
@@ -14,6 +14,7 @@ try:
 except Exception as e:
     hasWx = False
     newWx = False
+    
 
 import os
 import time
@@ -23,7 +24,6 @@ import random
 import numpy
 import scipy.stats
 import datetime
-import matplotlib
 
 import base
 import pytransit
@@ -41,7 +41,7 @@ long_name = "Resampling test of conditional essentiality between two conditions"
 description = """Method for determining conditional essentiality based on resampling (i.e. permutation test). Identifies significant changes in mean read-counts for each gene after normalization."""
 
 transposons = ["himar1", "tn5"]
-columns = ["Orf","Name","Desc","Sites","Mean Ctrl","Mean Exp","log2FC", "Sum Ctrl", "Sum Exp", "Delta Sum","p-value","Adj. p-value"]
+columns = ["Orf","Name","Desc","Sites","Mean Ctrl","Mean Exp","log2FC", "Sum Ctrl", "Sum Exp", "Delta Mean","p-value","Adj. p-value"]
 
 class ResamplingAnalysis(base.TransitAnalysis):
     def __init__(self):
@@ -225,7 +225,7 @@ class ResamplingMethod(base.DualConditionMethod):
             return None
 
         #Validate transposon types
-        if not transit_tools.validate_filetypes(ctrldata+expdata, transposons):
+        if not transit_tools.validate_transposons_used(ctrldata+expdata, transposons):
             return None
 
 
@@ -317,10 +317,16 @@ class ResamplingMethod(base.DualConditionMethod):
 
     def Run(self):
 
-        if not self.wxobj:
-            # Force matplotlib to use good backend for png.
-            matplotlib.use('Agg')
+        #if not self.wxobj:
+        #    # Force matplotlib to use good backend for png.
+        #    import matplotlib.pyplot as plt
+        #elif "matplotlib.pyplot" not in sys.modules:
+        try:
             import matplotlib.pyplot as plt
+        except:
+            print "Error: cannot do histograms"
+            self.doHistogram = False 
+                
 
         self.transit_message("Starting resampling Method")
         start_time = time.time()
@@ -340,7 +346,7 @@ class ResamplingMethod(base.DualConditionMethod):
         Kexp = len(self.expdata)
         #Get orf data
         self.transit_message("Getting Data")
-        (data, position) = tnseq_tools.get_data(self.ctrldata+self.expdata)
+        (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj)
 
         (K,N) = data.shape
 
@@ -380,17 +386,18 @@ class ResamplingMethod(base.DualConditionMethod):
                 data1 = gene.reads[:Kctrl,ii].flatten()+self.pseudocount
                 data2 = gene.reads[Kctrl:,ii].flatten()+self.pseudocount
                 
-                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,  pval_2tail, testlist) =  stat_tools.resampling(data1, data2, S=self.samples, testFunc=stat_tools.F_sum_diff_flat, adaptive=self.adaptive)
+                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,  pval_2tail, testlist) =  stat_tools.resampling(data1, data2, S=self.samples, testFunc=stat_tools.F_mean_diff_flat, adaptive=self.adaptive)
 
 
             if self.doHistogram:
+                import matplotlib.pyplot as plt
                 if testlist:
                     n, bins, patches = plt.hist(testlist, normed=1, facecolor='c', alpha=0.75, bins=100)
                 else:
                     n, bins, patches = plt.hist([0,0], normed=1, facecolor='c', alpha=0.75, bins=100)
-                plt.xlabel('Delta Sum')
+                plt.xlabel('Delta Mean')
                 plt.ylabel('Probability')
-                plt.title('%s - Histogram of Delta Sum' % gene.orf)
+                plt.title('%s - Histogram of Delta Mean' % gene.orf)
                 plt.axvline(test_obs, color='r', linestyle='dashed', linewidth=3)
                 plt.grid(True)
                 genePath = os.path.join(histPath, gene.orf +".png")


=====================================
src/pytransit/analysis/tn5gaps.py
=====================================
--- a/src/pytransit/analysis/tn5gaps.py
+++ b/src/pytransit/analysis/tn5gaps.py
@@ -233,16 +233,17 @@ class Tn5GapsMethod(base.SingleConditionMethod):
         start_time = time.time()
         
         self.transit_message("Getting data (May take a while)")
-        genes_obj = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)
         
         # Combine all wigs
-        (data,position) = tnseq_tools.get_data_zero_fill(self.ctrldata)
+        (data,position) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
         combined = tnseq_tools.combine_replicates(data, method=self.replicates)
         combined[combined < self.minread] = 0
         counts = combined
         counts[counts > 0] = 1
         num_sites = counts.size
         
+        genes_obj = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)
+        
         pins = numpy.mean(counts)
         pnon = 1.0 - pins
 


=====================================
src/pytransit/doc/source/transit_install.rst
=====================================
--- a/src/pytransit/doc/source/transit_install.rst
+++ b/src/pytransit/doc/source/transit_install.rst
@@ -305,8 +305,7 @@ install .whl (wheel) files:
 
     pip.exe install wheel
 
-Finally, install the transit package using pip:
-
+Next install the transit package using pip:
 
 ::
 
@@ -314,11 +313,35 @@ Finally, install the transit package using pip:
 
 
 
+
+To use transit in GUI mode you will need to install wxPython versions 3.0 or earlier. We have provided .whl files which you can download and install below. (Note: Make sure to
+choose the files that match your Windows version i.e. 32/64 bit)
+
+
+  + `wxPython-3.0.2.0-cp27-none-win_amd64.whl <http://saclab.tamu.edu/essentiality/transit/wxPython-3.0.2.0-cp27-none-win_amd64.whl>`_ or `[32 bit] <http://saclab.tamu.edu/essentiality/transit/wxPython-3.0.2.0-cp27-none-win32.whl>`_
+
+
+  + `wxPython_common-3.0.2.0-py2-none-any.whl <http://saclab.tamu.edu/essentiality/transit/wxPython_common-3.0.2.0-py2-none-any.whl>`_ or `[32 bit] <http://saclab.tamu.edu/essentiality/transit/wxPython_common-3.0.2.0-py2-none-any.whl>`_
+
+
+Finally, install the files using pip:
+
+::
+
+
+    pip.exe install wxPython-3.0.2.0-cp27-none-win_amd64.whl
+    pip.exe install wxPython_common-3.0.2.0-py2-none-any.whl
+
+
+making sure to replace the name with the file you downloaded (i.e. 32bit vs 64 bit)
+
+
 .. NOTE::
     If you will be using the pre-processor, TPP, you will also need to install :ref:`install BWA <bwa-win>`.
 
 
 
+
 Method 2: Install Source Locally
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 


=====================================
src/pytransit/tnseq_tools.py
=====================================
--- a/src/pytransit/tnseq_tools.py
+++ b/src/pytransit/tnseq_tools.py
@@ -307,7 +307,7 @@ class Genes:
 
 #
     
-    def __init__(self, wigList, annotation, norm="nonorm", reps="All", minread=1, ignoreCodon = True, nterm=0.0, cterm=0.0, include_nc = False, data=[], position=[]):
+    def __init__(self, wigList, annotation, norm="nonorm", reps="All", minread=1, ignoreCodon = True, nterm=0.0, cterm=0.0, include_nc = False, data=[], position=[],genome="", transposon="himar1"):
         """Initializes the gene list based on the list of wig files and a prot_table.
 
         This class helps define a list of Gene objects with attributes that 
@@ -349,7 +349,12 @@ class Genes:
         
         orf2info = get_gene_info(self.annotation)
         if not numpy.any(data):
-            (data, position) = get_data(self.wigList)
+            if transposon.lower() == "himar1" and not genome:
+                (data, position) = get_data(self.wigList)
+            elif genome:
+                (data, position) = get_data_w_genome(self.wigList, genome)
+            else:
+                (data, position) = get_data_zero_fill(self.wigList)
             ii_min = data < self.minread
             data[ii_min] = 0
         hash = get_pos_hash(self.annotation)
@@ -708,10 +713,39 @@ def get_file_types(wig_list):
                 tmp = line.split()
                 pos = int(tmp[0])
                 rd = float(tmp[1])
-                if pos != prev_pos + 1: types[i] = 'himar1'
+                if pos != prev_pos + 1:
+                    types[i] = 'himar1'
+                    break
                 prev_pos = pos
     return types
 
+def check_wig_includes_zeros(wig_list):
+    """Returns boolean list showing whether the given files include empty sites
+    (zero) or not.
+
+    Arguments:
+        wig_list (list): List of paths to wig files.
+
+    Returns:
+        list: List of boolean values.
+    """
+    if not wig_list:
+        return []
+    includes = [False for i in range(len(wig_list))]
+    for i, wig_filename in enumerate(wig_list):
+        with open(wig_filename) as wig_file:
+            for line in wig_file:
+                if line[0] not in "0123456789": continue
+                tmp = line.split()
+                pos = int(tmp[0])
+                rd = float(tmp[1])
+                if rd == 0:
+                    includes[i] = True
+                    break
+    return includes
+
+
+
 #
 
 def get_unknown_file_types(wig_list, transposons):
@@ -745,14 +779,25 @@ def get_data(wig_list):
     .. seealso:: :class:`get_file_types` :class:`combine_replicates` :class:`get_data_zero_fill` :class:`pytransit.norm_tools.normalize_data`
     """
     K = len(wig_list)
-    T = 0
 
+    # If empty just quickly return empty lists
     if not wig_list:
         return (numpy.zeros((1,0)), numpy.zeros(0), [])
 
-    for line in open(wig_list[0]):
-        if line[0] not in "0123456789": continue
-        T+=1
+    # Check size of all wig file matches
+    size_list = []
+    for j,path in enumerate(wig_list):
+        T = 0
+        for line in open(path):
+            if line[0] not in "0123456789": continue
+            T+=1
+        size_list.append(T)
+
+    # If it doesn't match, report and error and quit
+    if sum(size_list) != (T * len(size_list)):
+        print "Error: Not all wig files have the same number of sites." 
+        print "       Make sure all .wig files come from the same strain."
+        sys.exit()
     
     data = numpy.zeros((K,T))
     position = numpy.zeros(T, dtype=int)
@@ -766,7 +811,14 @@ def get_data(wig_list):
             pos = int(tmp[0])
             rd = float(tmp[1])
             prev_pos = pos
-            data[j,i] = rd
+            
+            try:
+                data[j,i] = rd
+            except Exception as e:
+                print "Error: %s" % e
+                print ""
+                print "Make sure that all wig files have the same number of TA sites (i.e. same strain)"
+                sys.exit()
             position[i] = pos
             i+=1
     return (data, position)
@@ -804,7 +856,7 @@ def get_data_zero_fill(wig_list):
         return (numpy.zeros((1,0)), numpy.zeros(0), [])
     
     data = numpy.zeros((K,T))
-    position = numpy.zeros(T)
+    position = numpy.array(range(T)) + 1#numpy.zeros(T)
     for j,path in enumerate(wig_list):
         reads = []
         i = 0
@@ -813,19 +865,43 @@ def get_data_zero_fill(wig_list):
             tmp = line.split()
             pos = int(tmp[0])
             rd = float(tmp[1])
-            
-            # Fill in remaining zeros
-            for fill_pos in range(i, pos - 1):
-                data[j,fill_pos] = 0
-                position[fill_pos] = i
-                i += 1
-                
             prev_pos = pos
-            data[j,i] = rd
-            position[i] = pos
+            data[j,pos-1] = rd
             i+=1
     return (data, position)
 
+
+def get_data_w_genome(wig_list, genome):
+   
+    X = read_genome(genome)
+    N = len(X)
+    positions = []
+    pos2index = {}
+    count = 0
+    for i in range(N-1):
+        if X[i:i+2].upper() == "TA":
+            pos = i+1
+            positions.append(pos)
+            pos2index[pos] = count
+            count +=1
+
+    positions = numpy.array(positions)
+    T = len(positions)
+    K = len(wig_list)
+    data = numpy.zeros((K,T))
+    for j,path in enumerate(wig_list):
+        for line in open(path):
+            if line[0] not in "0123456789": continue
+            tmp = line.split()
+            pos = int(tmp[0])
+            rd = float(tmp[1])
+            if pos in pos2index:
+                index = pos2index[pos]
+                data[j,index] = rd
+            else:
+                print "Warning: Coordinate %d did not match a TA site in the genome. Ignoring counts." %(pos)
+    return (data, positions)
+ 
 #
 
 def combine_replicates(data, method="Sum"):
@@ -890,16 +966,7 @@ def get_wig_stats(path):
     """
     (data,position) = get_data([path])
     reads = data[0]
-    density = numpy.mean(reads>0)
-    meanrd = numpy.mean(reads)
-    nzmeanrd = numpy.mean(reads[reads>0])
-    nzmedianrd = numpy.median(reads[reads>0])
-    maxrd = numpy.max(reads)
-    totalrd = numpy.sum(reads)
-
-    skew = scipy.stats.skew(reads[reads>0])
-    kurtosis = scipy.stats.kurtosis(reads[reads>0])
-    return (density, meanrd, nzmeanrd, nzmedianrd, maxrd, totalrd, skew, kurtosis)
+    return get_data_stats(reads)
 
 #
 
@@ -1454,7 +1521,7 @@ if __name__ == "__main__":
     print "#"
 
     griffin_results = griffin_analysis(G, theta)
-    for i,gene in enumerate(sorted(G)):
+    for i,gene in enumerate(G):
         pos = gene.position
         exprun, pval = griffin_results[i][-2:]
         print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%1.1f\t%1.5f" % (gene.orf, gene.name, gene.k, gene.n, gene.r, gene.s, gene.t, exprun, pval)


=====================================
src/pytransit/transit_gui.py
=====================================
--- a/src/pytransit/transit_gui.py
+++ b/src/pytransit/transit_gui.py
@@ -21,6 +21,7 @@ try:
 except Exception as e:
     hasWx = False
     newWx = False
+    print "EXCEPTION:", str(e)
 
 import os
 import time
@@ -105,7 +106,7 @@ class MainFrame ( wx.Frame ):
 
 
 
-        self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
+        #self.SetSizeHintsSz( wx.DefaultSize, wx.DefaultSize )
         
         bSizer1 = wx.BoxSizer( wx.HORIZONTAL )
         
@@ -720,7 +721,6 @@ class TnSeekFrame(MainFrame):
             for dataset in ctrlData:
                 try:
                     path = os.path.dirname(os.path.realpath(__file__))
-                    print path
                     path = os.path.join(os.path.dirname('/pacific/home/mdejesus/transit/src/transit.py'), "pytransit/data", dataset)
                     transit_tools.transit_message("Adding Ctrl File: " + path)
                     self.loadCtrlFile(path)
@@ -1039,6 +1039,7 @@ class TnSeekFrame(MainFrame):
             if dlg.ShowModal() == wx.ID_OK:
                 paths = dlg.GetPaths()
                 print "You chose the following Control file(s):"
+                print paths
                 for fullpath in paths:
                     print "\t%s" % fullpath
                     self.loadCtrlFile(fullpath)
@@ -1935,4 +1936,61 @@ along with TRANSIT.  If not, see <http://www.gnu.org/licenses/>.
             traceback.print_exc()
 
 
+class AssumeZerosDialog(wx.Dialog):
+
+    def __init__(self, *args, **kw):
+
+        self.ID_HIMAR1 = wx.NewId()
+        self.ID_TN5 = wx.NewId()
+
+        wx.Dialog.__init__(self, None, title="Dialog")
+
+        self.ID_HIMAR1 = wx.NewId()
+        self.ID_TN5 = wx.NewId()
+
+        self.SetSize((500, 300))
+        self.SetTitle("Warning:  Wig Files Do Not Include Empty Sites")
+
+        mainSizer = wx.BoxSizer(wx.VERTICAL)
+        self.SetSizer(mainSizer)
+
+        warningText = """
+
+One or more of your .wig files does not include any empty sites (i.e. sites with zero read-counts). The analysis methods in TRANSIT require knowing ALL possible insertion sites, even those without reads.
+    
+    Please indicate how you want to proceed:
+
+    As Himar1: You will need to provide the DNA sequence (.fasta format) and TRANSIT will automatically determine empty TA sites.
+
+    As Tn5: TRANSIT will assume all nucleotides are possible insertion sites. Those not included in the .wig file are assumed to be zero.
+    """
+        warningStaticBox = wx.StaticText(self, wx.ID_ANY, warningText, (-1,-1), (-1, -1), wx.ALL)
+        warningStaticBox.Wrap(480)
+        mainSizer.Add(warningStaticBox)
+
+        button_sizer = wx.BoxSizer(wx.HORIZONTAL)
+        himar1Button = wx.Button(self, self.ID_HIMAR1, label='Proceed as Himar1')
+        tn5Button = wx.Button(self, self.ID_TN5, label='Proceed as Tn5')
+        cancelButton = wx.Button(self, wx.ID_CANCEL, label='Cancel')
+
+
+        button_sizer.Add(himar1Button, flag=wx.LEFT, border=5)
+        button_sizer.Add(tn5Button, flag=wx.LEFT, border=5)
+        button_sizer.Add(cancelButton, flag=wx.LEFT, border=5)
+
+        mainSizer.Add(button_sizer,
+            flag=wx.ALIGN_CENTER|wx.TOP|wx.BOTTOM, border=10)
+
+
+        himar1Button.Bind(wx.EVT_BUTTON, self.OnClose)
+        tn5Button.Bind(wx.EVT_BUTTON, self.OnClose)
+        cancelButton.Bind(wx.EVT_BUTTON, self.OnClose)
+
+
+    def OnClose(self, event):
+
+        if self.IsModal():
+            self.EndModal(event.EventObject.Id)
+        else:
+            self.Close()
 


=====================================
src/pytransit/transit_tools.py
=====================================
--- a/src/pytransit/transit_tools.py
+++ b/src/pytransit/transit_tools.py
@@ -41,11 +41,75 @@ import ntpath
 import numpy
 import scipy.optimize
 import scipy.stats
-import pytransit
 
+import warnings
+
+import pytransit
 import pytransit.tnseq_tools as tnseq_tools
 import pytransit.norm_tools as norm_tools
 
+
+if hasWx:
+    class AssumeZerosDialog(wx.Dialog):
+
+        def __init__(self, *args, **kw):
+
+            self.ID_HIMAR1 = wx.NewId()
+            self.ID_TN5 = wx.NewId()
+
+            wx.Dialog.__init__(self, None, title="Dialog")
+
+            self.ID_HIMAR1 = wx.NewId()
+            self.ID_TN5 = wx.NewId()
+    
+            self.SetSize((500, 300))
+            self.SetTitle("Warning:  Wig Files Do Not Include Empty Sites")
+
+            mainSizer = wx.BoxSizer(wx.VERTICAL)
+            self.SetSizer(mainSizer)
+    
+            warningText = """
+
+One or more of your .wig files does not include any empty sites (i.e. sites with zero read-counts). The analysis methods in TRANSIT require knowing ALL possible insertion sites, even those without reads.
+    
+    Please indicate how you want to proceed:
+
+    As Himar1: You will need to provide the DNA sequence (.fasta format) and TRANSIT will automatically determine empty TA sites.
+
+    As Tn5: TRANSIT will assume all nucleotides are possible insertion sites. Those not included in the .wig file are assumed to be zero.
+    """
+            warningStaticBox = wx.StaticText(self, wx.ID_ANY, warningText, (-1,-1), (-1, -1), wx.ALL)
+            warningStaticBox.Wrap(480)
+            mainSizer.Add(warningStaticBox, flag=wx.CENTER, border=5)
+    
+            button_sizer = wx.BoxSizer(wx.HORIZONTAL)
+            himar1Button = wx.Button(self, self.ID_HIMAR1, label='Proceed as Himar1')
+            tn5Button = wx.Button(self, self.ID_TN5, label='Proceed as Tn5')
+            cancelButton = wx.Button(self, wx.ID_CANCEL, label='Cancel')
+
+    
+            button_sizer.Add(himar1Button, flag=wx.LEFT, border=5)
+            button_sizer.Add(tn5Button, flag=wx.LEFT, border=5)
+            button_sizer.Add(cancelButton, flag=wx.LEFT, border=5)
+
+            mainSizer.Add(button_sizer,
+                flag=wx.ALIGN_CENTER|wx.TOP|wx.BOTTOM, border=10)
+
+
+            himar1Button.Bind(wx.EVT_BUTTON, self.OnClose)
+            tn5Button.Bind(wx.EVT_BUTTON, self.OnClose)
+            cancelButton.Bind(wx.EVT_BUTTON, self.OnClose)
+
+
+        def OnClose(self, event):
+    
+            if self.IsModal():
+                self.EndModal(event.EventObject.Id)
+            else:
+                self.Close()
+
+
+
 def aton(aa):
     #TODO: Write docstring
     return(((aa-1)*3)+1)
@@ -169,10 +233,12 @@ def validate_both_datasets(ctrldata, expdata):
         return False
     else:
         return True
-    
 
-def validate_filetypes(datasets, transposons, justWarn=True):
+
+def validate_transposons_used(datasets, transposons, justWarn=True): 
+
     #TODO: Write docstring
+    # Check if transposon type is okay.
     unknown = tnseq_tools.get_unknown_file_types(datasets, transposons)
     if unknown:
         if justWarn:
@@ -184,9 +250,47 @@ def validate_filetypes(datasets, transposons, justWarn=True):
         else:
             transit_error("Error: Some of the selected datasets look like they were created using transposons that this method was not intended to work with: %s." % (",". join(unknown)))
             return False
+
     return True
 
 
+
+def validate_wig_format(wig_list, wxobj=None):
+    # Check if the .wig files include zeros or not
+    status = 0
+    genome = ""
+    includesZeros = tnseq_tools.check_wig_includes_zeros(wig_list)
+
+    if sum(includesZeros) < len(includesZeros):
+        # If console mode, just print a warning
+        if not wxobj or not hasWx:
+            warnings.warn("\nOne or more of your .wig files does not include any empty sites (i.e. sites with zero read-counts). Proceeding as if data was Tn5 (all other sites assumed to be zero)!\n")
+            return (2, "")
+
+        # Else check their decision
+        dlg = AssumeZerosDialog()
+        result = dlg.ShowModal()
+        if result == dlg.ID_HIMAR1 and wxobj:
+            status = 1
+            # Get genome
+            wc = u"Known Sequence Extensions (*.fna,*.fasta)|*.fna;*.fasta;|\nAll files (*.*)|*.*"
+            gen_dlg = wx.FileDialog(wxobj, message="Save file as ...", defaultDir=os.getcwd(), defaultFile="", wildcard=wc, style=wx.OPEN)
+            if gen_dlg.ShowModal() == wx.ID_OK:
+                genome = gen_dlg.GetPath()
+            else:
+                genome = ""
+
+        elif result == dlg.ID_TN5:
+            status = 2; genome = "" 
+        else:
+            status = 3; genome = ""
+    return (status, genome)
+
+
+def validate_filetypes(datasets, transposons, justWarn=True):
+    validate_transposons_used(datasets, transposons, justWarn)
+
+
 def get_pos_hash(path):
     """Returns a dictionary that maps coordinates to a list of genes that occur at that coordinate.
     
@@ -277,7 +381,47 @@ def convertToCombinedWig(dataset_list, annotationPath, outputPath, normchoice="n
         output.write("#%s\n" % f)
 
     for i,pos in enumerate(position):
-        output.write("%-10d %s  %s\n" % (position[i], "".join(["%7.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
+        #output.write("%-10d %s  %s\n" % (position[i], "".join(["%7.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
+        output.write("%d\t%s\t%s\n" % (position[i], "\t".join(["%1.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
     output.close()
 
 
+
+
+def get_validated_data(wig_list, wxobj=None):
+    """ Returns a tuple of (data, position) containing a matrix of raw read-counts
+        , and list of coordinates. 
+
+    Arguments:
+        wig_list (list): List of paths to wig files.
+        wxobj (object): wxPython GUI object for warnings
+
+    Returns:
+        tuple: Two lists containing data and positions of the wig files given.
+
+    :Example:
+
+        >>> import pytransit.tnseq_tools as tnseq_tools
+        >>> (data, position) = tnseq_tools.get_validated_data(["data/glycerol_H37Rv_rep1.wig", "data/glycerol_H37Rv_rep2.wig"])
+        >>> print data
+        array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
+               [ 0.,  0.,  0., ...,  0.,  0.,  0.]])
+
+    .. seealso:: :class:`get_file_types` :class:`combine_replicates` :class:`get_data_zero_fill` :class:`pytransit.norm_tools.normalize_data`
+    """
+
+    (status, genome) = validate_wig_format(wig_list, wxobj=wxobj)
+
+    # Regular file with empty sites
+    if status == 0:
+        return tnseq_tools.get_data(wig_list)    
+    # No empty sites, decided to proceed as Himar1
+    elif status == 1:
+        return tnseq_tools.get_data_w_genome(wig_list, genome)
+    # No empty sites, decided to proceed as Tn5
+    elif status == 2:
+        return tnseq_tools.get_data_zero_fill(wig_list)
+    # Didn't choose either.... what!?
+    else:
+        return tnseq_tools.get_data([])
+


=====================================
src/tpp.py
=====================================
--- a/src/tpp.py
+++ b/src/tpp.py
@@ -18,10 +18,10 @@
 #
 #    You should have received a copy of the GNU General Public License
 #    along with TRANSIT.  If not, see <http://www.gnu.org/licenses/>.
-
+import sys
 import pytpp.__main__
 
 
 if __name__ == "__main__":
-    pytpp.__main__.main()
+    pytpp.__main__.main(sys.argv[1:])
 



View it on GitLab: https://salsa.debian.org/med-team/tnseq-transit/commit/815892c5deedbdfc44330e694ba8b230cd714286

---
View it on GitLab: https://salsa.debian.org/med-team/tnseq-transit/commit/815892c5deedbdfc44330e694ba8b230cd714286
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180522/9cc9e341/attachment-0001.html>


More information about the debian-med-commit mailing list