[med-svn] [Git][med-team/fieldbioinformatics][master] 2 commits: Revert ca51297dc37f669b10a826ade80ea9bb2e534477 since Python2 is EOL and will...
Andreas Tille
gitlab at salsa.debian.org
Thu Apr 9 19:49:55 BST 2020
Andreas Tille pushed to branch master at Debian Med / fieldbioinformatics
Commits:
5849df35 by Andreas Tille at 2020-04-09T20:38:09+02:00
Revert ca51297dc37f669b10a826ade80ea9bb2e534477 since Python2 is EOL and will be removed from Debian
- - - - -
7a353aab by Andreas Tille at 2020-04-09T20:49:36+02:00
result of 2to3
- - - - -
4 changed files:
- debian/control
- + debian/patches/2to3.patch
- + debian/patches/series
- debian/rules
Changes:
=====================================
debian/control
=====================================
@@ -3,7 +3,7 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
Uploaders: Malihe Asemani <ml.asemani at gmail.com>
Section: science
Priority: optional
-Build-Depends: debhelper-compat (=12), python-all, python-setuptools, python3-all, python3-setuptools, dh-python
+Build-Depends: debhelper-compat (=12), python3-all, python3-setuptools, dh-python
Standards-Version: 4.1.3
Vcs-Browser: https://salsa.debian.org/med-team/fieldbioinformatics
Vcs-Git: https://salsa.debian.org/med-team/fieldbioinformatics.git
@@ -12,8 +12,7 @@ Homepage: https://github.com/artic-network/fieldbioinformatics
Package: fieldbioinformatics
Architecture: any
Depends: ${python3:Depends},
- ${python:Depends},
- ${misc:Depends}
+ ${misc:Depends}
Description: pipeline for working with virus sequencing data sequenced with nanopore
This is the ARTIC bioinformatics pipeline for working with virus sequencing
data, sequenced with nanopore. Nanopore is a complete bioinformatics
=====================================
debian/patches/2to3.patch
=====================================
@@ -0,0 +1,1009 @@
+Author: Andreas Tille <tille at debian.org>
+Last-Update: Thu, 09 Apr 2020 20:48:07 +0200
+Description: Result of
+ find . -name "*.py" -exec 2to3 -wn \{\} \;
+ to port from Python2 to Python3
+
+--- a/ansible/generate_user_passwords.py
++++ b/ansible/generate_user_passwords.py
+@@ -36,7 +36,7 @@ def get_users_from_stdin():
+ return users
+
+ def usage():
+- print("Usage: {} [-l <length>] USER1[:PASSWORD1] USER2[:PASSWORD2] USER3[:PASSWORD3] ...".format( sys.argv[0] ) )
++ print(("Usage: {} [-l <length>] USER1[:PASSWORD1] USER2[:PASSWORD2] USER3[:PASSWORD3] ...".format( sys.argv[0] ) ))
+ print (" User names can also be passed via stdin (each username:password on a separate line)")
+
+
+@@ -69,7 +69,7 @@ def main(argv):
+ sys.exit(0)
+ if (opt == '-l' or opt == '--length'):
+ password_length = int(arg)
+- print (generate_yaml( usernames, password_length ))
++ print((generate_yaml( usernames, password_length )))
+
+ if __name__ == '__main__' :
+ main(sys.argv[1:])
+--- a/artic/align_trim_fasta.py
++++ b/artic/align_trim_fasta.py
+@@ -6,8 +6,8 @@
+ import argparse
+ import pysam
+ import sys
+-from align_trim import find_primer
+-from vcftagprimersites import read_bed_file
++from .align_trim import find_primer
++from .vcftagprimersites import read_bed_file
+
+ def find_query_pos(alignment, reference_pos):
+ nearest = -1
+@@ -37,12 +37,12 @@ def go(args):
+ query_align_start = find_query_pos(s, primer_start)
+ query_align_end = find_query_pos(s, primer_end)
+
+- print >> sys.stderr, "%s\t%s\t%s\t%s" % (primer_start, primer_end, primer_end - primer_start, s.query_length)
++ print("%s\t%s\t%s\t%s" % (primer_start, primer_end, primer_end - primer_start, s.query_length), file=sys.stderr)
+
+ startpos = max(0, query_align_start - 40)
+ endpos = min(query_align_end+40, s.query_length)
+
+- print ">%s\n%s" % (s.query_name, s.query_sequence[startpos:endpos])
++ print(">%s\n%s" % (s.query_name, s.query_sequence[startpos:endpos]))
+ #query_align_end + 30])
+
+ parser = argparse.ArgumentParser(description='Trim alignments from an amplicon scheme.')
+--- a/artic/checkdir.py
++++ b/artic/checkdir.py
+@@ -5,4 +5,4 @@ import sys
+ passexists = os.path.exists('data/%s/pass' % (sys.argv[1]))
+ failexists = os.path.exists('data/%s/fail' % (sys.argv[1]))
+
+-print "%s\t%s\t%s" % (sys.argv[1], passexists, failexists)
++print("%s\t%s\t%s" % (sys.argv[1], passexists, failexists))
+--- a/artic/collatestats.py
++++ b/artic/collatestats.py
+@@ -6,12 +6,12 @@ for fn in sys.argv[1:]:
+ fh = open(fn)
+ headers = fh.readline()
+ if not headerprinted:
+- print "filename\t%s" % (headers),
++ print("filename\t%s" % (headers), end=' ')
+ headerprinted = True
+
+
+ for ln in fh:
+- print "%s\t%s" % (fn, ln),
++ print("%s\t%s" % (fn, ln), end=' ')
+
+
+
+--- a/artic/collect_quals.py
++++ b/artic/collect_quals.py
+@@ -17,13 +17,13 @@ def get_runs(dataset):
+ return cur.fetchall()
+
+ runs = get_runs(sys.argv[2])
+-print "sample aln query read_type read_len align_len unalign_len matches mismatches insertions deletions tot_errors"
++print("sample aln query read_type read_len align_len unalign_len matches mismatches insertions deletions tot_errors")
+
+ for row in runs:
+ fh = open("EM_079517_%s_hq_marginalign.idystats.txt" % (row['batch']))
+ fh.readline()
+ for ln in fh:
+- print "%s\tma\t%s" % (row['batch'], ln),
++ print("%s\tma\t%s" % (row['batch'], ln), end=' ')
+ fh.close()
+
+ # fh = open("EM_079517_%s_hq_bwa.idystats.txt" % (row['batch']))
+--- a/artic/collect_times.py
++++ b/artic/collect_times.py
+@@ -25,12 +25,12 @@ t2 = collect_times('data/%s/fail' % (run
+ with open("times/%s.times.txt" % (run,), "w") as fh:
+ start_time = float(min(t1[0], t2[0]))
+ end_time = float(max(t1[1], t2[1]))
+- print >>fh, "%s\t%s\t%s\t%s\t%s" % (
++ print("%s\t%s\t%s\t%s\t%s" % (
+ t1[2],
+ t2[2],
+ strftime('%F %T', localtime(start_time)),
+ strftime('%F %T', localtime(end_time)),
+ end_time - start_time
+- )
++ ), file=fh)
+
+
+--- a/artic/combineruns.py
++++ b/artic/combineruns.py
+@@ -2,16 +2,16 @@
+ import csv
+ import sys
+ from collections import defaultdict
+-from runs import get_runs
++from .runs import get_runs
+
+ runs = get_runs()
+
+ ref = sys.argv[1]
+ primer_scheme = sys.argv[2]
+
+-for run, samples in runs.iteritems():
+- print "cd %s" % (run,)
+- print "go_zika.sh %s ../newdata/%s/downloads/pass %s" % (ref, run, primer_scheme)
+- for sample, barcodes in samples.iteritems():
+- print "variants.sh %s %s %s %s" % (ref, sample, primer_scheme, " ".join(barcodes))
+- print "cd .."
++for run, samples in runs.items():
++ print("cd %s" % (run,))
++ print("go_zika.sh %s ../newdata/%s/downloads/pass %s" % (ref, run, primer_scheme))
++ for sample, barcodes in samples.items():
++ print("variants.sh %s %s %s %s" % (ref, sample, primer_scheme, " ".join(barcodes)))
++ print("cd ..")
+--- a/artic/convertscheme.py
++++ b/artic/convertscheme.py
+@@ -10,6 +10,6 @@ for ln in open(sys.argv[1]):
+
+ a,pair,b = cols[3].split('_')
+
+- print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % (cols[0], cols[1], cols[2], cols[3], 0, direction, pair)
++ print("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (cols[0], cols[1], cols[2], cols[3], 0, direction, pair))
+
+
+--- a/artic/copyunprocessedfiles.py
++++ b/artic/copyunprocessedfiles.py
+@@ -28,7 +28,7 @@ for root, dirs, files in os.walk(input_d
+ os.makedirs(checkdir)
+ movefrom = input_dir + '/' + albacore_root + '/' + name
+ moveto = process_dir + '/' + albacore_root + '/' + name
+- print "Copy %s to %s" % (movefrom, moveto)
++ print("Copy %s to %s" % (movefrom, moveto))
+ shutil.copy(movefrom, moveto)
+
+
+--- a/artic/countreads.py
++++ b/artic/countreads.py
+@@ -13,5 +13,5 @@ cmd = "UPDATE runs SET num_reads_align =
+ get_aligned("EM_079517_%s_hq_marginalign.sorted.bam" % (sys.argv[3],)),
+ sys.argv[2]
+ )
+-print cmd
++print(cmd)
+
+--- a/artic/cov.py
++++ b/artic/cov.py
+@@ -5,7 +5,7 @@ import sys
+ from tabulate import tabulate
+ from pandas import DataFrame
+ import collections
+-from runs import get_runs
++from .runs import get_runs
+ from operator import attrgetter
+ from Bio import SeqIO
+
+@@ -27,11 +27,11 @@ class OrderedDefaultdict(collections.Ord
+
+ def __reduce__(self): # optional, for pickle support
+ args = (self.default_factory,) if self.default_factory else tuple()
+- return self.__class__, args, None, None, self.iteritems()
++ return self.__class__, args, None, None, iter(self.items())
+
+ def __repr__(self): # optional
+ return '%s(%r, %r)' % (self.__class__.__name__, self.default_factory,
+- list(self.iteritems()))
++ list(self.items()))
+
+ def shell(cmd):
+ p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE)
+@@ -67,18 +67,18 @@ OrderedDefaultdict(list)
+
+ runs = get_runs()
+ directory = '.'
+-for directory, samples in runs.iteritems():
+- for sample in samples.keys():
++for directory, samples in runs.items():
++ for sample in list(samples.keys()):
+ s = Stat("%s/%s" % (directory, sample), reflen)
+ a = OrderedDefaultdict()
+ a['run'] = directory
+ a['sample'] = sample
+- for k,v in s.hash().iteritems():
++ for k,v in s.hash().items():
+ a[k] = v
+ table.append(a)
+
+ headers = table[0]
+-print "\t".join(headers.keys())
++print("\t".join(list(headers.keys())))
+ for row in table:
+- print "\t".join([str(s) for s in row.values()])
++ print("\t".join([str(s) for s in list(row.values())]))
+
+--- a/artic/coverages.py
++++ b/artic/coverages.py
+@@ -13,7 +13,7 @@ def collect_depths(bamfile):
+ if not os.path.exists(bamfile):
+ raise SystemExit("bamfile %s doesn't exist" % (bamfile,))
+
+- print >>sys.stderr, bamfile
++ print(bamfile, file=sys.stderr)
+
+ p = subprocess.Popen(['samtools', 'depth', bamfile],
+ stdout=subprocess.PIPE)
+@@ -28,4 +28,4 @@ def collect_depths(bamfile):
+ bamfn = "EM_079517_%s_marginalign.sorted.bam" % (sys.argv[4])
+ depths = collect_depths(bamfn)
+ covered = len([a for a in depths if a >= 25])
+-print "UPDATE runs SET mean_cov = %s, median_cov = %s, covered = %s WHERE batch = '%s';" % (numpy.mean(depths), numpy.median(depths), covered, sys.argv[2])
++print("UPDATE runs SET mean_cov = %s, median_cov = %s, covered = %s WHERE batch = '%s';" % (numpy.mean(depths), numpy.median(depths), covered, sys.argv[2]))
+--- a/artic/fasta.py
++++ b/artic/fasta.py
+@@ -7,19 +7,19 @@ def extract_fast5(path, basecaller, flow
+ for fast5 in Fast5FileSet(path, 0, basecaller):
+ read_flowcell_id= fast5.get_flowcell_id()
+ if flowcell_id != read_flowcell_id:
+- print >>sys.stderr, "Skipping read from flowcell: %s" % (read_flowcell_id)
++ print("Skipping read from flowcell: %s" % (read_flowcell_id), file=sys.stderr)
+ continue
+
+ read_id = fast5.get_read_id()
+ if read_id in reads:
+- print >>sys.stderr, "Skipping duplicate read: %s" % (read_id)
++ print("Skipping duplicate read: %s" % (read_id), file=sys.stderr)
+ continue
+
+ reads.add(read_id)
+
+ fas = fast5.get_fastas('fwd')
+ for read in fas:
+- print read
++ print(read)
+ fast5.close()
+
+ extract_fast5(sys.argv[1], 'ONT Albacore Sequencing Software=1.0.4', sys.argv[2])
+--- a/artic/generate_csv_ig.py
++++ b/artic/generate_csv_ig.py
+@@ -7,7 +7,7 @@ from Bio import SeqIO
+ import re
+
+ def clean(s):
+- s = s.decode("utf-8").replace(u"\u00E9", "e").encode("utf-8")
++ s = s.decode("utf-8").replace("\u00E9", "e").encode("utf-8")
+ return s
+
+ public = 1
+@@ -39,10 +39,10 @@ for line in lines:
+ line.append(line[2])
+ newlist.append(['EBOV', line[0], 'Goodfellow', 'SLE', line[2] + '--', line[3], line[4]])
+ else:
+- print >>sys.stderr, line
++ print(line, file=sys.stderr)
+ locstring = line[4].split('-')
+- if locstring[0] in locations.keys():
+- if '_'.join([locstring[0], locstring[1]]) in locations.keys():
++ if locstring[0] in list(locations.keys()):
++ if '_'.join([locstring[0], locstring[1]]) in list(locations.keys()):
+ line.append('_'.join([locstring[0], locstring[1]]))
+ line.append(locstring[0])
+ else:
+@@ -53,7 +53,7 @@ for line in newlist:
+ counts[line[6]] += 1
+
+ colourDict= {}
+-for n, (key, value) in enumerate(sorted(counts.items(), key=itemgetter(1), reverse=True)):
++for n, (key, value) in enumerate(sorted(list(counts.items()), key=itemgetter(1), reverse=True)):
+ colourDict[key] = colours[n]
+
+ header = 'tree_id,id,__latitude,__longitude,prefec,prefec__shape,prefec__colour,date,__day,__month,__year,all'
+--- a/artic/generate_report.py
++++ b/artic/generate_report.py
+@@ -71,31 +71,31 @@ Alignments of differences within cluster
+
+ """ % (dist, prefix)
+
+-print report
++print(report)
+
+ def dump_cluster(c):
+ if os.path.exists("%s_dist_%s_aligned_short.fasta-cluster%d.png" % (prefix, dist, c)):
+- print """
++ print("""
+ ## Subcluster %d
+- """ % (c)
++ """ % (c))
+
+ if c == 0:
+- print "Cluster 0 represents isolates that do not cluster with any other isolates within the distance cut-off, i.e. singleton sequences. The sequences presented are unrelated."
++ print("Cluster 0 represents isolates that do not cluster with any other isolates within the distance cut-off, i.e. singleton sequences. The sequences presented are unrelated.")
+
+- print """
++ print("""
+ ![Subcluster %d](%s_dist_%s_aligned_short.fasta-cluster%d.pdf.png)
+-""" % (c, prefix, dist, c)
++""" % (c, prefix, dist, c))
+ else:
+- print """
++ print("""
+ ## Subcluster %s
+
+ (Tree not shown for clusters with <5 isolates)
+
+ Isolates:
+
+- """ % (c)
++ """ % (c))
+ for rec in SeqIO.parse(open("%s_dist_%s_aligned_short.fasta-cluster%d" % (prefix, dist, c)), "fasta"):
+- print " - %s" % (rec.id)
++ print(" - %s" % (rec.id))
+
+
+ for c in clusters:
+--- a/artic/generate_tree_figure.py
++++ b/artic/generate_tree_figure.py
+@@ -50,7 +50,7 @@ def get_meta_new(metadata, big_tree):
+ def get_colours(clusters, tree, colours):
+ #get a list of prefectures for both clusters
+ both_leaves = []
+- for c in [key for key in clusters.keys() if key in ['SL3', 'GN1']]:
++ for c in [key for key in list(clusters.keys()) if key in ['SL3', 'GN1']]:
+ b = ["'" + clusters[c][0] + "'", "'" + clusters[c][1] + "'"]
+ for a in tree.get_common_ancestor(b).get_leaves():
+ both_leaves.append(a.name[1:-1])
+@@ -62,16 +62,16 @@ def get_colours(clusters, tree, colours)
+ #print each, metadata[each]['instrument'], metadata[each]['prefec']
+ if metadata[each]['instrument'] == 'MinION':
+ counts[metadata[each]['prefec']] += 1
+- for n, (key, value) in enumerate(sorted(counts.items(), key=itemgetter(1), reverse=True)):
++ for n, (key, value) in enumerate(sorted(list(counts.items()), key=itemgetter(1), reverse=True)):
+ colourDict[key] = colours[n]
+- for each in counts.keys():
+- print '%s\t%s\t%s' %(each, counts[each], colourDict[each])
++ for each in list(counts.keys()):
++ print('%s\t%s\t%s' %(each, counts[each], colourDict[each]))
+ return colourDict
+
+ #render tree function
+ def render_tree(tree, mode, cluster, colourDict, width, position):
+ duplicates = ["'EBOV|EMLab-RT|KG12||GIN|Boke|?|MinION_LQ05|2015-05-27'", "'EBOV|EMLab-RT|KG45||GIN|Boke|?|MinION_LQ10|2015-06-09'", "'EBOV|EMLab-RT|KG90||GIN|Boke|?|MinION_LQ05|2015-06-19'", "'EBOV|EMLab-RT|KG91||GIN|Boke|?|MinION_LQ05|2015-06-20'"]
+- print 'Running %s: %s cluster' %(mode, cluster)
++ print('Running %s: %s cluster' %(mode, cluster))
+ if mode == 'small':
+ #delete unwanted leaves
+ keep_leaves = []
+@@ -81,7 +81,7 @@ def render_tree(tree, mode, cluster, col
+ delete_leaves = [leaf for leaf in tree.get_leaf_names() if leaf not in keep_leaves]
+ #if cluster == 'Boke':
+ # delete_leaves.extend(duplicates)
+- print 'Keeping %s leaves' %len(keep_leaves)
++ print('Keeping %s leaves' %len(keep_leaves))
+ for leaf in delete_leaves:
+ if tree.search_nodes(name=leaf)[0]:
+ n = tree.search_nodes(name=leaf)[0]
+@@ -141,7 +141,7 @@ if mode == 'small':
+ ts.scale = 750000
+
+ #add legend
+-for each in colourDict.keys():
++for each in list(colourDict.keys()):
+ ts.legend.add_face(CircleFace(radius=size[mode]/2, color=colourDict[each]), column=0)
+ ts.legend.add_face(TextFace(each, ftype="Helvetica", fsize=size[mode]), column=1)
+ ts.legend.add_face(CircleFace(radius=size[mode]/2, color='#F1F1F1'), column=0)
+@@ -167,8 +167,8 @@ if mode == 'big':
+ cluster = 'big'
+ render_tree(big_tree, mode, cluster, colourDict, width=2000, position='float')
+ elif mode == 'small':
+- for cluster in clusters.keys():
++ for cluster in list(clusters.keys()):
+ tree = copy.deepcopy(big_tree)
+ render_tree(tree, mode, cluster, colourDict, width=4000, position='branch-right')
+ else:
+- print 'Mode not recognised: %s' %mode
++ print('Mode not recognised: %s' %mode)
+--- a/artic/get-alignment.py
++++ b/artic/get-alignment.py
+@@ -11,7 +11,7 @@ def main(args):
+ ids = set([record.id for record in records])
+ lens = set([len(record.seq) for record in records])
+ if len(lens) != 1:
+- print 'Sequence lengths not equal...'
++ print('Sequence lengths not equal...')
+ sys.exit()
+ ignore = set(['-'])
+ discrim = defaultdict(str)
+@@ -22,12 +22,12 @@ def main(args):
+ continue
+
+ if args.minfreq:
+- counts = [ids_alleles.values().count('A'), ids_alleles.values().count('G'), ids_alleles.values().count('T'), ids_alleles.values().count('C')]
++ counts = [list(ids_alleles.values()).count('A'), list(ids_alleles.values()).count('G'), list(ids_alleles.values()).count('T'), list(ids_alleles.values()).count('C')]
+ counts.sort(reverse=True)
+ if counts[0] < args.minfreq or counts[1] < args.minfreq:
+ continue
+
+- print >>sys.stderr, "%s\t%s" % (posn+1, ids_alleles.values())
++ print("%s\t%s" % (posn+1, list(ids_alleles.values())), file=sys.stderr)
+
+
+ discrim_pos.append(posn)
+@@ -39,7 +39,7 @@ def main(args):
+
+ # print >>sys.stderr, discrim_pos
+ for each in discrim:
+- print ">%s\n%s" % (each, discrim[each])
++ print(">%s\n%s" % (each, discrim[each]))
+
+ if __name__ == '__main__':
+ import argparse
+--- a/artic/intersection_vcf.py
++++ b/artic/intersection_vcf.py
+@@ -36,9 +36,9 @@ for vcffile in sys.argv[2:]:
+ fn = float(len(truthset - vcfset))
+ tpr = tp / (tp + fn)
+
+- print "%s\t%s\t%s\t%s\t%s\t%s" % (vcffile, len(vcfset), len(truthset), tp, fn, tpr)
++ print("%s\t%s\t%s\t%s\t%s\t%s" % (vcffile, len(vcfset), len(truthset), tp, fn, tpr))
+ for sample in vcfset & truthset:
+- print >>sys.stderr, sample, vcfinfo[sample]
+- print >>sys.stderr, "Missing: %s" % (truthset - vcfset)
+- print >>sys.stderr, "Extra: %s" % (vcfset - truthset)
++ print(sample, vcfinfo[sample], file=sys.stderr)
++ print("Missing: %s" % (truthset - vcfset), file=sys.stderr)
++ print("Extra: %s" % (vcfset - truthset), file=sys.stderr)
+
+--- a/artic/intersection_vcf_interrogate.py
++++ b/artic/intersection_vcf_interrogate.py
+@@ -29,14 +29,14 @@ def read_vcf(fn):
+
+ def filter_set(vcfinfo, threshold):
+ vcfset = set()
+- for k, v in vcfinfo.iteritems():
++ for k, v in vcfinfo.items():
+ if v >= threshold:
+ vcfset.add(k)
+ return vcfset
+
+ #print "pp-threshold\tvcf\ttotal_calls\tmutations\tTP\tFN\tTPR"
+
+-print "tag\tpos\tbasecalledfreq\tsupportfraction\tsupportingreads\tqual\tstate"
++print("tag\tpos\tbasecalledfreq\tsupportfraction\tsupportingreads\tqual\tstate")
+
+ for ln in open(sys.argv[1]):
+ sample, tag, truthset_fn, vcffile_fn = ln.rstrip().split("\t")
+@@ -45,7 +45,7 @@ for ln in open(sys.argv[1]):
+ try:
+ vcfinfo = read_vcf(vcffile_fn)
+ except IOError:
+- print >>sys.stderr, "Cannot open %s" % (vcffile_fn)
++ print("Cannot open %s" % (vcffile_fn), file=sys.stderr)
+ continue
+
+ vcf_reader = vcf.Reader(open(vcffile_fn, 'r'))
+@@ -57,11 +57,11 @@ for ln in open(sys.argv[1]):
+ else:
+ state = 'Unknown'
+
+- print "%s\t%s\t%s\t%s\t%s\t%s\t%s" % (tag, record.POS, record.INFO['BaseCalledFrequency'][0],
++ print("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (tag, record.POS, record.INFO['BaseCalledFrequency'][0],
+ record.INFO['SupportFraction'][0],
+ record.INFO['SupportingReads'][0],
+ record.QUAL,
+- state)
++ state))
+
+
+ continue
+@@ -73,7 +73,7 @@ for ln in open(sys.argv[1]):
+ fn = float(len(truthset - vcfset))
+ tpr = tp / (tp + fn)
+
+- print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (sample, tag, threshold, vcffile_fn, len(vcfset), len(truthset), tp, fn, tpr)
++ print("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (sample, tag, threshold, vcffile_fn, len(vcfset), len(truthset), tp, fn, tpr))
+
+ #for sample in vcfset & truthset:
+ # print vcfinfo[sample]
+--- a/artic/intersection_vcf_stats.py
++++ b/artic/intersection_vcf_stats.py
+@@ -30,12 +30,12 @@ def read_vcf(fn):
+
+ def filter_set(vcfinfo, threshold):
+ vcfset = set()
+- for k, v in vcfinfo.iteritems():
++ for k, v in vcfinfo.items():
+ if v >= threshold:
+ vcfset.add(k)
+ return vcfset
+
+-print "sample\ttag\tpp-threshold\tvcf\ttotal_calls\tmutations\tTP\tFP\tFN\tTPR"
++print("sample\ttag\tpp-threshold\tvcf\ttotal_calls\tmutations\tTP\tFP\tFN\tTPR")
+
+
+ for ln in open(sys.argv[1]):
+@@ -45,7 +45,7 @@ for ln in open(sys.argv[1]):
+ try:
+ vcfinfo = read_vcf(vcffile_fn)
+ except IOError:
+- print >>sys.stderr, "Cannot open %s" % (vcffile_fn)
++ print("Cannot open %s" % (vcffile_fn), file=sys.stderr)
+ continue
+
+ for threshold in [0.0]:
+@@ -57,7 +57,7 @@ for ln in open(sys.argv[1]):
+ fp = float(len(vcfset - truthset))
+ tpr = tp / (tp + fn)
+
+- print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (sample, tag, threshold, vcffile_fn, len(vcfset), len(truthset), tp, fp, fn, tpr)
++ print("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (sample, tag, threshold, vcffile_fn, len(vcfset), len(truthset), tp, fp, fn, tpr))
+ # print "FN: %s" % (truthset - vcfset)
+ # print "FP: %s" % (vcfset - truthset)
+
+--- a/artic/lengths.py
++++ b/artic/lengths.py
+@@ -3,4 +3,4 @@
+ import sys
+ from Bio import SeqIO
+
+-for rec in SeqIO.parse(sys.stdin, "fasta"): print rec.id, len(rec)
++for rec in SeqIO.parse(sys.stdin, "fasta"): print(rec.id, len(rec))
+--- a/artic/make_stats_file.py
++++ b/artic/make_stats_file.py
+@@ -27,25 +27,25 @@ for row in runs:
+ # )
+ # for refnum in [1,2,3,4,5]:
+ for refnum in [2]:
+- print "%s_hq\tnp-new\t%s\t%s" % (
++ print("%s_hq\tnp-new\t%s\t%s" % (
+ row['Batch'],
+ '../refs/EM_079517_mut30_%s.mutations.txt' % (refnum),
+ 'np_EM_079517_mut30_%s_%s_hq.vcf' % (refnum, row['Batch'])
+- )
++ ))
+ # print "%s\tnp-new-filter\t%s\t%s" % (
+ # row['Batch'],
+ # 'EM_079517_mut30_2.mutations.txt',
+ # '%s_hq_EM_079517_mut30_2_np_primer.filtered.vcf' % (row['Batch'])
+ # )
+- print "%s\tnp-new-filter075-30\t%s\t%s" % (
++ print("%s\tnp-new-filter075-30\t%s\t%s" % (
+ row['Batch'],
+ '../refs/EM_079517_mut30_2.mutations.txt',
+ '%s_hq_EM_079517_mut30_2_np_primer.filtered075_30.vcf' % (row['Batch'])
+- )
+- print "%s\tnp-new-filter_qual200-50\t%s\t%s" % (
++ ))
++ print("%s\tnp-new-filter_qual200-50\t%s\t%s" % (
+ row['Batch'],
+ '../refs/EM_079517_mut30_2.mutations.txt',
+ '%s_hq_EM_079517_mut30_2_np_primer.filtered_qual200.vcf' % (row['Batch'])
+- )
++ ))
+
+
+--- a/artic/makecommands.py
++++ b/artic/makecommands.py
+@@ -26,14 +26,14 @@ for row in runs:
+ for ref in refs:
+ batch2 = row['batch2'] if row['batch2'] else 'na'
+ if len(sys.argv) > 3 and sys.argv[3] == 'consensus':
+- print "consensus.sh ",
++ print("consensus.sh ", end=' ')
+ elif len(sys.argv) > 3:
+- print sys.argv[3] + " ",
++ print(sys.argv[3] + " ", end=' ')
+ else:
+- print "align.sh ",
++ print("align.sh ", end=' ')
+ if len(sys.argv) > 4:
+- print "%s %s %s %s_hq %s hq %s" % \
+- (ref, row['batch'], row['batch'], row['batch'], batch2, " ".join(sys.argv[4:]))
++ print("%s %s %s %s_hq %s hq %s" % \
++ (ref, row['batch'], row['batch'], row['batch'], batch2, " ".join(sys.argv[4:])))
+ else:
+- print "%s %s %s %s_hq %s hq" % \
+- (ref, row['batch'], row['batch'], row['batch'], batch2)
++ print("%s %s %s %s_hq %s hq" % \
++ (ref, row['batch'], row['batch'], row['batch'], batch2))
+--- a/artic/movematchfiles.py
++++ b/artic/movematchfiles.py
+@@ -23,7 +23,7 @@ for root, dirs, files in os.walk(input_d
+ os.makedirs(checkdir)
+ movefrom = input_dir + '/' + albacore_root + '/' + name
+ moveto = output_dir + '/' + albacore_root + '/' + name
+- print "Move %s to %s" % (movefrom, moveto)
++ print("Move %s to %s" % (movefrom, moveto))
+ shutil.move(movefrom, moveto)
+
+
+--- a/artic/moveprocessedfiles.py
++++ b/artic/moveprocessedfiles.py
+@@ -23,7 +23,7 @@ for root, dirs, files in os.walk(input_d
+ os.makedirs(checkdir)
+ movefrom = input_dir + '/' + albacore_root + '/' + name
+ moveto = process_dir + '/' + albacore_root + '/' + name
+- print "Move %s to %s" % (movefrom, moveto)
++ print("Move %s to %s" % (movefrom, moveto))
+ shutil.move(movefrom, moveto)
+
+
+--- a/artic/mungeheaders.py
++++ b/artic/mungeheaders.py
+@@ -7,4 +7,4 @@ for ln in open(sys.argv[1]):
+ if '00000000-0000-0000-0000-000000000000' in ln:
+ ln = ln.replace('00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-%012d' % (id))
+ id += 1
+- print ln,
++ print(ln, end=' ')
+--- a/artic/nanopolish_header.py
++++ b/artic/nanopolish_header.py
+@@ -5,7 +5,7 @@ import sys
+
+ recs = list(SeqIO.parse(open(sys.argv[1], "r"), "fasta"))
+ if len (recs) != 1:
+- print >>sys.stderr, "FASTA has more than one sequence"
++ print("FASTA has more than one sequence", file=sys.stderr)
+ raise SystemExit
+
+-print "%s:%d-%d" % (recs[0].id, 1, len(recs[0])+1)
++print("%s:%d-%d" % (recs[0].id, 1, len(recs[0])+1))
+--- a/artic/pdf_tree.py
++++ b/artic/pdf_tree.py
+@@ -1,8 +1,9 @@
+ #!/usr/bin/env python
+
+ import sys
++import importlib
+
+-reload(sys)
++importlib.reload(sys)
+ sys.setdefaultencoding( 'ISO8859-1' )
+
+ from ete3 import Tree, NodeStyle, TreeStyle, CircleFace, TextFace, PhyloTree, faces
+@@ -49,7 +50,7 @@ def read_positions(fn):
+ with open(fn) as csvfile:
+ for ln in csvfile:
+ cols = ln.split("\t")
+- print cols[0]
++ print(cols[0])
+ positions.append(int(cols[0]))
+ return positions
+
+@@ -172,7 +173,7 @@ def main(args):
+ #legend
+ if args.legend:
+ legend = {}
+- for s in samples.values():
++ for s in list(samples.values()):
+ legend[s['prefec']] = s['prefec__colour']
+ for p in sorted(legend.keys()):
+ ts.legend.add_face(CircleFace(4, legend[p]), column=0)
+--- a/artic/quality.py
++++ b/artic/quality.py
+@@ -3,6 +3,6 @@ import sys
+ import numpy
+
+ for record in SeqIO.parse(sys.argv[1], "fastq"):
+- print numpy.mean(record.letter_annotations["phred_quality"])
++ print(numpy.mean(record.letter_annotations["phred_quality"]))
+
+
+--- a/artic/rampart.py
++++ b/artic/rampart.py
+@@ -25,10 +25,10 @@ def run(parser, args):
+ read_file = "%s.fasta" % (args.sample)
+
+ if not os.path.exists(ref):
+- print(colored.red('Scheme reference file not found: ') + ref)
++ print((colored.red('Scheme reference file not found: ') + ref))
+ raise SystemExit
+ if not os.path.exists(bed):
+- print(colored.red('Scheme BED file not found: ') + bed)
++ print((colored.red('Scheme BED file not found: ') + bed))
+ raise SystemExit
+
+ cmds.append("bwa index %s" % (ref,))
+--- a/artic/root.py
++++ b/artic/root.py
+@@ -9,6 +9,6 @@ root = sys.argv[2]
+ t = Tree(tree)
+ t.set_outgroup(t & root)
+
+-print t.write()
++print(t.write())
+
+
+--- a/artic/root_and_deheader.py
++++ b/artic/root_and_deheader.py
+@@ -16,6 +16,6 @@ for leaf in t.iter_leaves():
+ elif cols[1] == 'SLE':
+ leaf.name = cols[0]
+
+-print t.write()
++print(t.write())
+
+
+--- a/artic/runstats.py
++++ b/artic/runstats.py
+@@ -5,7 +5,7 @@ import sys
+ from tabulate import tabulate
+ from pandas import DataFrame
+ import collections
+-from runs import get_runs
++from .runs import get_runs
+ from operator import attrgetter
+ from copy import copy
+
+@@ -27,11 +27,11 @@ class OrderedDefaultdict(collections.Ord
+
+ def __reduce__(self): # optional, for pickle support
+ args = (self.default_factory,) if self.default_factory else tuple()
+- return self.__class__, args, None, None, self.iteritems()
++ return self.__class__, args, None, None, iter(self.items())
+
+ def __repr__(self): # optional
+ return '%s(%r, %r)' % (self.__class__.__name__, self.default_factory,
+- list(self.iteritems()))
++ list(self.items()))
+
+ def shell(cmd):
+ p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE)
+@@ -66,17 +66,17 @@ OrderedDefaultdict(list)
+ # for barcode in ['NB%02d' % (i,) for i in xrange(1,13)]:
+
+ runs = get_runs()
+-for directory in runs.keys():
++for directory in list(runs.keys()):
+ s = Stat('newdata/'+directory)
+ a = OrderedDefaultdict()
+ a['directory'] = directory
+- for k,v in s.hash().iteritems():
++ for k,v in s.hash().items():
+ a[k] = v
+ table.append(a)
+
+ headers = table[0]
+-print "\t".join(headers.keys())
++print("\t".join(list(headers.keys())))
+ for row in table:
+- print "\t".join([str(s) for s in row.values()])
++ print("\t".join([str(s) for s in list(row.values())]))
+
+ #print tabulate(table, tablefmt='pipe', headers='keys')
+--- a/artic/split-clusters.py
++++ b/artic/split-clusters.py
+@@ -16,7 +16,7 @@ def main():
+ groups = set([c['group'] for c in clusters])
+
+ for group in groups:
+- print "cluster%s\t%s-cluster%s" % (group, sys.argv[1], group)
++ print("cluster%s\t%s-cluster%s" % (group, sys.argv[1], group))
+ with open('%s-cluster%s' %(sys.argv[1], group), 'w') as fout:
+ SeqIO.write([records[i['node']] for i in clusters if i['group'] == group], fout, 'fasta')
+
+--- a/artic/stats.py
++++ b/artic/stats.py
+@@ -4,7 +4,7 @@ import sys
+ import shutil
+ from collections import defaultdict
+ import re
+-import runs
++from . import runs
+
+ lookup = dict([(i['Flowcell'], i) for i in runs.load_runs(sys.argv[2])])
+
+@@ -18,8 +18,8 @@ for root, dirs, files in os.walk(sys.arg
+ flowcells[m.group(1)] += 1
+ unique.add(name)
+
+-for k, v in flowcells.iteritems():
++for k, v in flowcells.items():
+ if k in lookup:
+- print "%s %s => %s" % (lookup[k]['Library'], k, v)
++ print("%s %s => %s" % (lookup[k]['Library'], k, v))
+ else:
+- print >>sys.stderr, "No such flowcell %s" % (k,)
++ print("No such flowcell %s" % (k,), file=sys.stderr)
+--- a/artic/tagfastas.py
++++ b/artic/tagfastas.py
+@@ -7,7 +7,7 @@ import os.path
+ from Bio import SeqIO
+ import json
+ import subprocess
+-from StringIO import StringIO
++from io import StringIO
+
+ """
+ go through the runsamples
+@@ -38,7 +38,7 @@ for sample in runsamples['data']:
+ run_name = cols[0]
+ fn = '%s/%s.vcf' % (run_name, sample['sample_id'])
+ if not os.path.exists(fn):
+- print "No vcf for %s" % (fn,)
++ print("No vcf for %s" % (fn,))
+ continue
+
+ if fn in processed:
+@@ -46,17 +46,17 @@ for sample in runsamples['data']:
+ processed[fn] = True
+
+ cmd = "margin_cons.py refs/Zika_FP.fasta %s/%s.vcf %s/%s.primertrimmed.sorted.bam" % (run_name, sample['sample_id'], run_name, sample['sample_id'])
+- print cmd
++ print(cmd)
+
+ p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE)
+ out, err = p.communicate()
+ del p
+
+ rec = list(SeqIO.parse(StringIO(out), "fasta"))[0]
+- print rec
++ print(rec)
+
+ metadata = find_sample(sample['sample_id'])
+- print metadata
++ print(metadata)
+
+ """
+ {u'pregnancy_week': u'', u'municipality': u'murici', u'patient_sex': u'male', u'host_species': u'human', u'lab_internal_sample_id': u'', u'sample_id': u'ZBRD103', u'minion_barcodes': u'', u'ct': u'29.09', u'lab_id_lacen': u'150101004197', u'collection_date': u'2015-08-20', u'amplicon_concentration_pool_1': u'', u'pregnancy_trimester': u'', u'sample_number': u'103', u'symptoms': u'', u'creation_persistent_id': u'9EDCA6E1F234B3A6E160D5E819D8918D', u'state': u'alagoas', u'extraction_date': u'2016-06-13', u'creation_host_timestamp': u'09/08/2016 21:06:44', u'rt_positive': u'1', u'patient_age': u'25', u'modification_account_name': u'Admin', u'modification_persistent_id': u'9EDCA6E1F234B3A6E160D5E819D8918D', u'lab': u'lacen_maceio', u'onset_date': u'2015-08-18', u'microcephaly': u'', u'sample_type': u'', u'creation_account_name': u'Admin', u'modification_host_timestamp': u'', u'country': u'brazil', u'notes': u'', u'pregnant': u''}
+--- a/artic/vcffilter.py
++++ b/artic/vcffilter.py
+@@ -27,10 +27,10 @@ def filter(record):
+
+ number_vcf = 0
+ for record in vcf_reader:
+- if not filter(record):
++ if not list(filter(record)):
+ number_vcf += 1
+ vcf_writer.write_record(record)
+ else:
+- print >>sys.stderr, "Filtering %s" % (record)
++ print("Filtering %s" % (record), file=sys.stderr)
+
+-print >>sys.stderr, "Output %s records" % (number_vcf)
++print("Output %s records" % (number_vcf), file=sys.stderr)
+--- a/artic/vcffilterqual.py
++++ b/artic/vcffilterqual.py
+@@ -27,10 +27,10 @@ def filter(record):
+
+ number_vcf = 0
+ for record in vcf_reader:
+- if not filter(record):
++ if not list(filter(record)):
+ number_vcf += 1
+ vcf_writer.write_record(record)
+ else:
+- print >>sys.stderr, "Filtering %s" % (record)
++ print("Filtering %s" % (record), file=sys.stderr)
+
+-print >>sys.stderr, "Output %s records" % (number_vcf)
++print("Output %s records" % (number_vcf), file=sys.stderr)
+--- a/artic/zipfast5frombam.py
++++ b/artic/zipfast5frombam.py
+@@ -3,7 +3,7 @@
+ # Written by Nick Loman
+ # zipfast5frombam.py bamfile fastafile zipfile
+
+-from __future__ import print_function
++
+
+ import pysam
+ import sys
+--- a/barcodes/demultiplex.py
++++ b/barcodes/demultiplex.py
+@@ -28,9 +28,9 @@ def align_seq(seq,args):
+ resultdict[match]=dict()
+ resultdict[match]["score"]=score
+
+- print resultdict
++ print(resultdict)
+
+- results = sorted([(resultdict[x]["score"],x,resultdict[x]) for x in resultdict.keys()])[::-1]
++ results = sorted([(resultdict[x]["score"],x,resultdict[x]) for x in list(resultdict.keys())])[::-1]
+ #for result in results:
+ # print result
+ result = results[0]
+@@ -60,9 +60,9 @@ def nucl_align(sQSeq,sRSeq,query,target)
+ dEle2Int[ele.lower()] = i
+ dInt2Ele[i] = ele
+ nEleNum = len(lEle)
+- lScore = [0 for i in xrange(nEleNum**2)]
+- for i in xrange(nEleNum-1):
+- for j in xrange(nEleNum-1):
++ lScore = [0 for i in range(nEleNum**2)]
++ for i in range(nEleNum-1):
++ for j in range(nEleNum-1):
+ if lEle[i] == lEle[j]:
+ lScore[i*nEleNum+j] = 3
+ else:
+@@ -175,7 +175,7 @@ def buildPath(q, r, nQryBeg, nRefBeg, lC
+
+ if c == 'M':
+ sQ += q[nQOff : nQOff+n]
+- sA += ''.join(['|' if q[nQOff+j] == r[nROff+j] else '*' for j in xrange(n)])
++ sA += ''.join(['|' if q[nQOff+j] == r[nROff+j] else '*' for j in range(n)])
+ sR += r[nROff : nROff+n]
+ nQOff += n
+ nROff += n
+@@ -263,7 +263,7 @@ def main():
+ #print sequence
+
+ id_,score=align_seq(sequence,args)
+- print str(name),id_,score
++ print(str(name),id_,score)
+ if id_ not in resultdict:
+ resultdict[id_]=dict()
+ resultdict[id_]["counter"]=0
+@@ -274,17 +274,17 @@ def main():
+ resultdict[id_]["sequences"].append(fasta)
+
+ ##print resultdict
+- print "Score Threshold:",args.threshold
++ print("Score Threshold:",args.threshold)
+ for ids in barcode_list:
+- if ids in resultdict.keys():
+- print ids,
+- print resultdict[ids]["counter"],
+- print "Mean:", (sum(resultdict[ids]["score"])/resultdict[ids]["counter"])
++ if ids in list(resultdict.keys()):
++ print(ids, end=' ')
++ print(resultdict[ids]["counter"], end=' ')
++ print("Mean:", (sum(resultdict[ids]["score"])/resultdict[ids]["counter"]))
+ output_handle=open(os.path.join(os.path.dirname(input_file),ids+"_"+os.path.basename(input_file)),"w")
+ SeqIO.write(resultdict[ids]["sequences"], output_handle, "fasta")
+ output_handle.close()
+ else:
+- print ids,"0","Mean:N/A"
++ print(ids,"0","Mean:N/A")
+
+
+
+--- a/docs/conf.py
++++ b/docs/conf.py
+@@ -52,18 +52,18 @@ source_suffix = ['.rst', '.md']
+ master_doc = 'index'
+
+ # General information about the project.
+-project = u'Zibra Pipeline'
+-copyright = u'2017, Nick Loman and the ZiBRA Project Consortium'
+-author = u'Nick Loman and the ZiBRA Project Consortium'
++project = 'Zibra Pipeline'
++copyright = '2017, Nick Loman and the ZiBRA Project Consortium'
++author = 'Nick Loman and the ZiBRA Project Consortium'
+
+ # The version info for the project you're documenting, acts as replacement for
+ # |version| and |release|, also used in various other places throughout the
+ # built documents.
+ #
+ # The short X.Y version.
+-version = u''
++version = ''
+ # The full version, including alpha/beta/rc tags.
+-release = u''
++release = ''
+
+ # The language for content autogenerated by Sphinx. Refer to documentation
+ # for a list of supported languages.
+@@ -133,8 +133,8 @@ latex_elements = {
+ # (source start file, target name, title,
+ # author, documentclass [howto, manual, or own class]).
+ latex_documents = [
+- (master_doc, 'ZibraPipeline.tex', u'Zibra Pipeline Documentation',
+- u'Nick Loman and the ZiBRA Project Consortium', 'manual'),
++ (master_doc, 'ZibraPipeline.tex', 'Zibra Pipeline Documentation',
++ 'Nick Loman and the ZiBRA Project Consortium', 'manual'),
+ ]
+
+
+@@ -143,7 +143,7 @@ latex_documents = [
+ # One entry per manual page. List of tuples
+ # (source start file, name, description, authors, manual section).
+ man_pages = [
+- (master_doc, 'zibrapipeline', u'Zibra Pipeline Documentation',
++ (master_doc, 'zibrapipeline', 'Zibra Pipeline Documentation',
+ [author], 1)
+ ]
+
+@@ -154,7 +154,7 @@ man_pages = [
+ # (source start file, target name, title, author,
+ # dir menu entry, description, category)
+ texinfo_documents = [
+- (master_doc, 'ZibraPipeline', u'Zibra Pipeline Documentation',
++ (master_doc, 'ZibraPipeline', 'Zibra Pipeline Documentation',
+ author, 'ZibraPipeline', 'One line description of project.',
+ 'Miscellaneous'),
+ ]
=====================================
debian/patches/series
=====================================
@@ -0,0 +1 @@
+2to3.patch
=====================================
debian/rules
=====================================
@@ -19,7 +19,7 @@ include /usr/share/dpkg/default.mk
export PYBUILD_NAME=fieldbioinformatics
%:
- dh $@ --with python2,python3 --buildsystem=pybuild
+ dh $@ --with python3 --buildsystem=pybuild
### When overriding auto_test make sure DEB_BUILD_OPTIONS will be respected
#override_dh_auto_test:
View it on GitLab: https://salsa.debian.org/med-team/fieldbioinformatics/-/compare/ca51297dc37f669b10a826ade80ea9bb2e534477...7a353aab0c3f51316243173ac412d82a14c76489
--
View it on GitLab: https://salsa.debian.org/med-team/fieldbioinformatics/-/compare/ca51297dc37f669b10a826ade80ea9bb2e534477...7a353aab0c3f51316243173ac412d82a14c76489
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200409/dc09e17a/attachment-0001.html>
More information about the debian-med-commit
mailing list