[med-svn] [Git][med-team/tiddit][upstream] New upstream version 3.3.2+dfsg
Andreas Tille (@tille)
gitlab at salsa.debian.org
Tue Dec 6 18:44:16 GMT 2022
Andreas Tille pushed to branch upstream at Debian Med / tiddit
Commits:
0151dffb by Andreas Tille at 2022-12-06T19:39:38+01:00
New upstream version 3.3.2+dfsg
- - - - -
4 changed files:
- README.md
- setup.py
- tiddit/__main__.py
- tiddit/tiddit_variant.pyx
Changes:
=====================================
README.md
=====================================
@@ -111,7 +111,6 @@ optional parameters:
-o - the prefix of the output files
-z - compute the coverage within bins of a specified size across the entire genome, default bin size is 500
- -u - do not print per bin quality values
-w - generate a wig file instead of bed
--ref - reference sequence (fasta), required for reading cram file.
=====================================
setup.py
=====================================
@@ -20,7 +20,7 @@ else:
setup(
name = 'tiddit',
- version = '3.3.0',
+ version = '3.3.2',
url = "https://github.com/SciLifeLab/TIDDIT",
author = "Jesper Eisfeldt",
=====================================
tiddit/__main__.py
=====================================
@@ -17,7 +17,7 @@ import tiddit.tiddit_variant as tiddit_variant
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis
def main():
- version="3.3.0"
+ version="3.3.2"
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
@@ -88,7 +88,7 @@ def main():
if not os.path.isfile(args.bam):
print ("error, could not find the bam file")
quit()
-
+
bam_file_name=args.bam
samfile = pysam.AlignmentFile(bam_file_name, "r",reference_filename=args.ref)
@@ -122,10 +122,10 @@ def main():
print("Folder already exists")
pysam.index("-c","-m","6","-@",str(args.threads),bam_file_name,"{}_tiddit/{}.csi".format(args.o,sample_id))
-
+
min_mapq=args.q
max_ins_len=100000
- n_reads=args.s
+ n_reads=args.s
library=tiddit_stats.statistics(bam_file_name,args.ref,min_mapq,max_ins_len,n_reads)
if args.i:
@@ -153,29 +153,29 @@ def main():
print(time.time()-t)
vcf_header=tiddit_vcf_header.main( bam_header,library,sample_id,version )
-
+
if not args.e:
args.e=int(library["avg_insert_size"]/2.0)
-
+
t=time.time()
sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig,args.skip_assembly)
-
+
print("generated clusters in")
print(time.time()-t)
-
+
f=open(prefix+".vcf","w")
f.write(vcf_header+"\n")
-
+
t=time.time()
variants=tiddit_variant.main(bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len)
print("analyzed clusters in")
print(time.time()-t)
-
+
for chr in contigs:
if not chr in variants:
continue
for variant in sorted(variants[chr], key=lambda x: x[0]):
- f.write( "\t".join(variant[1])+"\n" )
+ f.write( "\t".join(variant[1])+"\n" )
f.close()
quit()
=====================================
tiddit/tiddit_variant.pyx
=====================================
@@ -23,7 +23,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
q_start=start
q_end=end+max_ins
-
+
if q_end > contig_length:
q_end=contig_length
@@ -44,14 +44,14 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
if not read.mate_is_unmapped:
if read.next_reference_start > end and read_reference_start > end:
- continue
+ continue
else:
if read_reference_start > end:
continue
if read.is_duplicate:
continue
-
+
if not (read_reference_start > end):
n_reads+=1
if read.mapq < min_q:
@@ -83,7 +83,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
if read_reference_start < start:
r_start=start
-
+
if read_reference_end > end:
r_end=end
@@ -133,7 +133,7 @@ def find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,librar
return("DUP:TANDEM",cn)
elif cn < p:
return("DEL",cn)
-
+
elif inverted > non_inverted:
return("INV",cn)
else:
@@ -238,7 +238,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
sample_data[sample]={}
sample_data[sample]={"covA":coverageA,"QA":frac_low_qA,"discA":n_discsA,"splitA":n_splitsA,"refRA":crossing_r_A,"refFA":crossing_f_A}
- sample_data[sample].update({"covB":coverageB,"QB":frac_low_qA,"discB":n_discsB,"splitB":n_splitsB,"refRB":crossing_r_B,"refFB":crossing_f_B})
+ sample_data[sample].update({"covB":coverageB,"QB":frac_low_qB,"discB":n_discsB,"splitB":n_splitsB,"refRB":crossing_r_B,"refFB":crossing_f_B})
if chrA != chrB:
sample_data[sample]["covM"]=0
@@ -253,7 +253,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
s=int(math.floor(posA/50.0))
e=int(math.floor(posB/50.0))+1
sample_data[sample]["covM"]=numpy.average(coverage_data[chrA][s:e] )
-
+
inverted=0
non_inverted=0
for i in range(0,len(sv_clusters[chrA][chrB][cluster]["positions_A"]["orientation_discordants"]) ):
@@ -277,12 +277,12 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
svtype,cn=find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,library)
filt=sv_filter(sample_data,args,chrA,chrB,posA,posB,max_ins_len,n_discordants,n_splits,library,sample_data[sample]["discA"],sample_data[sample]["discB"],sample_data[sample]["splitA"],sample_data[sample]["splitB"],n_contigs)
- format_col="GT:CN:COV:DV:RV:LQ:RR:RD"
+ format_col="GT:CN:COV:DV:RV:LQ:RR:DR"
#configure filters for CNV based on Read depth
for sample in samples:
if "DEL" in svtype:
- #homozygout del based on coverage
+ #homozygout del based on coverage
if cn == 0:
filt="PASS"
@@ -294,7 +294,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
if covA > covM*(cn+0.9) and covB > covM*(cn+0.9):
filt="PASS"
- #too few reads, but clear RD signal
+ #too few reads, but clear DR signal
elif "DUP" in svtype and filt == "BelowExpectedLinks":
filt="PASS"
@@ -361,7 +361,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
GT= "1/1"
else:
GT="0/1"
-
+
variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrA,posA,variant])
else:
@@ -472,7 +472,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
- variants.append([chrB,posB,variant])
+ variants.append([chrB,posB,variant])
samfile.close()
return(variants)
@@ -481,7 +481,7 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
contig_seqs={}
new_seq=False
if not args.skip_assembly:
- for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):
+ for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):
if not new_seq and line[0] == "@" and "\t" in line:
name=line.split("\t")[0][1:]
@@ -501,5 +501,5 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
for v in variants_list:
for variant in v:
variants[ variant[0] ].append( [ variant[1],variant[2] ] )
-
+
return(variants)
View it on GitLab: https://salsa.debian.org/med-team/tiddit/-/commit/0151dffb12b9db43610cd8599904495b1a3209d3
--
View it on GitLab: https://salsa.debian.org/med-team/tiddit/-/commit/0151dffb12b9db43610cd8599904495b1a3209d3
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221206/c16729ba/attachment-0001.htm>
More information about the debian-med-commit
mailing list