[med-svn] [Git][med-team/tiddit][master] 4 commits: New upstream version 3.4.0+dfsg
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Sat Dec 31 11:41:51 GMT 2022
Nilesh Patra pushed to branch master at Debian Med / tiddit
Commits:
82f057d6 by Nilesh Patra at 2022-12-31T17:05:26+05:30
New upstream version 3.4.0+dfsg
- - - - -
62d616a7 by Nilesh Patra at 2022-12-31T17:05:26+05:30
Update upstream source from tag 'upstream/3.4.0+dfsg'
Update to upstream version '3.4.0+dfsg'
with Debian dir e0437a4366f0fba3e967fba5566f0606a28356fc
- - - - -
dbbf455f by Nilesh Patra at 2022-12-31T17:05:30+05:30
Bump Standards-Version to 4.6.2 (no changes needed)
- - - - -
09348e85 by Nilesh Patra at 2022-12-31T17:05:50+05:30
Upload to unstable
- - - - -
8 changed files:
- debian/changelog
- debian/control
- setup.py
- tiddit/__main__.py
- tiddit/tiddit_cluster.pyx
- tiddit/tiddit_contig_analysis.pyx
- tiddit/tiddit_signal.pyx
- tiddit/tiddit_variant.pyx
Changes:
=====================================
debian/changelog
=====================================
@@ -1,3 +1,11 @@
+tiddit (3.4.0+dfsg-1) unstable; urgency=medium
+
+ * Team Upload.
+ * New upstream version 3.4.0+dfsg
+ * Bump Standards-Version to 4.6.2 (no changes needed)
+
+ -- Nilesh Patra <nilesh at debian.org> Sat, 31 Dec 2022 17:05:34 +0530
+
tiddit (3.3.2+dfsg-1) unstable; urgency=medium
* Fix watch file
=====================================
debian/control
=====================================
@@ -10,7 +10,7 @@ Build-Depends: debhelper-compat (= 13),
python3-setuptools,
python3-numpy,
python3-pysam
-Standards-Version: 4.6.1
+Standards-Version: 4.6.2
Vcs-Browser: https://salsa.debian.org/med-team/tiddit
Vcs-Git: https://salsa.debian.org/med-team/tiddit.git
Homepage: https://github.com/SciLifeLab/TIDDIT
=====================================
setup.py
=====================================
@@ -20,7 +20,7 @@ else:
setup(
name = 'tiddit',
- version = '3.3.2',
+ version = '3.4.0',
url = "https://github.com/SciLifeLab/TIDDIT",
author = "Jesper Eisfeldt",
=====================================
tiddit/__main__.py
=====================================
@@ -17,7 +17,7 @@ import tiddit.tiddit_variant as tiddit_variant
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis
def main():
- version="3.3.2"
+ version="3.4.0"
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
=====================================
tiddit/tiddit_cluster.pyx
=====================================
@@ -70,7 +70,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posA=contig_length[chrB]
- discordants[chrA][chrB].append([content[0],sample,"D",posA,content[5],posB,content[8],i])
+ discordants[chrA][chrB].append([content[0],sample,"D",posA,content[5],posB,content[8],i,int(content[3]),int(content[4]),int(content[6]),int(content[7])])
positions[chrA][chrB].append([int(posA),int(posB),i])
i+=1
@@ -99,7 +99,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]
- discordants[chrA][chrB].append([content[0],sample,"S",posA,content[4],posB,content[6],i])
+ discordants[chrA][chrB].append([content[0],sample,"S",posA,content[4],posB,content[6],i,int(content[7]),int(content[8]),int(content[9]),int(content[10])])
positions[chrA][chrB].append([int(posA),int(posB),i])
i+=1
@@ -132,7 +132,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]
- discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
+ discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i,int(content[7]),int(content[8]),int(content[9]),int(content[10])])
positions[chrA][chrB].append([int(posA),int(posB),i])
contigs.add(i)
i+=1
@@ -197,6 +197,9 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["positions_A"]["orientation_contigs"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["orientation_splits"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["orientation_discordants"]=[]
+ candidates[chrA][chrB][candidate]["positions_A"]["start"]=[]
+ candidates[chrA][chrB][candidate]["positions_A"]["end"]=[]
+
candidates[chrA][chrB][candidate]["start_A"]=0
candidates[chrA][chrB][candidate]["end_A"]=0
@@ -208,6 +211,8 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["positions_B"]["orientation_contigs"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["orientation_splits"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["orientation_discordants"]=[]
+ candidates[chrA][chrB][candidate]["positions_B"]["start"]=[]
+ candidates[chrA][chrB][candidate]["positions_B"]["end"]=[]
candidates[chrA][chrB][candidate]["start_B"]=0
candidates[chrA][chrB][candidate]["end_B"]=0
@@ -218,7 +223,13 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["sample_contigs"][discordants[chrA][chrB][i][1]]=set([])
candidates[chrA][chrB][candidate]["samples"].add(discordants[chrA][chrB][i][1])
-
+
+ candidates[chrA][chrB][candidate]["positions_A"]["start"].append(discordants[chrA][chrB][i][8])
+ candidates[chrA][chrB][candidate]["positions_A"]["end"].append(discordants[chrA][chrB][i][9])
+
+ candidates[chrA][chrB][candidate]["positions_B"]["start"].append(discordants[chrA][chrB][i][10])
+ candidates[chrA][chrB][candidate]["positions_B"]["end"].append(discordants[chrA][chrB][i][11])
+
if discordants[chrA][chrB][i][2] == "D":
candidates[chrA][chrB][candidate]["discordants"].add(discordants[chrA][chrB][i][0])
candidates[chrA][chrB][candidate]["positions_A"]["discordants"].append(int(discordants[chrA][chrB][i][3]))
@@ -321,11 +332,11 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["posA"]
candidates[chrA][chrB][candidate]["posB"]
- candidates[chrA][chrB][candidate]["startB"]=min(candidates[chrA][chrB][candidate]["positions_B"]["contigs"]+candidates[chrA][chrB][candidate]["positions_B"]["splits"]+candidates[chrA][chrB][candidate]["positions_B"]["discordants"])
- candidates[chrA][chrB][candidate]["endB"]=max(candidates[chrA][chrB][candidate]["positions_B"]["contigs"]+candidates[chrA][chrB][candidate]["positions_B"]["splits"]+candidates[chrA][chrB][candidate]["positions_B"]["discordants"])
+ candidates[chrA][chrB][candidate]["startB"]=min(candidates[chrA][chrB][candidate]["positions_B"]["start"])
+ candidates[chrA][chrB][candidate]["endB"]=max(candidates[chrA][chrB][candidate]["positions_B"]["end"])
- candidates[chrA][chrB][candidate]["startA"]=min(candidates[chrA][chrB][candidate]["positions_A"]["contigs"]+candidates[chrA][chrB][candidate]["positions_A"]["splits"]+candidates[chrA][chrB][candidate]["positions_A"]["discordants"])
- candidates[chrA][chrB][candidate]["endA"]=max(candidates[chrA][chrB][candidate]["positions_A"]["contigs"]+candidates[chrA][chrB][candidate]["positions_A"]["splits"]+candidates[chrA][chrB][candidate]["positions_A"]["discordants"])
+ candidates[chrA][chrB][candidate]["startA"]=min(candidates[chrA][chrB][candidate]["positions_A"]["start"])
+ candidates[chrA][chrB][candidate]["endA"]=max(candidates[chrA][chrB][candidate]["positions_A"]["end"])
return(candidates)
=====================================
tiddit/tiddit_contig_analysis.pyx
=====================================
@@ -63,7 +63,8 @@ def read_contigs(aligned_contigs,prefix,sample_id,min_size):
current_bp=read.reference_start
for i in range(0,len(read.cigartuples)-1):
if read.cigartuples[i][0] == 2 and read.cigartuples[i][1] > min_size:
- split_contigs[read.reference_name][read.reference_name]["{}_d_{}".format(read.query_name,i)]=[current_bp,read.is_reverse,current_bp+read.cigartuples[i][1],read.is_reverse]
+
+ split_contigs[read.reference_name][read.reference_name]["{}_d_{}".format(read.query_name,i)]=[current_bp,read.is_reverse,current_bp+read.cigartuples[i][1],read.is_reverse,read.reference_start,current_bp,current_bp+read.cigartuples[i][1],read.reference_end]
current_bp+=read.cigartuples[i][1]
f=open("{}_tiddit/contigs_{}.tab".format(prefix,sample_id),"w")
=====================================
tiddit/tiddit_signal.pyx
=====================================
@@ -66,18 +66,40 @@ def SA_analysis(read,min_q,tag,reference_name):
cdef long read_query_alignment_end=read.query_alignment_end
if (read.query_alignment_start ) < (read.query_length - read_query_alignment_end):
- split_pos=read.reference_end+1
+ if read.is_reverse:
+
+
+ split_pos=read.reference_start+1
+ else:
+ split_pos=read.reference_end+1
else:
- split_pos=read.reference_start+1
+ if read.is_reverse:
+ split_pos=read.reference_end+1
+ else:
+ split_pos=read.reference_start+1
supplementry_alignment=find_SA_query_range(SA_data)
SA_chr=SA_data[0]
+ startA=read.reference_start+1
+ endA=read.reference_end+1
+
+ startB=supplementry_alignment.reference_start
+ endB=supplementry_alignment.reference_end
+
if (supplementry_alignment.query_alignment_start ) < (supplementry_alignment.query_length - read_query_alignment_end):
- SA_split_pos=supplementry_alignment.reference_end
+ if SA_data[2] == "-":
+
+ SA_split_pos=supplementry_alignment.reference_start
+ else:
+ SA_split_pos=supplementry_alignment.reference_end
else:
- SA_split_pos=supplementry_alignment.reference_start
+ if SA_data[2] == "-":
+ SA_split_pos=supplementry_alignment.reference_end
+
+ else:
+ SA_split_pos=supplementry_alignment.reference_start
if SA_chr < reference_name:
@@ -87,6 +109,12 @@ def SA_analysis(read,min_q,tag,reference_name):
split_pos=SA_split_pos
SA_split_pos=tmp
+ startB=read.reference_start+1
+ endB=read.reference_end+1
+ startA=supplementry_alignment.reference_start
+ endA=supplementry_alignment.reference_end
+
+
else:
chrA=reference_name
chrB=SA_chr
@@ -97,11 +125,16 @@ def SA_analysis(read,min_q,tag,reference_name):
split_pos=SA_split_pos
SA_split_pos=tmp
+ startB=read.reference_start+1
+ endB=read.reference_end+1
+ startA=supplementry_alignment.reference_start
+ endA=supplementry_alignment.reference_end
+
split=[]
if "-" == SA_data[2]:
- split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos, True]
+ split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos, True,startA,endA,startB,endB]
else:
- split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos,False]
+ split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos,False,startA,endA,startB,endB]
#splits[chrA][chrB][read.query_name]+=[split_pos,read.is_reverse,SA_split_pos,False]
return(split)
=====================================
tiddit/tiddit_variant.pyx
=====================================
@@ -6,6 +6,37 @@ from joblib import Parallel, delayed
#from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment
from pysam import AlignmentFile, AlignedSegment
+
+def scoring(scoring_dict,percentiles):
+ score=[0]
+ if scoring_dict["n_contigs"]:
+ score.append(50)
+
+ if scoring_dict["n_discordants"]:
+ score.append(0)
+ for p in percentiles["FA"]:
+ if scoring_dict["n_discordants"]/(scoring_dict["refFA"]+scoring_dict["n_discordants"]) >= p:
+ score[-1]+=5
+
+ score.append(0)
+ for p in percentiles["FB"]:
+ if scoring_dict["n_discordants"]/(scoring_dict["refFB"]+scoring_dict["n_discordants"]) >= p:
+ score[-1]+=5
+
+
+ if scoring_dict["n_splits"]:
+ score.append(0)
+ for p in percentiles["RA"]:
+ if scoring_dict["n_splits"]/(scoring_dict["refRA"]+scoring_dict["n_splits"]) >= p:
+ score[-1]+=5
+
+ score.append(0)
+ for p in percentiles["RB"]:
+ if scoring_dict["n_splits"]/(scoring_dict["refRB"]+scoring_dict["n_splits"]) >= p:
+ score[-1]+=5
+
+ return(max(score))
+
def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, contig_number):
cdef int low_q=0
@@ -67,10 +98,10 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
r_start=read_reference_start
r_end=read_reference_end
- if read_reference_start < bp-10 and r_end > bp:
+ if read_reference_start < bp-20 and r_end > bp+20:
crossing_r+=1
- mate_bp_read= (read.next_reference_start < bp and r_end > bp)
+ mate_bp_read= (read.next_reference_start < bp-50 and r_end > bp+50)
discordant= ( abs(read.isize) > max_ins or read_next_reference_name != read_reference_name )
if mate_bp_read and not discordant:
@@ -281,14 +312,16 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
#configure filters for CNV based on Read depth
for sample in samples:
+
+ covA=sample_data[sample]["covA"]
+ covM=sample_data[sample]["covM"]
+ covB=sample_data[sample]["covB"]
+
if "DEL" in svtype:
#homozygout del based on coverage
if cn == 0:
filt="PASS"
- covA=sample_data[sample]["covA"]
- covM=sample_data[sample]["covM"]
- covB=sample_data[sample]["covB"]
#normal coverage on the flanking regions, abnormal inbetween
if covA > covM*(cn+0.9) and covB > covM*(cn+0.9):
@@ -297,8 +330,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
#too few reads, but clear DR signal
elif "DUP" in svtype and filt == "BelowExpectedLinks":
filt="PASS"
-
-
+ scoring_dict={"n_contigs":n_contigs, "n_discordants":n_discordants,"n_splits":n_splits,"covA":covA,"covM":covM,"covB":covB,"refRA":sample_data[sample]["refRA"],"refRB":sample_data[sample]["refRB"],"refFA":sample_data[sample]["refFA"],"refFB":sample_data[sample]["refFB"]}
if svtype != "BND":
info=["SVTYPE={}".format(svtype),"SVLEN={}".format(posB-posA),"END={}".format(posB)]
@@ -363,7 +395,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
GT="0/1"
variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
- variants.append([chrA,posA,variant])
+ variants.append([chrA,posA,variant,scoring_dict])
else:
info=["SVTYPE=BND".format(svtype)]
inverted=False
@@ -439,7 +471,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
- variants.append([chrA,posA,variant])
+ variants.append([chrA,posA,variant,scoring_dict])
variant=[chrB,str(posB),"SV_{}_2".format(var_n),"N",alt_str_b,".",filt,info,format_col]
@@ -472,7 +504,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
- variants.append([chrB,posB,variant])
+ variants.append([chrB,posB,variant, scoring_dict ])
samfile.close()
return(variants)
@@ -498,8 +530,25 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
variants_list=Parallel(n_jobs=args.threads)( delayed(define_variant)(chrA,bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len,contig_seqs) for chrA in sv_clusters)
+ ratios={"fragments_A":[],"fragments_B":[],"reads_A":[],"reads_B":[]}
for v in variants_list:
for variant in v:
+ if variant[3]["n_discordants"]:
+ ratios["fragments_A"].append(variant[3]["n_discordants"]/(variant[3]["refFA"]+variant[3]["n_discordants"]) )
+ ratios["fragments_B"].append(variant[3]["n_discordants"]/(variant[3]["refFB"]+variant[3]["n_discordants"]) )
+
+ if variant[3]["n_splits"]:
+ ratios["reads_A"].append(variant[3]["n_splits"]/(variant[3]["refRA"]+variant[3]["n_splits"]) )
+ ratios["reads_B"].append(variant[3]["n_splits"]/(variant[3]["refRB"]+variant[3]["n_splits"]) )
+
+
+ p=[1,5,10,20,30,40,50,60,70,75,80,85,90,95,97.5,99]
+ percentiles={"FA":numpy.percentile(ratios["fragments_A"],p),"FB":numpy.percentile(ratios["fragments_B"],p),"RA":numpy.percentile(ratios["reads_A"],p),"RB":numpy.percentile(ratios["reads_B"],p)}
+
+ for v in variants_list:
+ for variant in v:
+ score=scoring(variant[3],percentiles)
+ variant[2][5]=str(score)
variants[ variant[0] ].append( [ variant[1],variant[2] ] )
return(variants)
View it on GitLab: https://salsa.debian.org/med-team/tiddit/-/compare/10ac96c1d239d80172da43fe77cf328ccb6ae06a...09348e8503ffd89bff845e64ad81b7686beeb66e
--
View it on GitLab: https://salsa.debian.org/med-team/tiddit/-/compare/10ac96c1d239d80172da43fe77cf328ccb6ae06a...09348e8503ffd89bff845e64ad81b7686beeb66e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20221231/e6a927d6/attachment-0001.htm>
More information about the debian-med-commit
mailing list