[med-svn] [Git][med-team/discosnp][master] Fix python interpreter in several scripts
Andreas Tille
gitlab at salsa.debian.org
Mon Jan 27 13:09:14 GMT 2020
Andreas Tille pushed to branch master at Debian Med / discosnp
Commits:
24a8436f by Andreas Tille at 2020-01-27T14:08:48+01:00
Fix python interpreter in several scripts
- - - - -
1 changed file:
- debian/patches/2to3.patch
Changes:
=====================================
debian/patches/2to3.patch
=====================================
@@ -2,10 +2,8 @@ Author: Andreas Tille <tille at debian.org>
Last-Update: Mon, 21 Jan 2019 09:01:19 +0100
Description: Result of 2to3
-Index: discosnp/scripts/filter_out_using_MAF.py
-===================================================================
---- discosnp.orig/scripts/filter_out_using_MAF.py
-+++ discosnp/scripts/filter_out_using_MAF.py
+--- a/scripts/filter_out_using_MAF.py
++++ b/scripts/filter_out_using_MAF.py
@@ -2,8 +2,8 @@ import sys
import gzip
@@ -13,7 +11,7 @@ Index: discosnp/scripts/filter_out_using_MAF.py
- print "This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets."
- print "python filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\""
+ print("This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets.")
-+ print("python filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\"")
++ print("python3 filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\"")
sys.exit()
@@ -26,10 +24,14 @@ Index: discosnp/scripts/filter_out_using_MAF.py
-Index: discosnp/scripts/ClassVCF_creator.py
-===================================================================
---- discosnp.orig/scripts/ClassVCF_creator.py
-+++ discosnp/scripts/ClassVCF_creator.py
+--- a/scripts/ClassVCF_creator.py
++++ b/scripts/ClassVCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###############################################
+ #Dresscode : class : uppercase
@@ -218,7 +218,7 @@ class VARIANT():
#---------------------------------------------------------------------------------------------------------------------------
def RetrievePolymorphismFromHeader(self):
@@ -59,10 +61,18 @@ Index: discosnp/scripts/ClassVCF_creator.py
else : return (error)
-Index: discosnp/scripts/discoSnp++_to_csv.py
-===================================================================
---- discosnp.orig/scripts/discoSnp++_to_csv.py
-+++ discosnp/scripts/discoSnp++_to_csv.py
+--- a/scripts/discoSnp++_to_csv.py
++++ b/scripts/discoSnp++_to_csv.py
+@@ -1,7 +1,7 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ import sys
+ if len(sys.argv) !=2:
+- sys.stdout.write("Mandatory: python discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
++ sys.stdout.write("Mandatory: python3 discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
+ sys.stdout.write("This program formats the .fa to .csv format by puting each couple of .fa sequence (4 lines = 2 comments + 2 nucleotide sequences) into one line, replacing the '|' character by spaces and removing the CX_ formating")
+ sys.exit(1)
+
@@ -64,7 +64,7 @@ while 1:
i+=1
sys.stdout.write( com2_tab[i][:-1]+",")
@@ -72,10 +82,18 @@ Index: discosnp/scripts/discoSnp++_to_csv.py
-Index: discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
-===================================================================
---- discosnp.orig/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
-+++ discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
+--- a/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
++++ b/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
+@@ -1,7 +1,7 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #To apply on sorted vcf by position
+-#Usage : python filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
++#Usage : python3 filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
+ import sys
+ import gzip
+ import re
@@ -25,7 +25,7 @@ while True:
line = vcf_for_igv.readline()
if not line: break
@@ -97,10 +115,17 @@ Index: discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
if out:break
vcf_for_igv.close()
-Index: discosnp/scripts/filter_out_using_ratio_of_covered_files.py
-===================================================================
---- discosnp.orig/scripts/filter_out_using_ratio_of_covered_files.py
-+++ discosnp/scripts/filter_out_using_ratio_of_covered_files.py
+--- a/scripts/filter_out_using_ratio_of_covered_files.py
++++ b/scripts/filter_out_using_ratio_of_covered_files.py
+@@ -3,7 +3,7 @@ import gzip
+
+ if len(sys.argv)<4:
+ print ("This tool filters out discoSnp prediction whose number of read sets covering it is lower than a user defined threshold. A set covers a prediction if its coverage in at least one of the two alleles is higher than a user defined threshold")
+- print ("python filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
++ print ("python3 filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
+ sys.exit()
+
+
@@ -53,7 +53,7 @@ while True:
if coverage_high[i]>=minimal_coverage or coverage_low[i]>=minimal_coverage: number_of_covered_sets+=1
@@ -110,10 +135,14 @@ Index: discosnp/scripts/filter_out_using_ratio_of_covered_files.py
-Index: discosnp/scripts/functionObjectVCF_creator.py
-===================================================================
---- discosnp.orig/scripts/functionObjectVCF_creator.py
-+++ discosnp/scripts/functionObjectVCF_creator.py
+--- a/scripts/functionObjectVCF_creator.py
++++ b/scripts/functionObjectVCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###############################################
+ import os
@@ -191,25 +191,25 @@ def CheckAtDistanceXBestHits(upper_path,
best_up=1024
if int(upper_path.mappingPosition)==0 and int(lower_path.mappingPosition)==0:#Checks if paths are unmappped
@@ -144,10 +173,14 @@ Index: discosnp/scripts/functionObjectVCF_creator.py
if nbMismatch == best_low:
position_set.add(position)
if len(position_set) > 1:
-Index: discosnp/scripts/VCF_creator.py
-===================================================================
---- discosnp.orig/scripts/VCF_creator.py
-+++ discosnp/scripts/VCF_creator.py
+--- a/scripts/VCF_creator.py
++++ b/scripts/VCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #*****************************************************************************
+ # VCF_Creator: mapping and VCF creation feature in DiscoSnp++
@@ -78,7 +78,7 @@ def main():
listName[1]=listName[1].replace("_", " ")
stream_file=open(fileName,'r')
@@ -166,3 +199,422 @@ Index: discosnp/scripts/VCF_creator.py
usage()
sys.exit(2)
+--- a/scripts/format_phased_variants_for_haplotyping.py
++++ b/scripts/format_phased_variants_for_haplotyping.py
+@@ -3,11 +3,11 @@ import sys
+ ### first create connected components from disco (-A option)
+ #sh from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt # creates file connected_components_phased_alleles_read_set_id_1.txt
+ ### them from the .fa file, the id of the set your interested in (e.g. 1 for phased_alleles_read_set_id_1.txt, this will correspond to C1 coverage in the fa file), the file containing the connected components, and the phased_alleles_read_set_id_X.txt file, generate the fact file
+-#python format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt > phased_alles_read_set_1_facts.txt
++#python3 format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt > phased_alles_read_set_1_facts.txt
+
+
+ if not len(sys.argv)==5:
+- print ("usage: python format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
++ print ("usage: python3 format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
+ print (" * coherent.fa file: the file generated by discoSnp")
+ print (" * id number is the id of the read set, for which variants are phased. With i, this corresponds to Ci in the .fa file headers.")
+ print (" * connected_component_file: file obtained from \"from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt\" continaing connected component of phased alleles")
+--- a/scripts/remove_non_covered_genotypes.py
++++ b/scripts/remove_non_covered_genotypes.py
+@@ -3,7 +3,7 @@ import gzip
+
+ if len(sys.argv)<3:
+ print ("This tool replaces discoSnp VCF genotypes with DP lower or equal to a threshold to \"./.\"")
+- print ("python remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
++ print ("python3 remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
+ sys.exit()
+
+
+@@ -37,4 +37,4 @@ while True:
+ toprint+= splitted_geno[j]
+ if j<len(splitted_geno)-1: toprint+= ":"
+ if i<len(splitted_line): toprint+='\t'
+- print (toprint)
+\ No newline at end of file
++ print (toprint)
+--- a/scripts/simulations/multiple_samples_simulator.sh
++++ b/scripts/simulations/multiple_samples_simulator.sh
+@@ -68,7 +68,7 @@ fi
+
+ for p in `seq 1 $num_pop`
+ do
+- python ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
++ python3 ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
+
+ #pos random ordering
+ sort -R ERASEME_pos_mut_pop"$p" > ERASEME_pos_mut_random_pop"$p"
+@@ -90,7 +90,7 @@ for p in `seq 1 $num_pop`
+ #homozygotes mutations
+ cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
+ #mutation inducing
+- python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
++ python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
+ mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta
+ #homozygote and heterozygote mutations
+ nb_line2=`grep "." -c ERASEME_pos_mut_random_shared_pop"$p"_s"$i"`
+@@ -99,7 +99,7 @@ for p in `seq 1 $num_pop`
+ head -n +"$nb_homo" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo
+ #population mutationq
+ cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
+- python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
++ python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
+ mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_homo.fasta
+ #READS SIMULATION
+ mutareads_forward ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta pop"$p"_ind"$i"_allele1_err_reads $read_s $read_l 0.01 0 0
+--- a/scripts/simulations/targeted_mut_fasta_corrected.py
++++ b/scripts/simulations/targeted_mut_fasta_corrected.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+
+--- a/scripts/from_phased_alleles_to_clusters.sh
++++ b/scripts/from_phased_alleles_to_clusters.sh
+@@ -13,7 +13,7 @@ filename=$(basename -- "$file")
+ path=$(dirname "${file}")
+
+ # FIND THE PATH CONTAINING THE SCRIPT:
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" )
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" )
+ echo $EDIR
+
+ edge_coverage_threshold=0
+@@ -77,4 +77,4 @@ then
+ exit 1
+ fi
+
+-echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
+\ No newline at end of file
++echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
+--- a/scripts/redundancy_removal_discosnp.py
++++ b/scripts/redundancy_removal_discosnp.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ # from kissnp output:
+--- a/scripts/run_VCF_creator.sh
++++ b/scripts/run_VCF_creator.sh
+@@ -212,10 +212,10 @@ if [ -z "$samfile" ];then
+ exit 1
+ else
+
+- python $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile #-n $n
++ python3 $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile #-n $n
+ if [ $? -ne 0 ]
+ then
+- echo "there was a problem with the VCF creation (command was \"python $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile\""
++ echo "there was a problem with the VCF creation (command was \"python3 $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile\""
+ exit 1
+ fi
+ echo -e "... Creation of the vcf file : done ...==> $vcffile"
+@@ -259,9 +259,9 @@ if [ -z "$samfile" ];then
+ discoSNPsbis=$(basename $discoSNPs .fa)"bis.fasta"
+
+ if [ $map_with_extensions -eq 1 ];then
+- python $PATH_VCF_creator/keep_extensions_disco_file.py $discoSNPs $discoSNPsbis
++ python3 $PATH_VCF_creator/keep_extensions_disco_file.py $discoSNPs $discoSNPsbis
+ else
+- python $PATH_VCF_creator/remove_extensions_disco_file.py $discoSNPs $discoSNPsbis
++ python3 $PATH_VCF_creator/remove_extensions_disco_file.py $discoSNPs $discoSNPsbis
+ fi
+ if [ -z "$discoSNPsbis" ];then
+ echo "...Error with the script remove_extensions_disco_file.py..."
+@@ -320,10 +320,10 @@ else
+ fi
+ fi
+
+-python $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile
++python3 $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile
+ if [ $? -ne 0 ]
+ then
+- echo "there was a problem with the VCF creation (command was \"python $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile \""
++ echo "there was a problem with the VCF creation (command was \"python3 $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile \""
+ exit 1
+ fi
+ echo -e "... Creation of the vcf file: done ...==> $vcffile "
+@@ -332,10 +332,10 @@ echo -e "... Creation of the vcf file: d
+ if [ $igv -eq 1 ] ; then
+ $DIR/create_IGV_compatible_VCF.sh $vcffile
+ nameVCFIGV=$( basename $vcffile .vcf )
+- python $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf
++ python3 $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf
+ if [ $? -ne 0 ]
+ then
+- echo "there was a problem with the IGV VCF creation (command was \"python $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf\""
++ echo "there was a problem with the IGV VCF creation (command was \"python3 $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf\""
+ exit 1
+ fi
+ echo -e "... Creation of the vcf file: done ...==> $vcffile"
+--- a/scripts/simulations/random_mut_fasta.py
++++ b/scripts/simulations/random_mut_fasta.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+
+--- a/scripts/create_IGV_compatible_VCF.sh
++++ b/scripts/create_IGV_compatible_VCF.sh
+@@ -30,7 +30,7 @@ igvfile=$(basename $vcffile .vcf)"_for_I
+ cat $vcffile|grep "#">$igvfile
+ #cat $vcffile|grep -v "#"|sort -k 2n,2n -n|grep -v "^SNP"|grep -v "^INDEL">>$igvfile
+ cat $vcffile|grep -v "#"|sort -k 1,1 -k 2,2n |grep -v "^SNP"|grep -v "^INDEL">>$igvfile # from 2 2 6
+-#python $DIR/tools/one2zeroBased_vcf.py $igvfiletemp
++#python3 $DIR/tools/one2zeroBased_vcf.py $igvfiletemp
+ #cat VCFone2zeroBAsed.vcf >> $igvfile
+ #rm -f $igvfiletemp VCFone2zeroBAsed.vcf
+ echo -e "... Creation of the vcf file for IGV: done ...==> $igvfile"
+--- a/scripts/k3000/K3000_gfa_to_dat.py
++++ b/scripts/k3000/K3000_gfa_to_dat.py
+@@ -242,7 +242,7 @@ def main(gfa_file_name):
+ '''
+ Creation of a DAT file from the graph_plus.gfa GFA file
+ Usage:
+- python ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
++ python3 ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
+ '''
+ # Store the information as a graph.
+ # This enables
+--- a/scripts/keep_extensions_disco_file.py
++++ b/scripts/keep_extensions_disco_file.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ # change extensions in uppercase and replace relative positions of SNPs in the header
+--- a/scripts/remove_extensions_disco_file.py
++++ b/scripts/remove_extensions_disco_file.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ #Removes extensions in lowercase :
+--- a/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
++++ b/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+
+--- a/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
++++ b/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+
+--- a/scripts/k3000/K3000.py
++++ b/scripts/k3000/K3000.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+ '''
+--- a/scripts/k3000/K3000_node_ids_to_node_sequences.py
++++ b/scripts/k3000/K3000_node_ids_to_node_sequences.py
+@@ -152,7 +152,7 @@ def main():
+ Produces a gfa file replacing the node content from int ids of alleles to their sequence
+ '''
+ if len(sys.argv) !=3:
+- sys.stderr.write("Usage: python K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
++ sys.stderr.write("Usage: python3 K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
+ sys.exit(0)
+ sys.stderr.write("Indexing sequence positions\n")
+ header_to_file_position = index_sequences(sys.argv[2])
+--- a/scripts/k3000/run.sh
++++ b/scripts/k3000/run.sh
+@@ -11,7 +11,7 @@ wraith=false #$4 # set to true if you on
+ phased_allele_file=$1
+ disco_fa_file=$2
+ read_set_id=$3
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
+ if [ -z "$phased_allele_file" ]; then
+ echo "${red} You must provide a phased file name${reset}"
+ exit 1
+--- a/scripts/remove_non_biallelic.py
++++ b/scripts/remove_non_biallelic.py
+@@ -3,7 +3,7 @@ import gzip
+
+ if len(sys.argv)<2:
+ print ("This tool removes from discoSnp sorted VCF the locus which are tri-allelic or more")
+- print ("python remove_non_diploids.py \".vcf from discoSnp\" ")
++ print ("python3 remove_non_diploids.py \".vcf from discoSnp\" ")
+ sys.exit()
+
+
+--- a/discoSnpRAD/post-processing_scripts/README.md
++++ b/discoSnpRAD/post-processing_scripts/README.md
+@@ -7,7 +7,7 @@
+ * removes variants belonging to a cluster (locus) whose size (nb of variants) is outside the given size range (options `-m` and `-M`)
+ * removes variants with rank lower than a given threshold given by option `-r`
+ * Usage :
+- `python filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
++ `python3 filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
+
+ 3. **script** `filter_vcf_by_indiv_cov_max_missing_and_maf.py`:
+ * replaces individual genotypes that have DP less than the value given by option `-c` by missing genotype `./.`
+@@ -15,14 +15,14 @@
+ * removes variants (vcf lines) that have a minor allele frequency smaller than the value given by option `-f`
+ * outputs only SNP variants if option `-s`.
+ * Usage :
+- `python filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
++ `python3 filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
+
+ 3. **script** `filter_paralogs.py`:
+ * identifies variants (vcf lines) that have a fraction of heterozygous genotypes greater than `x` (not counting missing genotypes)
+ * removes variants (vcf lines) that belong to a cluster having a fraction of such variants greater than `y`
+ * Example : `x=0.1` and `y= 0.5` and if we consider a cluster to represent a locus. This filter removes loci that have more than 50% of the SNPs that have each more than 10% of heterozygous genotypes.
+ * Usage :
+- `python filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
++ `python3 filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
+
+
+ ## Scripts for STRUCTURE analyses :
+@@ -30,7 +30,7 @@
+ 4. **script** `1SNP_by_cluster.py`
+ * selects one SNP per cluster (the one with less missing genotypes)
+ * Usage :
+- `python 1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
++ `python3 1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
+
+ 5. **script** `vcf2structure.sh`
+ * changes the vcf format to a Structure format (input of the software Structure)
+@@ -55,13 +55,13 @@ Here is the full pipeline to map the res
+ sh [DISCO_DIR]/scripts/run_VCF_creator.sh -G dm6_masked.fa -p myDiscoSnpRADResult_raw_filtered.fa -e -o temp.vcf
+
+ # Adding clustering information (and minimal filtering on cluster size)
+-python add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
++python3 add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
+ # final vcf is myDiscoSnpRADResult_mapped.vcf
+ ```
+
+ Additionnally, in a validation context, if one wants to compare variant positions between two such vcf files, the following command will output recall and precision metrics:
+ ```
+-python [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
++python3 [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
+ ```
+
+
+--- a/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
++++ b/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
+@@ -1,5 +1,4 @@
+-
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+
+
+--- a/doc/discoSnp_user_guide.txt
++++ b/doc/discoSnp_user_guide.txt
+@@ -201,7 +201,7 @@ chromosome 117 3 C
+ See documentation specific to VCF_creator for more information: doc/vcf_creator_user_guide.pdf
+ Output Analyze
+ From a fasta format to a csv format: If you wish to analyze the results in a tabulated format:
+-# python output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa
++# python3 output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa
+ will output a .csv tabulated file containing on each line the content of 4 lines of the .fa, replacing the '|' character by comma ',' and removing the CX_
+ Exemples of close SNPs and indels
+ Exemple of a multiple SNP:
+--- a/run_discoSnp++.sh
++++ b/run_discoSnp++.sh
+@@ -65,7 +65,7 @@ verbose=1
+ stop_after_kissnp=0
+ e=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-#EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
++#EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
+ EDIR="/usr"
+
+
+--- a/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
++++ b/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
+@@ -1,5 +1,4 @@
+-
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+
+ import sys
+--- a/discoSnpRAD/post-processing_scripts/filter_paralogs.py
++++ b/discoSnpRAD/post-processing_scripts/filter_paralogs.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+
+
+--- a/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
++++ b/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+
+ ''' ***********************************************
+--- a/run_discoSnp++_storagefile.sh
++++ b/run_discoSnp++_storagefile.sh
+@@ -55,7 +55,7 @@ remove=1
+ verbose=1
+ e=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
+
+ if [ -d "$EDIR/build/" ] ; then # VERSION SOURCE COMPILED
+ read_file_names_bin=$EDIR/build/bin/read_file_names
+--- a/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
++++ b/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+
+
+--- a/discoSnpRAD/run_discoSnpRad.sh
++++ b/discoSnpRAD/run_discoSnpRad.sh
+@@ -75,7 +75,7 @@ verbose=1
+ short_read_connector_path=""
+ option_phase_variants=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
+
+ if [ -d "$EDIR/../build/" ] ; then # VERSION SOURCE COMPILED
+ read_file_names_bin=$EDIR/../build/bin/read_file_names
+@@ -556,7 +556,7 @@ fi
+ echo -e "${yellow}\t############################################################"
+ echo -e "\t#################### REDUNDANCY REMOVAL ###################"
+ echo -e "\t############################################################$reset"
+-redundancy_removal_cmd="python $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
++redundancy_removal_cmd="python3 $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
+ echo $green${redundancy_removal_cmd}$cyan
+ if [[ "$wraith" == "false" ]]; then
+ eval ${redundancy_removal_cmd}
+--- a/run_discoSnp++_ML.sh
++++ b/run_discoSnp++_ML.sh
+@@ -67,7 +67,7 @@ stop_after_kissnp=0
+ e=""
+ prefix_trash=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 13 ; echo ''`
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron
+
+
+ if [ -d "$EDIR/build/" ] ; then # VERSION SOURCE COMPILED
+@@ -839,4 +839,4 @@ if [[ "$wraith" == "false" ]]; then
+ fi
+ echo -e " Thanks for using discoSnp++ - http://colibread.inria.fr/discoSnp/ - Forum: http://www.biostars.org/t/discoSnp/"
+ echo -e "################################################################################################################${reset}"
+-fi
+\ No newline at end of file
++fi
View it on GitLab: https://salsa.debian.org/med-team/discosnp/commit/24a8436fbcbf9c7d15e4654f044ddf5354539ad8
--
View it on GitLab: https://salsa.debian.org/med-team/discosnp/commit/24a8436fbcbf9c7d15e4654f044ddf5354539ad8
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200127/f698e1b7/attachment-0001.html>
More information about the debian-med-commit
mailing list