[med-svn] [Git][med-team/discosnp][master] Fix python interpreter in several scripts

Andreas Tille gitlab at salsa.debian.org
Mon Jan 27 13:09:14 GMT 2020



Andreas Tille pushed to branch master at Debian Med / discosnp


Commits:
24a8436f by Andreas Tille at 2020-01-27T14:08:48+01:00
Fix python interpreter in several scripts

- - - - -


1 changed file:

- debian/patches/2to3.patch


Changes:

=====================================
debian/patches/2to3.patch
=====================================
@@ -2,10 +2,8 @@ Author: Andreas Tille <tille at debian.org>
 Last-Update: Mon, 21 Jan 2019 09:01:19 +0100
 Description: Result of 2to3
 
-Index: discosnp/scripts/filter_out_using_MAF.py
-===================================================================
---- discosnp.orig/scripts/filter_out_using_MAF.py
-+++ discosnp/scripts/filter_out_using_MAF.py
+--- a/scripts/filter_out_using_MAF.py
++++ b/scripts/filter_out_using_MAF.py
 @@ -2,8 +2,8 @@ import sys
  import gzip
  
@@ -13,7 +11,7 @@ Index: discosnp/scripts/filter_out_using_MAF.py
 -    print "This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets."
 -    print "python filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\""
 +    print("This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets.")
-+    print("python filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\"")
++    print("python3 filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\"")
      sys.exit()
  
  
@@ -26,10 +24,14 @@ Index: discosnp/scripts/filter_out_using_MAF.py
      
        
              
-Index: discosnp/scripts/ClassVCF_creator.py
-===================================================================
---- discosnp.orig/scripts/ClassVCF_creator.py
-+++ discosnp/scripts/ClassVCF_creator.py
+--- a/scripts/ClassVCF_creator.py
++++ b/scripts/ClassVCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###############################################
+ #Dresscode : class : uppercase
 @@ -218,7 +218,7 @@ class VARIANT():
  #---------------------------------------------------------------------------------------------------------------------------                                                   
          def RetrievePolymorphismFromHeader(self):
@@ -59,10 +61,18 @@ Index: discosnp/scripts/ClassVCF_creator.py
                  else : return (error)                                   
                                  
                  
-Index: discosnp/scripts/discoSnp++_to_csv.py
-===================================================================
---- discosnp.orig/scripts/discoSnp++_to_csv.py
-+++ discosnp/scripts/discoSnp++_to_csv.py
+--- a/scripts/discoSnp++_to_csv.py
++++ b/scripts/discoSnp++_to_csv.py
+@@ -1,7 +1,7 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ import sys
+ if len(sys.argv) !=2:
+-    sys.stdout.write("Mandatory: python discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
++    sys.stdout.write("Mandatory: python3 discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
+     sys.stdout.write("This program formats the .fa to .csv format by puting each couple of .fa sequence (4 lines = 2 comments + 2 nucleotide sequences) into one line, replacing the '|' character by spaces and removing the CX_ formating")
+     sys.exit(1)
+ 
 @@ -64,7 +64,7 @@ while 1:
          i+=1
      sys.stdout.write( com2_tab[i][:-1]+",")
@@ -72,10 +82,18 @@ Index: discosnp/scripts/discoSnp++_to_csv.py
      
      
      
-Index: discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
-===================================================================
---- discosnp.orig/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
-+++ discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
+--- a/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
++++ b/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
+@@ -1,7 +1,7 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #To apply on sorted vcf by position
+-#Usage : python filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
++#Usage : python3 filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
+ import sys
+ import gzip
+ import re
 @@ -25,7 +25,7 @@ while True:
                  line = vcf_for_igv.readline()
                  if not line: break
@@ -97,10 +115,17 @@ Index: discosnp/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
          if out:break         
  vcf_for_igv.close() 
  
-Index: discosnp/scripts/filter_out_using_ratio_of_covered_files.py
-===================================================================
---- discosnp.orig/scripts/filter_out_using_ratio_of_covered_files.py
-+++ discosnp/scripts/filter_out_using_ratio_of_covered_files.py
+--- a/scripts/filter_out_using_ratio_of_covered_files.py
++++ b/scripts/filter_out_using_ratio_of_covered_files.py
+@@ -3,7 +3,7 @@ import gzip
+ 
+ if len(sys.argv)<4:
+     print ("This tool filters out discoSnp prediction whose number of read sets covering it is lower than a user defined threshold. A set covers a prediction if its coverage in at least one of the two alleles is higher than a user defined threshold")
+-    print ("python filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
++    print ("python3 filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
+     sys.exit()
+ 
+ 
 @@ -53,7 +53,7 @@ while True:
          if coverage_high[i]>=minimal_coverage or coverage_low[i]>=minimal_coverage: number_of_covered_sets+=1
      
@@ -110,10 +135,14 @@ Index: discosnp/scripts/filter_out_using_ratio_of_covered_files.py
      
        
              
-Index: discosnp/scripts/functionObjectVCF_creator.py
-===================================================================
---- discosnp.orig/scripts/functionObjectVCF_creator.py
-+++ discosnp/scripts/functionObjectVCF_creator.py
+--- a/scripts/functionObjectVCF_creator.py
++++ b/scripts/functionObjectVCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###############################################
+ import os
 @@ -191,25 +191,25 @@ def CheckAtDistanceXBestHits(upper_path,
          best_up=1024
          if int(upper_path.mappingPosition)==0 and int(lower_path.mappingPosition)==0:#Checks if paths are unmappped
@@ -144,10 +173,14 @@ Index: discosnp/scripts/functionObjectVCF_creator.py
                  if nbMismatch == best_low:
                          position_set.add(position)
                  if len(position_set) > 1: 
-Index: discosnp/scripts/VCF_creator.py
-===================================================================
---- discosnp.orig/scripts/VCF_creator.py
-+++ discosnp/scripts/VCF_creator.py
+--- a/scripts/VCF_creator.py
++++ b/scripts/VCF_creator.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #*****************************************************************************
+ #   VCF_Creator: mapping and VCF creation feature in DiscoSnp++
 @@ -78,7 +78,7 @@ def main():
                             listName[1]=listName[1].replace("_", " ")
                     stream_file=open(fileName,'r')
@@ -166,3 +199,422 @@ Index: discosnp/scripts/VCF_creator.py
              usage()
              sys.exit(2)
  
+--- a/scripts/format_phased_variants_for_haplotyping.py
++++ b/scripts/format_phased_variants_for_haplotyping.py
+@@ -3,11 +3,11 @@ import sys
+ ### first create connected components from disco (-A option)
+ #sh from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt # creates file connected_components_phased_alleles_read_set_id_1.txt
+ ### them from the .fa file, the id of the set your interested in (e.g. 1 for phased_alleles_read_set_id_1.txt, this will correspond to C1 coverage in the fa file), the file containing the connected components, and the phased_alleles_read_set_id_X.txt file, generate the fact file
+-#python format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt  > phased_alles_read_set_1_facts.txt
++#python3 format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt  > phased_alles_read_set_1_facts.txt
+ 
+ 
+ if not len(sys.argv)==5:
+-    print ("usage: python format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
++    print ("usage: python3 format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
+     print (" * coherent.fa file: the file generated by discoSnp")
+     print (" * id number is the id of the read set, for which variants are phased. With i, this corresponds to Ci in the .fa file headers.")
+     print (" * connected_component_file: file obtained from \"from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt\" continaing connected component of phased alleles")
+--- a/scripts/remove_non_covered_genotypes.py
++++ b/scripts/remove_non_covered_genotypes.py
+@@ -3,7 +3,7 @@ import gzip
+ 
+ if len(sys.argv)<3:
+     print ("This tool replaces discoSnp VCF genotypes with DP lower or equal to a threshold to \"./.\"")
+-    print ("python remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
++    print ("python3 remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
+     sys.exit()
+ 
+ 
+@@ -37,4 +37,4 @@ while True:
+             toprint+= splitted_geno[j]
+             if j<len(splitted_geno)-1: toprint+= ":"
+         if i<len(splitted_line): toprint+='\t'
+-    print (toprint)
+\ No newline at end of file
++    print (toprint)
+--- a/scripts/simulations/multiple_samples_simulator.sh
++++ b/scripts/simulations/multiple_samples_simulator.sh
+@@ -68,7 +68,7 @@ fi
+ 
+ for p in `seq 1 $num_pop`
+ 	do
+-	python ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
++	python3 ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
+ 
+ 	#pos random ordering
+ 	sort -R ERASEME_pos_mut_pop"$p" > ERASEME_pos_mut_random_pop"$p"
+@@ -90,7 +90,7 @@ for p in `seq 1 $num_pop`
+ 		#homozygotes mutations
+ 		cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
+ 		#mutation inducing
+-		python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
++		python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
+ 		mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta
+ 		#homozygote and heterozygote mutations
+ 		nb_line2=`grep "." -c ERASEME_pos_mut_random_shared_pop"$p"_s"$i"`
+@@ -99,7 +99,7 @@ for p in `seq 1 $num_pop`
+ 		head -n +"$nb_homo" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo
+ 		#population mutationq
+ 		cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
+-		python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
++		python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
+ 		mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_homo.fasta
+ 		#READS SIMULATION
+ 		mutareads_forward ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta pop"$p"_ind"$i"_allele1_err_reads $read_s $read_l 0.01 0 0
+--- a/scripts/simulations/targeted_mut_fasta_corrected.py
++++ b/scripts/simulations/targeted_mut_fasta_corrected.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ # 
+ 
+--- a/scripts/from_phased_alleles_to_clusters.sh
++++ b/scripts/from_phased_alleles_to_clusters.sh
+@@ -13,7 +13,7 @@ filename=$(basename -- "$file")
+ path=$(dirname "${file}")
+ 
+ # FIND THE PATH CONTAINING THE SCRIPT: 
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) 
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) 
+ echo $EDIR
+ 
+ edge_coverage_threshold=0
+@@ -77,4 +77,4 @@ then
+     exit 1
+ fi
+ 
+-echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
+\ No newline at end of file
++echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
+--- a/scripts/redundancy_removal_discosnp.py
++++ b/scripts/redundancy_removal_discosnp.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ # from kissnp output: 
+--- a/scripts/run_VCF_creator.sh
++++ b/scripts/run_VCF_creator.sh
+@@ -212,10 +212,10 @@ if [ -z "$samfile" ];then
+                      exit 1
+               else
+ 
+-                     python $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile #-n $n
++                     python3 $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile #-n $n
+                      if [ $? -ne 0 ]
+                      then
+-                            echo "there was a problem with the VCF creation (command was \"python $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile\""
++                            echo "there was a problem with the VCF creation (command was \"python3 $PATH_VCF_creator/VCF_creator.py -s $discoSNPs -o $vcffile\""
+                             exit 1
+                      fi
+                      echo -e "... Creation of the vcf file : done ...==> $vcffile"
+@@ -259,9 +259,9 @@ if [ -z "$samfile" ];then
+                      discoSNPsbis=$(basename $discoSNPs .fa)"bis.fasta"
+ 
+                      if [ $map_with_extensions -eq 1 ];then
+-                            python $PATH_VCF_creator/keep_extensions_disco_file.py $discoSNPs $discoSNPsbis
++                            python3 $PATH_VCF_creator/keep_extensions_disco_file.py $discoSNPs $discoSNPsbis
+                      else
+-                            python $PATH_VCF_creator/remove_extensions_disco_file.py $discoSNPs $discoSNPsbis
++                            python3 $PATH_VCF_creator/remove_extensions_disco_file.py $discoSNPs $discoSNPsbis
+                      fi
+                      if [ -z "$discoSNPsbis" ];then
+                             echo "...Error with the script remove_extensions_disco_file.py..."
+@@ -320,10 +320,10 @@ else
+        fi
+ fi
+ 
+-python $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile 
++python3 $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile 
+ if [ $? -ne 0 ]
+ then
+-       echo "there was a problem with the VCF creation (command was \"python $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile \""
++       echo "there was a problem with the VCF creation (command was \"python3 $PATH_VCF_creator/VCF_creator.py -s $samfile -o $vcffile \""
+        exit 1
+ fi
+ echo -e "... Creation of the vcf file: done ...==> $vcffile "
+@@ -332,10 +332,10 @@ echo -e "... Creation of the vcf file: d
+ if [ $igv -eq 1 ] ; then
+        $DIR/create_IGV_compatible_VCF.sh $vcffile
+        nameVCFIGV=$( basename $vcffile .vcf )
+-       python $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf
++       python3 $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf
+        if [ $? -ne 0 ]
+        then
+-              echo "there was a problem with the IGV VCF creation (command was \"python $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf\""
++              echo "there was a problem with the IGV VCF creation (command was \"python3 $PATH_VCF_creator/filterOnBestDP_multiple_variant_at_same_pos.py $nameVCFIGV\_for_IGV.vcf > tmp.vcf\""
+               exit 1
+        fi
+        echo -e "... Creation of the vcf file: done ...==> $vcffile"
+--- a/scripts/simulations/random_mut_fasta.py
++++ b/scripts/simulations/random_mut_fasta.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ # 
+ 
+--- a/scripts/create_IGV_compatible_VCF.sh
++++ b/scripts/create_IGV_compatible_VCF.sh
+@@ -30,7 +30,7 @@ igvfile=$(basename $vcffile .vcf)"_for_I
+ cat $vcffile|grep "#">$igvfile
+ #cat $vcffile|grep -v  "#"|sort -k 2n,2n -n|grep -v "^SNP"|grep -v "^INDEL">>$igvfile
+ cat $vcffile|grep -v  "#"|sort -k 1,1 -k 2,2n |grep -v "^SNP"|grep -v "^INDEL">>$igvfile # from 2 2 6
+-#python $DIR/tools/one2zeroBased_vcf.py $igvfiletemp 
++#python3 $DIR/tools/one2zeroBased_vcf.py $igvfiletemp 
+ #cat VCFone2zeroBAsed.vcf >> $igvfile
+ #rm -f $igvfiletemp VCFone2zeroBAsed.vcf
+ echo -e "... Creation of the vcf file for IGV: done ...==> $igvfile"
+--- a/scripts/k3000/K3000_gfa_to_dat.py
++++ b/scripts/k3000/K3000_gfa_to_dat.py
+@@ -242,7 +242,7 @@ def main(gfa_file_name):
+     '''
+     Creation of a DAT file from the graph_plus.gfa GFA file 
+     Usage: 
+-        python ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
++        python3 ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
+     '''
+     # Store the information as a graph. 
+     # This enables 
+--- a/scripts/keep_extensions_disco_file.py
++++ b/scripts/keep_extensions_disco_file.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ # change extensions in uppercase and replace relative positions of SNPs in the header
+--- a/scripts/remove_extensions_disco_file.py
++++ b/scripts/remove_extensions_disco_file.py
+@@ -1,4 +1,4 @@
+-#!/bin/python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ ###################################
+ #Removes extensions in lowercase :
+--- a/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
++++ b/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ # 
+ 
+--- a/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
++++ b/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ # 
+ 
+--- a/scripts/k3000/K3000.py
++++ b/scripts/k3000/K3000.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ #
+ '''
+--- a/scripts/k3000/K3000_node_ids_to_node_sequences.py
++++ b/scripts/k3000/K3000_node_ids_to_node_sequences.py
+@@ -152,7 +152,7 @@ def main():
+     Produces a gfa file replacing the node content from int ids of alleles to their sequence
+     '''
+     if len(sys.argv) !=3:
+-        sys.stderr.write("Usage: python K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
++        sys.stderr.write("Usage: python3 K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
+         sys.exit(0)
+     sys.stderr.write("Indexing sequence positions\n")
+     header_to_file_position = index_sequences(sys.argv[2])
+--- a/scripts/k3000/run.sh
++++ b/scripts/k3000/run.sh
+@@ -11,7 +11,7 @@ wraith=false #$4 # set to true if you on
+ phased_allele_file=$1
+ disco_fa_file=$2
+ read_set_id=$3
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+ if [ -z "$phased_allele_file" ]; then
+     echo "${red}           You must provide a phased file name${reset}"
+     exit 1
+--- a/scripts/remove_non_biallelic.py
++++ b/scripts/remove_non_biallelic.py
+@@ -3,7 +3,7 @@ import gzip
+ 
+ if len(sys.argv)<2:
+     print ("This tool removes from discoSnp sorted VCF the locus which are tri-allelic or more")
+-    print ("python remove_non_diploids.py \".vcf from discoSnp\" ")
++    print ("python3 remove_non_diploids.py \".vcf from discoSnp\" ")
+     sys.exit()
+ 
+ 
+--- a/discoSnpRAD/post-processing_scripts/README.md
++++ b/discoSnpRAD/post-processing_scripts/README.md
+@@ -7,7 +7,7 @@
+        * removes variants belonging to a cluster (locus) whose size (nb of variants) is outside the given size range (options `-m` and `-M`)
+        * removes variants with rank lower than a given threshold given by option `-r`
+        * Usage :  
+-       `python filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
++       `python3 filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
+ 
+    3. **script** `filter_vcf_by_indiv_cov_max_missing_and_maf.py`:
+        * replaces individual genotypes that have DP less than the value given by option `-c` by missing genotype `./.`
+@@ -15,14 +15,14 @@
+        * removes variants (vcf lines) that have a minor allele frequency smaller than the value given by option `-f` 
+        * outputs only SNP variants if option `-s`. 
+        * Usage :    
+-       `python filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
++       `python3 filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
+ 
+   3. **script** `filter_paralogs.py`:
+         * identifies variants (vcf lines) that have a fraction of heterozygous genotypes greater than `x` (not counting missing genotypes)
+         * removes variants (vcf lines) that belong to a cluster having a fraction of such variants greater than `y`
+         * Example : `x=0.1` and `y= 0.5` and if we consider a cluster to represent a locus. This filter removes loci that have more than 50% of the SNPs that have each more than 10% of heterozygous genotypes.
+         * Usage :     
+-        `python filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
++        `python3 filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
+ 
+ 
+ ## Scripts for STRUCTURE analyses :
+@@ -30,7 +30,7 @@
+    4. **script** `1SNP_by_cluster.py`
+         * selects one SNP per cluster (the one with less missing genotypes)
+         * Usage :   
+-        `python  1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
++        `python3  1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
+ 
+    5. **script** `vcf2structure.sh`    
+         * changes the vcf format to a Structure format (input of the software Structure)
+@@ -55,13 +55,13 @@ Here is the full pipeline to map the res
+ sh [DISCO_DIR]/scripts/run_VCF_creator.sh  -G dm6_masked.fa -p myDiscoSnpRADResult_raw_filtered.fa -e -o temp.vcf
+ 
+ # Adding clustering information (and minimal filtering on cluster size)
+-python add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
++python3 add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
+ # final vcf is myDiscoSnpRADResult_mapped.vcf
+ ```
+ 
+ Additionnally, in a validation context, if one wants to compare variant positions between two such vcf files, the following command will output recall and precision metrics:
+ ```
+-python [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
++python3 [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
+ ```
+ 
+ 
+--- a/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
++++ b/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
+@@ -1,5 +1,4 @@
+-
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ 
+ 
+--- a/doc/discoSnp_user_guide.txt
++++ b/doc/discoSnp_user_guide.txt
+@@ -201,7 +201,7 @@ chromosome      117     3       C
+ See documentation specific to VCF_creator for more information: doc/vcf_creator_user_guide.pdf
+ Output Analyze
+ From a fasta format to a csv format: If you wish to analyze the results in a tabulated format: 
+-# python output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa 
++# python3 output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa 
+ will output a .csv tabulated file containing on each line the content of 4 lines of the .fa, replacing the '|' character by comma ',' and removing the CX_
+ Exemples of close SNPs and indels
+ Exemple of a multiple SNP:
+--- a/run_discoSnp++.sh
++++ b/run_discoSnp++.sh
+@@ -65,7 +65,7 @@ verbose=1
+ stop_after_kissnp=0
+ e=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-#EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
++#EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+ EDIR="/usr"
+ 
+ 
+--- a/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
++++ b/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
+@@ -1,5 +1,4 @@
+-
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ 
+ import sys
+--- a/discoSnpRAD/post-processing_scripts/filter_paralogs.py
++++ b/discoSnpRAD/post-processing_scripts/filter_paralogs.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ 
+ 
+--- a/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
++++ b/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ 
+ ''' ***********************************************
+--- a/run_discoSnp++_storagefile.sh
++++ b/run_discoSnp++_storagefile.sh
+@@ -55,7 +55,7 @@ remove=1
+ verbose=1
+ e=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+ 
+ if [ -d "$EDIR/build/" ] ; then # VERSION SOURCE COMPILED
+     read_file_names_bin=$EDIR/build/bin/read_file_names
+--- a/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
++++ b/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/python3
+ # -*- coding: utf-8 -*-
+ 
+ 
+--- a/discoSnpRAD/run_discoSnpRad.sh
++++ b/discoSnpRAD/run_discoSnpRad.sh
+@@ -75,7 +75,7 @@ verbose=1
+ short_read_connector_path=""
+ option_phase_variants=""
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+ 
+ if [ -d "$EDIR/../build/" ] ; then # VERSION SOURCE COMPILED
+     read_file_names_bin=$EDIR/../build/bin/read_file_names
+@@ -556,7 +556,7 @@ fi
+ echo -e "${yellow}\t############################################################"
+ echo -e "\t#################### REDUNDANCY REMOVAL  ###################"
+ echo -e "\t############################################################$reset"
+-redundancy_removal_cmd="python $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
++redundancy_removal_cmd="python3 $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
+ echo $green${redundancy_removal_cmd}$cyan
+ if [[ "$wraith" == "false" ]]; then
+    eval ${redundancy_removal_cmd}
+--- a/run_discoSnp++_ML.sh
++++ b/run_discoSnp++_ML.sh
+@@ -67,7 +67,7 @@ stop_after_kissnp=0
+ e=""
+ prefix_trash=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 13 ; echo ''`
+ #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
+-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
++EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+ 
+ 
+ if [ -d "$EDIR/build/" ] ; then # VERSION SOURCE COMPILED
+@@ -839,4 +839,4 @@ if [[ "$wraith" == "false" ]]; then
+     fi
+     echo -e " Thanks for using discoSnp++ - http://colibread.inria.fr/discoSnp/ - Forum: http://www.biostars.org/t/discoSnp/"
+     echo -e "################################################################################################################${reset}"
+-fi
+\ No newline at end of file
++fi



View it on GitLab: https://salsa.debian.org/med-team/discosnp/commit/24a8436fbcbf9c7d15e4654f044ddf5354539ad8

-- 
View it on GitLab: https://salsa.debian.org/med-team/discosnp/commit/24a8436fbcbf9c7d15e4654f044ddf5354539ad8
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200127/f698e1b7/attachment-0001.html>


More information about the debian-med-commit mailing list