[med-svn] [Git][med-team/ivar][upstream] New upstream version 1.3.1+dfsg

Étienne Mollier (@emollier) gitlab at salsa.debian.org
Fri Aug 27 21:32:25 BST 2021



Étienne Mollier pushed to branch upstream at Debian Med / ivar


Commits:
b02028a7 by Étienne Mollier at 2021-08-27T20:18:36+02:00
New upstream version 1.3.1+dfsg
- - - - -


13 changed files:

- configure.ac
- src/interval_tree.cpp
- src/interval_tree.h
- src/ivar.cpp
- src/primer_bed.cpp
- src/primer_bed.h
- src/trim_primer_quality.cpp
- src/trim_primer_quality.h
- tests/test_amplicon_search.cpp
- tests/test_interval_tree.cpp
- tests/test_isize_trim.cpp
- tests/test_primer_trim_edge_cases.cpp
- tests/test_trim.cpp


Changes:

=====================================
configure.ac
=====================================
@@ -2,7 +2,7 @@
 # Process this file with autoconf to produce a configure script.
 
 AC_PREREQ([2.63])
-AC_INIT([ivar], [1.3], [gkarthik at scripps.edu])
+AC_INIT([ivar], [1.3.1], [gkarthik at scripps.edu])
 AM_INIT_AUTOMAKE([foreign subdir-objects])
 AC_CONFIG_HEADERS([config.h])
 


=====================================
src/interval_tree.cpp
=====================================
@@ -38,13 +38,13 @@ void IntervalTree::insert(ITNode *root, Interval data){
     }
   }
   // update max value of ancestor node
-  if(root->max < data.low)
-    root->max = data.low;
+  if(root->max < data.high)
+    root->max = data.high;
 }
 
 
-// A utility function to check if given two intervals overlap
-bool doOverlap(Interval i1, Interval i2){
+// A utility function to check if the 1st interval envelops the second
+bool doEnvelop(Interval i1, Interval i2){
   if(i1.low <= i2.low && i1.high >= i2.high)
     return true;
   return false;
@@ -53,23 +53,23 @@ bool doOverlap(Interval i1, Interval i2){
 
 // The main function that searches an interval i in a given
 // Interval Tree.
-bool IntervalTree::overlapSearch(ITNode *root, Interval i){
+bool IntervalTree::envelopSearch(ITNode *root, Interval i){
   // Base Case, tree is empty
   //std::cout << root->data->low << ":" << root->data->high << std::endl;
   if (root == NULL) return false;
 
   // If given interval overlaps with root
-  if (doOverlap(*(root->data), i))
+  if (doEnvelop(*(root->data), i))
     return true;
 
   // If left child of root is present and max of left child is
   // greater than or equal to given interval, then i may
-  // overlap with an interval in left subtree
-  if (root->left != NULL && root->left->max >= i.low)
-    return overlapSearch(root->left, i);
+  // be enveloped by an amplicon in left subtree
+  if (root->left != NULL && root->left->max >= i.high)
+    return envelopSearch(root->left, i);
 
-  // Else interval can only overlap with right subtree
-  return overlapSearch(root->right, i);
+  // Else interval can only be enveloped by amplicon in right subtree
+  return envelopSearch(root->right, i);
 }
 
 // A helper function for inorder traversal of the tree


=====================================
src/interval_tree.h
=====================================
@@ -25,7 +25,7 @@ class ITNode{
     void setRight(ITNode *node);
   */
 public:
-  ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.low) {}  // constructor
+  ITNode(Interval value): data(new Interval(value)), left(nullptr), right(nullptr), max(value.high) {}  // constructor
   Interval *data;  // pointer to node's interval data object
   ITNode *left, *right; // pointer to node's left & right child node objects
   int max;
@@ -38,13 +38,13 @@ class IntervalTree{
 private:
   ITNode *_root;
   void insert(ITNode *root, Interval data);
-  bool overlapSearch(ITNode *root, Interval data);
+  bool envelopSearch(ITNode *root, Interval data);
   void inOrder(ITNode * root);
 
 public:
   IntervalTree();  // constructor
   void insert(Interval data){ insert(_root, data);}
-  bool overlapSearch(Interval data){ return overlapSearch(_root, data);}
+  bool envelopSearch(Interval data){ return envelopSearch(_root, data);}
   void inOrder() {inOrder(_root);}
 };
 


=====================================
src/ivar.cpp
=====================================
@@ -16,7 +16,7 @@
 #include "suffix_tree.h"
 #include "get_common_variants.h"
 
-const std::string VERSION = "1.3";
+const std::string VERSION = "1.3.1";
 
 struct args_t {
   std::string bam;		// -i
@@ -37,6 +37,7 @@ struct args_t {
   char gap;			// -n
   bool keep_min_coverage;	// -k
   std::string primer_pair_file;	// -f
+  int32_t primer_offset; // -x
   std::string file_list;		// -f
   bool write_no_primers_flag;	// -e
   std::string gff;		// -g
@@ -65,8 +66,9 @@ void print_trim_usage(){
     "Input Options    Description\n"
     "           -i    (Required) Sorted bam file, with aligned reads, to trim primers and quality\n"
     "           -b    BED file with primer sequences and positions. If no BED file is specified, only quality trimming will be done.\n"
-    "           -f    Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
-    "                 If provided, reads will be filtered based on their overlap with amplicons prior to trimming\n"
+    "           -f    [EXPERIMENTAL] Primer pair information file containing left and right primer names for the same amplicon separated by a tab\n"
+    "                 If provided, reads that do not fall within atleat one amplicon will be ignored prior to primer trimming.\n"
+    "           -x    Primer position offset (Default: 0). Reads that occur at the specified offset positions relative to primer positions will also be trimmed.\n"
     "           -m    Minimum length of read to retain after trimming (Default: 30)\n"
     "           -q    Minimum quality threshold for sliding window to pass (Default: 20)\n"
     "           -s    Width of sliding window (Default: 4)\n"
@@ -165,7 +167,7 @@ void print_version_info(){
     "\nPlease raise issues and bug reports at https://github.com/andersen-lab/ivar/\n\n";
 }
 
-static const char *trim_opt_str = "i:b:f:p:m:q:s:ekh?";
+static const char *trim_opt_str = "i:b:f:x:p:m:q:s:ekh?";
 static const char *variants_opt_str = "p:t:q:m:r:g:h?";
 static const char *consensus_opt_str = "i:p:q:t:m:n:kh?";
 static const char *removereads_opt_str = "i:p:t:b:h?";
@@ -221,6 +223,7 @@ int main(int argc, char* argv[]){
     g_args.keep_for_reanalysis = false;
     g_args.bed = "";
     g_args.primer_pair_file = "";
+    g_args.primer_offset = 0;
     opt = getopt( argc, argv, trim_opt_str);
     while( opt != -1 ) {
       switch( opt ) {
@@ -232,6 +235,9 @@ int main(int argc, char* argv[]){
 	break;
       case 'f':
   g_args.primer_pair_file = optarg;
+  break;
+      case 'x':
+  g_args.primer_offset = std::stoi(optarg);
   break;
       case 'p':
 	g_args.prefix = optarg;
@@ -264,7 +270,7 @@ int main(int argc, char* argv[]){
       return -1;
     }
     g_args.prefix = get_filename_without_extension(g_args.prefix,".bam");
-    res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file);
+    res = trim_bam_qual_primer(g_args.bam, g_args.bed, g_args.prefix, g_args.region, g_args.min_qual, g_args.sliding_window, cl_cmd.str(), g_args.write_no_primers_flag, g_args.keep_for_reanalysis, g_args.min_length, g_args.primer_pair_file, g_args.primer_offset);
   }
   // ivar variants
   else if (cmd.compare("variants") == 0){


=====================================
src/primer_bed.cpp
=====================================
@@ -85,6 +85,75 @@ void print_bed_format(){
   std::cout << "It requires the following columns delimited by a tab: chrom, chromStart, chromEnd, name, score, strand" << std::endl;
 }
 
+std::vector<primer> populate_from_file(std::string path, int32_t offset = 0){
+  std::ifstream  data(path.c_str());
+  std::string line;
+  std::vector<primer> primers;
+  int16_t indice = 0;
+  while(std::getline(data,line)){ // Remove extra lineStream
+    std::stringstream lineStream(line);
+    std::string cell;
+    int ctr = 0;
+    primer p;
+    p.set_strand(0);		// Set strand to NULL
+    while(std::getline(lineStream,cell,'\t')){
+      switch(ctr){
+      case 0:
+	p.set_region(cell);
+	break;
+      case 1:
+	if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
+	  p.set_start(std::stoul(cell) - offset);
+	} else {
+	  print_bed_format();
+	  primers.clear();
+	  return primers;
+	}
+	break;
+      case 2:
+	if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
+	  p.set_end(std::stoul(cell) - 1 + offset); // Bed format - End is not 0 based
+	} else {
+	  print_bed_format();
+	  primers.clear();
+	  return primers;
+	}
+	break;
+      case 3:
+	p.set_name(cell);
+	break;
+      case 4:
+	if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
+	  p.set_score(stoi(cell));
+	} else {
+	  print_bed_format();  // score is missing, send warning but continue populating
+    std::cout << "\nWARNING: The BED file provided did not have the expected score column, but iVar will continue trimming\n" << std::endl;
+    p.set_score(-1);
+	}
+	break;
+      case 5:
+	if(cell[0] == '+' || cell[0] == '-')
+	  p.set_strand(cell[0]);
+	else {
+	  print_bed_format();
+	  primers.clear();
+	  return primers;
+	}
+      }
+      ctr++;
+    }
+    if(indice == 0 && ctr < 6)
+      std::cout << "Strand not found in primer BED file so strand will not be considered for trimming" << std::endl;
+    p.set_indice(indice);
+    p.set_pair_indice(-1);
+    p.set_read_count(0);
+    primers.push_back(p);
+    indice++;
+  }
+  std::cout << "Found " << primers.size() << " primers in BED file" << std::endl;
+  return primers;
+}
+
 std::vector<primer> populate_from_file(std::string path){
   std::ifstream  data(path.c_str());
   std::string line;
@@ -112,7 +181,7 @@ std::vector<primer> populate_from_file(std::string path){
 	break;
       case 2:
 	if(std::all_of(cell.begin(), cell.end(), ::isdigit)) {
-	  p.set_end(std::stoul(cell)-1); // Bed format - End is not 0 based
+	  p.set_end(std::stoul(cell) - 1); // Bed format - End is not 0 based
 	} else {
 	  print_bed_format();
 	  primers.clear();


=====================================
src/primer_bed.h
=====================================
@@ -46,6 +46,7 @@ class primer {
 
 };
 
+std::vector<primer> populate_from_file(std::string path, int32_t offset);
 std::vector<primer> populate_from_file(std::string path);
 std::vector<primer> get_primers(std::vector<primer> p, unsigned int pos);
 int get_primer_indice(std::vector<primer> p, std::string name);


=====================================
src/trim_primer_quality.cpp
=====================================
@@ -331,7 +331,7 @@ int get_bigger_primer(std::vector<primer> primers){
   return max_primer_len;
 }
 
-// check if read overlaps with any of the amplicons
+// check if read is enveloped by any of the amplicons
 bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
   Interval fragment_coords = Interval(0, 1);
   if(r->core.isize > 0){
@@ -342,16 +342,16 @@ bool amplicon_filter(IntervalTree amplicons, bam1_t* r){
     fragment_coords.high = bam_endpos(r);
   }
   // debugging
-  bool amplicon_flag = amplicons.overlapSearch(fragment_coords);
+  bool amplicon_flag = amplicons.envelopSearch(fragment_coords);
   return amplicon_flag;
 }
 
-int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "") {
+int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool keep_for_reanalysis, int min_length = 30, std::string pair_info = "", int32_t primer_offset = 0) {
   int retval = 0;
   std::vector<primer> primers;
   int max_primer_len = 0;
   if(!bed.empty()){
-    primers = populate_from_file(bed);
+    primers = populate_from_file(bed, primer_offset);
     if(primers.size() == 0){
       std::cout << "Exiting." << std::endl;
       return -1;
@@ -363,6 +363,8 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
   if(!pair_info.empty()){
     amplicons = populate_amplicons(pair_info, primers);
   }
+  std::cout << "Amplicons detected: " << std::endl;
+  amplicons.inOrder();
   if(bam.empty()){
     std::cout << "Bam file is empty." << std::endl;
     return -1;
@@ -605,7 +607,10 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
     std::cout << unmapped_counter << " unmapped reads were not written to file." << std::endl;
   }
   if(amplicon_flag_ctr > 0){
-    std::cout << amplicon_flag_ctr << " reads were ignored because they did not fall within an amplicon" << std::endl;
+    std::cout << round_int(amplicon_flag_ctr, mapped) 
+              << "% (" << amplicon_flag_ctr 
+              << ") reads were ignored because they did not fall within an amplicon" 
+              << std::endl;
   }
   if(failed_frag_size > 0){
     std::cout << round_int(failed_frag_size, mapped)


=====================================
src/trim_primer_quality.h
=====================================
@@ -31,7 +31,7 @@ inline void free_cigar(cigar_ t) { if (t.free_cig) free(t.cigar); }
 void add_pg_line_to_header(bam_hdr_t** hdr, char *cmd);
 
 
-int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info);
+int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out, std::string region_, uint8_t min_qual, uint8_t sliding_window, std::string cmd, bool write_no_primer_reads, bool mark_qcfail_flag, int min_length, std::string pair_info, int32_t primer_offset);
 void free_cigar(cigar_ t);
 int32_t get_pos_on_query(uint32_t *cigar, uint32_t ncigar, int32_t pos, int32_t ref_start);
 int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos, uint32_t ref_start);


=====================================
tests/test_amplicon_search.cpp
=====================================
@@ -5,11 +5,11 @@
 #include "../src/primer_bed.h"
 #include "../src/interval_tree.h"
 
-int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, bool expected[])
+int test_amplicon_search(std::string bam_file, std::string bed_file, std::string pair_info_file, int32_t primer_offset, bool expected[])
 {
   std::vector<primer> primers;
   IntervalTree amplicons;
-  primers = populate_from_file(bed_file);
+  primers = populate_from_file(bed_file, primer_offset);
   std::cout << "Total Number of primers: " << primers.size() << std::endl;
   amplicons = populate_amplicons(pair_info_file, primers);
   samFile *in = hts_open(bam_file.c_str(), "r");
@@ -35,13 +35,14 @@ int main(){
   int success;
   std::string bam = "../data/test_amplicon.sorted.bam";
   std::string pair_indice = "../data/pair_info_2.tsv";
+  int32_t primer_offset = 0;
   std::string bed = "../data/test_isize.bed";
   std::string prefix = "/tmp/data/trim_amplicon";
   std::string bam_out = "/tmp/trim_amplicon.bam";
 
   IntervalTree amplicons;
   bool expected[8] = {true, false, true, false, true, false, true, false};
-  success = test_amplicon_search(bam, bed, pair_indice, expected);
+  success = test_amplicon_search(bam, bed, pair_indice, primer_offset, expected);
   amplicons.inOrder();
   return success;
 }


=====================================
tests/test_interval_tree.cpp
=====================================
@@ -7,7 +7,7 @@ int test_itree_overlap(IntervalTree tree, Interval queries[], int num_tests, boo
     int result = 0;
     for (int i = 0; i < num_tests; i++)
     {
-        result = tree.overlapSearch(queries[i]);
+        result = tree.envelopSearch(queries[i]);
         if (result != expected[i])
         {
             std::cout << "Interval Tree overlap behavior incorrect for interval " << queries[i].low << ":" << queries[i].high 


=====================================
tests/test_isize_trim.cpp
=====================================
@@ -21,6 +21,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
     std::string bam = "../data/test_isize.sorted.bam";
     std::string bed = "../data/test_isize.bed";
     std::string pair_info = "";
+    int32_t primer_offset = 0;
     std::string prefix = "../data/trim_isize";
     std::string bam_out = "../data/trim_isize.bam";
 
@@ -29,7 +30,7 @@ int test_isize_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag
     std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";
 
     // Test and check result
-    res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
+    res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);
 
     if (res) {
         success = -1;


=====================================
tests/test_primer_trim_edge_cases.cpp
=====================================
@@ -4,7 +4,8 @@
 int main(){
   int success = 0;
   std::string bam = "../data/primer_only/primer_edge_cases.bam";
-  std::vector<primer> primers = populate_from_file("../data/test.bed");
+  int32_t primer_offset = 0;
+  std::vector<primer> primers = populate_from_file("../data/test.bed", primer_offset);
   int max_primer_len = 0;
   max_primer_len = get_bigger_primer(primers);
   std::string region_;


=====================================
tests/test_trim.cpp
=====================================
@@ -21,6 +21,7 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool
   std::string bam = "../data/test.unmapped.sorted.bam";
   std::string bed = "../data/test.bed";
   std::string pair_info = "";
+  int32_t primer_offset = 0;
   std::string prefix = "/tmp/trim";
   std::string bam_out = "/tmp/trim.bam";
 
@@ -28,7 +29,7 @@ int test_trim(uint8_t min_qual, uint8_t sliding_window, bool no_write_flag, bool
   std::string cmd = "@PG\tID:ivar-trim\tPN:ivar\tVN:1.0.0\tCL:ivar trim\n";
 
   // Test and check result
-  res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info);
+  res = trim_bam_qual_primer(bam, bed, prefix, region_, min_qual, sliding_window, cmd, no_write_flag, keep_for_reanalysis, min_length, pair_info, primer_offset);
   if (res) {
     success = -1;
     std::cerr << testname << " failed: trim_bam_qual_primer() returned " << res << std::endl;



View it on GitLab: https://salsa.debian.org/med-team/ivar/-/commit/b02028a7de0247141fb9648bcd2e4e048363a38c

-- 
View it on GitLab: https://salsa.debian.org/med-team/ivar/-/commit/b02028a7de0247141fb9648bcd2e4e048363a38c
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210827/8a367f27/attachment-0001.htm>


More information about the debian-med-commit mailing list