[med-svn] [Git][med-team/racon][master] Adapt to bioparser 3.0.12.

Michael R. Crusoe gitlab at salsa.debian.org
Fri Jan 8 09:15:45 GMT 2021



Michael R. Crusoe pushed to branch master at Debian Med / racon


Commits:
9da51296 by Michael R. Crusoe at 2021-01-08T10:14:55+01:00
Adapt to bioparser 3.0.12.

- - - - -


3 changed files:

- debian/changelog
- debian/patches/series
- + debian/patches/updated_bioparser


Changes:

=====================================
debian/changelog
=====================================
@@ -2,6 +2,10 @@ racon (1.4.13-3) UNRELEASED; urgency=medium
 
   * Add lintian-override
 
+  [ Michael Crusoe ]
+  * Adapt to libbioparser-dev 3.0.12
+    TODO: the tests hang
+
  -- Andreas Tille <tille at debian.org>  Fri, 07 Aug 2020 17:49:51 +0200
 
 racon (1.4.13-2) unstable; urgency=medium


=====================================
debian/patches/series
=====================================
@@ -1,3 +1,4 @@
 use_debian_packaged_libs.patch
 remove_buildpath
 py3
+updated_bioparser


=====================================
debian/patches/updated_bioparser
=====================================
@@ -0,0 +1,361 @@
+From: Michael R. Crusoe <crusoe at debian.org>
+Subject: Adapt to bioparser version 3.0.12
+Forwarded: https://github.com/lbcb-sci/racon/pull/48
+--- racon.orig/src/cuda/cudapolisher.cpp
++++ racon/src/cuda/cudapolisher.cpp
+@@ -14,7 +14,7 @@
+ #include "cudapolisher.hpp"
+ #include <claragenomics/utils/cudautils.hpp>
+ 
+-#include "bioparser/bioparser.hpp"
++#include "bioparser/parser.hpp"
+ 
+ namespace racon {
+ 
+--- racon.orig/src/polisher.cpp
++++ racon/src/polisher.cpp
+@@ -17,7 +17,11 @@
+ #include "cuda/cudapolisher.hpp"
+ #endif
+ 
+-#include "bioparser/bioparser.hpp"
++#include "bioparser/fasta_parser.hpp"
++#include "bioparser/fastq_parser.hpp"
++#include "bioparser/mhap_parser.hpp"
++#include "bioparser/paf_parser.hpp"
++#include "bioparser/sam_parser.hpp"
+ #include "thread_pool/thread_pool.hpp"
+ #include "spoa/spoa.hpp"
+ 
+@@ -83,11 +87,11 @@
+     if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fasta.gz") ||
+         is_suffix(sequences_path, ".fna") || is_suffix(sequences_path, ".fna.gz") ||
+         is_suffix(sequences_path, ".fa") || is_suffix(sequences_path, ".fa.gz")) {
+-        sparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
++        sparser = bioparser::Parser<Sequence>::Create<bioparser::FastaParser>(
+             sequences_path);
+     } else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fastq.gz") ||
+         is_suffix(sequences_path, ".fq") || is_suffix(sequences_path, ".fq.gz")) {
+-        sparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
++        sparser = bioparser::Parser<Sequence>::Create<bioparser::FastqParser>(
+             sequences_path);
+     } else {
+         fprintf(stderr, "[racon::createPolisher] error: "
+@@ -99,13 +103,13 @@
+     }
+ 
+     if (is_suffix(overlaps_path, ".mhap") || is_suffix(overlaps_path, ".mhap.gz")) {
+-        oparser = bioparser::createParser<bioparser::MhapParser, Overlap>(
++        oparser = bioparser::Parser<Overlap>::Create<bioparser::MhapParser>(
+             overlaps_path);
+     } else if (is_suffix(overlaps_path, ".paf") || is_suffix(overlaps_path, ".paf.gz")) {
+-        oparser = bioparser::createParser<bioparser::PafParser, Overlap>(
++        oparser = bioparser::Parser<Overlap>::Create<bioparser::PafParser>(
+             overlaps_path);
+     } else if (is_suffix(overlaps_path, ".sam") || is_suffix(overlaps_path, ".sam.gz")) {
+-        oparser = bioparser::createParser<bioparser::SamParser, Overlap>(
++        oparser = bioparser::Parser<Overlap>::Create<bioparser::SamParser>(
+             overlaps_path);
+     } else {
+         fprintf(stderr, "[racon::createPolisher] error: "
+@@ -117,11 +121,11 @@
+     if (is_suffix(target_path, ".fasta") || is_suffix(target_path, ".fasta.gz") ||
+         is_suffix(target_path, ".fna") || is_suffix(target_path, ".fna.gz") ||
+         is_suffix(target_path, ".fa") || is_suffix(target_path, ".fa.gz")) {
+-        tparser = bioparser::createParser<bioparser::FastaParser, Sequence>(
++        tparser = bioparser::Parser<Sequence>::Create<bioparser::FastaParser>(
+             target_path);
+     } else if (is_suffix(target_path, ".fastq") || is_suffix(target_path, ".fastq.gz") ||
+         is_suffix(target_path, ".fq") || is_suffix(target_path, ".fq.gz")) {
+-        tparser = bioparser::createParser<bioparser::FastqParser, Sequence>(
++        tparser = bioparser::Parser<Sequence>::Create<bioparser::FastqParser>(
+             target_path);
+     } else {
+         fprintf(stderr, "[racon::createPolisher] error: "
+@@ -197,8 +201,8 @@
+ 
+     logger_->log();
+ 
+-    tparser_->reset();
+-    tparser_->parse(sequences_, -1);
++    tparser_->Reset();
++    sequences_ = tparser_->Parse(-1);
+ 
+     uint64_t targets_size = sequences_.size();
+     if (targets_size == 0) {
+@@ -223,40 +227,40 @@
+ 
+     uint64_t sequences_size = 0, total_sequences_length = 0;
+ 
+-    sparser_->reset();
++    sparser_->Reset();
+     while (true) {
+         uint64_t l = sequences_.size();
+-        auto status = sparser_->parse(sequences_, kChunkSize);
++	try {
++            sequences_ = sparser_->Parse(kChunkSize);
+ 
+-        uint64_t n = 0;
+-        for (uint64_t i = l; i < sequences_.size(); ++i, ++sequences_size) {
+-            total_sequences_length += sequences_[i]->data().size();
+-
+-            auto it = name_to_id.find(sequences_[i]->name() + "t");
+-            if (it != name_to_id.end()) {
+-                if (sequences_[i]->data().size() != sequences_[it->second]->data().size() ||
+-                    sequences_[i]->quality().size() != sequences_[it->second]->quality().size()) {
+-
+-                    fprintf(stderr, "[racon::Polisher::initialize] error: "
+-                        "duplicate sequence %s with unequal data\n",
+-                        sequences_[i]->name().c_str());
+-                    exit(1);
+-                }
++            uint64_t n = 0;
++            for (uint64_t i = l; i < sequences_.size(); ++i, ++sequences_size) {
++                total_sequences_length += sequences_[i]->data().size();
++
++                auto it = name_to_id.find(sequences_[i]->name() + "t");
++                if (it != name_to_id.end()) {
++                    if (sequences_[i]->data().size() != sequences_[it->second]->data().size() ||
++                        sequences_[i]->quality().size() != sequences_[it->second]->quality().size()) {
++
++                        fprintf(stderr, "[racon::Polisher::initialize] error: "
++                            "duplicate sequence %s with unequal data\n",
++                            sequences_[i]->name().c_str());
++                        exit(1);
++                    }
+ 
+-                name_to_id[sequences_[i]->name() + "q"] = it->second;
+-                id_to_id[sequences_size << 1 | 0] = it->second;
++                    name_to_id[sequences_[i]->name() + "q"] = it->second;
++                    id_to_id[sequences_size << 1 | 0] = it->second;
+ 
+-                sequences_[i].reset();
+-                ++n;
+-            } else {
+-                name_to_id[sequences_[i]->name() + "q"] = i - n;
+-                id_to_id[sequences_size << 1 | 0] = i - n;
++                    sequences_[i].reset();
++                    ++n;
++                } else {
++                    name_to_id[sequences_[i]->name() + "q"] = i - n;
++                    id_to_id[sequences_size << 1 | 0] = i - n;
++                }
+             }
+-        }
+ 
+-        shrinkToFit(sequences_, l);
+-
+-        if (!status) {
++            shrinkToFit(sequences_, l);
++	} catch (std::invalid_argument& exception) {
+             break;
+         }
+     }
+@@ -305,31 +309,34 @@
+         }
+     };
+ 
+-    oparser_->reset();
++    oparser_->Reset();
+     uint64_t l = 0;
+     while (true) {
+-        auto status = oparser_->parse(overlaps, kChunkSize);
+-
+         uint64_t c = l;
+-        for (uint64_t i = l; i < overlaps.size(); ++i) {
+-            overlaps[i]->transmute(sequences_, name_to_id, id_to_id);
+-
+-            if (!overlaps[i]->is_valid()) {
+-                overlaps[i].reset();
+-                continue;
+-            }
++	bool status = true;
++	try {
++            overlaps = oparser_->Parse(kChunkSize);
++
++            for (uint64_t i = l; i < overlaps.size(); ++i) {
++                overlaps[i]->transmute(sequences_, name_to_id, id_to_id);
++
++                if (!overlaps[i]->is_valid()) {
++                    overlaps[i].reset();
++                    continue;
++                }
+ 
+-            while (overlaps[c] == nullptr) {
+-                ++c;
+-            }
+-            if (overlaps[c]->q_id() != overlaps[i]->q_id()) {
+-                remove_invalid_overlaps(c, i);
+-                c = i;
++                while (overlaps[c] == nullptr) {
++                    ++c;
++                }
++                if (overlaps[c]->q_id() != overlaps[i]->q_id()) {
++                    remove_invalid_overlaps(c, i);
++                    c = i;
++                }
+             }
+-        }
+-        if (!status) {
++	} catch (std::invalid_argument& exception) {
+             remove_invalid_overlaps(c, overlaps.size());
+             c = overlaps.size();
++	    status = false;
+         }
+ 
+         for (uint64_t i = l; i < c; ++i) {
+--- racon.orig/test/racon_test.cpp
++++ racon/test/racon_test.cpp
+@@ -10,7 +10,7 @@
+ #include "polisher.hpp"
+ 
+ #include "edlib.h"
+-#include "bioparser/bioparser.hpp"
++#include "bioparser/fasta_parser.hpp"
+ #include "gtest/gtest.h"
+ 
+ uint32_t calculateEditDistance(const std::string& query, const std::string& target) {
+@@ -98,9 +98,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -120,9 +120,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -142,9 +142,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -164,9 +164,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -186,9 +186,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -208,9 +208,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -303,9 +303,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -325,9 +325,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -347,9 +347,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -369,9 +369,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -391,9 +391,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),
+@@ -413,9 +413,9 @@
+ 
+     polished_sequences[0]->create_reverse_complement();
+ 
+-    auto parser = bioparser::createParser<bioparser::FastaParser, racon::Sequence>(
++    auto parser = bioparser::Parser<racon::Sequence>::Create<bioparser::FastaParser>(
+         racon_test_data_path + "sample_reference.fasta.gz");
+-    parser->parse(polished_sequences, -1);
++    polished_sequences = parser->Parse(-1);
+     EXPECT_EQ(polished_sequences.size(), 2);
+ 
+     EXPECT_EQ(calculateEditDistance(polished_sequences[0]->reverse_complement(),



View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/9da51296c87af89a046e8c70cdfcb18adf3153ae

-- 
View it on GitLab: https://salsa.debian.org/med-team/racon/-/commit/9da51296c87af89a046e8c70cdfcb18adf3153ae
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210108/71fbf542/attachment-0001.html>


More information about the debian-med-commit mailing list