[med-svn] [Git][med-team/libfastahack][master] 8 commits: find the new tagged release upstream

Michael R. Crusoe gitlab at salsa.debian.org
Wed Sep 4 08:44:44 BST 2019



Michael R. Crusoe pushed to branch master at Debian Med / libfastahack


Commits:
4ad21f80 by Michael R. Crusoe at 2019-09-04T06:37:15Z
find the new tagged release upstream

- - - - -
45c8227d by Michael R. Crusoe at 2019-09-04T06:37:29Z
New upstream version 1.0.0+dfsg
- - - - -
b8ba3d15 by Michael R. Crusoe at 2019-09-04T06:37:29Z
Update upstream source from tag 'upstream/1.0.0+dfsg'

Update to upstream version '1.0.0+dfsg'
with Debian dir 4acf62e8ab1238b10bd321768d931315580ff700
- - - - -
a0697c33 by Michael R. Crusoe at 2019-09-04T06:37:29Z
New upstream version

- - - - -
c4734e01 by Michael R. Crusoe at 2019-09-04T06:37:30Z
debhelper-compat 12

- - - - -
fb5e7896 by Michael R. Crusoe at 2019-09-04T06:37:31Z
Standards-Version: 4.4.0

- - - - -
dad76ead by Michael R. Crusoe at 2019-09-04T06:37:33Z
Set upstream metadata fields: Name.
- - - - -
4e5994fd by Michael R. Crusoe at 2019-09-04T07:18:47Z
add one new symbol

- - - - -


8 changed files:

- Fasta.cpp
- debian/changelog
- − debian/compat
- debian/control
- debian/libfastahack0.symbols
- debian/upstream/metadata
- debian/watch
- + tests/crlf.fasta


Changes:

=====================================
Fasta.cpp
=====================================
@@ -3,7 +3,7 @@
 // Marth Lab, Department of Biology, Boston College
 // All rights reserved.
 // ---------------------------------------------------------------------------
-// Last modified: 9 February 2010 (EG)
+// Last modified: 26 February 2019 (AMN)
 // ---------------------------------------------------------------------------
 
 #include "Fasta.h"
@@ -87,6 +87,28 @@ ostream& operator<<(ostream& output, FastaIndex& fastaIndex) {
     for( vector<FastaIndexEntry>::iterator fit = sortedIndex.begin(); fit != sortedIndex.end(); ++fit) {
         output << *fit << endl;
     }
+    return output;
+}
+
+// Read a line from in into line. Line endings ('\r', '\n', etc.) are not
+// written to line but is counted in bytes, which will hold the total bytes
+// consumed. Supports both '\n' and '\r\n' line endings. Returns true if data
+// was read, and false on EOF before anything could be read.
+bool getlineCounting(istream& in, string& line, int& bytes) {
+    bytes = 0;
+    line.clear();
+    for(int got = in.get(); got != EOF; got = in.get()) {
+        bytes++;
+        if (got == '\n') {
+            // Line is over, but we read something (the '\n')
+            return true;
+        } else if (got != '\r') {
+            // Anything other than a '\r' is real data.
+            line.push_back((char)got);
+        }
+        // '\r' is skipped, but still counted in bytes
+    }
+    return !line.empty();
 }
 
 void FastaIndex::indexReference(string refname) {
@@ -100,6 +122,7 @@ void FastaIndex::indexReference(string refname) {
     FastaIndexEntry entry;  // an entry buffer used in processing
     entry.clear();
     int line_length = 0;
+    int line_bytes = 0;
     long long offset = 0;  // byte offset from start of file
     long long line_number = 0; // current line number
     bool mismatchedLineLengths = false; // flag to indicate if our line length changes mid-file
@@ -112,18 +135,20 @@ void FastaIndex::indexReference(string refname) {
     ifstream refFile;
     refFile.open(refname.c_str());
     if (refFile.is_open()) {
-        while (getline(refFile, line)) {
+        while (getlineCounting(refFile, line, line_bytes)) {
             ++line_number;
             line_length = line.length();
             if (line[0] == ';') {
                 // fasta comment, skip
             } else if (line[0] == '+') {
                 // fastq quality header
-                getline(refFile, line);
-                line_length = line.length();
-                offset += line_length + 1;
-                // get and don't handle the quality line
-                getline(refFile, line);
+                
+                // account for header offset
+                offset += line_bytes;
+                
+                // read in quality line so its offset will be accounted for too
+                // TODO: we don't support the quality offset field of the FAI format
+                getlineCounting(refFile, line, line_bytes);
                 line_length = line.length();
             } else if (line[0] == '>' || line[0] == '@') { // fasta /fastq header
                 // if we aren't on the first entry, push the last sequence into the index
@@ -139,7 +164,6 @@ void FastaIndex::indexReference(string refname) {
                     entry.offset = offset;
                 entry.length += line_length;
                 if (entry.line_len) {
-                    //entry.line_len = entry.line_len ? entry.line_len : line_length + 1;
                     if (mismatchedLineLengths || emptyLine) {
                         if (line_length == 0) {
                             emptyLine = true; // flag empty lines, raise error only if this is embedded in the sequence
@@ -157,18 +181,18 @@ void FastaIndex::indexReference(string refname) {
                     // this flag is set here and checked on the next line
                     // because we may have reached the end of the sequence, in
                     // which case a mismatched line length is OK
-                    if (entry.line_len != line_length + 1) {
+                    if (entry.line_len != line_bytes) {
                         mismatchedLineLengths = true;
                         if (line_length == 0) {
                             emptyLine = true; // flag empty lines, raise error only if this is embedded in the sequence
                         }
                     }
                 } else {
-                    entry.line_len = line_length + 1; // first line
+                    entry.line_len = line_bytes; // first line
+                    entry.line_blen = line_length;
                 }
-                entry.line_blen = entry.line_len - 1;
             }
-            offset += line_length + 1;
+            offset += line_bytes;
         }
         // we've hit the end of the fasta file!
         // flush the last entry
@@ -249,8 +273,9 @@ FastaReference::~FastaReference(void) {
 
 string FastaReference::getSequence(string seqname) {
     FastaIndexEntry entry = index->entry(seqname);
-    int newlines_in_sequence = entry.length / entry.line_blen;
-    int seqlen = newlines_in_sequence  + entry.length;
+    int bytes_per_newline = entry.line_len - entry.line_blen;
+    int newline_bytes_in_sequence = entry.length / entry.line_blen * bytes_per_newline;
+    int seqlen = newline_bytes_in_sequence + entry.length;
     char* seq = (char*) calloc (seqlen + 1, sizeof(char));
     fseek64(file, entry.offset, SEEK_SET);
     string s;
@@ -258,6 +283,7 @@ string FastaReference::getSequence(string seqname) {
         seq[seqlen] = '\0';
         char* pbegin = seq;
         char* pend = seq + (seqlen/sizeof(char));
+        pend = remove(pbegin, pend, '\r');
         pend = remove(pbegin, pend, '\n');
         pend = remove(pbegin, pend, '\0');
         s = seq;
@@ -299,7 +325,8 @@ string FastaReference::getSubSequence(string seqname, int start, int length) {
     int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0;
     int newlines_by_end = (start + length - 1) / entry.line_blen;
     int newlines_inside = newlines_by_end - newlines_before;
-    int seqlen = length + newlines_inside;
+    int bytes_per_newline = entry.line_len - entry.line_blen;
+    int seqlen = length + newlines_inside * bytes_per_newline;
     char* seq = (char*) calloc (seqlen + 1, sizeof(char));
     fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET);
     string s;
@@ -307,6 +334,7 @@ string FastaReference::getSubSequence(string seqname, int start, int length) {
         seq[seqlen] = '\0';
         char* pbegin = seq;
         char* pend = seq + (seqlen/sizeof(char));
+        pend = remove(pbegin, pend, '\r');
         pend = remove(pbegin, pend, '\n');
         pend = remove(pbegin, pend, '\0');
         s = seq;


=====================================
debian/changelog
=====================================
@@ -1,3 +1,14 @@
+libfastahack (1.0.0+dfsg-1) UNRELEASED; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * debhelper-compat 12
+  * Standards-Version: 4.4.0
+  * Set upstream metadata fields: Name.
+  * Add the one new symbol
+
+ -- Michael R. Crusoe <michael.crusoe at gmail.com>  Wed, 04 Sep 2019 15:37:29 +0900
+
 libfastahack (0.0+git20160702.bbc645f+dfsg-6) unstable; urgency=medium
 
   * Set some symbols optional to make build more robust


=====================================
debian/compat deleted
=====================================
@@ -1 +0,0 @@
-11


=====================================
debian/control
=====================================
@@ -3,10 +3,10 @@ Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.
 Uploaders: Andreas Tille <tille at debian.org>
 Section: science
 Priority: optional
-Build-Depends: debhelper (>= 11~),
+Build-Depends: debhelper-compat (= 12),
                d-shlibs,
                libdisorder-dev
-Standards-Version: 4.2.1
+Standards-Version: 4.4.0
 Vcs-Browser: https://salsa.debian.org/med-team/libfastahack
 Vcs-Git: https://salsa.debian.org/med-team/libfastahack.git
 Homepage: https://github.com/ekg/fastahack


=====================================
debian/libfastahack0.symbols
=====================================
@@ -1,4 +1,6 @@
 libfastahack.so.0 libfastahack0 #MINVER#
+* Build-Depends-Package: libfastahack-dev
+ _Z15getlineCountingRSiRNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERi at Base 1.0.0
  _Z22fastaIndexEntryCompare15FastaIndexEntryS_ at Base 0.0+git20160702.bbc645f
  _Z5splitRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES6_ at Base 0.0+git20160702.bbc645f
  _Z5splitRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES6_RSt6vectorIS4_SaIS4_EE at Base 0.0+git20160702.bbc645f


=====================================
debian/upstream/metadata
=====================================
@@ -1,7 +1,8 @@
 Registry:
- - Name: OMICtools
-   Entry: OMICS_20516
- - Name: bio.tools
-   Entry: NA
- - Name: SciCrunch
-   Entry: NA
\ No newline at end of file
+- Name: OMICtools
+  Entry: OMICS_20516
+- Name: bio.tools
+  Entry: NA
+- Name: SciCrunch
+  Entry: NA
+Name: fastahack


=====================================
debian/watch
=====================================
@@ -1,7 +1,4 @@
 version=4
 
-opts="mode=git,pretty=0.0+git%cd.%h,repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
-    https://github.com/ekg/fastahack.git HEAD
-
-# Issue asking for release tags:
-#    https://github.com/ekg/fastahack/issues/14
+opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz,filenamemangle=s/.+\/v?(\d\S*)\.tar\.gz/libfastahack-$1\.tar\.gz/" \
+    https://github.com/ekg/fastahack/releases .*/archive/v at ANY_VERSION@@ARCHIVE_EXT@


=====================================
tests/crlf.fasta
=====================================
@@ -0,0 +1,7 @@
+>chr
+C
+A
+A
+G
+T
+C



View it on GitLab: https://salsa.debian.org/med-team/libfastahack/compare/792d8f59451825dba804b435aa9abdc134a60a70...4e5994fd3442b7797e669baa30ccb28046f7ae96

-- 
View it on GitLab: https://salsa.debian.org/med-team/libfastahack/compare/792d8f59451825dba804b435aa9abdc134a60a70...4e5994fd3442b7797e669baa30ccb28046f7ae96
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190904/e3c692dc/attachment-0001.html>


More information about the debian-med-commit mailing list