[med-svn] [Git][med-team/grabix][upstream] New upstream version 0.1.6+git20171023

Steffen Möller gitlab at salsa.debian.org
Fri Apr 27 13:26:23 BST 2018


Steffen Möller pushed to branch upstream at Debian Med / grabix


Commits:
4a49cae3 by Steffen Moeller at 2018-04-27T12:16:26+02:00
New upstream version 0.1.6+git20171023
- - - - -


5 changed files:

- + LICENSE
- grabix.cpp
- grabix.h
- test.sh
- + tests/empty.fastq.gz


Changes:

=====================================
LICENSE
=====================================
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Brent Pedersen and Aaron Quinlan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.


=====================================
grabix.cpp
=====================================
--- a/grabix.cpp
+++ b/grabix.cpp
@@ -1,5 +1,6 @@
 #include <cstdlib>
 #include <iostream>
+#include <stdio.h>
 #include <fstream>
 #include <vector>
 #include <unistd.h>
@@ -56,15 +57,21 @@ random access via the BGZF seek utility
 */
 int create_grabix_index(string bgzf_file)
 {
+    if (!bgzf_is_bgzf(bgzf_file.c_str()))
+    {
+        cerr << "[grabix] " << bgzf_file << " doesn't exist or wasn't compressed with bgzip" << endl;
+        exit (1);
+    }
+
     BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
     if (bgzf_fp == NULL)
     {
-        cerr << "[grabix] could not open file:" << bgzf_file << endl;
+        cerr << "[grabix] could not open file: " << bgzf_file << endl;
         exit (1);
     }
 
     // create an index for writing
-    string index_file_name = bgzf_file + ".gbi";
+    string index_file_name = bgzf_file + ".gbi.tmp";
     ofstream index_file(index_file_name.c_str(), ios::out);
 
     // add the offset for the end of the header to the index
@@ -79,7 +86,7 @@ int create_grabix_index(string bgzf_file)
     int64_t offset = 0;
     while ((status = bgzf_getline(bgzf_fp, '\n', line)) >= 0)
     {
-        offset = bgzf_tell (bgzf_fp);
+        offset = bgzf_tell(bgzf_fp);
         if (line->s[0] != '#')
             break;
         prev_offset = offset;
@@ -92,15 +99,15 @@ int create_grabix_index(string bgzf_file)
     int64_t total_lines = 1;
     vector<int64_t> chunk_positions;
     chunk_positions.push_back (prev_offset);
-    bool eof = false;
+    int eof = 1;
     while (true)
     {
         // grab the next line and store the offset
-        eof = bgzf_getline_counting(bgzf_fp);
-        offset = bgzf_tell (bgzf_fp);
+        eof = bgzf_getline(bgzf_fp, '\n', line);
+        offset = bgzf_tell(bgzf_fp);
         chunk_count++;
         // stop if we have encountered an empty line
-        if (eof)
+        if (eof < 0 || offset == prev_offset)
         {
             if (bgzf_check_EOF(bgzf_fp) == 1) {
                 if (offset > prev_offset) {
@@ -109,6 +116,7 @@ int create_grabix_index(string bgzf_file)
                 }
                 break;
             }
+            break;
         }
         // store the offset of this chunk start
         else if (chunk_count == CHUNK_SIZE)
@@ -129,7 +137,7 @@ int create_grabix_index(string bgzf_file)
     }
     index_file.close();
 
-    return EXIT_SUCCESS;
+    return std::rename((bgzf_file + ".gbi.tmp").c_str(), (bgzf_file + ".gbi").c_str());
 }
 
 /*
@@ -195,7 +203,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
         BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
         if (bgzf_fp == NULL)
         {
-            cerr << "[grabix] could not open file:" << bgzf_file << endl;
+            cerr << "[grabix] could not open file: " << bgzf_file << endl;
             exit (1);
         }
 
@@ -206,7 +214,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
         line->l = 0;
         line->m = 0;
 
-        while ((status = bgzf_getline(bgzf_fp, '\n', line)) != 0)
+        while ((status = bgzf_getline(bgzf_fp, '\n', line)) > 0)
         {
             if (line->s[0] == '#')
                 printf("%s\n", line->s);
@@ -259,7 +267,7 @@ int random(string bgzf_file, uint64_t K)
         BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
         if (bgzf_fp == NULL)
         {
-            cerr << "[grabix] could not open file:" << bgzf_file << endl;
+            cerr << "[grabix] could not open file: " << bgzf_file << endl;
             exit (1);
         }
 


=====================================
grabix.h
=====================================
--- a/grabix.h
+++ b/grabix.h
@@ -7,7 +7,7 @@ using namespace std;
 #include "bgzf.h"
 
 
-#define VERSION "0.1.6"
+#define VERSION "0.1.8"
 // we only want to store the offset for every 10000th
 // line. otherwise, were we to store the position of every
 // line in the file, the index could become very large for


=====================================
test.sh
=====================================
--- a/test.sh
+++ b/test.sh
@@ -3,17 +3,35 @@ make
 FQ=test.cnt.gz
 rm -f ${FQ}{,.gbi}
 
-lines=50000
+lines=500000
 python tests/make-test-fastq.py $lines | bgzip -c > $FQ
-./grabix index $FQ
+echo "indexing"
+time ./grabix index $FQ
+echo "indexed"
 python tests/test-fastq.py $FQ
-a=$(grabix grab test.cnt.gz $(($lines * 4)))
+a=$(./grabix grab test.cnt.gz $(($lines * 4)))
 b=$(zless $FQ | tail -1)
 if [[ "$a" != "$b" ]]; then
 	echo FAIL last record
+    exit 1
+else
+	echo OK last record
 fi
 rm -f ${FQ}{,.gbi}
 
+rm -f tests/empty.fastq.gz.gbi
+./grabix index tests/empty.fastq.gz
+
+a=$(cat tests/empty.fastq.gz.gbi | awk 'NR == 2')
+if [[ "$a" != "16" ]]; then
+    echo FAIL index wrong size
+    exit 1
+else
+    echo "OK index size"
+fi
+
+
+
 for V in  \
 	test.PLs.vcf \
 	test.auto_dom.no_parents.2.vcf \


=====================================
tests/empty.fastq.gz
=====================================
Binary files /dev/null and b/tests/empty.fastq.gz differ



View it on GitLab: https://salsa.debian.org/med-team/grabix/commit/4a49cae32d125eadddd0d57a57be4fe3cffe9c1b

---
View it on GitLab: https://salsa.debian.org/med-team/grabix/commit/4a49cae32d125eadddd0d57a57be4fe3cffe9c1b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180427/d2789465/attachment-0001.html>


More information about the debian-med-commit mailing list