[med-svn] [Git][med-team/grabix][upstream] New upstream version 0.1.6+git20171023
Steffen Möller
gitlab at salsa.debian.org
Fri Apr 27 13:26:23 BST 2018
Steffen Möller pushed to branch upstream at Debian Med / grabix
Commits:
4a49cae3 by Steffen Moeller at 2018-04-27T12:16:26+02:00
New upstream version 0.1.6+git20171023
- - - - -
5 changed files:
- + LICENSE
- grabix.cpp
- grabix.h
- test.sh
- + tests/empty.fastq.gz
Changes:
=====================================
LICENSE
=====================================
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Brent Pedersen and Aaron Quinlan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
=====================================
grabix.cpp
=====================================
--- a/grabix.cpp
+++ b/grabix.cpp
@@ -1,5 +1,6 @@
#include <cstdlib>
#include <iostream>
+#include <stdio.h>
#include <fstream>
#include <vector>
#include <unistd.h>
@@ -56,15 +57,21 @@ random access via the BGZF seek utility
*/
int create_grabix_index(string bgzf_file)
{
+ if (!bgzf_is_bgzf(bgzf_file.c_str()))
+ {
+ cerr << "[grabix] " << bgzf_file << " doesn't exist or wasn't compressed with bgzip" << endl;
+ exit (1);
+ }
+
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
- cerr << "[grabix] could not open file:" << bgzf_file << endl;
+ cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
// create an index for writing
- string index_file_name = bgzf_file + ".gbi";
+ string index_file_name = bgzf_file + ".gbi.tmp";
ofstream index_file(index_file_name.c_str(), ios::out);
// add the offset for the end of the header to the index
@@ -79,7 +86,7 @@ int create_grabix_index(string bgzf_file)
int64_t offset = 0;
while ((status = bgzf_getline(bgzf_fp, '\n', line)) >= 0)
{
- offset = bgzf_tell (bgzf_fp);
+ offset = bgzf_tell(bgzf_fp);
if (line->s[0] != '#')
break;
prev_offset = offset;
@@ -92,15 +99,15 @@ int create_grabix_index(string bgzf_file)
int64_t total_lines = 1;
vector<int64_t> chunk_positions;
chunk_positions.push_back (prev_offset);
- bool eof = false;
+ int eof = 1;
while (true)
{
// grab the next line and store the offset
- eof = bgzf_getline_counting(bgzf_fp);
- offset = bgzf_tell (bgzf_fp);
+ eof = bgzf_getline(bgzf_fp, '\n', line);
+ offset = bgzf_tell(bgzf_fp);
chunk_count++;
// stop if we have encountered an empty line
- if (eof)
+ if (eof < 0 || offset == prev_offset)
{
if (bgzf_check_EOF(bgzf_fp) == 1) {
if (offset > prev_offset) {
@@ -109,6 +116,7 @@ int create_grabix_index(string bgzf_file)
}
break;
}
+ break;
}
// store the offset of this chunk start
else if (chunk_count == CHUNK_SIZE)
@@ -129,7 +137,7 @@ int create_grabix_index(string bgzf_file)
}
index_file.close();
- return EXIT_SUCCESS;
+ return std::rename((bgzf_file + ".gbi.tmp").c_str(), (bgzf_file + ".gbi").c_str());
}
/*
@@ -195,7 +203,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
- cerr << "[grabix] could not open file:" << bgzf_file << endl;
+ cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
@@ -206,7 +214,7 @@ int grab(string bgzf_file, int64_t from_line, int64_t to_line)
line->l = 0;
line->m = 0;
- while ((status = bgzf_getline(bgzf_fp, '\n', line)) != 0)
+ while ((status = bgzf_getline(bgzf_fp, '\n', line)) > 0)
{
if (line->s[0] == '#')
printf("%s\n", line->s);
@@ -259,7 +267,7 @@ int random(string bgzf_file, uint64_t K)
BGZF *bgzf_fp = bgzf_open(bgzf_file.c_str(), "r");
if (bgzf_fp == NULL)
{
- cerr << "[grabix] could not open file:" << bgzf_file << endl;
+ cerr << "[grabix] could not open file: " << bgzf_file << endl;
exit (1);
}
=====================================
grabix.h
=====================================
--- a/grabix.h
+++ b/grabix.h
@@ -7,7 +7,7 @@ using namespace std;
#include "bgzf.h"
-#define VERSION "0.1.6"
+#define VERSION "0.1.8"
// we only want to store the offset for every 10000th
// line. otherwise, were we to store the position of every
// line in the file, the index could become very large for
=====================================
test.sh
=====================================
--- a/test.sh
+++ b/test.sh
@@ -3,17 +3,35 @@ make
FQ=test.cnt.gz
rm -f ${FQ}{,.gbi}
-lines=50000
+lines=500000
python tests/make-test-fastq.py $lines | bgzip -c > $FQ
-./grabix index $FQ
+echo "indexing"
+time ./grabix index $FQ
+echo "indexed"
python tests/test-fastq.py $FQ
-a=$(grabix grab test.cnt.gz $(($lines * 4)))
+a=$(./grabix grab test.cnt.gz $(($lines * 4)))
b=$(zless $FQ | tail -1)
if [[ "$a" != "$b" ]]; then
echo FAIL last record
+ exit 1
+else
+ echo OK last record
fi
rm -f ${FQ}{,.gbi}
+rm -f tests/empty.fastq.gz.gbi
+./grabix index tests/empty.fastq.gz
+
+a=$(cat tests/empty.fastq.gz.gbi | awk 'NR == 2')
+if [[ "$a" != "16" ]]; then
+ echo FAIL index wrong size
+ exit 1
+else
+ echo "OK index size"
+fi
+
+
+
for V in \
test.PLs.vcf \
test.auto_dom.no_parents.2.vcf \
=====================================
tests/empty.fastq.gz
=====================================
Binary files /dev/null and b/tests/empty.fastq.gz differ
View it on GitLab: https://salsa.debian.org/med-team/grabix/commit/4a49cae32d125eadddd0d57a57be4fe3cffe9c1b
---
View it on GitLab: https://salsa.debian.org/med-team/grabix/commit/4a49cae32d125eadddd0d57a57be4fe3cffe9c1b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180427/d2789465/attachment-0001.html>
More information about the debian-med-commit
mailing list