[med-svn] [htslib] 03/10: Adaptations to Windows line endings and binary/text files: -stop git from converting line endings on the reference fasta files. -make the test scripts handle CRLF line endings, too. Perl should take care of it, according to the docs, but it doesn't.

Andreas Tille tille at debian.org
Wed Jul 19 19:54:42 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository htslib.

commit 6999a0959294d44f6b92f93e3d63396f42275c88
Author: Anders Kaplan <anders.kaplan at magasin1.se>
Date:   Thu Mar 23 19:02:06 2017 +0100

    Adaptations to Windows line endings and binary/text files:
    -stop git from converting line endings on the reference fasta files.
    -make the test scripts handle CRLF line endings, too. Perl should take care of it, according to the docs, but it doesn't.
    
    Added a fail-fast flag to test/test.pl. When the flag is given, the test stops on the first error it encounters.
    
    Improve usage instructions for test_view. Also replaced magic numbers with constants to make the code more readable.
    
    Minor code documentation.
---
 .gitattributes      |  5 +++++
 .gitignore          |  2 ++
 cram/cram_encode.c  |  2 +-
 htslib/bgzf.h       |  3 +++
 htslib/hfile.h      |  4 ++++
 test/compare_sam.pl |  2 ++
 test/hfile.c        |  6 +++---
 test/test.pl        | 60 +++++++++++++++++++++++++++++++----------------------
 test/test_bgzf.c    | 15 ++++----------
 test/test_view.c    | 55 ++++++++++++++++++++++++++++++++++--------------
 10 files changed, 98 insertions(+), 56 deletions(-)

diff --git a/.gitattributes b/.gitattributes
index 173dcd3..da0ea83 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -10,3 +10,8 @@
 .git*           export-ignore
 /.travis.yml    export-ignore
 README.md       export-ignore
+
+# Remove the text attribute from reference files, so that git doesn't convert
+# line separators on Windows machines. It causes the index files to become out
+# of sync with the fasta files.
+*.fa* -text
diff --git a/.gitignore b/.gitignore
index 6972b55..6101b21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,9 @@
 *.o
 *.pico
+*.obj
 *.dSYM
 *.exe
+*.dll
 *.pc.tmp
 *-uninstalled.pc
 /version.h
diff --git a/cram/cram_encode.c b/cram/cram_encode.c
index d7f08f1..1d9f70c 100644
--- a/cram/cram_encode.c
+++ b/cram/cram_encode.c
@@ -1935,7 +1935,7 @@ static int cram_add_insertion(cram_container *c, cram_slice *s, cram_record *r,
 }
 
 /*
- * Encodes auxiliary data.
+ * Encodes auxiliary data, CRAM 1.0 format.
  * Returns the read-group parsed out of the BAM aux fields on success
  *         NULL on failure or no rg present (FIXME)
  */
diff --git a/htslib/bgzf.h b/htslib/bgzf.h
index 15c76cd..b2f4165 100644
--- a/htslib/bgzf.h
+++ b/htslib/bgzf.h
@@ -92,6 +92,9 @@ typedef struct __kstring_t {
      * Open an existing file descriptor for reading or writing.
      *
      * @param fd    file descriptor
+     *              Note that the file must be opened in binary mode, or else
+     *              there will be problems on platforms that make a difference
+     *              between text and binary mode.
      * @param mode  mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for
      *              writing, 'a' for appending, 'g' for gzip rather than BGZF
      *              compression (with 'w' only), and digit specifies the zlib
diff --git a/htslib/hfile.h b/htslib/hfile.h
index d07a755..fa89718 100644
--- a/htslib/hfile.h
+++ b/htslib/hfile.h
@@ -66,6 +66,10 @@ hFILE *hopen(const char *filename, const char *mode, ...) HTS_RESULT_USED;
 /// Associate a stream with an existing open file descriptor
 /** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
 
+Note that the file must be opened in binary mode, or else
+there will be problems on platforms that make a difference
+between text and binary mode.
+
 For socket descriptors (on Windows), _mode_ should contain `s`.
 */
 hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED;
diff --git a/test/compare_sam.pl b/test/compare_sam.pl
index a241c6b..6860c91 100755
--- a/test/compare_sam.pl
+++ b/test/compare_sam.pl
@@ -64,6 +64,8 @@ while (<$fd2>) {
 
 # Compare lines
 while ($ln1 && $ln2) {
+    $ln1 =~ s/\015?\012/\n/;
+    $ln2 =~ s/\015?\012/\n/;
     chomp($ln1);
     chomp($ln2);
 
diff --git a/test/hfile.c b/test/hfile.c
index 16ad670..358ac70 100644
--- a/test/hfile.c
+++ b/test/hfile.c
@@ -61,9 +61,9 @@ char *slurp(const char *filename)
 {
     char *text;
     struct stat sbuf;
-    size_t filesize;
-    FILE *f = fopen(filename, "r");
-    if (f == NULL) fail("fopen(\"%s\", \"r\")", filename);
+    size_t filesize, readsize;
+    FILE *f = fopen(filename, "rb");
+    if (f == NULL) fail("fopen(\"%s\", \"rb\")", filename);
     if (fstat(fileno(f), &sbuf) != 0) fail("fstat(\"%s\")", filename);
     filesize = sbuf.st_size;
 
diff --git a/test/test.pl b/test/test.pl
index 3ce6e67..261ca98 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -64,6 +64,7 @@ sub error
         "Options:\n",
         "   -r, --redo-outputs              Recreate expected output files.\n",
         "   -t, --temp-dir <path>           When given, temporary files will not be removed.\n",
+        "   -f, --fail-fast                 Fail-fast mode: exit as soon as a test fails.\n",
         "   -h, -?, --help                  This help message.\n",
         "\n";
     exit 1;
@@ -76,6 +77,7 @@ sub parse_params
     my $ret = GetOptions (
             't|temp-dir:s' => \$$opts{keep_files},
             'r|redo-outputs' => \$$opts{redo_outputs},
+            'f|fail-fast' => \$$opts{fail_fast},
             'h|?|help' => \$help
             );
     if ( !$ret or $help ) { error(); }
@@ -149,11 +151,13 @@ sub test_cmd
     {
         my @exp = <$fh>;
         $exp = join('', at exp);
+        $exp =~ s/\015?\012/\n/g;
         close($fh);
     }
     elsif ( !$$opts{redo_outputs} ) { failed($opts,$test,"$$opts{path}/$args{out}: $!"); return; }
 
-    if ( $exp ne $out )
+    (my $out_lf = $out) =~ s/\015?\012/\n/g;
+    if ( $exp ne $out_lf )
     {
         open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new");
         print $fh $out;
@@ -181,6 +185,9 @@ sub failed
     if ( defined $reason ) { print STDERR "\t$reason\n"; }
     print STDERR ".. failed ...\n\n";
     STDERR->flush();
+    if ($$opts{fail_fast}) {
+      die "\n";
+    }
 }
 sub passed
 {
@@ -202,7 +209,7 @@ sub is_file_newer
 
 my $test_view_failures;
 sub testv {
-    my ($cmd) = @_;
+    my ($opts, $cmd) = @_;
     print "  $cmd\n";
     my ($ret, $out) = _cmd($cmd);
     if ($ret != 0) {
@@ -210,6 +217,9 @@ sub testv {
         print STDERR "FAILED\n$out\n";
         STDERR->flush();
         $test_view_failures++;
+        if ($$opts{fail_fast}) {
+          die "\n";
+        }
     }
 }
 
@@ -234,50 +244,50 @@ sub test_view
         $test_view_failures = 0;
 
         # SAM -> BAM -> SAM
-        testv "./test_view $tv_args -S -b $sam > $bam";
-        testv "./test_view $tv_args $bam > $bam.sam_";
-        testv "./compare_sam.pl $sam $bam.sam_";
+        testv $opts, "./test_view $tv_args -S -b $sam > $bam";
+        testv $opts, "./test_view $tv_args $bam > $bam.sam_";
+        testv $opts, "./compare_sam.pl $sam $bam.sam_";
 
         # SAM -> CRAM -> SAM
-        testv "./test_view $tv_args -t $ref -S -C $sam > $cram";
-        testv "./test_view $tv_args -D $cram > $cram.sam_";
-        testv "./compare_sam.pl $md $sam $cram.sam_";
+        testv $opts, "./test_view $tv_args -t $ref -S -C $sam > $cram";
+        testv $opts, "./test_view $tv_args -D $cram > $cram.sam_";
+        testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
 
         # BAM -> CRAM -> BAM -> SAM
         $cram = "$bam.cram";
-        testv "./test_view $tv_args -t $ref -C $bam > $cram";
-        testv "./test_view $tv_args -b -D $cram > $cram.bam";
-        testv "./test_view $tv_args $cram.bam > $cram.bam.sam_";
-        testv "./compare_sam.pl $md $sam $cram.bam.sam_";
+        testv $opts, "./test_view $tv_args -t $ref -C $bam > $cram";
+        testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam";
+        testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_";
+        testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_";
 
         # SAM -> CRAM3 -> SAM
         $cram = "$base.tmp.cram";
-        testv "./test_view $tv_args -t $ref -S -C -o VERSION=3.0 $sam > $cram";
-        testv "./test_view $tv_args -D $cram > $cram.sam_";
-        testv "./compare_sam.pl $md $sam $cram.sam_";
+        testv $opts, "./test_view $tv_args -t $ref -S -C -o VERSION=3.0 $sam > $cram";
+        testv $opts, "./test_view $tv_args -D $cram > $cram.sam_";
+        testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
 
         # BAM -> CRAM3 -> BAM -> SAM
         $cram = "$bam.cram";
-        testv "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram";
-        testv "./test_view $tv_args -b -D $cram > $cram.bam";
-        testv "./test_view $tv_args $cram.bam > $cram.bam.sam_";
-        testv "./compare_sam.pl $md $sam $cram.bam.sam_";
+        testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $bam > $cram";
+        testv $opts, "./test_view $tv_args -b -D $cram > $cram.bam";
+        testv $opts, "./test_view $tv_args $cram.bam > $cram.bam.sam_";
+        testv $opts, "./compare_sam.pl $md $sam $cram.bam.sam_";
 
         # CRAM3 -> CRAM2
         $cram = "$base.tmp.cram";
-        testv "./test_view $tv_args -t $ref -C -o VERSION=2.1 $cram > $cram.cram";
+        testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=2.1 $cram > $cram.cram";
 
         # CRAM2 -> CRAM3
-        testv "./test_view $tv_args -t $ref -C -o VERSION=3.0 $cram.cram > $cram";
-        testv "./test_view $tv_args $cram > $cram.sam_";
-        testv "./compare_sam.pl $md $sam $cram.sam_";
+        testv $opts, "./test_view $tv_args -t $ref -C -o VERSION=3.0 $cram.cram > $cram";
+        testv $opts, "./test_view $tv_args $cram > $cram.sam_";
+        testv $opts, "./compare_sam.pl $md $sam $cram.sam_";
 
         # Java pre-made CRAM -> SAM
         my $jcram = "${base}_java.cram";
         if (-e $jcram) {
             my $jsam = "${base}_java.tmp.sam_";
-            testv "./test_view $tv_args -i reference=$ref $jcram > $jsam";
-            testv "./compare_sam.pl -Baux $md $sam $jsam";
+            testv $opts, "./test_view $tv_args -i reference=$ref $jcram > $jsam";
+            testv $opts, "./compare_sam.pl -Baux $md $sam $jsam";
         }
 
         if ($test_view_failures == 0)
diff --git a/test/test_bgzf.c b/test/test_bgzf.c
index e34b22f..c983829 100644
--- a/test/test_bgzf.c
+++ b/test/test_bgzf.c
@@ -23,6 +23,8 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 DEALINGS IN THE SOFTWARE.
  */
 
+#include <config.h>
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE.
 #include <fcntl.h>
 #include "htslib/bgzf.h"
 #include "htslib/hfile.h"
+#include "hfile_internal.h"
 
 const char *bgzf_suffix = ".gz";
 const char *idx_suffix  = ".gzi";
@@ -113,17 +116,7 @@ static BGZF * try_bgzf_open(const char *name, const char *mode,
 static BGZF * try_bgzf_dopen(const char *name, const char *mode,
                              const char *func) {
     BGZF *bgz = NULL;
-    int fd = -1;
-    if (strchr(mode, 'r')) {
-        fd = open(name, O_RDONLY);
-    } else if (strchr(mode, 'w')) {
-        fd = open(name, O_WRONLY | O_CREAT | O_TRUNC, 0666);
-    } else if (strchr(mode, 'a')) {
-        fd = open(name, O_WRONLY | O_CREAT | O_APPEND, 0666);
-    } else {
-        errno = EINVAL;
-    }
-
+    int fd = open(name, hfile_oflags(mode), 0666);
     if (fd < 0) {
         fprintf(stderr, "%s : Failed to open %s with mode %s : %s\n",
                 func, name, mode, strerror(errno));
diff --git a/test/test_view.c b/test/test_view.c
index 7f173fd..9d2678a 100644
--- a/test/test_view.c
+++ b/test/test_view.c
@@ -35,6 +35,13 @@ DEALINGS IN THE SOFTWARE.  */
 
 #include "htslib/sam.h"
 
+enum test_op {
+    READ_COMPRESSED  = 1,
+    WRITE_COMPRESSED = 2,
+    READ_CRAM        = 4,
+    WRITE_CRAM       = 8
+};
+
 int main(int argc, char *argv[])
 {
     samFile *in;
@@ -52,30 +59,46 @@ int main(int argc, char *argv[])
     int benchmark = 0;
     int nthreads = 0; // shared pool
 
-    while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:BZ:@:")) >= 0) {
+    while ((c = getopt(argc, argv, "DSIt:i:bCl:o:N:BZ:@:")) >= 0) {
         switch (c) {
-        case 'S': flag |= 1; break;
-        case 'b': flag |= 2; break;
-        case 'D': flag |= 4; break;
-        case 'C': flag |= 8; break;
-        case 'B': benchmark = 1; break;
-        case 'l': clevel = atoi(optarg); flag |= 2; break;
-        case 't': fn_ref = optarg; break;
+        case 'D': flag |= READ_CRAM; break;
+        case 'S': flag |= READ_COMPRESSED; break;
         case 'I': ignore_sam_err = 1; break;
-        case 'i': if (hts_opt_add(&in_opts,  optarg)) return 1; break;
+        case 't': fn_ref = optarg; break;
+        case 'i': if (hts_opt_add(&in_opts, optarg)) return 1; break;
+        case 'b': flag |= WRITE_COMPRESSED; break;
+        case 'C': flag |= WRITE_CRAM; break;
+        case 'l': clevel = atoi(optarg); flag |= WRITE_COMPRESSED; break;
         case 'o': if (hts_opt_add(&out_opts, optarg)) return 1; break;
         case 'N': nreads = atoi(optarg); break;
+        case 'B': benchmark = 1; break;
         case 'Z': extra_hdr_nuls = atoi(optarg); break;
         case '@': nthreads = atoi(optarg); break;
         }
     }
     if (argc == optind) {
-        fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] [-Z hdr_nuls] <in.bam>|<in.sam>|<in.cram> [region]\n");
+        fprintf(stderr, "Usage: test_view [-DSI] [-t fn_ref] [-i option=value] [-bC] [-l level] [-o option=value] [-N num_reads] [-B] [-Z hdr_nuls] [-@ num_threads] <in.bam>|<in.sam>|<in.cram> [region]\n");
+        fprintf(stderr, "\n");
+        fprintf(stderr, "-D: read CRAM format (mode 'c')\n");
+        fprintf(stderr, "-S: read compressed BCF, BAM, FAI (mode 'b')\n");
+        fprintf(stderr, "-I: ignore SAM parsing errors\n");
+        fprintf(stderr, "-t: fn_ref: load CRAM references from the specificed fasta file instead of @SQ headers when writing a CRAM file\n");
+        fprintf(stderr, "-i: option=value: set an option for CRAM input\n");
+        fprintf(stderr, "\n");
+        fprintf(stderr, "-b: write compressed BCF, BAM, FAI (mode 'b')\n");
+        fprintf(stderr, "-C: write CRAM format (mode 'c')\n");
+        fprintf(stderr, "-l 0-9: set zlib compression level\n");
+        fprintf(stderr, "-o option=value: set an option for CRAM output\n");
+        fprintf(stderr, "-N: num_reads: limit the output to the first num_reads reads\n");
+        fprintf(stderr, "\n");
+        fprintf(stderr, "-B: enable benchmarking\n");
+        fprintf(stderr, "-Z hdr_nuls: append specified number of null bytes to the SAM header\n");
+        fprintf(stderr, "-@ num_threads: use thread pool with specified number of threads\n");
         return 1;
     }
     strcpy(moder, "r");
-    if (flag&4) strcat(moder, "c");
-    else if ((flag&1) == 0) strcat(moder, "b");
+    if (flag & READ_CRAM) strcat(moder, "c");
+    else if ((flag & READ_COMPRESSED) == 0) strcat(moder, "b");
 
     in = sam_open(argv[optind], moder);
     if (in == NULL) {
@@ -103,8 +126,8 @@ int main(int argc, char *argv[])
 
     strcpy(modew, "w");
     if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);
-    if (flag&8) strcat(modew, "c");
-    else if (flag&2) strcat(modew, "b");
+    if (flag & WRITE_CRAM) strcat(modew, "c");
+    else if (flag & WRITE_COMPRESSED) strcat(modew, "b");
     out = hts_open("-", modew);
     if (out == NULL) {
         fprintf(stderr, "Error opening standard output\n");
@@ -112,7 +135,7 @@ int main(int argc, char *argv[])
     }
 
     /* CRAM output */
-    if (flag & 8) {
+    if (flag & WRITE_CRAM) {
         int ret;
 
         // Parse input header and use for CRAM output
@@ -155,7 +178,7 @@ int main(int argc, char *argv[])
         fprintf(stderr, "Error writing output header.\n");
         exit_code = 1;
     }
-    if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region
+    if (optind + 1 < argc && !(flag & READ_COMPRESSED)) { // BAM input and has a region
         int i;
         hts_idx_t *idx;
         if ((idx = sam_index_load(in, argv[optind])) == 0) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htslib.git



More information about the debian-med-commit mailing list