[med-svn] samtools 07/11: Replace bcftools+bcf_filter.sh with new vcf-miniview

Charles Plessy plessy at moszumanska.debian.org
Thu Aug 21 22:58:53 UTC 2014


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch develop
in repository samtools.

commit f5288ca22e5825b1d4dcd78b4fe90be5b85d67e6
Author: John Marshall <jm18 at sanger.ac.uk>
Date:   Wed Aug 20 16:18:27 2014 +0100

    Replace bcftools+bcf_filter.sh with new vcf-miniview
    
    Add new vcf-miniview test utility, which reads BCF or VCF and outputs
    VCF optionally filtered in the same way that the old bcf_filter.sh did.
    The sed -r option used by bcf_filter.sh was not available on OS X, and
    an installation of bcftools will not necessarily be available on test
    machines, notably when testing via travis.
---
 .gitignore                 |   1 +
 Makefile                   |   7 ++-
 test/mpileup/bcf_filter.sh |  29 ----------
 test/mpileup/mpileup.reg   |  58 ++++++++++----------
 test/mpileup/regression.sh |   4 +-
 test/vcf-miniview.c        | 132 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 170 insertions(+), 61 deletions(-)

diff --git a/.gitignore b/.gitignore
index d1ed6f5..399c94b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ lib*.a
 /misc/md5sum-lite
 /misc/wgsim
 /samtools
+/test/vcf-miniview
 
 /TAGS
 
diff --git a/Makefile b/Makefile
index e110b31..c621353 100644
--- a/Makefile
+++ b/Makefile
@@ -75,7 +75,8 @@ BUILT_TEST_PROGRAMS = \
 	test/split/test_count_rg \
 	test/split/test_expand_format_string \
 	test/split/test_filter_header_rg \
-	test/split/test_parse_args
+	test/split/test_parse_args \
+	test/vcf-miniview
 
 all: $(PROGRAMS) $(BUILT_MISC_PROGRAMS) $(BUILT_TEST_PROGRAMS)
 
@@ -212,6 +213,9 @@ test/split/test_filter_header_rg: test/split/test_filter_header_rg.o test/test.o
 test/split/test_parse_args: test/split/test_parse_args.o test/test.o $(HTSLIB)
 	$(CC) -pthread $(LDFLAGS) -o $@ test/split/test_parse_args.o test/test.o $(HTSLIB) $(LDLIBS) -lz
 
+test/vcf-miniview: test/vcf-miniview.o $(HTSLIB)
+	$(CC) -pthread $(LDFLAGS) -o $@ test/vcf-miniview.o $(HTSLIB) $(LDLIBS) -lz
+
 test_test_h = test/test.h $(htslib_sam_h)
 
 test/merge/test_bam_translate.o: test/merge/test_bam_translate.c $(test_test_h) bam_sort.o
@@ -223,6 +227,7 @@ test/split/test_expand_format_string.o: test/split/test_expand_format_string.c b
 test/split/test_filter_header_rg.o: test/split/test_filter_header_rg.c bam_split.o $(test_test_h)
 test/split/test_parse_args.o: test/split/test_parse_args.c bam_split.o $(test_test_h)
 test/test.o: test/test.c $(htslib_sam_h) $(test_test_h)
+test/vcf-miniview.o: test/vcf-miniview.c $(htslib_vcf_h)
 
 
 # misc programs
diff --git a/test/mpileup/bcf_filter.sh b/test/mpileup/bcf_filter.sh
deleted file mode 100755
index 43b9808..0000000
--- a/test/mpileup/bcf_filter.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-#    Copyright (C) 2014 Genome Research Ltd.
-#
-#    Author: James Bonfield <jkb at sanger.ac.uk>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# Ignores specific BCF fields and truncates the precision of others, to allow
-# for easy comparison.
-
-egrep -v '^##' |
-sed -r 's/;*(IMF|DP|IDV|IMP|IS|VDB|SGB|MQB|BQB|RPB|MQ0F|MQSB)=[-+e0-9/,.]*//g' |
-sed 's/\(;QS=[0-9]*\)[.0-9,]*/\1/'
diff --git a/test/mpileup/mpileup.reg b/test/mpileup/mpileup.reg
index e7d579d..08010cc 100644
--- a/test/mpileup/mpileup.reg
+++ b/test/mpileup/mpileup.reg
@@ -7,8 +7,8 @@
 #   Filename of expected output
 
 # Third onwards; command to execute. $fmt is replaced by the current file
-# format, ie sam, bam or cram. $samtools and $bcftools are pointers to
-# the current binaries. This can be useful for testing older versions.
+# format, ie sam, bam or cram. $samtools is a pointer to the desired
+# samtools binary. This can be useful for testing older versions.
 
 # # Create variant formats
 INIT x $samtools view -C mpileup.1.bam > mpileup.1.cram
@@ -104,40 +104,40 @@ P 46.out $samtools view -h mpileup.1.bam | $samtools mpileup -x --ff 0x714 -
 F 47.out $samtools mpileup -x -d 8500 -B -f mpileup.ref.fa deep.sam|awk '{print $4}'
 
 # BCF output options
-P 48.out $samtools mpileup -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 49.out $samtools mpileup -x -v -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 50.out $samtools mpileup -D -V -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 51.out $samtools mpileup -S -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
+P 48.out $samtools mpileup -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 49.out $samtools mpileup -x -v -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 50.out $samtools mpileup -D -V -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 51.out $samtools mpileup -S -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
 P 52.out $samtools mpileup -u -x -f mpileup.ref.fa mpileup.1.bam | egrep -v '^##samtools'
 
 # # -o/e/h for indel scores
-P 53.out $samtools mpileup -e 1       -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
-P 54.out $samtools mpileup -e 10      -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
-P 55.out $samtools mpileup -h 10      -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
-P 56.out $samtools mpileup -h 90      -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
-P 57.out $samtools mpileup -e 1 -o 10 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
-P 58.out $samtools mpileup -e 1 -o 40 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools  view - | ./bcf_filter.sh|awk '/INDEL/'
+P 53.out $samtools mpileup -e 1       -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 54.out $samtools mpileup -e 10      -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 55.out $samtools mpileup -h 10      -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 56.out $samtools mpileup -h 90      -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 57.out $samtools mpileup -e 1 -o 10 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 58.out $samtools mpileup -e 1 -o 40 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
 
 # -F/-m for indel reads; 2 samples {2indel, 1not} + {1indel, 1not}.
-P 59.out $samtools mpileup -x -F 0.60    -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 60.out $samtools mpileup -x -F 0.66    -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 61.out $samtools mpileup -x -m 3       -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 62.out $samtools mpileup -x -m 4       -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 63.out $samtools mpileup -x -p -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 64.out $samtools mpileup -x -p -F 0.67 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 65.out $samtools mpileup -x -p -m 2    -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 66.out $samtools mpileup -x -p -m 3    -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 67.out $samtools mpileup -x -L 3       -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 68.out $samtools mpileup -x -L 2       -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 69.out $samtools mpileup -x -I         -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
+P 59.out $samtools mpileup -x -F 0.60    -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 60.out $samtools mpileup -x -F 0.66    -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 61.out $samtools mpileup -x -m 3       -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 62.out $samtools mpileup -x -m 4       -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 63.out $samtools mpileup -x -p -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 64.out $samtools mpileup -x -p -F 0.67 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 65.out $samtools mpileup -x -p -m 2    -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 66.out $samtools mpileup -x -p -m 3    -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 67.out $samtools mpileup -x -L 3       -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 68.out $samtools mpileup -x -L 2       -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 69.out $samtools mpileup -x -I         -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
 
 # -P to select platform. Note the actual indel sequence call is made on the entire set.
-P 70.out $samtools mpileup -x                   -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 71.out $samtools mpileup -x -P ILLUMINA,LS454 -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 72.out $samtools mpileup -x -P ILLUMINA       -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 73.out $samtools mpileup -x -P ILLUMINA       -m 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 74.out $samtools mpileup -x -P LS454          -m 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 75.out $samtools mpileup -x -P LS454          -m 1 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
+P 70.out $samtools mpileup -x                   -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 71.out $samtools mpileup -x -P ILLUMINA,LS454 -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 72.out $samtools mpileup -x -P ILLUMINA       -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 73.out $samtools mpileup -x -P ILLUMINA       -m 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 74.out $samtools mpileup -x -P LS454          -m 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 75.out $samtools mpileup -x -P LS454          -m 1 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
 
 # Pileup output options; -s/O
 P 76.out $samtools mpileup -Q0 -s -x -f mpileup.ref.fa mpileup.1.bam
diff --git a/test/mpileup/regression.sh b/test/mpileup/regression.sh
index 1200bca..69e180c 100755
--- a/test/mpileup/regression.sh
+++ b/test/mpileup/regression.sh
@@ -139,11 +139,11 @@ regtest() {
 echo "Samtools mpileup tests:"
 
 samtools="../../samtools"
-bcftools="../../../bcftools/bcftools"
+filter="../vcf-miniview -f"
 regtest mpileup.reg
 
 # samtools="./samtools-0.1.19"
-# bcftools="./bcftools-0.1.19"
+# filter="./bcftools-0.1.19 view - | sed etc"
 # regtest mpileup.reg
 
 exit $?
diff --git a/test/vcf-miniview.c b/test/vcf-miniview.c
new file mode 100644
index 0000000..02cfd1f
--- /dev/null
+++ b/test/vcf-miniview.c
@@ -0,0 +1,132 @@
+/*  test/vcf-miniview.c -- minimal BCF/VCF viewer, for use by test harness.
+
+    Copyright (C) 2014 Genome Research Ltd.
+
+    Author: John Marshall <jm18 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <htslib/vcf.h>
+
+void usage()
+{
+    fprintf(stderr,
+"Usage: vcf-miniview [view] [-f] [FILE]\n"
+"Options:\n"
+"  -f  Filters out ## headers and various fields, to simplify file comparison\n");
+    exit(EXIT_FAILURE);
+}
+
+void fail(const char *message)
+{
+    fprintf(stderr, "vcf-miniview: %s\n", message);
+    exit(EXIT_FAILURE);
+}
+
+void erase(kstring_t* str, const char *tag)
+{
+    char *begin, *end;
+
+    if ((begin = strstr(str->s+1, tag)) == NULL) return;
+
+    end = begin;
+    while (*end && *end != '\t' && *end != ';') end++;
+    if (begin[-1] == ';') begin--;
+
+    memmove(begin, end, str->l - (end - str->s) + 1);
+    str->l -= end - begin;
+}
+
+void truncate(kstring_t* str, const char *tag)
+{
+    char *begin, *end;
+
+    if ((begin = strstr(str->s+1, tag)) == NULL) return;
+
+    end = begin;
+    while (*end && *end != '\t' && *end != ';') end++;
+
+    begin += strlen(tag);
+    while (isdigit(*begin)) begin++;
+
+    memmove(begin, end, str->l - (end - str->s) + 1);
+    str->l -= end - begin;
+}
+
+int main(int argc, char **argv)
+{
+    int optind, hdr_length, filter = 0;
+    char *hdr_text;
+    htsFile *in;
+    bcf_hdr_t *hdr;
+    bcf1_t *rec;
+    kstring_t str = { 0, 0, NULL };
+
+    optind = 1;
+    if (optind < argc && strcmp(argv[optind], "view") == 0) optind++;
+    if (optind < argc && strcmp(argv[optind], "-f") == 0) filter = 1, optind++;
+    if (argc == 1 || argc - optind > 1) usage();
+
+    if ((in = hts_open((optind < argc)? argv[optind] : "-", "r")) == NULL)
+        fail("can't open input file");
+
+    if ((hdr = bcf_hdr_read(in)) == NULL)
+        fail("can't read header");
+
+    hdr_text = bcf_hdr_fmt_text(hdr, 0, &hdr_length);
+    if (filter) {
+        char *fixed = strstr(hdr_text, "\n#CHROM");
+        printf("%s", fixed? fixed+1 : hdr_text);
+    }
+    else printf("%s", hdr_text);
+    free(hdr_text);
+
+    rec = bcf_init();
+    while (bcf_read(in, hdr, rec) >= 0) {
+        str.l = 0;
+        vcf_format(hdr, rec, &str);
+        if (filter) {
+            erase(&str, "IMF=");
+            erase(&str, "DP=");
+            erase(&str, "IDV=");
+            erase(&str, "IMP=");
+            erase(&str, "IS=");
+            erase(&str, "VDB=");
+            erase(&str, "SGB=");
+            erase(&str, "MQB=");
+            erase(&str, "BQB=");
+            erase(&str, "RPB=");
+            erase(&str, "MQ0F=");
+            erase(&str, "MQSB=");
+            truncate(&str, "QS=");
+        }
+        printf("%s", str.s);
+    }
+
+    free(str.s);
+    bcf_destroy(rec);
+    bcf_hdr_destroy(hdr);
+    hts_close(in);
+    return EXIT_SUCCESS;
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/samtools.git



More information about the debian-med-commit mailing list