[med-svn] samtools 07/11: Replace bcftools+bcf_filter.sh with new vcf-miniview
Charles Plessy
plessy at moszumanska.debian.org
Thu Aug 21 22:58:53 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch develop
in repository samtools.
commit f5288ca22e5825b1d4dcd78b4fe90be5b85d67e6
Author: John Marshall <jm18 at sanger.ac.uk>
Date: Wed Aug 20 16:18:27 2014 +0100
Replace bcftools+bcf_filter.sh with new vcf-miniview
Add new vcf-miniview test utility, which reads BCF or VCF and outputs
VCF optionally filtered in the same way that the old bcf_filter.sh did.
The sed -r option used by bcf_filter.sh was not available on OS X, and
an installation of bcftools will not necessarily be available on test
machines, notably when testing via travis.
---
.gitignore | 1 +
Makefile | 7 ++-
test/mpileup/bcf_filter.sh | 29 ----------
test/mpileup/mpileup.reg | 58 ++++++++++----------
test/mpileup/regression.sh | 4 +-
test/vcf-miniview.c | 132 +++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 170 insertions(+), 61 deletions(-)
diff --git a/.gitignore b/.gitignore
index d1ed6f5..399c94b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,7 @@ lib*.a
/misc/md5sum-lite
/misc/wgsim
/samtools
+/test/vcf-miniview
/TAGS
diff --git a/Makefile b/Makefile
index e110b31..c621353 100644
--- a/Makefile
+++ b/Makefile
@@ -75,7 +75,8 @@ BUILT_TEST_PROGRAMS = \
test/split/test_count_rg \
test/split/test_expand_format_string \
test/split/test_filter_header_rg \
- test/split/test_parse_args
+ test/split/test_parse_args \
+ test/vcf-miniview
all: $(PROGRAMS) $(BUILT_MISC_PROGRAMS) $(BUILT_TEST_PROGRAMS)
@@ -212,6 +213,9 @@ test/split/test_filter_header_rg: test/split/test_filter_header_rg.o test/test.o
test/split/test_parse_args: test/split/test_parse_args.o test/test.o $(HTSLIB)
$(CC) -pthread $(LDFLAGS) -o $@ test/split/test_parse_args.o test/test.o $(HTSLIB) $(LDLIBS) -lz
+test/vcf-miniview: test/vcf-miniview.o $(HTSLIB)
+ $(CC) -pthread $(LDFLAGS) -o $@ test/vcf-miniview.o $(HTSLIB) $(LDLIBS) -lz
+
test_test_h = test/test.h $(htslib_sam_h)
test/merge/test_bam_translate.o: test/merge/test_bam_translate.c $(test_test_h) bam_sort.o
@@ -223,6 +227,7 @@ test/split/test_expand_format_string.o: test/split/test_expand_format_string.c b
test/split/test_filter_header_rg.o: test/split/test_filter_header_rg.c bam_split.o $(test_test_h)
test/split/test_parse_args.o: test/split/test_parse_args.c bam_split.o $(test_test_h)
test/test.o: test/test.c $(htslib_sam_h) $(test_test_h)
+test/vcf-miniview.o: test/vcf-miniview.c $(htslib_vcf_h)
# misc programs
diff --git a/test/mpileup/bcf_filter.sh b/test/mpileup/bcf_filter.sh
deleted file mode 100755
index 43b9808..0000000
--- a/test/mpileup/bcf_filter.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Copyright (C) 2014 Genome Research Ltd.
-#
-# Author: James Bonfield <jkb at sanger.ac.uk>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-# DEALINGS IN THE SOFTWARE.
-
-# Ignores specific BCF fields and truncates the precision of others, to allow
-# for easy comparison.
-
-egrep -v '^##' |
-sed -r 's/;*(IMF|DP|IDV|IMP|IS|VDB|SGB|MQB|BQB|RPB|MQ0F|MQSB)=[-+e0-9/,.]*//g' |
-sed 's/\(;QS=[0-9]*\)[.0-9,]*/\1/'
diff --git a/test/mpileup/mpileup.reg b/test/mpileup/mpileup.reg
index e7d579d..08010cc 100644
--- a/test/mpileup/mpileup.reg
+++ b/test/mpileup/mpileup.reg
@@ -7,8 +7,8 @@
# Filename of expected output
# Third onwards; command to execute. $fmt is replaced by the current file
-# format, ie sam, bam or cram. $samtools and $bcftools are pointers to
-# the current binaries. This can be useful for testing older versions.
+# format, ie sam, bam or cram. $samtools is a pointer to the desired
+# samtools binary. This can be useful for testing older versions.
# # Create variant formats
INIT x $samtools view -C mpileup.1.bam > mpileup.1.cram
@@ -104,40 +104,40 @@ P 46.out $samtools view -h mpileup.1.bam | $samtools mpileup -x --ff 0x714 -
F 47.out $samtools mpileup -x -d 8500 -B -f mpileup.ref.fa deep.sam|awk '{print $4}'
# BCF output options
-P 48.out $samtools mpileup -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 49.out $samtools mpileup -x -v -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 50.out $samtools mpileup -D -V -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
-P 51.out $samtools mpileup -S -x -g -f mpileup.ref.fa mpileup.1.$fmt | $bcftools view - | ./bcf_filter.sh
+P 48.out $samtools mpileup -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 49.out $samtools mpileup -x -v -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 50.out $samtools mpileup -D -V -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
+P 51.out $samtools mpileup -S -x -g -f mpileup.ref.fa mpileup.1.$fmt | $filter
P 52.out $samtools mpileup -u -x -f mpileup.ref.fa mpileup.1.bam | egrep -v '^##samtools'
# # -o/e/h for indel scores
-P 53.out $samtools mpileup -e 1 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 54.out $samtools mpileup -e 10 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 55.out $samtools mpileup -h 10 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 56.out $samtools mpileup -h 90 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 57.out $samtools mpileup -e 1 -o 10 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 58.out $samtools mpileup -e 1 -o 40 -u -x -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
+P 53.out $samtools mpileup -e 1 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 54.out $samtools mpileup -e 10 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 55.out $samtools mpileup -h 10 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 56.out $samtools mpileup -h 90 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 57.out $samtools mpileup -e 1 -o 10 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 58.out $samtools mpileup -e 1 -o 40 -u -x -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
# -F/-m for indel reads; 2 samples {2indel, 1not} + {1indel, 1not}.
-P 59.out $samtools mpileup -x -F 0.60 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 60.out $samtools mpileup -x -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 61.out $samtools mpileup -x -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 62.out $samtools mpileup -x -m 4 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 63.out $samtools mpileup -x -p -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 64.out $samtools mpileup -x -p -F 0.67 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 65.out $samtools mpileup -x -p -m 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 66.out $samtools mpileup -x -p -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 67.out $samtools mpileup -x -L 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 68.out $samtools mpileup -x -L 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 69.out $samtools mpileup -x -I -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
+P 59.out $samtools mpileup -x -F 0.60 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 60.out $samtools mpileup -x -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 61.out $samtools mpileup -x -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 62.out $samtools mpileup -x -m 4 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 63.out $samtools mpileup -x -p -F 0.66 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 64.out $samtools mpileup -x -p -F 0.67 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 65.out $samtools mpileup -x -p -m 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 66.out $samtools mpileup -x -p -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 67.out $samtools mpileup -x -L 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 68.out $samtools mpileup -x -L 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 69.out $samtools mpileup -x -I -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
# -P to select platform. Note the actual indel sequence call is made on the entire set.
-P 70.out $samtools mpileup -x -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 71.out $samtools mpileup -x -P ILLUMINA,LS454 -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 72.out $samtools mpileup -x -P ILLUMINA -m 3 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 73.out $samtools mpileup -x -P ILLUMINA -m 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 74.out $samtools mpileup -x -P LS454 -m 2 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
-P 75.out $samtools mpileup -x -P LS454 -m 1 -u -f mpileup.ref.fa indels.$fmt|$bcftools view - | ./bcf_filter.sh|awk '/INDEL/'
+P 70.out $samtools mpileup -x -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 71.out $samtools mpileup -x -P ILLUMINA,LS454 -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 72.out $samtools mpileup -x -P ILLUMINA -m 3 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 73.out $samtools mpileup -x -P ILLUMINA -m 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 74.out $samtools mpileup -x -P LS454 -m 2 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
+P 75.out $samtools mpileup -x -P LS454 -m 1 -u -f mpileup.ref.fa indels.$fmt|$filter|awk '/INDEL/'
# Pileup output options; -s/O
P 76.out $samtools mpileup -Q0 -s -x -f mpileup.ref.fa mpileup.1.bam
diff --git a/test/mpileup/regression.sh b/test/mpileup/regression.sh
index 1200bca..69e180c 100755
--- a/test/mpileup/regression.sh
+++ b/test/mpileup/regression.sh
@@ -139,11 +139,11 @@ regtest() {
echo "Samtools mpileup tests:"
samtools="../../samtools"
-bcftools="../../../bcftools/bcftools"
+filter="../vcf-miniview -f"
regtest mpileup.reg
# samtools="./samtools-0.1.19"
-# bcftools="./bcftools-0.1.19"
+# filter="./bcftools-0.1.19 view - | sed etc"
# regtest mpileup.reg
exit $?
diff --git a/test/vcf-miniview.c b/test/vcf-miniview.c
new file mode 100644
index 0000000..02cfd1f
--- /dev/null
+++ b/test/vcf-miniview.c
@@ -0,0 +1,132 @@
+/* test/vcf-miniview.c -- minimal BCF/VCF viewer, for use by test harness.
+
+ Copyright (C) 2014 Genome Research Ltd.
+
+ Author: John Marshall <jm18 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE. */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <htslib/vcf.h>
+
+void usage()
+{
+ fprintf(stderr,
+"Usage: vcf-miniview [view] [-f] [FILE]\n"
+"Options:\n"
+" -f Filters out ## headers and various fields, to simplify file comparison\n");
+ exit(EXIT_FAILURE);
+}
+
+void fail(const char *message)
+{
+ fprintf(stderr, "vcf-miniview: %s\n", message);
+ exit(EXIT_FAILURE);
+}
+
+void erase(kstring_t* str, const char *tag)
+{
+ char *begin, *end;
+
+ if ((begin = strstr(str->s+1, tag)) == NULL) return;
+
+ end = begin;
+ while (*end && *end != '\t' && *end != ';') end++;
+ if (begin[-1] == ';') begin--;
+
+ memmove(begin, end, str->l - (end - str->s) + 1);
+ str->l -= end - begin;
+}
+
+void truncate(kstring_t* str, const char *tag)
+{
+ char *begin, *end;
+
+ if ((begin = strstr(str->s+1, tag)) == NULL) return;
+
+ end = begin;
+ while (*end && *end != '\t' && *end != ';') end++;
+
+ begin += strlen(tag);
+ while (isdigit(*begin)) begin++;
+
+ memmove(begin, end, str->l - (end - str->s) + 1);
+ str->l -= end - begin;
+}
+
+int main(int argc, char **argv)
+{
+ int optind, hdr_length, filter = 0;
+ char *hdr_text;
+ htsFile *in;
+ bcf_hdr_t *hdr;
+ bcf1_t *rec;
+ kstring_t str = { 0, 0, NULL };
+
+ optind = 1;
+ if (optind < argc && strcmp(argv[optind], "view") == 0) optind++;
+ if (optind < argc && strcmp(argv[optind], "-f") == 0) filter = 1, optind++;
+ if (argc == 1 || argc - optind > 1) usage();
+
+ if ((in = hts_open((optind < argc)? argv[optind] : "-", "r")) == NULL)
+ fail("can't open input file");
+
+ if ((hdr = bcf_hdr_read(in)) == NULL)
+ fail("can't read header");
+
+ hdr_text = bcf_hdr_fmt_text(hdr, 0, &hdr_length);
+ if (filter) {
+ char *fixed = strstr(hdr_text, "\n#CHROM");
+ printf("%s", fixed? fixed+1 : hdr_text);
+ }
+ else printf("%s", hdr_text);
+ free(hdr_text);
+
+ rec = bcf_init();
+ while (bcf_read(in, hdr, rec) >= 0) {
+ str.l = 0;
+ vcf_format(hdr, rec, &str);
+ if (filter) {
+ erase(&str, "IMF=");
+ erase(&str, "DP=");
+ erase(&str, "IDV=");
+ erase(&str, "IMP=");
+ erase(&str, "IS=");
+ erase(&str, "VDB=");
+ erase(&str, "SGB=");
+ erase(&str, "MQB=");
+ erase(&str, "BQB=");
+ erase(&str, "RPB=");
+ erase(&str, "MQ0F=");
+ erase(&str, "MQSB=");
+ truncate(&str, "QS=");
+ }
+ printf("%s", str.s);
+ }
+
+ free(str.s);
+ bcf_destroy(rec);
+ bcf_hdr_destroy(hdr);
+ hts_close(in);
+ return EXIT_SUCCESS;
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/samtools.git
More information about the debian-med-commit
mailing list