[med-svn] [python-pysam] 01/13: Imported Upstream version 0.9.1+ds
Afif Elghraoui
afif at moszumanska.debian.org
Mon Jun 20 01:53:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository python-pysam.
commit f4708100096e3853ab9749dbb378225683d04be6
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Sun Jun 19 14:17:37 2016 -0700
Imported Upstream version 0.9.1+ds
---
INSTALL | 2 +-
bcftools/bcftools.h | 3 +-
bcftools/consensus.c | 2 +-
bcftools/consensus.c.pysam.c | 48 +--
bcftools/convert.c.pysam.c | 8 +-
bcftools/em.c.pysam.c | 8 +-
bcftools/filter.c.pysam.c | 22 +-
bcftools/khash_str2str.h | 19 +-
bcftools/main.c | 4 +-
bcftools/main.c.pysam.c | 18 +-
bcftools/mcall.c.pysam.c | 20 +-
bcftools/ploidy.c | 30 +-
bcftools/ploidy.c.pysam.c | 30 +-
bcftools/prob1.c.pysam.c | 8 +-
bcftools/pysam.h | 4 +-
bcftools/tabix.c.pysam.c | 34 +-
bcftools/vcfannotate.c | 212 ++++++++++--
bcftools/vcfannotate.c.pysam.c | 256 +++++++++++----
bcftools/vcfcall.c | 43 ++-
bcftools/vcfcall.c.pysam.c | 155 +++++----
bcftools/vcfcnv.c.pysam.c | 84 ++---
bcftools/vcfconcat.c | 139 +++++++-
bcftools/vcfconcat.c.pysam.c | 187 +++++++++--
bcftools/vcfconvert.c | 18 +-
bcftools/vcfconvert.c.pysam.c | 196 +++++------
bcftools/vcffilter.c | 8 +-
bcftools/vcffilter.c.pysam.c | 50 +--
bcftools/vcfgtcheck.c.pysam.c | 52 +--
bcftools/vcfindex.c | 3 +-
bcftools/vcfindex.c.pysam.c | 67 ++--
bcftools/vcfisec.c | 12 +-
bcftools/vcfisec.c.pysam.c | 84 ++---
bcftools/vcfmerge.c | 10 +-
bcftools/vcfmerge.c.pysam.c | 96 +++---
bcftools/vcfnorm.c | 20 +-
bcftools/vcfnorm.c.pysam.c | 76 +++--
bcftools/vcfplugin.c | 41 +--
bcftools/vcfplugin.c.pysam.c | 127 ++++----
bcftools/vcfquery.c.pysam.c | 52 +--
bcftools/vcfroh.c | 43 ++-
bcftools/vcfroh.c.pysam.c | 107 +++---
bcftools/vcfsom.c.pysam.c | 56 ++--
bcftools/vcfstats.c.pysam.c | 242 +++++++-------
bcftools/vcfview.c | 8 +-
bcftools/vcfview.c.pysam.c | 102 +++---
bcftools/version.c.pysam.c | 2 +-
bcftools/version.h | 2 +-
doc/faq.rst | 2 +-
doc/glossary.rst | 3 +-
doc/installation.rst | 5 -
doc/release.rst | 20 ++
import.py | 82 ++++-
pysam/__init__.py | 2 +
pysam/calignedsegment.pyx | 127 +++++++-
pysam/calignmentfile.pxd | 1 +
pysam/calignmentfile.pyx | 41 ++-
pysam/cbcf.pyx | 2 +-
pysam/cfaidx.pxd | 12 +-
pysam/cfaidx.pyx | 44 ++-
pysam/chtslib.pxd | 39 ++-
pysam/ctabix.pxd | 13 +-
pysam/ctabix.pyx | 27 +-
pysam/ctabixproxies.pyx | 68 +++-
pysam/cutils.pxd | 3 +
pysam/cutils.pyx | 212 +++++-------
pysam/cvcf.pyx | 13 +-
pysam/pysam_stream.h | 3 +-
pysam/pysam_util.c | 50 ++-
pysam/pysam_util.h | 19 +-
pysam/tabix_util.c | 2 -
pysam/utils.py | 20 +-
pysam/version.py | 6 +-
run_tests_travis.sh | 30 +-
samtools/bam.c | 18 +-
samtools/bam.c.pysam.c | 18 +-
samtools/bam.h | 9 +-
samtools/bam2bcf.c | 2 +
samtools/bam2bcf.c.pysam.c | 10 +-
samtools/bam2bcf_indel.c | 2 +
samtools/bam2bcf_indel.c.pysam.c | 22 +-
samtools/bam2depth.c | 2 +
samtools/bam2depth.c.pysam.c | 70 ++--
samtools/bam_addrprg.c | 2 +
samtools/bam_addrprg.c.pysam.c | 42 +--
samtools/bam_aux.c | 2 +
samtools/bam_aux.c.pysam.c | 2 +
samtools/bam_cat.c | 93 ++++--
samtools/bam_cat.c.pysam.c | 121 ++++---
samtools/bam_color.c | 2 +
samtools/bam_color.c.pysam.c | 2 +
samtools/bam_flags.c | 2 +
samtools/bam_flags.c.pysam.c | 42 +--
samtools/bam_import.c | 2 +
samtools/bam_import.c.pysam.c | 4 +-
samtools/bam_index.c | 2 +
samtools/bam_index.c.pysam.c | 20 +-
samtools/bam_lpileup.c | 2 +
samtools/bam_lpileup.c.pysam.c | 12 +-
samtools/bam_mate.c | 80 +++--
samtools/bam_mate.c.pysam.c | 88 +++--
samtools/bam_md.c | 64 +++-
samtools/bam_md.c.pysam.c | 84 +++--
samtools/bam_plbuf.c | 2 +
samtools/bam_plbuf.c.pysam.c | 2 +
samtools/bam_plcmd.c | 6 +-
samtools/bam_plcmd.c.pysam.c | 74 +++--
samtools/bam_quickcheck.c | 22 +-
samtools/bam_quickcheck.c.pysam.c | 48 ++-
samtools/bam_reheader.c | 64 +++-
samtools/bam_reheader.c.pysam.c | 95 ++++--
samtools/bam_rmdup.c | 109 +++++--
samtools/bam_rmdup.c.pysam.c | 129 ++++++--
samtools/bam_rmdupse.c | 64 +++-
samtools/bam_rmdupse.c.pysam.c | 66 +++-
samtools/bam_sort.c | 267 ++++++++++-----
samtools/bam_sort.c.pysam.c | 329 +++++++++++++------
samtools/bam_split.c | 88 +++--
samtools/bam_split.c.pysam.c | 124 ++++---
samtools/bam_stat.c | 2 +
samtools/bam_stat.c.pysam.c | 46 +--
samtools/bam_tview.c | 2 +
samtools/bam_tview.c.pysam.c | 18 +-
samtools/bam_tview_curses.c.pysam.c | 2 +-
samtools/bam_tview_html.c | 2 +
samtools/bam_tview_html.c.pysam.c | 8 +-
samtools/bamshuf.c | 141 ++++++--
samtools/bamshuf.c.pysam.c | 163 +++++++---
samtools/bamtk.c | 6 +-
samtools/bamtk.c.pysam.c | 36 +-
samtools/bedcov.c | 2 +
samtools/bedcov.c.pysam.c | 16 +-
samtools/bedidx.c | 2 +
samtools/bedidx.c.pysam.c | 6 +-
samtools/cut_target.c | 2 +
samtools/cut_target.c.pysam.c | 24 +-
samtools/dict.c | 5 +-
samtools/dict.c.pysam.c | 29 +-
samtools/errmod.c | 2 +
samtools/errmod.c.pysam.c | 2 +
samtools/faidx.c | 6 +-
samtools/faidx.c.pysam.c | 20 +-
samtools/kprobaln.c | 2 +
samtools/kprobaln.c.pysam.c | 14 +-
samtools/misc/ace2sam.c | 2 +
samtools/misc/ace2sam.c.pysam.c | 38 ++-
samtools/padding.c | 42 ++-
samtools/padding.c.pysam.c | 136 ++++----
samtools/phase.c | 89 +++--
samtools/phase.c.pysam.c | 167 ++++++----
samtools/pysam.h | 4 +-
samtools/sam.c | 18 +-
samtools/sam.c.pysam.c | 26 +-
samtools/sam_header.c | 2 +
samtools/sam_header.c.pysam.c | 8 +-
samtools/sam_opts.c | 2 +
samtools/sam_opts.c.pysam.c | 4 +-
samtools/sam_view.c | 84 +++--
samtools/sam_view.c.pysam.c | 152 +++++----
samtools/sample.c | 2 +
samtools/sample.c.pysam.c | 2 +
samtools/stats.c | 2 +
samtools/stats.c.pysam.c | 60 ++--
samtools/stats_isize.c | 2 +
samtools/stats_isize.c.pysam.c | 4 +-
samtools/test/merge/test_bam_translate.c | 2 +
samtools/test/merge/test_bam_translate.c.pysam.c | 170 +++++-----
samtools/test/merge/test_rtrans_build.c | 2 +
samtools/test/merge/test_rtrans_build.c.pysam.c | 26 +-
samtools/test/merge/test_trans_tbl_init.c | 16 +-
samtools/test/merge/test_trans_tbl_init.c.pysam.c | 152 ++++-----
samtools/test/split/test_count_rg.c | 2 +
samtools/test/split/test_count_rg.c.pysam.c | 32 +-
samtools/test/split/test_expand_format_string.c | 2 +
.../test/split/test_expand_format_string.c.pysam.c | 32 +-
samtools/test/split/test_filter_header_rg.c | 2 +
.../test/split/test_filter_header_rg.c.pysam.c | 50 +--
samtools/test/split/test_parse_args.c | 2 +
samtools/test/split/test_parse_args.c.pysam.c | 120 +++----
samtools/test/test.c | 2 +
samtools/test/test.c.pysam.c | 16 +-
samtools/test/tview/test_get_rg_sample.c | 2 +
samtools/test/tview/test_get_rg_sample.c.pysam.c | 6 +-
samtools/version.h | 2 +-
setup.py | 27 +-
tests/AlignedSegment_test.py | 121 ++++++-
tests/AlignmentFile_test.py | 362 +++++++++++----------
tests/TestUtils.py | 34 +-
tests/VariantFile_test.py | 47 ++-
tests/cbcf_data/example_vcf42.vcf | 6 +-
tests/faidx_test.py | 63 +++-
tests/pysam_data/Makefile | 9 +-
tests/samtools_test.py | 82 ++++-
tests/tabix_test.py | 145 ++++++---
tests/test_samtools_python.py | 35 ++
194 files changed, 5846 insertions(+), 3093 deletions(-)
diff --git a/INSTALL b/INSTALL
index 30fe770..5ddff7f 100644
--- a/INSTALL
+++ b/INSTALL
@@ -15,7 +15,7 @@ manually modifying one line in Makefile.
curl
-Pysam requires Python (2.6 or greater) and Cython (0.22 or greater).
+Pysam requires Python (2.7 or greater) and Cython (0.22 or greater).
It has not been tested on many other platforms.
Compilation
diff --git a/bcftools/bcftools.h b/bcftools/bcftools.h
index 6f22272..d4e856d 100644
--- a/bcftools/bcftools.h
+++ b/bcftools/bcftools.h
@@ -26,6 +26,7 @@ THE SOFTWARE. */
#define BCFTOOLS_H
#include <stdarg.h>
+#include <htslib/hts_defs.h>
#include <htslib/vcf.h>
#include <math.h>
@@ -37,7 +38,7 @@ THE SOFTWARE. */
#define FT_STDIN (1<<3)
char *bcftools_version(void);
-void error(const char *format, ...);
+void error(const char *format, ...) HTS_NORETURN;
void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
const char *hts_bcf_wmode(int file_type);
diff --git a/bcftools/consensus.c b/bcftools/consensus.c
index 7a615fe..051f353 100644
--- a/bcftools/consensus.c
+++ b/bcftools/consensus.c
@@ -623,7 +623,7 @@ int main_consensus(int argc, char *argv[])
{"chain",1,0,'c'},
{0,0,0,0}
};
- char c;
+ int c;
while ((c = getopt_long(argc, argv, "h?s:1iH:f:o:m:c:",loptions,NULL)) >= 0)
{
switch (c)
diff --git a/bcftools/consensus.c.pysam.c b/bcftools/consensus.c.pysam.c
index 7765d6b..91aa5ae 100644
--- a/bcftools/consensus.c.pysam.c
+++ b/bcftools/consensus.c.pysam.c
@@ -87,7 +87,7 @@ args_t;
static chain_t* init_chain(chain_t *chain, int ref_ori_pos)
{
-// fprintf(pysamerr, "init_chain(*chain, ref_ori_pos=%d)\n", ref_ori_pos);
+// fprintf(pysam_stderr, "init_chain(*chain, ref_ori_pos=%d)\n", ref_ori_pos);
chain = (chain_t*) calloc(1,sizeof(chain_t));
chain->num = 0;
chain->block_lengths = NULL;
@@ -157,7 +157,7 @@ static void print_chain(args_t *args)
static void push_chain_gap(chain_t *chain, int ref_start, int ref_len, int alt_start, int alt_len)
{
-// fprintf(pysamerr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
+// fprintf(pysam_stderr, "push_chain_gap(*chain, ref_start=%d, ref_len=%d, alt_start=%d, alt_len=%d)\n", ref_start, ref_len, alt_start, alt_len);
int num = chain->num;
if (ref_start <= chain->ref_last_block_ori) {
@@ -218,7 +218,7 @@ static void init_data(args_t *args)
args->fp_out = fopen(args->output_fname,"w");
if ( ! args->fp_out ) error("Failed to create %s: %s\n", args->output_fname, strerror(errno));
}
- else args->fp_out = stdout;
+ else args->fp_out = pysam_stdout;
}
static void destroy_data(args_t *args)
@@ -257,7 +257,7 @@ static void init_region(args_t *args, char *line)
}
}
args->rid = bcf_hdr_name2id(args->hdr,line);
- if ( args->rid<0 ) fprintf(pysamerr,"Warning: Sequence \"%s\" not in %s\n", line,args->fname);
+ if ( args->rid<0 ) fprintf(pysam_stderr,"Warning: Sequence \"%s\" not in %s\n", line,args->fname);
args->fa_buf.l = 0;
args->fa_length = 0;
args->fa_end_pos = to;
@@ -342,7 +342,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
if ( rec->pos <= args->fa_frz_pos )
{
- fprintf(pysamerr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1);
+ fprintf(pysam_stderr,"The site %s:%d overlaps with another variant, skipping...\n", bcf_seqname(args->hdr,rec),rec->pos+1);
return;
}
if ( args->mask )
@@ -428,7 +428,7 @@ static void apply_variant(args_t *args, bcf1_t *rec)
}
else if ( strncasecmp(rec->d.allele[0],args->fa_buf.s+idx,rec->rlen) )
{
- // fprintf(pysamerr,"%d .. [%s], idx=%d ori=%d off=%d\n",args->fa_ori_pos,args->fa_buf.s,idx,args->fa_ori_pos,args->fa_mod_off);
+ // fprintf(pysam_stderr,"%d .. [%s], idx=%d ori=%d off=%d\n",args->fa_ori_pos,args->fa_buf.s,idx,args->fa_ori_pos,args->fa_mod_off);
char tmp = 0;
if ( args->fa_buf.l - idx > rec->rlen )
{
@@ -589,23 +589,23 @@ static void consensus(args_t *args)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Create consensus sequence by applying VCF variants to a reference\n");
- fprintf(pysamerr, " fasta file.\n");
- fprintf(pysamerr, "Usage: bcftools consensus [OPTIONS] <file.vcf>\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
- fprintf(pysamerr, " -H, --haplotype <1|2> apply variants for the given haplotype\n");
- fprintf(pysamerr, " -i, --iupac-codes output variants in the form of IUPAC ambiguity codes\n");
- fprintf(pysamerr, " -m, --mask <file> replace regions with N\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -c, --chain <file> write a chain file for liftover\n");
- fprintf(pysamerr, " -s, --sample <name> apply variants of the given sample\n");
- fprintf(pysamerr, "Examples:\n");
- fprintf(pysamerr, " # Get the consensus for one region. The fasta header lines are then expected\n");
- fprintf(pysamerr, " # in the form \">chr:from-to\".\n");
- fprintf(pysamerr, " samtools faidx ref.fa 8:11870-11890 | bcftools consensus in.vcf.gz > out.fa\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Create consensus sequence by applying VCF variants to a reference\n");
+ fprintf(pysam_stderr, " fasta file.\n");
+ fprintf(pysam_stderr, "Usage: bcftools consensus [OPTIONS] <file.vcf>\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
+ fprintf(pysam_stderr, " -H, --haplotype <1|2> apply variants for the given haplotype\n");
+ fprintf(pysam_stderr, " -i, --iupac-codes output variants in the form of IUPAC ambiguity codes\n");
+ fprintf(pysam_stderr, " -m, --mask <file> replace regions with N\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -c, --chain <file> write a chain file for liftover\n");
+ fprintf(pysam_stderr, " -s, --sample <name> apply variants of the given sample\n");
+ fprintf(pysam_stderr, "Examples:\n");
+ fprintf(pysam_stderr, " # Get the consensus for one region. The fasta header lines are then expected\n");
+ fprintf(pysam_stderr, " # in the form \">chr:from-to\".\n");
+ fprintf(pysam_stderr, " samtools faidx ref.fa 8:11870-11890 | bcftools consensus in.vcf.gz > out.fa\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -625,7 +625,7 @@ int main_consensus(int argc, char *argv[])
{"chain",1,0,'c'},
{0,0,0,0}
};
- char c;
+ int c;
while ((c = getopt_long(argc, argv, "h?s:1iH:f:o:m:c:",loptions,NULL)) >= 0)
{
switch (c)
diff --git a/bcftools/convert.c.pysam.c b/bcftools/convert.c.pysam.c
index ee27882..084ef50 100644
--- a/bcftools/convert.c.pysam.c
+++ b/bcftools/convert.c.pysam.c
@@ -197,7 +197,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
case BCF_BT_INT32: if ( info->v1.i==bcf_int32_missing ) kputc('.', str); else kputw(info->v1.i, str); break;
case BCF_BT_FLOAT: if ( bcf_float_is_missing(info->v1.f) ) kputc('.', str); else ksprintf(str, "%g", info->v1.f); break;
case BCF_BT_CHAR: kputc(info->v1.i, str); break;
- default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+ default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
}
}
else if ( fmt->subscript >=0 )
@@ -218,7 +218,7 @@ static void process_info(convert_t *convert, bcf1_t *line, fmt_t *fmt, int isamp
case BCF_BT_INT16: BRANCH(int16_t, val==bcf_int16_missing, val==bcf_int16_vector_end, kputw(val, str)); break;
case BCF_BT_INT32: BRANCH(int32_t, val==bcf_int32_missing, val==bcf_int32_vector_end, kputw(val, str)); break;
case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(val), bcf_float_is_vector_end(val), ksprintf(str, "%g", val)); break;
- default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+ default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
}
#undef BRANCH
}
@@ -730,7 +730,7 @@ static fmt_t *register_tag(convert_t *convert, int type, char *key, int is_gtf)
else if ( id>=0 && bcf_hdr_idinfo_exists(convert->header,BCF_HL_INFO,id) )
{
fmt->type = T_INFO;
- fprintf(pysamerr,"Warning: Assuming INFO/%s\n", key);
+ fprintf(pysam_stderr,"Warning: Assuming INFO/%s\n", key);
}
}
}
@@ -896,7 +896,7 @@ convert_t *convert_init(bcf_hdr_t *hdr, int *samples, int nsamples, const char *
char *p = convert->format_str;
while ( *p )
{
- //fprintf(pysamerr,"<%s>\n", p);
+ //fprintf(pysam_stderr,"<%s>\n", p);
switch (*p)
{
case '[': is_gtf = 1; p++; break;
diff --git a/bcftools/em.c.pysam.c b/bcftools/em.c.pysam.c
index 758d919..8109152 100644
--- a/bcftools/em.c.pysam.c
+++ b/bcftools/em.c.pysam.c
@@ -74,7 +74,7 @@ static double prob1(double f, void *data)
minaux1_t *a = (minaux1_t*)data;
double p = 1., l = 0., f3[3];
int i;
-// printf("brent %lg\n", f);
+// fprintf(pysam_stdout, "brent %lg\n", f);
if (f < 0 || f > 1) return 1e300;
f3[0] = (1.-f)*(1.-f); f3[1] = 2.*f*(1.-f); f3[2] = f*f;
for (i = a->beg; i < a->end; ++i) {
@@ -90,7 +90,7 @@ static double freq_iter(double *f, const double *_pdg, int beg, int end)
{
double f0 = *f, f3[3], err;
int i;
-// printf("em %lg\n", *f);
+// fprintf(pysam_stdout, "em %lg\n", *f);
f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0;
for (i = beg, f0 = 0.; i < end; ++i) {
const double *pdg = _pdg + i * 3;
@@ -128,7 +128,7 @@ static double g3_iter(double g[3], const double *_pdg, int beg, int end)
double err, gg[3];
int i;
gg[0] = gg[1] = gg[2] = 0.;
-// printf("%lg,%lg,%lg\n", g[0], g[1], g[2]);
+// fprintf(pysam_stdout, "%lg,%lg,%lg\n", g[0], g[1], g[2]);
for (i = beg; i < end; ++i) {
double sum, tmp[3];
const double *pdg = _pdg + i * 3;
@@ -237,7 +237,7 @@ static int pair_freq_iter(int n, double *pdg[2], double f[4])
{
double ff[4];
int i, k, h;
-// printf("%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]);
+// fprintf(pysam_stdout, "%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]);
memset(ff, 0, 4 * sizeof(double));
for (i = 0; i < n; ++i) {
double *p[2], sum, tmp;
diff --git a/bcftools/filter.c.pysam.c b/bcftools/filter.c.pysam.c
index 7520106..531339e 100644
--- a/bcftools/filter.c.pysam.c
+++ b/bcftools/filter.c.pysam.c
@@ -360,7 +360,7 @@ static int bcf_get_info_value(bcf1_t *line, int info_id, int ivec, void *value)
case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, int); break;
case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, int); break;
case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), float); break;
- default: fprintf(pysamerr,"todo: type %d\n", info->type); exit(1); break;
+ default: fprintf(pysam_stderr,"todo: type %d\n", info->type); exit(1); break;
}
#undef BRANCH
return -1; // this shouldn't happen
@@ -586,7 +586,7 @@ gt_length_too_big:
case BCF_BT_INT8: BRANCH(int8_t); break;
case BCF_BT_INT16: BRANCH(int16_t); break;
case BCF_BT_INT32: BRANCH(int32_t); break;
- default: fprintf(pysamerr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
+ default: fprintf(pysam_stderr,"FIXME: type %d in bcf_format_gt?\n", fmt->type); abort(); break;
}
#undef BRANCH
@@ -1045,7 +1045,7 @@ static int vector_logic_or(token_t *atok, token_t *btok, int or_type)
{ \
if ( (atok)->values[0] CMP_OP (btok)->values[0] ) { pass_site = 1; } \
} \
- /*fprintf(pysamerr,"pass=%d\n", pass_site);*/ \
+ /*fprintf(pysam_stderr,"pass=%d\n", pass_site);*/ \
(ret) = pass_site; \
} \
}
@@ -1394,16 +1394,16 @@ static void filter_debug_print(token_t *toks, token_t **tok_ptrs, int ntoks)
if ( tok->tok_type==TOK_VAL )
{
if ( tok->key )
- fprintf(pysamerr,"%s", tok->key);
+ fprintf(pysam_stderr,"%s", tok->key);
else if ( tok->tag )
- fprintf(pysamerr,"%s", tok->tag);
+ fprintf(pysam_stderr,"%s", tok->tag);
else
- fprintf(pysamerr,"%e", tok->threshold);
+ fprintf(pysam_stderr,"%e", tok->threshold);
}
else
- fprintf(pysamerr,"%c", TOKEN_STRING[tok->tok_type]);
- if ( tok->setter ) fprintf(pysamerr,"\t[setter %p]", tok->setter);
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"%c", TOKEN_STRING[tok->tok_type]);
+ if ( tok->setter ) fprintf(pysam_stderr,"\t[setter %p]", tok->setter);
+ fprintf(pysam_stderr,"\n");
}
}
@@ -1427,8 +1427,8 @@ filter_t *filter_init(bcf_hdr_t *hdr, const char *str)
ret = filters_next_token(&tmp, &len);
if ( ret==-1 ) error("Missing quotes in: %s\n", str);
- //fprintf(pysamerr,"token=[%c] .. [%s] %d\n", TOKEN_STRING[ret], tmp, len);
- //int i; for (i=0; i<nops; i++) fprintf(pysamerr," .%c.", TOKEN_STRING[ops[i]]); fprintf(pysamerr,"\n");
+ //fprintf(pysam_stderr,"token=[%c] .. [%s] %d\n", TOKEN_STRING[ret], tmp, len);
+ //int i; for (i=0; i<nops; i++) fprintf(pysam_stderr," .%c.", TOKEN_STRING[ops[i]]); fprintf(pysam_stderr,"\n");
if ( ret==TOK_LFT ) // left bracket
{
diff --git a/bcftools/khash_str2str.h b/bcftools/khash_str2str.h
index ecf4e0b..4a5bd12 100644
--- a/bcftools/khash_str2str.h
+++ b/bcftools/khash_str2str.h
@@ -1,6 +1,6 @@
/* khash_str2str.h -- C-string to C-string hash table.
- Copyright (C) 2014 Genome Research Ltd.
+ Copyright (C) 2014,2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -61,6 +61,23 @@ static inline void khash_str2str_destroy_free(void *_hash)
}
/*
+ * Destroys the hash structure, the keys and the values
+ */
+static inline void khash_str2str_destroy_free_all(void *_hash)
+{
+ khash_t(str2str) *hash = (khash_t(str2str)*)_hash;
+ khint_t k;
+ if (hash == 0) return;
+ for (k = 0; k < kh_end(hash); ++k)
+ if (kh_exist(hash, k))
+ {
+ free((char*)kh_key(hash, k));
+ free((char*)kh_val(hash, k));
+ }
+ kh_destroy(str2str, hash);
+}
+
+/*
* Returns value if key exists or NULL if not
*/
static inline char *khash_str2str_get(void *_hash, const char *str)
diff --git a/bcftools/main.c b/bcftools/main.c
index f08b5c7..1892c1d 100644
--- a/bcftools/main.c
+++ b/bcftools/main.c
@@ -1,6 +1,6 @@
/* main.c -- main bcftools command front-end.
- Copyright (C) 2012-2015 Genome Research Ltd.
+ Copyright (C) 2012-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -219,7 +219,7 @@ int main(int argc, char *argv[])
if (argc < 2) { usage(stderr); return 1; }
if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
- printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2015 Genome Research Ltd.\n", bcftools_version(), hts_version());
+ printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version());
#if USE_GPL
printf("License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
#else
diff --git a/bcftools/main.c.pysam.c b/bcftools/main.c.pysam.c
index f180e56..f578442 100644
--- a/bcftools/main.c.pysam.c
+++ b/bcftools/main.c.pysam.c
@@ -2,7 +2,7 @@
/* main.c -- main bcftools command front-end.
- Copyright (C) 2012-2015 Genome Research Ltd.
+ Copyright (C) 2012-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -218,24 +218,24 @@ static void usage(FILE *fp)
int bcftools_main(int argc, char *argv[])
{
- if (argc < 2) { usage(pysamerr); return 1; }
+ if (argc < 2) { usage(pysam_stderr); return 1; }
if (strcmp(argv[1], "version") == 0 || strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-v") == 0) {
- printf("bcftools %s\nUsing htslib %s\nCopyright (C) 2015 Genome Research Ltd.\n", bcftools_version(), hts_version());
+ fprintf(pysam_stdout, "bcftools %s\nUsing htslib %s\nCopyright (C) 2016 Genome Research Ltd.\n", bcftools_version(), hts_version());
#if USE_GPL
- printf("License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
+ fprintf(pysam_stdout, "License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n");
#else
- printf("License Expat: The MIT/Expat license\n");
+ fprintf(pysam_stdout, "License Expat: The MIT/Expat license\n");
#endif
- printf("This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n");
+ fprintf(pysam_stdout, "This is free software: you are free to change and redistribute it.\nThere is NO WARRANTY, to the extent permitted by law.\n");
return 0;
}
else if (strcmp(argv[1], "--version-only") == 0) {
- printf("%s+htslib-%s\n", bcftools_version(), hts_version());
+ fprintf(pysam_stdout, "%s+htslib-%s\n", bcftools_version(), hts_version());
return 0;
}
else if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-h") == 0) {
- if (argc == 2) { usage(stdout); return 0; }
+ if (argc == 2) { usage(pysam_stdout); return 0; }
// Otherwise change "bcftools help COMMAND [...]" to "bcftools COMMAND";
// main_xyz() functions by convention display the subcommand's usage
// when invoked without any arguments.
@@ -260,7 +260,7 @@ int bcftools_main(int argc, char *argv[])
}
i++;
}
- fprintf(pysamerr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]);
+ fprintf(pysam_stderr, "[E::%s] unrecognized command '%s'\n", __func__, argv[1]);
return 1;
}
diff --git a/bcftools/mcall.c.pysam.c b/bcftools/mcall.c.pysam.c
index b4c4a99..29ed799 100644
--- a/bcftools/mcall.c.pysam.c
+++ b/bcftools/mcall.c.pysam.c
@@ -288,7 +288,7 @@ void mcall_init(call_t *call)
call->theta *= aM;
if ( call->theta >= 1 )
{
- fprintf(pysamerr,"The prior is too big (theta*aM=%.2f), going with 0.99\n", call->theta);
+ fprintf(pysam_stderr,"The prior is too big (theta*aM=%.2f), going with 0.99\n", call->theta);
call->theta = 0.99;
}
call->theta = log(call->theta);
@@ -516,13 +516,13 @@ float calc_ICB(int nref, int nalt, int nhets, int ndiploid)
double q = 2*fref*falt; // probability of a het, assuming HWE
double mean = q*ndiploid;
- //fprintf(pysamerr,"\np=%e N=%d k=%d .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
+ //fprintf(pysam_stderr,"\np=%e N=%d k=%d .. nref=%d nalt=%d nhets=%d ndiploid=%d\n", q,ndiploid,nhets, nref,nalt,nhets,ndiploid);
// Can we use normal approximation? The second condition is for performance only
// and is not well justified.
if ( (mean>10 && (1-q)*ndiploid>10 ) || ndiploid>200 )
{
- //fprintf(pysamerr,"out: mean=%e p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
+ //fprintf(pysam_stderr,"out: mean=%e p=%e\n", mean,exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q))));
return exp(-0.5*(nhets-mean)*(nhets-mean)/(mean*(1-q)));
}
@@ -1032,12 +1032,12 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
if ( igt==GT_SKIP ) continue;
lk += gl[igt];
npresent++;
- // fprintf(pysamerr," %e", gl[igt]);
+ // fprintf(pysam_stderr," %e", gl[igt]);
}
- // fprintf(pysamerr,"\t\t");
+ // fprintf(pysam_stderr,"\t\t");
double Pkij = npresent==3 ? (double)2/(trio[itr]>>12) : 1; // with missing genotypes Pkij's are different
lk += log(1 - trio_Pm * (1 - Pkij));
- // fprintf(pysamerr,"%d%d%d\t%e\t%.2f\n", trio[itr]>>8&0xf,trio[itr]>>4&0xf,trio[itr]&0xf, lk, Pkij);
+ // fprintf(pysam_stderr,"%d%d%d\t%e\t%.2f\n", trio[itr]>>8&0xf,trio[itr]>>4&0xf,trio[itr]&0xf, lk, Pkij);
if ( c_lk < lk ) { c_lk = lk; c_itr = trio[itr]; }
if ( uc_itr==trio[itr] ) uc_is_mendelian = 1;
}
@@ -1045,10 +1045,10 @@ static void mcall_call_trio_genotypes(call_t *call, bcf1_t *rec, int nals, int n
if ( !uc_is_mendelian )
{
uc_lk += log(1 - trio_Pm);
- // fprintf(pysamerr,"c_lk=%e uc_lk=%e c_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,uc_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
+ // fprintf(pysam_stderr,"c_lk=%e uc_lk=%e c_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,uc_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
if ( c_lk < uc_lk ) { c_lk = uc_lk; c_itr = uc_itr; }
}
- // fprintf(pysamerr,"best_lk=%e best_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
+ // fprintf(pysam_stderr,"best_lk=%e best_itr=%d%d%d uc_itr=%d%d%d\n", c_lk,c_itr>>8&0xf,c_itr>>4&0xf,c_itr&0xf,uc_itr>>8&0xf,uc_itr>>4&0xf,uc_itr&0xf);
// Set genotypes for father, mother, child and calculate genotype qualities
for (i=0; i<3; i++)
@@ -1429,7 +1429,7 @@ int mcall(call_t *call, bcf1_t *rec)
int out_als, nout;
if ( nals > 8*sizeof(out_als) )
{
- fprintf(pysamerr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1);
+ fprintf(pysam_stderr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1);
return 0;
}
nout = mcall_find_best_alleles(call, nals, &out_als);
@@ -1473,7 +1473,7 @@ int mcall(call_t *call, bcf1_t *rec)
{
if ( nout>4 )
{
- fprintf(pysamerr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1);
+ fprintf(pysam_stderr,"Too many alleles at %s:%d, skipping.\n", bcf_seqname(call->hdr,rec),rec->pos+1);
return 0;
}
mcall_call_trio_genotypes(call, rec, nals,nout,out_als);
diff --git a/bcftools/ploidy.c b/bcftools/ploidy.c
index 160bc3e..719e175 100644
--- a/bcftools/ploidy.c
+++ b/bcftools/ploidy.c
@@ -1,5 +1,5 @@
-/*
- Copyright (C) 2014 Genome Research Ltd.
+/*
+ Copyright (C) 2014-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -98,7 +98,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s);
sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]);
ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex);
- ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt;
+ ploidy->sex2dflt[ploidy->nsex-1] = -1;
}
ss = se;
@@ -106,8 +106,8 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
if ( !*se ) error("Could not parse: %s\n", line);
sp->ploidy = strtol(ss,&se,10);
if ( ss==se ) error("Could not parse: %s\n", line);
- if ( sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
- if ( sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
+ if ( ploidy->min<0 || sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
+ if ( ploidy->max<0 || sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
// Special case, chr="*" stands for a default value
if ( default_ploidy_def )
@@ -119,19 +119,32 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
return 0;
}
+static void _set_defaults(ploidy_t *ploidy, int dflt)
+{
+ int i;
+ if ( khash_str2int_get(ploidy->sex2id, "*", &i) == 0 ) dflt = ploidy->sex2dflt[i];
+ for (i=0; i<ploidy->nsex; i++)
+ if ( ploidy->sex2dflt[i]==-1 ) ploidy->sex2dflt[i] = dflt;
+
+ ploidy->dflt = dflt;
+ if ( ploidy->min<0 || dflt < ploidy->min ) ploidy->min = dflt;
+ if ( ploidy->max<0 || dflt > ploidy->max ) ploidy->max = dflt;
+}
+
ploidy_t *ploidy_init(const char *fname, int dflt)
{
ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
if ( !pld ) return NULL;
- pld->dflt = pld->min = pld->max = dflt;
+ pld->min = pld->max = -1;
pld->sex2id = khash_str2int_init();
pld->idx = regidx_init(fname,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
if ( !pld->idx )
{
ploidy_destroy(pld);
- pld = NULL;
+ return NULL;
}
+ _set_defaults(pld,dflt);
return pld;
}
@@ -140,7 +153,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
if ( !pld ) return NULL;
- pld->dflt = pld->min = pld->max = dflt;
+ pld->min = pld->max = -1;
pld->sex2id = khash_str2int_init();
pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
@@ -160,6 +173,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
regidx_insert(pld->idx,NULL);
free(tmp.s);
+ _set_defaults(pld,dflt);
return pld;
}
diff --git a/bcftools/ploidy.c.pysam.c b/bcftools/ploidy.c.pysam.c
index 4f567a3..d0468b9 100644
--- a/bcftools/ploidy.c.pysam.c
+++ b/bcftools/ploidy.c.pysam.c
@@ -1,7 +1,7 @@
#include "pysam.h"
-/*
- Copyright (C) 2014 Genome Research Ltd.
+/*
+ Copyright (C) 2014-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -100,7 +100,7 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s);
sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]);
ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex);
- ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt;
+ ploidy->sex2dflt[ploidy->nsex-1] = -1;
}
ss = se;
@@ -108,8 +108,8 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
if ( !*se ) error("Could not parse: %s\n", line);
sp->ploidy = strtol(ss,&se,10);
if ( ss==se ) error("Could not parse: %s\n", line);
- if ( sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
- if ( sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
+ if ( ploidy->min<0 || sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
+ if ( ploidy->max<0 || sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;
// Special case, chr="*" stands for a default value
if ( default_ploidy_def )
@@ -121,19 +121,32 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
return 0;
}
+static void _set_defaults(ploidy_t *ploidy, int dflt)
+{
+ int i;
+ if ( khash_str2int_get(ploidy->sex2id, "*", &i) == 0 ) dflt = ploidy->sex2dflt[i];
+ for (i=0; i<ploidy->nsex; i++)
+ if ( ploidy->sex2dflt[i]==-1 ) ploidy->sex2dflt[i] = dflt;
+
+ ploidy->dflt = dflt;
+ if ( ploidy->min<0 || dflt < ploidy->min ) ploidy->min = dflt;
+ if ( ploidy->max<0 || dflt > ploidy->max ) ploidy->max = dflt;
+}
+
ploidy_t *ploidy_init(const char *fname, int dflt)
{
ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
if ( !pld ) return NULL;
- pld->dflt = pld->min = pld->max = dflt;
+ pld->min = pld->max = -1;
pld->sex2id = khash_str2int_init();
pld->idx = regidx_init(fname,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
if ( !pld->idx )
{
ploidy_destroy(pld);
- pld = NULL;
+ return NULL;
}
+ _set_defaults(pld,dflt);
return pld;
}
@@ -142,7 +155,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
ploidy_t *pld = (ploidy_t*) calloc(1,sizeof(ploidy_t));
if ( !pld ) return NULL;
- pld->dflt = pld->min = pld->max = dflt;
+ pld->min = pld->max = -1;
pld->sex2id = khash_str2int_init();
pld->idx = regidx_init(NULL,ploidy_parse,NULL,sizeof(sex_ploidy_t),pld);
@@ -162,6 +175,7 @@ ploidy_t *ploidy_init_string(const char *str, int dflt)
regidx_insert(pld->idx,NULL);
free(tmp.s);
+ _set_defaults(pld,dflt);
return pld;
}
diff --git a/bcftools/prob1.c.pysam.c b/bcftools/prob1.c.pysam.c
index bad2478..a59ec44 100644
--- a/bcftools/prob1.c.pysam.c
+++ b/bcftools/prob1.c.pysam.c
@@ -128,7 +128,7 @@ int bcf_p1_set_n1(bcf_p1aux_t *b, int n1)
{
if (n1 == 0 || n1 >= b->n) return -1;
if (b->M != b->n * 2) {
- fprintf(pysamerr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__);
+ fprintf(pysam_stderr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__);
return -1;
}
b->n1 = n1;
@@ -523,9 +523,9 @@ int bcf_p1_cal(call_t *call, bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1
void bcf_p1_dump_afs(bcf_p1aux_t *ma)
{
int k;
- fprintf(pysamerr, "[afs]");
+ fprintf(pysam_stderr, "[afs]");
for (k = 0; k <= ma->M; ++k)
- fprintf(pysamerr, " %d:%.3lf", k, ma->afs[ma->M - k]);
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, " %d:%.3lf", k, ma->afs[ma->M - k]);
+ fprintf(pysam_stderr, "\n");
memset(ma->afs, 0, sizeof(double) * (ma->M + 1));
}
diff --git a/bcftools/pysam.h b/bcftools/pysam.h
index 008cbbd..b0fc4fb 100644
--- a/bcftools/pysam.h
+++ b/bcftools/pysam.h
@@ -1,5 +1,7 @@
#ifndef PYSAM_H
#define PYSAM_H
#include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
#endif
diff --git a/bcftools/tabix.c.pysam.c b/bcftools/tabix.c.pysam.c
index 0eb328f..afa3619 100644
--- a/bcftools/tabix.c.pysam.c
+++ b/bcftools/tabix.c.pysam.c
@@ -52,24 +52,24 @@ int main_tabix(int argc, char *argv[])
else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;
else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;
else {
- fprintf(pysamerr, "The type '%s' not recognised\n", optarg);
+ fprintf(pysam_stderr, "The type '%s' not recognised\n", optarg);
return 1;
}
}
if (optind == argc) {
- fprintf(pysamerr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n");
- fprintf(pysamerr, "Options: -p STR preset: gff, bed, sam or vcf [gff]\n");
- fprintf(pysamerr, " -s INT column number for sequence names (suppressed by -p) [1]\n");
- fprintf(pysamerr, " -b INT column number for region start [4]\n");
- fprintf(pysamerr, " -e INT column number for region end (if no end, set INT to -b) [5]\n");
- fprintf(pysamerr, " -0 specify coordinates are zero-based\n");
- fprintf(pysamerr, " -S INT skip first INT lines [0]\n");
- fprintf(pysamerr, " -c CHAR skip lines starting with CHAR [null]\n");
- fprintf(pysamerr, " -a print all records\n");
- fprintf(pysamerr, " -f force to overwrite existing index\n");
- fprintf(pysamerr, " -m INT set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n");
+ fprintf(pysam_stderr, "Options: -p STR preset: gff, bed, sam or vcf [gff]\n");
+ fprintf(pysam_stderr, " -s INT column number for sequence names (suppressed by -p) [1]\n");
+ fprintf(pysam_stderr, " -b INT column number for region start [4]\n");
+ fprintf(pysam_stderr, " -e INT column number for region end (if no end, set INT to -b) [5]\n");
+ fprintf(pysam_stderr, " -0 specify coordinates are zero-based\n");
+ fprintf(pysam_stderr, " -S INT skip first INT lines [0]\n");
+ fprintf(pysam_stderr, " -c CHAR skip lines starting with CHAR [null]\n");
+ fprintf(pysam_stderr, " -a print all records\n");
+ fprintf(pysam_stderr, " -f force to overwrite existing index\n");
+ fprintf(pysam_stderr, " -m INT set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n");
+ fprintf(pysam_stderr, "\n");
return 1;
}
if (is_all) { // read without random access
@@ -77,7 +77,7 @@ int main_tabix(int argc, char *argv[])
BGZF *fp;
s.l = s.m = 0; s.s = 0;
fp = bgzf_open(argv[optind], "r");
- while (bgzf_getline(fp, '\n', &s) >= 0) puts(s.s);
+ while (bgzf_getline(fp, '\n', &s) >= 0) fputs(s.s, pysam_stdout) & fputc('\n', pysam_stdout);
bgzf_close(fp);
free(s.s);
} else if (optind + 2 > argc) { // create index
@@ -100,13 +100,13 @@ int main_tabix(int argc, char *argv[])
strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi");
if ((fp = fopen(fn, "rb")) != 0) {
fclose(fp);
- fprintf(pysamerr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
+ fprintf(pysam_stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__);
return 1;
}
}
if ( tbx_index_build(argv[optind], min_shift, &conf) )
{
- fprintf(pysamerr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n");
+ fprintf(pysam_stderr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n");
return 1;
}
} else { // read with random access
@@ -120,7 +120,7 @@ int main_tabix(int argc, char *argv[])
for (i = optind + 1; i < argc; ++i) {
hts_itr_t *itr;
if ((itr = tbx_itr_querys(tbx, argv[i])) == 0) continue;
- while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) puts(s.s);
+ while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) fputs(s.s, pysam_stdout) & fputc('\n', pysam_stdout);
tbx_itr_destroy(itr);
}
free(s.s);
diff --git a/bcftools/vcfannotate.c b/bcftools/vcfannotate.c
index 96a1649..d5164f3 100644
--- a/bcftools/vcfannotate.c
+++ b/bcftools/vcfannotate.c
@@ -1,6 +1,6 @@
/* vcfannotate.c -- Annotate and edit VCF/BCF files.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -120,7 +120,7 @@ typedef struct _args_t
char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
- int argc, drop_header, tgts_is_vcf, mark_sites_logic;
+ int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic;
}
args_t;
@@ -809,6 +809,135 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
return bcf_update_genotypes(args->hdr_out,line,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
}
}
+static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
+{
+ int i, nmax = 0;
+ for (i=icol_beg; i<icol_end; i++)
+ {
+ char *str = tab->cols[i], *end = str;
+ if ( str[0]=='.' && !str[1] )
+ {
+ // missing value
+ if ( !nmax ) nmax = 1;
+ continue;
+ }
+ int n = 1;
+ while ( *end )
+ {
+ if ( *end==',' ) n++;
+ end++;
+ }
+ if ( nmax<n ) nmax = n;
+ }
+ return nmax;
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+ int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+ assert( nvals>0 );
+ hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ int32_t *ptr = args->tmpi + ismpl*nvals;
+ int ival = 0;
+
+ char *str = tab->cols[icol];
+ while ( *str )
+ {
+ if ( str[0]=='.' && (!str[1] || str[1]==',') ) // missing value
+ {
+ ptr[ival++] = bcf_int32_missing;
+ str += str[1] ? 2 : 1;
+ continue;
+ }
+
+ char *end = str;
+ ptr[ival] = strtol(str, &end, 10);
+ if ( end==str )
+ error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+ ival++;
+ str = *end ? end+1 : end;
+ }
+ while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+ icol++;
+ }
+ return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+ int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+ assert( nvals>0 );
+ hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ float *ptr = args->tmpf + ismpl*nvals;
+ int ival = 0;
+
+ char *str = tab->cols[icol];
+ while ( *str )
+ {
+ if ( str[0]=='.' && (!str[1] || str[1]==',') ) // missing value
+ {
+ bcf_float_set_missing(ptr[ival]);
+ ival++;
+ str += str[1] ? 2 : 1;
+ continue;
+ }
+
+ char *end = str;
+ ptr[ival] = strtod(str, &end);
+ if ( end==str )
+ error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+ ival++;
+ str = *end ? end+1 : end;
+ }
+ while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+ icol++;
+ }
+ return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
+}
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+
+ int i, max_len = 0;
+ for (i=col->icol; i<col->icol+nsmpl; i++)
+ {
+ int len = strlen(tab->cols[i]);
+ if ( max_len < len ) max_len = len;
+ }
+ hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ char *ptr = args->tmps + ismpl*max_len;
+ char *str = tab->cols[icol];
+ i = 0;
+ while ( str[i] )
+ {
+ ptr[i] = str[i];
+ i++;
+ }
+ while ( i<max_len ) ptr[i++] = 0;
+ icol++;
+ }
+ return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
+}
static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
{
bcf1_t *rec = (bcf1_t*) data;
@@ -1127,7 +1256,7 @@ static void init_columns(args_t *args)
kstring_t str = {0,0,0}, tmp = {0,0,0};
char *ss = args->columns, *se = ss;
args->ncols = 0;
- int i = -1, has_fmt_str = 0, force_samples = -1;
+ int icol = -1, has_fmt_str = 0, force_samples = -1;
while ( *ss )
{
if ( *se && *se!=',' ) { se++; continue; }
@@ -1135,22 +1264,22 @@ static void init_columns(args_t *args)
if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
- i++;
+ icol++;
str.l = 0;
kputsn(ss, se-ss, &str);
if ( !str.s[0] || !strcasecmp("-",str.s) ) ;
- else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = i;
- else if ( !strcasecmp("POS",str.s) ) args->from_idx = i;
- else if ( !strcasecmp("FROM",str.s) ) args->from_idx = i;
- else if ( !strcasecmp("TO",str.s) ) args->to_idx = i;
- else if ( !strcasecmp("REF",str.s) ) args->ref_idx = i;
- else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = i;
+ else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = icol;
+ else if ( !strcasecmp("POS",str.s) ) args->from_idx = icol;
+ else if ( !strcasecmp("FROM",str.s) ) args->from_idx = icol;
+ else if ( !strcasecmp("TO",str.s) ) args->to_idx = icol;
+ else if ( !strcasecmp("REF",str.s) ) args->ref_idx = icol;
+ else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
else if ( !strcasecmp("ID",str.s) )
{
if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
col->hdr_key = strdup(str.s);
@@ -1160,7 +1289,7 @@ static void init_columns(args_t *args)
if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
col->hdr_key = strdup(str.s);
@@ -1187,7 +1316,7 @@ static void init_columns(args_t *args)
if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
col->hdr_key = strdup(str.s);
@@ -1262,30 +1391,38 @@ static void init_columns(args_t *args)
}
else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
{
- if ( !args->tgts_is_vcf )
- error("Error: FORMAT fields can be carried over from a VCF file only.\n");
-
char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
if ( force_samples<0 ) force_samples = replace;
- if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;;
- bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
- tmp.l = 0;
- bcf_hrec_format(hrec, &tmp);
- bcf_hdr_append(args->hdr_out, tmp.s);
- bcf_hdr_sync(args->hdr_out);
+ if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+ if ( args->tgts_is_vcf )
+ {
+ bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+ tmp.l = 0;
+ bcf_hrec_format(hrec, &tmp);
+ bcf_hdr_append(args->hdr_out, tmp.s);
+ bcf_hdr_sync(args->hdr_out);
+ }
int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+ if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
+ error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = -1;
+ if ( !args->tgts_is_vcf )
+ {
+ col->icol = icol;
+ icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+ }
+ else
+ col->icol = -1;
col->replace = replace;
col->hdr_key = strdup(key);
if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
else
switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
{
- case BCF_HT_INT: col->setter = vcf_setter_format_int; break;
- case BCF_HT_REAL: col->setter = vcf_setter_format_real; break;
- case BCF_HT_STR: col->setter = vcf_setter_format_str; has_fmt_str = 1; break;
+ case BCF_HT_INT: col->setter = args->tgts_is_vcf ? vcf_setter_format_int : setter_format_int; break;
+ case BCF_HT_REAL: col->setter = args->tgts_is_vcf ? vcf_setter_format_real : setter_format_real; break;
+ case BCF_HT_STR: col->setter = args->tgts_is_vcf ? vcf_setter_format_str : setter_format_str; has_fmt_str = 1; break;
default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id));
}
}
@@ -1314,7 +1451,7 @@ static void init_columns(args_t *args)
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->hdr_key = strdup(str.s);
col->number = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
@@ -1338,11 +1475,12 @@ static void init_columns(args_t *args)
if ( skip_fmt ) khash_str2int_destroy_free(skip_fmt);
if ( has_fmt_str )
{
- int n = bcf_hdr_nsamples(args->hdr_out) > bcf_hdr_nsamples(args->files->readers[1].header) ? bcf_hdr_nsamples(args->hdr_out) : bcf_hdr_nsamples(args->files->readers[1].header);
+ int n = bcf_hdr_nsamples(args->hdr_out);
+ if ( args->tgts_is_vcf && n<bcf_hdr_nsamples(args->files->readers[1].header) ) n = bcf_hdr_nsamples(args->files->readers[1].header);
args->tmpp = (char**)malloc(sizeof(char*)*n);
args->tmpp2 = (char**)malloc(sizeof(char*)*n);
}
- if ( force_samples>=0 )
+ if ( force_samples>=0 && args->tgts_is_vcf )
set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
}
@@ -1419,7 +1557,7 @@ static void init_data(args_t *args)
args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
}
- bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
if ( !args->drop_header )
{
if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
@@ -1517,8 +1655,10 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
}
if ( args->ref_idx != -1 )
{
- assert( args->ref_idx < tmp->ncols );
- assert( args->alt_idx < tmp->ncols );
+ if ( args->ref_idx >= tmp->ncols )
+ error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->ref_idx+1,tmp->ncols,args->tgts->line.s);
+ if ( args->alt_idx >= tmp->ncols )
+ error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->alt_idx+1,tmp->ncols,args->tgts->line.s);
tmp->nals = 2;
hts_expand(char*,tmp->nals,tmp->mals,tmp->als);
tmp->als[0] = tmp->cols[args->ref_idx];
@@ -1624,9 +1764,10 @@ static void usage(args_t *args)
fprintf(stderr, " -c, --columns <list> list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
fprintf(stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -h, --header-lines <file> lines which should be appended to the VCF header\n");
- fprintf(stderr, " -I, --set-id [+]<format> set ID column, see man pagee for details\n");
- fprintf(stderr, " -i, --include <expr> select sites for which the expression is true (see man pagee for details)\n");
+ fprintf(stderr, " -I, --set-id [+]<format> set ID column, see man page for details\n");
+ fprintf(stderr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -m, --mark-sites [+-]<tag> add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
fprintf(stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
@@ -1649,6 +1790,7 @@ int main_vcfannotate(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
args->set_ids_replace = 1;
int regions_is_file = 0;
@@ -1671,6 +1813,7 @@ int main_vcfannotate(int argc, char *argv[])
{"header-lines",required_argument,NULL,'h'},
{"samples",required_argument,NULL,'s'},
{"samples-file",required_argument,NULL,'S'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:",loptions,NULL)) >= 0)
@@ -1705,6 +1848,7 @@ int main_vcfannotate(int argc, char *argv[])
case 'h': args->header_fname = optarg; break;
case 1 : args->rename_chrs = optarg; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?': usage(args); break;
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/vcfannotate.c.pysam.c b/bcftools/vcfannotate.c.pysam.c
index 1d86dbe..ea8398c 100644
--- a/bcftools/vcfannotate.c.pysam.c
+++ b/bcftools/vcfannotate.c.pysam.c
@@ -2,7 +2,7 @@
/* vcfannotate.c -- Annotate and edit VCF/BCF files.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -122,7 +122,7 @@ typedef struct _args_t
char **argv, *output_fname, *targets_fname, *regions_list, *header_fname;
char *remove_annots, *columns, *rename_chrs, *sample_names, *mark_sites;
- int argc, drop_header, tgts_is_vcf, mark_sites_logic;
+ int argc, drop_header, record_cmd_line, tgts_is_vcf, mark_sites_logic;
}
args_t;
@@ -265,7 +265,7 @@ static void init_remove_annots(args_t *args)
int id = bcf_hdr_id2int(args->hdr,BCF_DT_ID,str.s);
if ( !bcf_hdr_idinfo_exists(args->hdr,type,id) )
{
- fprintf(pysamerr,"Warning: The tag \"%s\" not defined in the header\n", str.s);
+ fprintf(pysam_stderr,"Warning: The tag \"%s\" not defined in the header\n", str.s);
args->nrm--;
}
else if ( (type==BCF_HL_FMT && keep_fmt) || (type==BCF_HL_INFO && keep_info) )
@@ -811,6 +811,135 @@ static int vcf_setter_format_gt(args_t *args, bcf1_t *line, annot_col_t *col, vo
return bcf_update_genotypes(args->hdr_out,line,args->tmpi3,nsrc*bcf_hdr_nsamples(args->hdr_out));
}
}
+static int count_vals(annot_line_t *tab, int icol_beg, int icol_end)
+{
+ int i, nmax = 0;
+ for (i=icol_beg; i<icol_end; i++)
+ {
+ char *str = tab->cols[i], *end = str;
+ if ( str[0]=='.' && !str[1] )
+ {
+ // missing value
+ if ( !nmax ) nmax = 1;
+ continue;
+ }
+ int n = 1;
+ while ( *end )
+ {
+ if ( *end==',' ) n++;
+ end++;
+ }
+ if ( nmax<n ) nmax = n;
+ }
+ return nmax;
+}
+static int setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+ int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+ assert( nvals>0 );
+ hts_expand(int32_t,nvals*nsmpl,args->mtmpi,args->tmpi);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ int32_t *ptr = args->tmpi + ismpl*nvals;
+ int ival = 0;
+
+ char *str = tab->cols[icol];
+ while ( *str )
+ {
+ if ( str[0]=='.' && (!str[1] || str[1]==',') ) // missing value
+ {
+ ptr[ival++] = bcf_int32_missing;
+ str += str[1] ? 2 : 1;
+ continue;
+ }
+
+ char *end = str;
+ ptr[ival] = strtol(str, &end, 10);
+ if ( end==str )
+ error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+ ival++;
+ str = *end ? end+1 : end;
+ }
+ while ( ival<nvals ) ptr[ival++] = bcf_int32_vector_end;
+ icol++;
+ }
+ return bcf_update_format_int32(args->hdr_out,line,col->hdr_key,args->tmpi,nsmpl*nvals);
+}
+static int setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+ int nvals = count_vals(tab,col->icol,col->icol+nsmpl);
+ assert( nvals>0 );
+ hts_expand(float,nvals*nsmpl,args->mtmpf,args->tmpf);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ float *ptr = args->tmpf + ismpl*nvals;
+ int ival = 0;
+
+ char *str = tab->cols[icol];
+ while ( *str )
+ {
+ if ( str[0]=='.' && (!str[1] || str[1]==',') ) // missing value
+ {
+ bcf_float_set_missing(ptr[ival]);
+ ival++;
+ str += str[1] ? 2 : 1;
+ continue;
+ }
+
+ char *end = str;
+ ptr[ival] = strtod(str, &end);
+ if ( end==str )
+ error("Could not parse %s at %s:%d .. [%s]\n", col->hdr_key,bcf_seqname(args->hdr,line),line->pos+1,tab->cols[col->icol]);
+
+ ival++;
+ str = *end ? end+1 : end;
+ }
+ while ( ival<nvals ) { bcf_float_set_vector_end(ptr[ival]); ival++; }
+ icol++;
+ }
+ return bcf_update_format_float(args->hdr_out,line,col->hdr_key,args->tmpf,nsmpl*nvals);
+}
+static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
+{
+ annot_line_t *tab = (annot_line_t*) data;
+ int nsmpl = bcf_hdr_nsamples(args->hdr_out);
+ assert( col->icol+nsmpl <= tab->ncols );
+
+ int i, max_len = 0;
+ for (i=col->icol; i<col->icol+nsmpl; i++)
+ {
+ int len = strlen(tab->cols[i]);
+ if ( max_len < len ) max_len = len;
+ }
+ hts_expand(char,max_len*nsmpl,args->mtmps,args->tmps);
+
+ int icol = col->icol, ismpl;
+ for (ismpl=0; ismpl<nsmpl; ismpl++)
+ {
+ char *ptr = args->tmps + ismpl*max_len;
+ char *str = tab->cols[icol];
+ i = 0;
+ while ( str[i] )
+ {
+ ptr[i] = str[i];
+ i++;
+ }
+ while ( i<max_len ) ptr[i++] = 0;
+ icol++;
+ }
+ return bcf_update_format_char(args->hdr_out,line,col->hdr_key,args->tmps,nsmpl*max_len);
+}
static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
{
bcf1_t *rec = (bcf1_t*) data;
@@ -1010,7 +1139,7 @@ static void set_samples(args_t *args, bcf_hdr_t *src, bcf_hdr_t *dst, int need_s
return; // the same samples in both files
if ( !nmatch ) error("No matching samples found in the source and the destination file\n");
- if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(pysamerr,"%d sample(s) in common\n", nmatch);
+ if ( nmatch!=bcf_hdr_nsamples(src) || nmatch!=bcf_hdr_nsamples(dst) ) fprintf(pysam_stderr,"%d sample(s) in common\n", nmatch);
args->nsample_map = bcf_hdr_nsamples(dst);
args->sample_map = (int*) malloc(sizeof(int)*args->nsample_map);
@@ -1129,7 +1258,7 @@ static void init_columns(args_t *args)
kstring_t str = {0,0,0}, tmp = {0,0,0};
char *ss = args->columns, *se = ss;
args->ncols = 0;
- int i = -1, has_fmt_str = 0, force_samples = -1;
+ int icol = -1, has_fmt_str = 0, force_samples = -1;
while ( *ss )
{
if ( *se && *se!=',' ) { se++; continue; }
@@ -1137,22 +1266,22 @@ static void init_columns(args_t *args)
if ( *ss=='+' ) { replace = REPLACE_MISSING; ss++; }
else if ( *ss=='-' ) { replace = REPLACE_EXISTING; ss++; }
else if ( *ss=='=' ) { replace = SET_OR_APPEND; ss++; }
- i++;
+ icol++;
str.l = 0;
kputsn(ss, se-ss, &str);
if ( !str.s[0] || !strcasecmp("-",str.s) ) ;
- else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = i;
- else if ( !strcasecmp("POS",str.s) ) args->from_idx = i;
- else if ( !strcasecmp("FROM",str.s) ) args->from_idx = i;
- else if ( !strcasecmp("TO",str.s) ) args->to_idx = i;
- else if ( !strcasecmp("REF",str.s) ) args->ref_idx = i;
- else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = i;
+ else if ( !strcasecmp("CHROM",str.s) ) args->chr_idx = icol;
+ else if ( !strcasecmp("POS",str.s) ) args->from_idx = icol;
+ else if ( !strcasecmp("FROM",str.s) ) args->from_idx = icol;
+ else if ( !strcasecmp("TO",str.s) ) args->to_idx = icol;
+ else if ( !strcasecmp("REF",str.s) ) args->ref_idx = icol;
+ else if ( !strcasecmp("ALT",str.s) ) args->alt_idx = icol;
else if ( !strcasecmp("ID",str.s) )
{
if ( replace==REPLACE_EXISTING ) error("Apologies, the -ID feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_id : setter_id;
col->hdr_key = strdup(str.s);
@@ -1162,7 +1291,7 @@ static void init_columns(args_t *args)
if ( replace==REPLACE_EXISTING ) error("Apologies, the -FILTER feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_filter : setter_filter;
col->hdr_key = strdup(str.s);
@@ -1189,7 +1318,7 @@ static void init_columns(args_t *args)
if ( replace==SET_OR_APPEND ) error("Apologies, the =QUAL feature has not been implemented yet.\n");
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->setter = args->tgts_is_vcf ? vcf_setter_qual : setter_qual;
col->hdr_key = strdup(str.s);
@@ -1264,30 +1393,38 @@ static void init_columns(args_t *args)
}
else if ( !strncasecmp("FORMAT/",str.s, 7) || !strncasecmp("FMT/",str.s,4) )
{
- if ( !args->tgts_is_vcf )
- error("Error: FORMAT fields can be carried over from a VCF file only.\n");
-
char *key = str.s + (!strncasecmp("FMT/",str.s,4) ? 4 : 7);
if ( force_samples<0 ) force_samples = replace;
- if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;;
- bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
- tmp.l = 0;
- bcf_hrec_format(hrec, &tmp);
- bcf_hdr_append(args->hdr_out, tmp.s);
- bcf_hdr_sync(args->hdr_out);
+ if ( force_samples>=0 && replace!=REPLACE_ALL ) force_samples = replace;
+ if ( args->tgts_is_vcf )
+ {
+ bcf_hrec_t *hrec = bcf_hdr_get_hrec(args->files->readers[1].header, BCF_HL_FMT, "ID", key, NULL);
+ tmp.l = 0;
+ bcf_hrec_format(hrec, &tmp);
+ bcf_hdr_append(args->hdr_out, tmp.s);
+ bcf_hdr_sync(args->hdr_out);
+ }
int hdr_id = bcf_hdr_id2int(args->hdr_out, BCF_DT_ID, key);
+ if ( !bcf_hdr_idinfo_exists(args->hdr_out,BCF_HL_FMT,hdr_id) )
+ error("The tag \"%s\" is not defined in %s\n", str.s, args->targets_fname);
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = -1;
+ if ( !args->tgts_is_vcf )
+ {
+ col->icol = icol;
+ icol += bcf_hdr_nsamples(args->hdr_out) - 1;
+ }
+ else
+ col->icol = -1;
col->replace = replace;
col->hdr_key = strdup(key);
if ( !strcasecmp("GT",key) ) col->setter = vcf_setter_format_gt;
else
switch ( bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id) )
{
- case BCF_HT_INT: col->setter = vcf_setter_format_int; break;
- case BCF_HT_REAL: col->setter = vcf_setter_format_real; break;
- case BCF_HT_STR: col->setter = vcf_setter_format_str; has_fmt_str = 1; break;
+ case BCF_HT_INT: col->setter = args->tgts_is_vcf ? vcf_setter_format_int : setter_format_int; break;
+ case BCF_HT_REAL: col->setter = args->tgts_is_vcf ? vcf_setter_format_real : setter_format_real; break;
+ case BCF_HT_STR: col->setter = args->tgts_is_vcf ? vcf_setter_format_str : setter_format_str; has_fmt_str = 1; break;
default: error("The type of %s not recognised (%d)\n", str.s,bcf_hdr_id2type(args->hdr_out,BCF_HL_FMT,hdr_id));
}
}
@@ -1316,7 +1453,7 @@ static void init_columns(args_t *args)
args->ncols++; args->cols = (annot_col_t*) realloc(args->cols,sizeof(annot_col_t)*args->ncols);
annot_col_t *col = &args->cols[args->ncols-1];
- col->icol = i;
+ col->icol = icol;
col->replace = replace;
col->hdr_key = strdup(str.s);
col->number = bcf_hdr_id2length(args->hdr_out,BCF_HL_INFO,hdr_id);
@@ -1340,11 +1477,12 @@ static void init_columns(args_t *args)
if ( skip_fmt ) khash_str2int_destroy_free(skip_fmt);
if ( has_fmt_str )
{
- int n = bcf_hdr_nsamples(args->hdr_out) > bcf_hdr_nsamples(args->files->readers[1].header) ? bcf_hdr_nsamples(args->hdr_out) : bcf_hdr_nsamples(args->files->readers[1].header);
+ int n = bcf_hdr_nsamples(args->hdr_out);
+ if ( args->tgts_is_vcf && n<bcf_hdr_nsamples(args->files->readers[1].header) ) n = bcf_hdr_nsamples(args->files->readers[1].header);
args->tmpp = (char**)malloc(sizeof(char*)*n);
args->tmpp2 = (char**)malloc(sizeof(char*)*n);
}
- if ( force_samples>=0 )
+ if ( force_samples>=0 && args->tgts_is_vcf )
set_samples(args, args->files->readers[1].header, args->hdr, force_samples==REPLACE_ALL ? 0 : 1);
}
@@ -1421,7 +1559,7 @@ static void init_data(args_t *args)
args->mark_sites,args->mark_sites_logic==MARK_LISTED?"":"not ",args->mark_sites);
}
- bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_annotate");
if ( !args->drop_header )
{
if ( args->rename_chrs ) rename_chrs(args, args->rename_chrs);
@@ -1519,8 +1657,10 @@ static void buffer_annot_lines(args_t *args, bcf1_t *line, int start_pos, int en
}
if ( args->ref_idx != -1 )
{
- assert( args->ref_idx < tmp->ncols );
- assert( args->alt_idx < tmp->ncols );
+ if ( args->ref_idx >= tmp->ncols )
+ error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->ref_idx+1,tmp->ncols,args->tgts->line.s);
+ if ( args->alt_idx >= tmp->ncols )
+ error("Could not parse the line, expected %d+ columns, found %d:\n\t%s\n",args->alt_idx+1,tmp->ncols,args->tgts->line.s);
tmp->nals = 2;
hts_expand(char*,tmp->nals,tmp->mals,tmp->als);
tmp->als[0] = tmp->cols[args->ref_idx];
@@ -1617,28 +1757,29 @@ static void annotate(args_t *args, bcf1_t *line)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Annotate and edit VCF/BCF files.\n");
- fprintf(pysamerr, "Usage: bcftools annotate [options] <in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -a, --annotations <file> VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
- fprintf(pysamerr, " -c, --columns <list> list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -h, --header-lines <file> lines which should be appended to the VCF header\n");
- fprintf(pysamerr, " -I, --set-id [+]<format> set ID column, see man pagee for details\n");
- fprintf(pysamerr, " -i, --include <expr> select sites for which the expression is true (see man pagee for details)\n");
- fprintf(pysamerr, " -m, --mark-sites [+-]<tag> add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " --rename-chrs <file> rename sequences according to map file: from\\tto\n");
- fprintf(pysamerr, " -s, --samples [^]<list> comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
- fprintf(pysamerr, " -S, --samples-file [^]<file> file of samples to annotate (or exclude with \"^\" prefix)\n");
- fprintf(pysamerr, " -x, --remove <list> list of annotations to remove (e.g. ID,INFO/DP,FORMAT/DP,FILTER). See man page for details\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Annotate and edit VCF/BCF files.\n");
+ fprintf(pysam_stderr, "Usage: bcftools annotate [options] <in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -a, --annotations <file> VCF file or tabix-indexed file with annotations: CHR\\tPOS[\\tVALUE]+\n");
+ fprintf(pysam_stderr, " -c, --columns <list> list of columns in the annotation file, e.g. CHROM,POS,REF,ALT,-,INFO/TAG. See man page for details\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -h, --header-lines <file> lines which should be appended to the VCF header\n");
+ fprintf(pysam_stderr, " -I, --set-id [+]<format> set ID column, see man page for details\n");
+ fprintf(pysam_stderr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -m, --mark-sites [+-]<tag> add INFO/tag flag to sites which are (\"+\") or are not (\"-\") listed in the -a file\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " --rename-chrs <file> rename sequences according to map file: from\\tto\n");
+ fprintf(pysam_stderr, " -s, --samples [^]<list> comma separated list of samples to annotate (or exclude with \"^\" prefix)\n");
+ fprintf(pysam_stderr, " -S, --samples-file [^]<file> file of samples to annotate (or exclude with \"^\" prefix)\n");
+ fprintf(pysam_stderr, " -x, --remove <list> list of annotations to remove (e.g. ID,INFO/DP,FORMAT/DP,FILTER). See man page for details\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -1651,6 +1792,7 @@ int main_vcfannotate(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->ref_idx = args->alt_idx = args->chr_idx = args->from_idx = args->to_idx = -1;
args->set_ids_replace = 1;
int regions_is_file = 0;
@@ -1673,6 +1815,7 @@ int main_vcfannotate(int argc, char *argv[])
{"header-lines",required_argument,NULL,'h'},
{"samples",required_argument,NULL,'s'},
{"samples-file",required_argument,NULL,'S'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "h:?o:O:r:R:a:x:c:i:e:S:s:I:m:",loptions,NULL)) >= 0)
@@ -1707,6 +1850,7 @@ int main_vcfannotate(int argc, char *argv[])
case 'h': args->header_fname = optarg; break;
case 1 : args->rename_chrs = optarg; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?': usage(args); break;
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/vcfcall.c b/bcftools/vcfcall.c
index a28caee..e5bbf11 100644
--- a/bcftools/vcfcall.c
+++ b/bcftools/vcfcall.c
@@ -1,6 +1,6 @@
/* vcfcall.c -- SNP/indel variant calling from VCF/BCF.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -68,7 +68,7 @@ void error(const char *format, ...);
typedef struct
{
int flag; // combination of CF_* flags above
- int output_type, n_threads;
+ int output_type, n_threads, record_cmd_line;
htsFile *bcf_in, *out_fh;
char *bcf_fname, *output_fname;
char **samples; // for subsampling and ploidy
@@ -175,6 +175,11 @@ static ploidy_predef_t ploidy_predefs[] =
"* * * M 1\n"
"* * * F 0\n"
},
+ { .alias = "1",
+ .about = "Treat all samples as haploid",
+ .ploidy =
+ "* * * * 1\n"
+ },
{
.alias = NULL,
.about = NULL,
@@ -381,7 +386,7 @@ static void init_data(args_t *args)
if ( args->regions )
{
if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
- error("Failed to read the targets: %s\n", args->regions);
+ error("Failed to read the regions: %s\n", args->regions);
}
if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
@@ -396,9 +401,21 @@ static void init_data(args_t *args)
if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
fprintf(stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
}
+ }
+ if ( args->ploidy )
+ {
args->nsex = ploidy_nsex(args->ploidy);
args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
+ if ( !args->nsamples )
+ {
+ args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
+ args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
+ for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+ }
+ }
+ if ( args->nsamples )
+ {
args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
@@ -418,9 +435,12 @@ static void init_data(args_t *args)
else
{
args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
- for (i=0; i<args->nsamples; i++)
- if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
- error("No such sample: %s\n", args->samples[i]);
+ if ( args->samples )
+ {
+ for (i=0; i<args->nsamples; i++)
+ if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
+ error("No such sample: %s\n", args->samples[i]);
+ }
}
args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
@@ -439,7 +459,7 @@ static void init_data(args_t *args)
bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");
- bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
bcf_hdr_write(args->out_fh, args->aux.hdr);
if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
@@ -451,7 +471,10 @@ static void destroy_data(args_t *args)
else if ( args->flag & CF_MCALL ) mcall_destroy(&args->aux);
else if ( args->flag & CF_QCALL ) qcall_destroy(&args->aux);
int i;
- for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+ if ( args->samples )
+ {
+ for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+ }
if ( args->aux.fams )
{
for (i=0; i<args->aux.nfams; i++) free(args->aux.fams[i].name);
@@ -579,6 +602,7 @@ static void usage(args_t *args)
fprintf(stderr, "Usage: bcftools call [options] <in.vcf.gz>\n");
fprintf(stderr, "\n");
fprintf(stderr, "File format options:\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
fprintf(stderr, " --ploidy <assembly>[?] predefined ploidy, 'list' to print available settings, append '?' for details\n");
@@ -634,6 +658,7 @@ int main_vcfcall(int argc, char *argv[])
args.output_fname = "-";
args.output_type = FT_VCF;
args.n_threads = 0;
+ args.record_cmd_line = 1;
args.aux.trio_Pm_SNPs = 1 - 1e-8;
args.aux.trio_Pm_ins = args.aux.trio_Pm_del = 1 - 1e-9;
@@ -668,6 +693,7 @@ int main_vcfcall(int argc, char *argv[])
{"ploidy-file",required_argument,NULL,2},
{"chromosome-X",no_argument,NULL,'X'},
{"chromosome-Y",no_argument,NULL,'Y'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
@@ -727,6 +753,7 @@ int main_vcfcall(int argc, char *argv[])
case 's': args.samples_fname = optarg; break;
case 'S': args.samples_fname = optarg; args.samples_is_file = 1; break;
case 9 : args.n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args.record_cmd_line = 0; break;
default: usage(&args);
}
}
diff --git a/bcftools/vcfcall.c.pysam.c b/bcftools/vcfcall.c.pysam.c
index 9e8c1bb..8e59fd9 100644
--- a/bcftools/vcfcall.c.pysam.c
+++ b/bcftools/vcfcall.c.pysam.c
@@ -2,7 +2,7 @@
/* vcfcall.c -- SNP/indel variant calling from VCF/BCF.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -70,7 +70,7 @@ void error(const char *format, ...);
typedef struct
{
int flag; // combination of CF_* flags above
- int output_type, n_threads;
+ int output_type, n_threads, record_cmd_line;
htsFile *bcf_in, *out_fh;
char *bcf_fname, *output_fname;
char **samples; // for subsampling and ploidy
@@ -177,6 +177,11 @@ static ploidy_predef_t ploidy_predefs[] =
"* * * M 1\n"
"* * * F 0\n"
},
+ { .alias = "1",
+ .about = "Treat all samples as haploid",
+ .ploidy =
+ "* * * * 1\n"
+ },
{
.alias = NULL,
.about = NULL,
@@ -290,7 +295,7 @@ static void set_samples(args_t *args, const char *fn, int is_file)
char x = *se, *xptr = se; *se = 0;
int ismpl = bcf_hdr_id2int(args->aux.hdr, BCF_DT_SAMPLE, ss);
- if ( ismpl < 0 ) { fprintf(pysamerr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
+ if ( ismpl < 0 ) { fprintf(pysam_stderr,"Warning: No such sample in the VCF: %s\n",ss); continue; }
ss = se+1;
while ( *ss && isspace(*ss) ) ss++;
@@ -383,7 +388,7 @@ static void init_data(args_t *args)
if ( args->regions )
{
if ( bcf_sr_set_regions(args->aux.srs, args->regions, args->regions_is_file)<0 )
- error("Failed to read the targets: %s\n", args->regions);
+ error("Failed to read the regions: %s\n", args->regions);
}
if ( !bcf_sr_add_reader(args->aux.srs, args->bcf_fname) ) error("Failed to open %s: %s\n", args->bcf_fname,bcf_sr_strerror(args->aux.srs->errnum));
@@ -396,11 +401,23 @@ static void init_data(args_t *args)
if ( args->aux.flag&CALL_CONSTR_TRIO )
{
if ( 3*args->aux.nfams!=args->nsamples ) error("Expected only trios in %s, sorry!\n", args->samples_fname);
- fprintf(pysamerr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
+ fprintf(pysam_stderr,"Detected %d samples in %d trio families\n", args->nsamples,args->aux.nfams);
}
+ }
+ if ( args->ploidy )
+ {
args->nsex = ploidy_nsex(args->ploidy);
args->sex2ploidy = (int*) calloc(args->nsex,sizeof(int));
args->sex2ploidy_prev = (int*) calloc(args->nsex,sizeof(int));
+ if ( !args->nsamples )
+ {
+ args->nsamples = bcf_hdr_nsamples(args->aux.hdr);
+ args->sample2sex = (int*) malloc(sizeof(int)*args->nsamples);
+ for (i=0; i<args->nsamples; i++) args->sample2sex[i] = 0;
+ }
+ }
+ if ( args->nsamples )
+ {
args->aux.ploidy = (uint8_t*) malloc(args->nsamples);
for (i=0; i<args->nsamples; i++) args->aux.ploidy[i] = 2;
for (i=0; i<args->nsex; i++) args->sex2ploidy_prev[i] = 2;
@@ -420,9 +437,12 @@ static void init_data(args_t *args)
else
{
args->aux.hdr = bcf_hdr_dup(bcf_sr_get_header(args->aux.srs,0));
- for (i=0; i<args->nsamples; i++)
- if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
- error("No such sample: %s\n", args->samples[i]);
+ if ( args->samples )
+ {
+ for (i=0; i<args->nsamples; i++)
+ if ( bcf_hdr_id2int(args->aux.hdr,BCF_DT_SAMPLE,args->samples[i])<0 )
+ error("No such sample: %s\n", args->samples[i]);
+ }
}
args->out_fh = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
@@ -441,7 +461,7 @@ static void init_data(args_t *args)
bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "QS");
bcf_hdr_remove(args->aux.hdr, BCF_HL_INFO, "I16");
- bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->aux.hdr, args->argc, args->argv, "bcftools_call");
bcf_hdr_write(args->out_fh, args->aux.hdr);
if ( args->flag&CF_INS_MISSED ) init_missed_line(args);
@@ -453,7 +473,10 @@ static void destroy_data(args_t *args)
else if ( args->flag & CF_MCALL ) mcall_destroy(&args->aux);
else if ( args->flag & CF_QCALL ) qcall_destroy(&args->aux);
int i;
- for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+ if ( args->samples )
+ {
+ for (i=0; i<args->nsamples; i++) free(args->samples[i]);
+ }
if ( args->aux.fams )
{
for (i=0; i<args->aux.nfams; i++) free(args->aux.fams[i].name);
@@ -507,7 +530,7 @@ static int parse_format_flag(const char *str)
else if ( !strncasecmp(ss,"GP",se-ss) ) flag |= CALL_FMT_GP;
else
{
- fprintf(pysamerr,"Could not parse \"%s\"\n", str);
+ fprintf(pysam_stderr,"Could not parse \"%s\"\n", str);
exit(1);
}
if ( !*se ) break;
@@ -548,23 +571,23 @@ ploidy_t *init_ploidy(char *alias)
if ( !pld->alias )
{
- fprintf(pysamerr,"Predefined ploidies:\n");
+ fprintf(pysam_stderr,"Predefined ploidies:\n");
pld = ploidy_predefs;
while ( pld->alias )
{
- fprintf(pysamerr,"%s\n .. %s\n\n", pld->alias,pld->about);
+ fprintf(pysam_stderr,"%s\n .. %s\n\n", pld->alias,pld->about);
if ( detailed )
- fprintf(pysamerr,"%s\n", pld->ploidy);
+ fprintf(pysam_stderr,"%s\n", pld->ploidy);
pld++;
}
- fprintf(pysamerr,"Run as --ploidy <alias> (e.g. --ploidy GRCh37).\n");
- fprintf(pysamerr,"To see the detailed ploidy definition, append a question mark (e.g. --ploidy GRCh37?).\n");
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"Run as --ploidy <alias> (e.g. --ploidy GRCh37).\n");
+ fprintf(pysam_stderr,"To see the detailed ploidy definition, append a question mark (e.g. --ploidy GRCh37?).\n");
+ fprintf(pysam_stderr,"\n");
exit(-1);
}
else if ( detailed )
{
- fprintf(pysamerr,"%s", pld->ploidy);
+ fprintf(pysam_stderr,"%s", pld->ploidy);
exit(-1);
}
return ploidy_init_string(pld->ploidy,2);
@@ -572,51 +595,52 @@ ploidy_t *init_ploidy(char *alias)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: SNP/indel variant calling from VCF/BCF. To be used in conjunction with samtools mpileup.\n");
- fprintf(pysamerr, " This command replaces the former \"bcftools view\" caller. Some of the original\n");
- fprintf(pysamerr, " functionality has been temporarily lost in the process of transition to htslib,\n");
- fprintf(pysamerr, " but will be added back on popular demand. The original calling model can be\n");
- fprintf(pysamerr, " invoked with the -c option.\n");
- fprintf(pysamerr, "Usage: bcftools call [options] <in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "File format options:\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
- fprintf(pysamerr, " --ploidy <assembly>[?] predefined ploidy, 'list' to print available settings, append '?' for details\n");
- fprintf(pysamerr, " --ploidy-file <file> space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --samples <list> list of samples to include [all samples]\n");
- fprintf(pysamerr, " -S, --samples-file <file> PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Input/output options:\n");
- fprintf(pysamerr, " -A, --keep-alts keep all possible alternate alleles at variant sites\n");
- fprintf(pysamerr, " -f, --format-fields <list> output format fields: GQ,GP (lowercase allowed) []\n");
- fprintf(pysamerr, " -g, --gvcf <int>,[...] group non-variant sites into gVCF blocks by minimum per-sample DP\n");
- fprintf(pysamerr, " -i, --insert-missed output also sites missed by mpileup but present in -T\n");
- fprintf(pysamerr, " -M, --keep-masked-ref keep sites with masked reference allele (REF=N)\n");
- fprintf(pysamerr, " -V, --skip-variants <type> skip indels/snps\n");
- fprintf(pysamerr, " -v, --variants-only output variant sites only\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Consensus/variant calling options:\n");
- fprintf(pysamerr, " -c, --consensus-caller the original calling method (conflicts with -m)\n");
- fprintf(pysamerr, " -C, --constrain <str> one of: alleles, trio (see manual)\n");
- fprintf(pysamerr, " -m, --multiallelic-caller alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
- fprintf(pysamerr, " -n, --novel-rate <float>,[...] likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
- fprintf(pysamerr, " -p, --pval-threshold <float> variant if P(ref|D)<FLOAT with -c [0.5]\n");
- fprintf(pysamerr, " -P, --prior <float> mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: SNP/indel variant calling from VCF/BCF. To be used in conjunction with samtools mpileup.\n");
+ fprintf(pysam_stderr, " This command replaces the former \"bcftools view\" caller. Some of the original\n");
+ fprintf(pysam_stderr, " functionality has been temporarily lost in the process of transition to htslib,\n");
+ fprintf(pysam_stderr, " but will be added back on popular demand. The original calling model can be\n");
+ fprintf(pysam_stderr, " invoked with the -c option.\n");
+ fprintf(pysam_stderr, "Usage: bcftools call [options] <in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "File format options:\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> output type: 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " --ploidy <assembly>[?] predefined ploidy, 'list' to print available settings, append '?' for details\n");
+ fprintf(pysam_stderr, " --ploidy-file <file> space/tab-delimited list of CHROM,FROM,TO,SEX,PLOIDY\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --samples <list> list of samples to include [all samples]\n");
+ fprintf(pysam_stderr, " -S, --samples-file <file> PED file or a file with an optional column with sex (see man page for details) [all samples]\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Input/output options:\n");
+ fprintf(pysam_stderr, " -A, --keep-alts keep all possible alternate alleles at variant sites\n");
+ fprintf(pysam_stderr, " -f, --format-fields <list> output format fields: GQ,GP (lowercase allowed) []\n");
+ fprintf(pysam_stderr, " -g, --gvcf <int>,[...] group non-variant sites into gVCF blocks by minimum per-sample DP\n");
+ fprintf(pysam_stderr, " -i, --insert-missed output also sites missed by mpileup but present in -T\n");
+ fprintf(pysam_stderr, " -M, --keep-masked-ref keep sites with masked reference allele (REF=N)\n");
+ fprintf(pysam_stderr, " -V, --skip-variants <type> skip indels/snps\n");
+ fprintf(pysam_stderr, " -v, --variants-only output variant sites only\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Consensus/variant calling options:\n");
+ fprintf(pysam_stderr, " -c, --consensus-caller the original calling method (conflicts with -m)\n");
+ fprintf(pysam_stderr, " -C, --constrain <str> one of: alleles, trio (see manual)\n");
+ fprintf(pysam_stderr, " -m, --multiallelic-caller alternative model for multiallelic and rare-variant calling (conflicts with -c)\n");
+ fprintf(pysam_stderr, " -n, --novel-rate <float>,[...] likelihood of novel mutation for constrained trio calling, see man page for details [1e-8,1e-9,1e-9]\n");
+ fprintf(pysam_stderr, " -p, --pval-threshold <float> variant if P(ref|D)<FLOAT with -c [0.5]\n");
+ fprintf(pysam_stderr, " -P, --prior <float> mutation rate (use bigger for greater sensitivity) [1.1e-3]\n");
// todo (and more)
- // fprintf(pysamerr, "\nContrast calling and association test options:\n");
- // fprintf(pysamerr, " -1 INT number of group-1 samples [0]\n");
- // fprintf(pysamerr, " -C FLOAT posterior constrast for LRT<FLOAT and P(ref|D)<0.5 [%g]\n", args->aux.min_lrt);
- // fprintf(pysamerr, " -U INT number of permutations for association testing (effective with -1) [0]\n");
- // fprintf(pysamerr, " -X FLOAT only perform permutations for P(chi^2)<FLOAT [%g]\n", args->aux.min_perm_p);
- fprintf(pysamerr, "\n");
+ // fprintf(pysam_stderr, "\nContrast calling and association test options:\n");
+ // fprintf(pysam_stderr, " -1 INT number of group-1 samples [0]\n");
+ // fprintf(pysam_stderr, " -C FLOAT posterior constrast for LRT<FLOAT and P(ref|D)<0.5 [%g]\n", args->aux.min_lrt);
+ // fprintf(pysam_stderr, " -U INT number of permutations for association testing (effective with -1) [0]\n");
+ // fprintf(pysam_stderr, " -X FLOAT only perform permutations for P(chi^2)<FLOAT [%g]\n", args->aux.min_perm_p);
+ fprintf(pysam_stderr, "\n");
exit(-1);
}
@@ -636,6 +660,7 @@ int main_vcfcall(int argc, char *argv[])
args.output_fname = "-";
args.output_type = FT_VCF;
args.n_threads = 0;
+ args.record_cmd_line = 1;
args.aux.trio_Pm_SNPs = 1 - 1e-8;
args.aux.trio_Pm_ins = args.aux.trio_Pm_del = 1 - 1e-9;
@@ -670,6 +695,7 @@ int main_vcfcall(int argc, char *argv[])
{"ploidy-file",required_argument,NULL,2},
{"chromosome-X",no_argument,NULL,'X'},
{"chromosome-Y",no_argument,NULL,'Y'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
@@ -680,8 +706,8 @@ int main_vcfcall(int argc, char *argv[])
{
case 2 : ploidy_fname = optarg; break;
case 1 : ploidy = optarg; break;
- case 'X': ploidy = "X"; fprintf(pysamerr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
- case 'Y': ploidy = "Y"; fprintf(pysamerr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
+ case 'X': ploidy = "X"; fprintf(pysam_stderr,"Warning: -X will be deprecated, please use --ploidy instead.\n"); break;
+ case 'Y': ploidy = "Y"; fprintf(pysam_stderr,"Warning: -Y will be deprecated, please use --ploidy instead.\n"); break;
case 'f': args.aux.output_tags |= parse_format_flag(optarg); break;
case 'M': args.flag &= ~CF_ACGT_ONLY; break; // keep sites where REF is N
case 'N': args.flag |= CF_ACGT_ONLY; break; // omit sites where first base in REF is N (the new default)
@@ -729,6 +755,7 @@ int main_vcfcall(int argc, char *argv[])
case 's': args.samples_fname = optarg; break;
case 'S': args.samples_fname = optarg; args.samples_is_file = 1; break;
case 9 : args.n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args.record_cmd_line = 0; break;
default: usage(&args);
}
}
@@ -745,7 +772,7 @@ int main_vcfcall(int argc, char *argv[])
if ( !ploidy_fname && !ploidy )
{
- fprintf(pysamerr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
+ fprintf(pysam_stderr,"Note: Neither --ploidy nor --ploidy-file given, assuming all sites are diploid\n");
args.ploidy = ploidy_init_string("",2);
}
diff --git a/bcftools/vcfcnv.c.pysam.c b/bcftools/vcfcnv.c.pysam.c
index d8a1ca5..10a00b9 100644
--- a/bcftools/vcfcnv.c.pysam.c
+++ b/bcftools/vcfcnv.c.pysam.c
@@ -267,7 +267,7 @@ static void init_data(args_t *args)
args->hmm = hmm_init(args->nstates, args->tprob, 10000);
hmm_init_states(args->hmm, args->iprobs);
- args->summary_fh = stdout;
+ args->summary_fh = pysam_stdout;
if ( args->output_dir )
{
init_sample_files(&args->query_sample, args->output_dir);
@@ -306,7 +306,7 @@ static void py_plot_cnv(char *script, float th)
char *cmd = msprintf("python %s -p %f", script, th);
int ret = system(cmd);
- if ( ret) fprintf(pysamerr, "The command returned non-zero status %d: %s\n", ret, cmd);
+ if ( ret) fprintf(pysam_stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
free(cmd);
}
@@ -641,7 +641,7 @@ static int set_observed_prob(args_t *args, sample_t *smpl, int isite)
cn3_baf /= norm;
#if DBG0
- if ( args->verbose ) fprintf(pysamerr,"%f\t%f %f %f\n", baf,cn1_baf,cn2_baf,cn3_baf);
+ if ( args->verbose ) fprintf(pysam_stderr,"%f\t%f %f %f\n", baf,cn1_baf,cn2_baf,cn3_baf);
#endif
double cn1_lrr = exp(-(lrr + 0.45)*(lrr + 0.45)/smpl->lrr_dev2);
@@ -866,7 +866,7 @@ static int update_sample_args(args_t *args, sample_t *smpl, int ismpl)
baf_AA_dev2 /= norm_baf_AA_dev2;
if ( baf_dev2 < baf_AA_dev2 ) baf_dev2 = baf_AA_dev2;
double max_mean_cn3 = 0.5 - sqrt(baf_dev2)*1.644854; // R: qnorm(0.95)=1.644854
- //fprintf(pysamerr,"dev=%f AA_dev=%f max_mean_cn3=%f mean_cn3=%f\n", baf_dev2,baf_AA_dev2,max_mean_cn3,mean_cn3);
+ //fprintf(pysam_stderr,"dev=%f AA_dev=%f max_mean_cn3=%f mean_cn3=%f\n", baf_dev2,baf_AA_dev2,max_mean_cn3,mean_cn3);
assert( max_mean_cn3>0 );
double new_frac = 1./mean_cn3 - 2;
@@ -936,13 +936,13 @@ static void cnv_flush_viterbi(args_t *args)
if ( args->optimize_frac )
{
int niter = 0;
- fprintf(pysamerr,"Attempting to estimate the fraction of aberrant cells (chr %s):\n", bcf_hdr_id2name(args->hdr,args->prev_rid));
+ fprintf(pysam_stderr,"Attempting to estimate the fraction of aberrant cells (chr %s):\n", bcf_hdr_id2name(args->hdr,args->prev_rid));
do
{
- fprintf(pysamerr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
+ fprintf(pysam_stderr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
if ( args->control_sample.name )
- fprintf(pysamerr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
+ fprintf(pysam_stderr,"\n");
set_emission_probs(args);
hmm_run_fwd_bwd(hmm, args->nsites, args->eprob, args->sites);
}
@@ -958,10 +958,10 @@ static void cnv_flush_viterbi(args_t *args)
if ( args->control_sample.name ) set_gauss_params(args, &args->control_sample);
}
- fprintf(pysamerr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
+ fprintf(pysam_stderr,"\t.. %f %f", args->query_sample.cell_frac,args->query_sample.baf_dev2);
if ( args->control_sample.name )
- fprintf(pysamerr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"\t.. %f %f", args->control_sample.cell_frac,args->control_sample.baf_dev2);
+ fprintf(pysam_stderr,"\n");
}
set_emission_probs(args);
@@ -971,7 +971,7 @@ static void cnv_flush_viterbi(args_t *args)
double ori_ii = avg_ii_prob(nstates,hmm_get_tprob(hmm));
hmm_run_baum_welch(hmm, args->nsites, args->eprob, args->sites);
double new_ii = avg_ii_prob(nstates,hmm_get_tprob(hmm));
- fprintf(pysamerr,"%e\t%e\t%e\n", ori_ii,new_ii,new_ii-ori_ii);
+ fprintf(pysam_stderr,"%e\t%e\t%e\n", ori_ii,new_ii,new_ii-ori_ii);
double *tprob = init_tprob_matrix(nstates, 1-new_ii, args->same_prob);
hmm_set_tprob(args->hmm, tprob, 10000);
double *tprob_arr = hmm_get_tprob(hmm);
@@ -983,9 +983,9 @@ static void cnv_flush_viterbi(args_t *args)
{
for (j=0; j<nstates; j++)
{
- printf(" %.15f", MAT(tprob_arr,nstates,j,i));
+ fprintf(pysam_stdout, " %.15f", MAT(tprob_arr,nstates,j,i));
}
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
break;
}
@@ -1176,33 +1176,33 @@ static void cnv_next_line(args_t *args, bcf1_t *line)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Copy number variation caller, requires Illumina's B-allele frequency (BAF) and Log R\n");
- fprintf(pysamerr, " Ratio intensity (LRR). The HMM considers the following copy number states: CN 2\n");
- fprintf(pysamerr, " (normal), 1 (single-copy loss), 0 (complete loss), 3 (single-copy gain)\n");
- fprintf(pysamerr, "Usage: bcftools cnv [OPTIONS] <file.vcf>\n");
- fprintf(pysamerr, "General Options:\n");
- fprintf(pysamerr, " -c, --control-sample <string> optional control sample name to highlight differences\n");
- fprintf(pysamerr, " -f, --AF-file <file> read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
- fprintf(pysamerr, " -o, --output-dir <path> \n");
- fprintf(pysamerr, " -p, --plot-threshold <float> plot aberrant chromosomes with quality at least 'float'\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --query-sample <string> query samply name\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, "HMM Options:\n");
- fprintf(pysamerr, " -a, --aberrant <float[,float]> fraction of aberrant cells in query and control [1.0,1.0]\n");
- fprintf(pysamerr, " -b, --BAF-weight <float> relative contribution from BAF [1]\n");
- fprintf(pysamerr, " -d, --BAF-dev <float[,float]> expected BAF deviation in query and control [0.04,0.04]\n"); // experimental
- fprintf(pysamerr, " -e, --err-prob <float> uniform error probability [1e-4]\n");
- fprintf(pysamerr, " -k, --LRR-dev <float[,float]> expected LRR deviation [0.2,0.2]\n"); // experimental
- fprintf(pysamerr, " -l, --LRR-weight <float> relative contribution from LRR [0.2]\n");
- fprintf(pysamerr, " -L, --LRR-smooth-win <int> window of LRR moving average smoothing [10]\n");
- fprintf(pysamerr, " -O, --optimize <float> estimate fraction of aberrant cells down to <float> [1.0]\n");
- fprintf(pysamerr, " -P, --same-prob <float> prior probability of -s/-c being the same [0.5]\n");
- fprintf(pysamerr, " -x, --xy-prob <float> P(x|y) transition probability [1e-9]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Copy number variation caller, requires Illumina's B-allele frequency (BAF) and Log R\n");
+ fprintf(pysam_stderr, " Ratio intensity (LRR). The HMM considers the following copy number states: CN 2\n");
+ fprintf(pysam_stderr, " (normal), 1 (single-copy loss), 0 (complete loss), 3 (single-copy gain)\n");
+ fprintf(pysam_stderr, "Usage: bcftools cnv [OPTIONS] <file.vcf>\n");
+ fprintf(pysam_stderr, "General Options:\n");
+ fprintf(pysam_stderr, " -c, --control-sample <string> optional control sample name to highlight differences\n");
+ fprintf(pysam_stderr, " -f, --AF-file <file> read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
+ fprintf(pysam_stderr, " -o, --output-dir <path> \n");
+ fprintf(pysam_stderr, " -p, --plot-threshold <float> plot aberrant chromosomes with quality at least 'float'\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --query-sample <string> query samply name\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, "HMM Options:\n");
+ fprintf(pysam_stderr, " -a, --aberrant <float[,float]> fraction of aberrant cells in query and control [1.0,1.0]\n");
+ fprintf(pysam_stderr, " -b, --BAF-weight <float> relative contribution from BAF [1]\n");
+ fprintf(pysam_stderr, " -d, --BAF-dev <float[,float]> expected BAF deviation in query and control [0.04,0.04]\n"); // experimental
+ fprintf(pysam_stderr, " -e, --err-prob <float> uniform error probability [1e-4]\n");
+ fprintf(pysam_stderr, " -k, --LRR-dev <float[,float]> expected LRR deviation [0.2,0.2]\n"); // experimental
+ fprintf(pysam_stderr, " -l, --LRR-weight <float> relative contribution from LRR [0.2]\n");
+ fprintf(pysam_stderr, " -L, --LRR-smooth-win <int> window of LRR moving average smoothing [10]\n");
+ fprintf(pysam_stderr, " -O, --optimize <float> estimate fraction of aberrant cells down to <float> [1.0]\n");
+ fprintf(pysam_stderr, " -P, --same-prob <float> prior probability of -s/-c being the same [0.5]\n");
+ fprintf(pysam_stderr, " -x, --xy-prob <float> P(x|y) transition probability [1e-9]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -1379,7 +1379,7 @@ int main_vcfcnv(int argc, char *argv[])
}
cnv_next_line(args, NULL);
create_plots(args);
- fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+ fprintf(pysam_stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
destroy_data(args);
free(args);
return 0;
diff --git a/bcftools/vcfconcat.c b/bcftools/vcfconcat.c
index cfec7c0..bd6a00a 100644
--- a/bcftools/vcfconcat.c
+++ b/bcftools/vcfconcat.c
@@ -31,13 +31,15 @@ THE SOFTWARE. */
#include <htslib/vcf.h>
#include <htslib/synced_bcf_reader.h>
#include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <htslib/tbx.h> // for hts_get_bgzfp()
#include "bcftools.h"
typedef struct _args_t
{
bcf_srs_t *files;
htsFile *out_fh;
- int output_type, n_threads;
+ int output_type, n_threads, record_cmd_line;
bcf_hdr_t *out_hdr;
int *seen_seq;
@@ -50,7 +52,7 @@ typedef struct _args_t
char **argv, *output_fname, *file_list, **fnames, *remove_dups, *regions_list;
int argc, nfnames, allow_overlaps, phased_concat, regions_is_file;
- int compact_PS, phase_set_changed;
+ int compact_PS, phase_set_changed, naive_concat;
}
args_t;
@@ -106,7 +108,7 @@ static void init_data(args_t *args)
bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PQ,Number=1,Type=Integer,Description=\"Phasing Quality (bigger is better)\">");
bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
}
- bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
@@ -176,8 +178,11 @@ static void destroy_data(args_t *args)
for (i=0; i<args->nfnames; i++) free(args->fnames[i]);
free(args->fnames);
if ( args->files ) bcf_sr_destroy(args->files);
- if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
- bcf_hdr_destroy(args->out_hdr);
+ if ( args->out_fh )
+ {
+ if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
+ }
+ if ( args->out_hdr ) bcf_hdr_destroy(args->out_hdr);
free(args->seen_seq);
free(args->start_pos);
free(args->swap_phase);
@@ -550,6 +555,108 @@ static void concat(args_t *args)
}
}
+static void naive_concat(args_t *args)
+{
+ // only compressed BCF atm
+ BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
+
+ const size_t page_size = 32768;
+ char *buf = (char*) malloc(page_size);
+ kstring_t tmp = {0,0,0};
+ int i;
+ for (i=0; i<args->nfnames; i++)
+ {
+ htsFile *hts_fp = hts_open(args->fnames[i],"r");
+ if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
+ htsFormat type = *hts_get_format(hts_fp);
+
+ if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+ if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+
+ BGZF *fp = hts_get_bgzfp(hts_fp);
+ if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
+ error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
+
+ uint8_t magic[5];
+ if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+ if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+
+ if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+ hts_expand(char,tmp.l,tmp.m,tmp.s);
+ if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+
+ // write only the first header
+ if ( i==0 )
+ {
+ if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+ if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+ if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+ }
+
+ // Output all non-header data that were read together with the header block
+ int nskip = fp->block_offset;
+ if ( fp->block_length - nskip > 0 )
+ {
+ if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+ }
+ if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
+
+
+ // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
+ ssize_t nread, ncached = 0, nwr;
+ const int neof = 28;
+ char cached[neof];
+ while (1)
+ {
+ nread = bgzf_raw_read(fp, buf, page_size);
+
+ // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
+ if ( nread<=0 ) break;
+ if ( nread<=neof ) // last block
+ {
+ if ( ncached )
+ {
+ // flush the part of the cache that won't be needed
+ nwr = bgzf_raw_write(bgzf_out, cached, nread);
+ if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+
+ // make space in the cache so that we can append to the end
+ if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
+ }
+
+ // fill the cache and check for eof outside this loop
+ memcpy(cached+neof-nread,buf,nread);
+ break;
+ }
+
+ // not the last block, flush the cache if full
+ if ( ncached )
+ {
+ nwr = bgzf_raw_write(bgzf_out, cached, ncached);
+ if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
+ ncached = 0;
+ }
+
+ // fill the cache
+ nread -= neof;
+ memcpy(cached,buf+nread,neof);
+ ncached = neof;
+
+ nwr = bgzf_raw_write(bgzf_out, buf, nread);
+ if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+ }
+ if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
+ {
+ nwr = bgzf_raw_write(bgzf_out, cached, neof);
+ if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+ }
+ if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
+ }
+ free(buf);
+ free(tmp.s);
+ if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
+}
+
static void usage(args_t *args)
{
fprintf(stderr, "\n");
@@ -558,7 +665,9 @@ static void usage(args_t *args)
fprintf(stderr, " concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
fprintf(stderr, " VCF into one. The input files must be sorted by chr and position. The files\n");
fprintf(stderr, " must be given in the correct order to produce sorted VCF on output unless\n");
- fprintf(stderr, " the -a, --allow-overlaps option is specified.\n");
+ fprintf(stderr, " the -a, --allow-overlaps option is specified. With the --naive option, the files\n");
+ fprintf(stderr, " are concatenated without being recompressed, which is very fast but dangerous\n");
+ fprintf(stderr, " if the BCF headers differ.\n");
fprintf(stderr, "Usage: bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Options:\n");
@@ -568,6 +677,8 @@ static void usage(args_t *args)
fprintf(stderr, " -D, --remove-duplicates Alias for -d none\n");
fprintf(stderr, " -f, --file-list <file> Read the list of files from a file.\n");
fprintf(stderr, " -l, --ligate Ligate phased VCFs by matching phase at overlapping haplotypes\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(stderr, " -n, --naive Concatenate BCF files without recompression (dangerous, use with caution)\n");
fprintf(stderr, " -o, --output <file> Write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
fprintf(stderr, " -q, --min-PQ <int> Break phase set if phasing quality is lower than <int> [30]\n");
@@ -586,10 +697,12 @@ int main_vcfconcat(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->min_PQ = 30;
static struct option loptions[] =
{
+ {"naive",no_argument,NULL,'n'},
{"compact-PS",no_argument,NULL,'c'},
{"regions",required_argument,NULL,'r'},
{"regions-file",required_argument,NULL,'R'},
@@ -602,10 +715,11 @@ int main_vcfconcat(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"file-list",required_argument,NULL,'f'},
{"min-PQ",required_argument,NULL,'q'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
- while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:c",loptions,NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:cn",loptions,NULL)) >= 0)
{
switch (c) {
case 'c': args->compact_PS = 1; break;
@@ -617,6 +731,7 @@ int main_vcfconcat(int argc, char *argv[])
args->min_PQ = strtol(optarg,&tmp,10);
if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
break;
+ case 'n': args->naive_concat = 1; break;
case 'a': args->allow_overlaps = 1; break;
case 'l': args->phased_concat = 1; break;
case 'f': args->file_list = optarg; break;
@@ -631,6 +746,7 @@ int main_vcfconcat(int argc, char *argv[])
};
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage(args); break;
default: error("Unknown argument: %s\n", optarg);
@@ -654,6 +770,15 @@ int main_vcfconcat(int argc, char *argv[])
if ( !args->nfnames ) usage(args);
if ( args->remove_dups && !args->allow_overlaps ) error("The -D option is supported only with -a\n");
if ( args->regions_list && !args->allow_overlaps ) error("The -r/-R option is supported only with -a\n");
+ if ( args->naive_concat )
+ {
+ if ( args->allow_overlaps ) error("The option --naive cannot be combined with --allow-overlaps\n");
+ if ( args->phased_concat ) error("The option --naive cannot be combined with --ligate\n");
+ naive_concat(args);
+ destroy_data(args);
+ free(args);
+ return 0;
+ }
init_data(args);
concat(args);
destroy_data(args);
diff --git a/bcftools/vcfconcat.c.pysam.c b/bcftools/vcfconcat.c.pysam.c
index 40db3f7..be2d6a2 100644
--- a/bcftools/vcfconcat.c.pysam.c
+++ b/bcftools/vcfconcat.c.pysam.c
@@ -33,13 +33,15 @@ THE SOFTWARE. */
#include <htslib/vcf.h>
#include <htslib/synced_bcf_reader.h>
#include <htslib/kseq.h>
+#include <htslib/bgzf.h>
+#include <htslib/tbx.h> // for hts_get_bgzfp()
#include "bcftools.h"
typedef struct _args_t
{
bcf_srs_t *files;
htsFile *out_fh;
- int output_type, n_threads;
+ int output_type, n_threads, record_cmd_line;
bcf_hdr_t *out_hdr;
int *seen_seq;
@@ -52,7 +54,7 @@ typedef struct _args_t
char **argv, *output_fname, *file_list, **fnames, *remove_dups, *regions_list;
int argc, nfnames, allow_overlaps, phased_concat, regions_is_file;
- int compact_PS, phase_set_changed;
+ int compact_PS, phase_set_changed, naive_concat;
}
args_t;
@@ -108,7 +110,7 @@ static void init_data(args_t *args)
bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PQ,Number=1,Type=Integer,Description=\"Phasing Quality (bigger is better)\">");
bcf_hdr_append(args->out_hdr,"##FORMAT=<ID=PS,Number=1,Type=Integer,Description=\"Phase Set\">");
}
- bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_concat");
args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
if ( args->n_threads ) hts_set_threads(args->out_fh, args->n_threads);
@@ -178,8 +180,11 @@ static void destroy_data(args_t *args)
for (i=0; i<args->nfnames; i++) free(args->fnames[i]);
free(args->fnames);
if ( args->files ) bcf_sr_destroy(args->files);
- if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
- bcf_hdr_destroy(args->out_hdr);
+ if ( args->out_fh )
+ {
+ if ( hts_close(args->out_fh)!=0 ) error("hts_close error\n");
+ }
+ if ( args->out_hdr ) bcf_hdr_destroy(args->out_hdr);
free(args->seen_seq);
free(args->start_pos);
free(args->swap_phase);
@@ -231,7 +236,7 @@ static void phased_flush(args_t *args)
{
if ( !gt_absent_warned )
{
- fprintf(pysamerr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(ahdr,arec), arec->pos+1);
+ fprintf(pysam_stderr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(ahdr,arec), arec->pos+1);
gt_absent_warned = 1;
}
continue;
@@ -242,7 +247,7 @@ static void phased_flush(args_t *args)
{
if ( !gt_absent_warned )
{
- fprintf(pysamerr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(bhdr,brec), brec->pos+1);
+ fprintf(pysam_stderr,"GT is not present at %s:%d. (This warning is printed only once.)\n", bcf_seqname(bhdr,brec), brec->pos+1);
gt_absent_warned = 1;
}
continue;
@@ -552,31 +557,137 @@ static void concat(args_t *args)
}
}
+static void naive_concat(args_t *args)
+{
+ // only compressed BCF atm
+ BGZF *bgzf_out = bgzf_open(args->output_fname,"w");;
+
+ const size_t page_size = 32768;
+ char *buf = (char*) malloc(page_size);
+ kstring_t tmp = {0,0,0};
+ int i;
+ for (i=0; i<args->nfnames; i++)
+ {
+ htsFile *hts_fp = hts_open(args->fnames[i],"r");
+ if ( !hts_fp ) error("Failed to open: %s\n", args->fnames[i]);
+ htsFormat type = *hts_get_format(hts_fp);
+
+ if ( type.format==vcf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+ if ( type.compression!=bgzf ) error("The --naive option currently works only for compressed BCFs, sorry :-/\n");
+
+ BGZF *fp = hts_get_bgzfp(hts_fp);
+ if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length )
+ error("Failed to read %s: %s\n", args->fnames[i], strerror(errno));
+
+ uint8_t magic[5];
+ if ( bgzf_read(fp, magic, 5) != 5 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+ if (strncmp((char*)magic, "BCF\2\2", 5) != 0) error("Invalid BCF magic string in %s\n", args->fnames[i]);
+
+ if ( bgzf_read(fp, &tmp.l, 4) != 4 ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+ hts_expand(char,tmp.l,tmp.m,tmp.s);
+ if ( bgzf_read(fp, tmp.s, tmp.l) != tmp.l ) error("Failed to read the BCF header in %s\n", args->fnames[i]);
+
+ // write only the first header
+ if ( i==0 )
+ {
+ if ( bgzf_write(bgzf_out, "BCF\2\2", 5) !=5 ) error("Failed to write %d bytes to %s\n", 5,args->output_fname);
+ if ( bgzf_write(bgzf_out, &tmp.l, 4) !=4 ) error("Failed to write %d bytes to %s\n", 4,args->output_fname);
+ if ( bgzf_write(bgzf_out, tmp.s, tmp.l) != tmp.l) error("Failed to write %d bytes to %s\n", tmp.l,args->output_fname);
+ }
+
+ // Output all non-header data that were read together with the header block
+ int nskip = fp->block_offset;
+ if ( fp->block_length - nskip > 0 )
+ {
+ if ( bgzf_write(bgzf_out, fp->uncompressed_block+nskip, fp->block_length-nskip)<0 ) error("Error: %d\n",fp->errcode);
+ }
+ if ( bgzf_flush(bgzf_out)<0 ) error("Error: %d\n",bgzf_out->errcode);
+
+
+ // Stream the rest of the file as it is, without recompressing, but remove BGZF EOF blocks
+ ssize_t nread, ncached = 0, nwr;
+ const int neof = 28;
+ char cached[neof];
+ while (1)
+ {
+ nread = bgzf_raw_read(fp, buf, page_size);
+
+ // page_size boundary may occur in the middle of the EOF block, so we need to cache the blocks' ends
+ if ( nread<=0 ) break;
+ if ( nread<=neof ) // last block
+ {
+ if ( ncached )
+ {
+ // flush the part of the cache that won't be needed
+ nwr = bgzf_raw_write(bgzf_out, cached, nread);
+ if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+
+ // make space in the cache so that we can append to the end
+ if ( nread!=neof ) memmove(cached,cached+nread,neof-nread);
+ }
+
+ // fill the cache and check for eof outside this loop
+ memcpy(cached+neof-nread,buf,nread);
+ break;
+ }
+
+ // not the last block, flush the cache if full
+ if ( ncached )
+ {
+ nwr = bgzf_raw_write(bgzf_out, cached, ncached);
+ if (nwr != ncached) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)ncached);
+ ncached = 0;
+ }
+
+ // fill the cache
+ nread -= neof;
+ memcpy(cached,buf+nread,neof);
+ ncached = neof;
+
+ nwr = bgzf_raw_write(bgzf_out, buf, nread);
+ if (nwr != nread) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)nread);
+ }
+ if ( ncached && memcmp(cached,"\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0",neof) )
+ {
+ nwr = bgzf_raw_write(bgzf_out, cached, neof);
+ if (nwr != neof) error("Write failed, wrote %d instead of %d bytes.\n", nwr,(int)neof);
+ }
+ if (hts_close(hts_fp)) error("Close failed: %s\n",args->fnames[i]);
+ }
+ free(buf);
+ free(tmp.s);
+ if (bgzf_close(bgzf_out) < 0) error("Error: %d\n",bgzf_out->errcode);
+}
+
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Concatenate or combine VCF/BCF files. All source files must have the same sample\n");
- fprintf(pysamerr, " columns appearing in the same order. The program can be used, for example, to\n");
- fprintf(pysamerr, " concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
- fprintf(pysamerr, " VCF into one. The input files must be sorted by chr and position. The files\n");
- fprintf(pysamerr, " must be given in the correct order to produce sorted VCF on output unless\n");
- fprintf(pysamerr, " the -a, --allow-overlaps option is specified.\n");
- fprintf(pysamerr, "Usage: bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -a, --allow-overlaps First coordinate of the next file can precede last record of the current file.\n");
- fprintf(pysamerr, " -c, --compact-PS Do not output PS tag at each site, only at the start of a new phase set block.\n");
- fprintf(pysamerr, " -d, --rm-dups <string> Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
- fprintf(pysamerr, " -D, --remove-duplicates Alias for -d none\n");
- fprintf(pysamerr, " -f, --file-list <file> Read the list of files from a file.\n");
- fprintf(pysamerr, " -l, --ligate Ligate phased VCFs by matching phase at overlapping haplotypes\n");
- fprintf(pysamerr, " -o, --output <file> Write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, " -q, --min-PQ <int> Break phase set if phasing quality is lower than <int> [30]\n");
- fprintf(pysamerr, " -r, --regions <region> Restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> Restrict to regions listed in a file\n");
- fprintf(pysamerr, " --threads <int> Number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Concatenate or combine VCF/BCF files. All source files must have the same sample\n");
+ fprintf(pysam_stderr, " columns appearing in the same order. The program can be used, for example, to\n");
+ fprintf(pysam_stderr, " concatenate chromosome VCFs into one VCF, or combine a SNP VCF and an indel\n");
+ fprintf(pysam_stderr, " VCF into one. The input files must be sorted by chr and position. The files\n");
+ fprintf(pysam_stderr, " must be given in the correct order to produce sorted VCF on output unless\n");
+ fprintf(pysam_stderr, " the -a, --allow-overlaps option is specified. With the --naive option, the files\n");
+ fprintf(pysam_stderr, " are concatenated without being recompressed, which is very fast but dangerous\n");
+ fprintf(pysam_stderr, " if the BCF headers differ.\n");
+ fprintf(pysam_stderr, "Usage: bcftools concat [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -a, --allow-overlaps First coordinate of the next file can precede last record of the current file.\n");
+ fprintf(pysam_stderr, " -c, --compact-PS Do not output PS tag at each site, only at the start of a new phase set block.\n");
+ fprintf(pysam_stderr, " -d, --rm-dups <string> Output duplicate records present in multiple files only once: <snps|indels|both|all|none>\n");
+ fprintf(pysam_stderr, " -D, --remove-duplicates Alias for -d none\n");
+ fprintf(pysam_stderr, " -f, --file-list <file> Read the list of files from a file.\n");
+ fprintf(pysam_stderr, " -l, --ligate Ligate phased VCFs by matching phase at overlapping haplotypes\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -n, --naive Concatenate BCF files without recompression (dangerous, use with caution)\n");
+ fprintf(pysam_stderr, " -o, --output <file> Write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -q, --min-PQ <int> Break phase set if phasing quality is lower than <int> [30]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> Restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> Restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " --threads <int> Number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -588,10 +699,12 @@ int main_vcfconcat(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->min_PQ = 30;
static struct option loptions[] =
{
+ {"naive",no_argument,NULL,'n'},
{"compact-PS",no_argument,NULL,'c'},
{"regions",required_argument,NULL,'r'},
{"regions-file",required_argument,NULL,'R'},
@@ -604,10 +717,11 @@ int main_vcfconcat(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"file-list",required_argument,NULL,'f'},
{"min-PQ",required_argument,NULL,'q'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
- while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:c",loptions,NULL)) >= 0)
+ while ((c = getopt_long(argc, argv, "h:?o:O:f:alq:Dd:r:R:cn",loptions,NULL)) >= 0)
{
switch (c) {
case 'c': args->compact_PS = 1; break;
@@ -619,6 +733,7 @@ int main_vcfconcat(int argc, char *argv[])
args->min_PQ = strtol(optarg,&tmp,10);
if ( *tmp ) error("Could not parse argument: --min-PQ %s\n", optarg);
break;
+ case 'n': args->naive_concat = 1; break;
case 'a': args->allow_overlaps = 1; break;
case 'l': args->phased_concat = 1; break;
case 'f': args->file_list = optarg; break;
@@ -633,6 +748,7 @@ int main_vcfconcat(int argc, char *argv[])
};
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage(args); break;
default: error("Unknown argument: %s\n", optarg);
@@ -656,6 +772,15 @@ int main_vcfconcat(int argc, char *argv[])
if ( !args->nfnames ) usage(args);
if ( args->remove_dups && !args->allow_overlaps ) error("The -D option is supported only with -a\n");
if ( args->regions_list && !args->allow_overlaps ) error("The -r/-R option is supported only with -a\n");
+ if ( args->naive_concat )
+ {
+ if ( args->allow_overlaps ) error("The option --naive cannot be combined with --allow-overlaps\n");
+ if ( args->phased_concat ) error("The option --naive cannot be combined with --ligate\n");
+ naive_concat(args);
+ destroy_data(args);
+ free(args);
+ return 0;
+ }
init_data(args);
concat(args);
destroy_data(args);
diff --git a/bcftools/vcfconvert.c b/bcftools/vcfconvert.c
index 26166df..1e60d30 100644
--- a/bcftools/vcfconvert.c
+++ b/bcftools/vcfconvert.c
@@ -66,7 +66,7 @@ struct _args_t
int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
char *outfname, *infname, *ref_fname;
- int argc, n_threads;
+ int argc, n_threads, record_cmd_line;
};
static void destroy_data(args_t *args)
@@ -369,7 +369,7 @@ static void gensample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype Probabilities\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -489,7 +489,7 @@ static void haplegendsample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nrows, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nrows);
@@ -606,7 +606,7 @@ static void hapsample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -1143,7 +1143,7 @@ static void tsv_to_vcf(args_t *args)
args->header = bcf_hdr_init("w");
bcf_hdr_set_chrs(args->header, args->ref);
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, n;
char **smpls = hts_readlist(args->sample_list, args->sample_is_file, &n);
@@ -1241,7 +1241,7 @@ static void gvcf_to_vcf(args_t *args)
if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
- bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
bcf_hdr_write(out_fh,hdr);
int32_t *itmp = NULL, nitmp = 0;
@@ -1304,11 +1304,12 @@ static void usage(void)
fprintf(stderr, " -S, --samples-file <file> file of samples to include\n");
fprintf(stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
fprintf(stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(stderr, " --threads <int> number of extra output compression threads [0]\n");
fprintf(stderr, "\n");
fprintf(stderr, "VCF output options:\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> output file name [stdout]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(stderr, " --threads <int> number of extra output compression threads [0]\n");
fprintf(stderr, "\n");
fprintf(stderr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
fprintf(stderr, " -G, --gensample2vcf <...> <prefix>|<gen-file>,<sample-file>\n");
@@ -1359,6 +1360,7 @@ int main_vcfconvert(int argc, char *argv[])
args->outfname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
static struct option loptions[] =
{
@@ -1387,6 +1389,7 @@ int main_vcfconvert(int argc, char *argv[])
{"haplegendsample2vcf",required_argument,NULL,'H'},
{"columns",required_argument,NULL,'c'},
{"fasta-ref",required_argument,NULL,'f'},
+ {"no-version",no_argument,NULL,10},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
@@ -1424,6 +1427,7 @@ int main_vcfconvert(int argc, char *argv[])
break;
case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 10 : args->record_cmd_line = 0; break;
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/vcfconvert.c.pysam.c b/bcftools/vcfconvert.c.pysam.c
index 03b24b4..12333cc 100644
--- a/bcftools/vcfconvert.c.pysam.c
+++ b/bcftools/vcfconvert.c.pysam.c
@@ -68,7 +68,7 @@ struct _args_t
int nsamples, *samples, sample_is_file, targets_is_file, regions_is_file, output_type;
char **argv, *sample_list, *targets_list, *regions_list, *tag, *columns;
char *outfname, *infname, *ref_fname;
- int argc, n_threads;
+ int argc, n_threads, record_cmd_line;
};
static void destroy_data(args_t *args)
@@ -211,13 +211,13 @@ static int tsv_setter_gt_gp(tsv_t *tsv, bcf1_t *rec, void *usr)
{
float aa,ab,bb;
aa = strtod(tsv->ss, &tsv->se);
- if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse first value of %d-th sample\n", i+1); return -1; }
+ if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse first value of %d-th sample\n", i+1); return -1; }
tsv->ss = tsv->se+1;
ab = strtod(tsv->ss, &tsv->se);
- if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse second value of %d-th sample\n", i+1); return -1; }
+ if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse second value of %d-th sample\n", i+1); return -1; }
tsv->ss = tsv->se+1;
bb = strtod(tsv->ss, &tsv->se);
- if ( tsv->ss==tsv->se ) { fprintf(pysamerr,"Could not parse third value of %d-th sample\n", i+1); return -1; }
+ if ( tsv->ss==tsv->se ) { fprintf(pysam_stderr,"Could not parse third value of %d-th sample\n", i+1); return -1; }
tsv->ss = tsv->se+1;
if ( args->rev_als ) { float tmp = bb; bb = aa; aa = tmp; }
@@ -263,7 +263,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
if ( !ss[0] || !ss[1] || !ss[2] ||
(up && (!ss[3] || !ss[4]) ) )
{
- fprintf(pysamerr,"Wrong number of fields at %d-th sample ([%c][%c][%c]). ",i+1,ss[0],ss[1],ss[2]);
+ fprintf(pysam_stderr,"Wrong number of fields at %d-th sample ([%c][%c][%c]). ",i+1,ss[0],ss[1],ss[2]);
return -1;
}
@@ -282,7 +282,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
args->gts[2*i+all] = bcf_int32_vector_end;
break;
default :
- fprintf(pysamerr,"Could not parse: [%c][%s]\n", ss[all*2+up],tsv->ss);
+ fprintf(pysam_stderr,"Could not parse: [%c][%s]\n", ss[all*2+up],tsv->ss);
return -1;
}
if( ss[all*2+up+1]=='*' ) up = up + 1;
@@ -290,7 +290,7 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
if(up && up != 2)
{
- fprintf(pysamerr,"Missing unphased marker '*': [%c][%s]", ss[2+up], tsv->ss);
+ fprintf(pysam_stderr,"Missing unphased marker '*': [%c][%s]", ss[2+up], tsv->ss);
return -1;
}
@@ -304,8 +304,8 @@ static int tsv_setter_haps(tsv_t *tsv, bcf1_t *rec, void *usr)
}
if ( tsv->ss[(nsamples-1)*4+3+nup] )
{
- fprintf(pysamerr,"nup: %d", nup);
- fprintf(pysamerr,"Wrong number of fields (%d-th column = [%c]). ", nsamples*2,tsv->ss[(nsamples-1)*4+nup]);
+ fprintf(pysam_stderr,"nup: %d", nup);
+ fprintf(pysam_stderr,"Wrong number of fields (%d-th column = [%c]). ", nsamples*2,tsv->ss[(nsamples-1)*4+nup]);
return -1;
}
@@ -371,7 +371,7 @@ static void gensample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GP,Number=G,Type=Float,Description=\"Genotype Probabilities\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -417,7 +417,7 @@ static void gensample_to_vcf(args_t *args)
free(args->flt);
tsv_destroy(tsv);
- fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+ fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
}
static void haplegendsample_to_vcf(args_t *args)
@@ -491,7 +491,7 @@ static void haplegendsample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nrows, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nrows);
@@ -554,7 +554,7 @@ static void haplegendsample_to_vcf(args_t *args)
tsv_destroy(hap_tsv);
tsv_destroy(leg_tsv);
- fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+ fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
}
static void hapsample_to_vcf(args_t *args)
@@ -608,7 +608,7 @@ static void hapsample_to_vcf(args_t *args)
bcf_hdr_append(args->header, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant described in this record\">");
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
bcf_hdr_printf(args->header, "##contig=<ID=%s,length=%d>", args->str.s,0x7fffffff); // MAX_CSI_COOR
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, nsamples;
char **samples = hts_readlist(sample_fname, 1, &nsamples);
@@ -653,7 +653,7 @@ static void hapsample_to_vcf(args_t *args)
free(args->gts);
tsv_destroy(tsv);
- fprintf(pysamerr,"Number of processed rows: \t%d\n", args->n.total);
+ fprintf(pysam_stderr,"Number of processed rows: \t%d\n", args->n.total);
}
static void vcf_to_gensample(args_t *args)
@@ -710,8 +710,8 @@ static void vcf_to_gensample(args_t *args)
if ( gen_fname && (strlen(gen_fname)<3 || strcasecmp(".gz",gen_fname+strlen(gen_fname)-3)) ) gen_compressed = 0;
if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
- if (gen_fname) fprintf(pysamerr, "Gen file: %s\n", gen_fname);
- if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+ if (gen_fname) fprintf(pysam_stderr, "Gen file: %s\n", gen_fname);
+ if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
// write samples file
if (sample_fname) {
@@ -755,7 +755,7 @@ static void vcf_to_gensample(args_t *args)
// biallelic required
if ( line->n_allele>2 ) {
if (!non_biallelic)
- fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+ fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
non_biallelic++;
continue;
}
@@ -774,7 +774,7 @@ static void vcf_to_gensample(args_t *args)
nok++;
}
}
- fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n",
+ fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d/%d no-ALT/non-biallelic/filtered/duplicated\n",
nok, no_alt+non_biallelic+filtered+ndup, no_alt, non_biallelic, filtered, ndup);
if ( str.m ) free(str.s);
@@ -826,9 +826,9 @@ static void vcf_to_haplegendsample(args_t *args)
if ( legend_fname && (strlen(legend_fname)<3 || strcasecmp(".gz",legend_fname+strlen(legend_fname)-3)) ) legend_compressed = 0;
if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
- if (hap_fname) fprintf(pysamerr, "Haps file: %s\n", hap_fname);
- if (legend_fname) fprintf(pysamerr, "Legend file: %s\n", legend_fname);
- if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+ if (hap_fname) fprintf(pysam_stderr, "Haps file: %s\n", hap_fname);
+ if (legend_fname) fprintf(pysam_stderr, "Legend file: %s\n", legend_fname);
+ if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
// write samples file
if (sample_fname) {
@@ -879,7 +879,7 @@ static void vcf_to_haplegendsample(args_t *args)
// biallelic required
if ( line->n_allele>2 ) {
if (!non_biallelic)
- fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+ fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
non_biallelic++;
continue;
}
@@ -906,7 +906,7 @@ static void vcf_to_haplegendsample(args_t *args)
}
nok++;
}
- fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok,no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
+ fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok,no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
if ( str.m ) free(str.s);
if ( hout && bgzf_close(hout)!=0 ) error("Error closing %s: %s\n", hap_fname, strerror(errno));
if ( lout && bgzf_close(lout)!=0 ) error("Error closing %s: %s\n", legend_fname, strerror(errno));
@@ -968,8 +968,8 @@ static void vcf_to_hapsample(args_t *args)
if ( hap_fname && (strlen(hap_fname)<3 || strcasecmp(".gz",hap_fname+strlen(hap_fname)-3)) ) hap_compressed = 0;
if ( sample_fname && strlen(sample_fname)>3 && strcasecmp(".gz",sample_fname+strlen(sample_fname)-3)==0 ) sample_compressed = 0;
- if (hap_fname) fprintf(pysamerr, "Haps file: %s\n", hap_fname);
- if (sample_fname) fprintf(pysamerr, "Sample file: %s\n", sample_fname);
+ if (hap_fname) fprintf(pysam_stderr, "Haps file: %s\n", hap_fname);
+ if (sample_fname) fprintf(pysam_stderr, "Sample file: %s\n", sample_fname);
// write samples file
if (sample_fname) {
@@ -1013,7 +1013,7 @@ static void vcf_to_hapsample(args_t *args)
// biallelic required
if ( line->n_allele>2 ) {
if (!non_biallelic)
- fprintf(pysamerr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
+ fprintf(pysam_stderr, "Warning: non-biallelic records are skipped. Consider splitting multi-allelic records into biallelic records using 'bcftools norm -m-'.\n");
non_biallelic++;
continue;
}
@@ -1029,7 +1029,7 @@ static void vcf_to_hapsample(args_t *args)
}
nok++;
}
- fprintf(pysamerr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok, no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
+ fprintf(pysam_stderr, "%d records written, %d skipped: %d/%d/%d no-ALT/non-biallelic/filtered\n", nok, no_alt+non_biallelic+filtered, no_alt, non_biallelic, filtered);
if ( str.m ) free(str.s);
if ( hout && bgzf_close(hout)!=0 ) error("Error closing %s: %s\n", hap_fname, strerror(errno));
if (hap_fname) free(hap_fname);
@@ -1145,7 +1145,7 @@ static void tsv_to_vcf(args_t *args)
args->header = bcf_hdr_init("w");
bcf_hdr_set_chrs(args->header, args->ref);
bcf_hdr_append(args->header, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
- bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->header, args->argc, args->argv, "bcftools_convert");
int i, n;
char **smpls = hts_readlist(args->sample_list, args->sample_is_file, &n);
@@ -1197,13 +1197,13 @@ static void tsv_to_vcf(args_t *args)
free(args->str.s);
free(args->gts);
- fprintf(pysamerr,"Rows total: \t%d\n", args->n.total);
- fprintf(pysamerr,"Rows skipped: \t%d\n", args->n.skipped);
- fprintf(pysamerr,"Missing GTs: \t%d\n", args->n.missing);
- fprintf(pysamerr,"Hom RR: \t%d\n", args->n.hom_rr);
- fprintf(pysamerr,"Het RA: \t%d\n", args->n.het_ra);
- fprintf(pysamerr,"Hom AA: \t%d\n", args->n.hom_aa);
- fprintf(pysamerr,"Het AA: \t%d\n", args->n.het_aa);
+ fprintf(pysam_stderr,"Rows total: \t%d\n", args->n.total);
+ fprintf(pysam_stderr,"Rows skipped: \t%d\n", args->n.skipped);
+ fprintf(pysam_stderr,"Missing GTs: \t%d\n", args->n.missing);
+ fprintf(pysam_stderr,"Hom RR: \t%d\n", args->n.hom_rr);
+ fprintf(pysam_stderr,"Het RA: \t%d\n", args->n.het_ra);
+ fprintf(pysam_stderr,"Hom AA: \t%d\n", args->n.hom_aa);
+ fprintf(pysam_stderr,"Het AA: \t%d\n", args->n.het_aa);
}
static void vcf_to_vcf(args_t *args)
@@ -1243,7 +1243,7 @@ static void gvcf_to_vcf(args_t *args)
if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
bcf_hdr_t *hdr = bcf_sr_get_header(args->files,0);
- bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
+ if (args->record_cmd_line) bcf_hdr_append_version(hdr, args->argc, args->argv, "bcftools_convert");
bcf_hdr_write(out_fh,hdr);
int32_t *itmp = NULL, nitmp = 0;
@@ -1291,65 +1291,66 @@ static void gvcf_to_vcf(args_t *args)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Converts VCF/BCF to other formats and back. See man page for file\n");
- fprintf(pysamerr, " formats details. When specifying output files explicitly instead\n");
- fprintf(pysamerr, " of with <prefix>, one can use '-' for stdout and '.' to suppress.\n");
- fprintf(pysamerr, "Usage: bcftools convert [OPTIONS] <input_file>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "VCF input options:\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
- fprintf(pysamerr, " -i, --include <expr> select sites for which the expression is true\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --samples <list> list of samples to include\n");
- fprintf(pysamerr, " -S, --samples-file <file> file of samples to include\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "VCF output options:\n");
- fprintf(pysamerr, " -o, --output <file> output file name [stdout]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
- fprintf(pysamerr, " -G, --gensample2vcf <...> <prefix>|<gen-file>,<sample-file>\n");
- fprintf(pysamerr, " -g, --gensample <...> <prefix>|<gen-file>,<sample-file>\n");
- fprintf(pysamerr, " --tag <string> tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
- fprintf(pysamerr, " --chrom output chromosome in first column instead of CHROM:POS_REF_ALT\n");
- fprintf(pysamerr, " --vcf-ids output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "gVCF conversion:\n");
- fprintf(pysamerr, " --gvcf2vcf expand gVCF reference blocks\n");
- fprintf(pysamerr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "HAP/SAMPLE conversion (output from SHAPEIT):\n");
- fprintf(pysamerr, " --hapsample2vcf <...> <prefix>|<haps-file>,<sample-file>\n");
- fprintf(pysamerr, " --hapsample <...> <prefix>|<haps-file>,<sample-file>\n");
- fprintf(pysamerr, " --haploid2diploid convert haploid genotypes to diploid homozygotes\n");
- fprintf(pysamerr, " --vcf-ids output VCF IDs instead of CHROM:POS_REF_ALT\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "HAP/LEGEND/SAMPLE conversion:\n");
- fprintf(pysamerr, " -H, --haplegendsample2vcf <...> <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
- fprintf(pysamerr, " -h, --haplegendsample <...> <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
- fprintf(pysamerr, " --haploid2diploid convert haploid genotypes to diploid homozygotes\n");
- fprintf(pysamerr, " --vcf-ids output VCF IDs instead of CHROM:POS_REF_ALT\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "TSV conversion:\n");
- fprintf(pysamerr, " --tsv2vcf <file> \n");
- fprintf(pysamerr, " -c, --columns <string> columns of the input tsv file [ID,CHROM,POS,AA]\n");
- fprintf(pysamerr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
- fprintf(pysamerr, " -s, --samples <list> list of sample names\n");
- fprintf(pysamerr, " -S, --samples-file <file> file of sample names\n");
- fprintf(pysamerr, "\n");
- // fprintf(pysamerr, "PLINK options:\n");
- // fprintf(pysamerr, " -p, --plink <prefix>|<ped>,<map>,<fam>|<bed>,<bim>,<fam>|<tped>,<tfam>\n");
- // fprintf(pysamerr, " --tped make tped file instead\n");
- // fprintf(pysamerr, " --bin make binary bed/fam/bim files\n");
- // fprintf(pysamerr, "\n");
- // fprintf(pysamerr, "PBWT options:\n");
- // fprintf(pysamerr, " -b, --pbwt <prefix> or <pbwt>,<sites>,<sample>,<missing>\n");
- // fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Converts VCF/BCF to other formats and back. See man page for file\n");
+ fprintf(pysam_stderr, " formats details. When specifying output files explicitly instead\n");
+ fprintf(pysam_stderr, " of with <prefix>, one can use '-' for pysam_stdout and '.' to suppress.\n");
+ fprintf(pysam_stderr, "Usage: bcftools convert [OPTIONS] <input_file>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "VCF input options:\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
+ fprintf(pysam_stderr, " -i, --include <expr> select sites for which the expression is true\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --samples <list> list of samples to include\n");
+ fprintf(pysam_stderr, " -S, --samples-file <file> file of samples to include\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "VCF output options:\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> output file name [pysam_stdout]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "GEN/SAMPLE conversion (input/output from IMPUTE2):\n");
+ fprintf(pysam_stderr, " -G, --gensample2vcf <...> <prefix>|<gen-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " -g, --gensample <...> <prefix>|<gen-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " --tag <string> tag to take values for .gen file: GT,PL,GL,GP [GT]\n");
+ fprintf(pysam_stderr, " --chrom output chromosome in first column instead of CHROM:POS_REF_ALT\n");
+ fprintf(pysam_stderr, " --vcf-ids output VCF IDs in second column instead of CHROM:POS_REF_ALT\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "gVCF conversion:\n");
+ fprintf(pysam_stderr, " --gvcf2vcf expand gVCF reference blocks\n");
+ fprintf(pysam_stderr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "HAP/SAMPLE conversion (output from SHAPEIT):\n");
+ fprintf(pysam_stderr, " --hapsample2vcf <...> <prefix>|<haps-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " --hapsample <...> <prefix>|<haps-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " --haploid2diploid convert haploid genotypes to diploid homozygotes\n");
+ fprintf(pysam_stderr, " --vcf-ids output VCF IDs instead of CHROM:POS_REF_ALT\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "HAP/LEGEND/SAMPLE conversion:\n");
+ fprintf(pysam_stderr, " -H, --haplegendsample2vcf <...> <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " -h, --haplegendsample <...> <prefix>|<hap-file>,<legend-file>,<sample-file>\n");
+ fprintf(pysam_stderr, " --haploid2diploid convert haploid genotypes to diploid homozygotes\n");
+ fprintf(pysam_stderr, " --vcf-ids output VCF IDs instead of CHROM:POS_REF_ALT\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "TSV conversion:\n");
+ fprintf(pysam_stderr, " --tsv2vcf <file> \n");
+ fprintf(pysam_stderr, " -c, --columns <string> columns of the input tsv file [ID,CHROM,POS,AA]\n");
+ fprintf(pysam_stderr, " -f, --fasta-ref <file> reference sequence in fasta format\n");
+ fprintf(pysam_stderr, " -s, --samples <list> list of sample names\n");
+ fprintf(pysam_stderr, " -S, --samples-file <file> file of sample names\n");
+ fprintf(pysam_stderr, "\n");
+ // fprintf(pysam_stderr, "PLINK options:\n");
+ // fprintf(pysam_stderr, " -p, --plink <prefix>|<ped>,<map>,<fam>|<bed>,<bim>,<fam>|<tped>,<tfam>\n");
+ // fprintf(pysam_stderr, " --tped make tped file instead\n");
+ // fprintf(pysam_stderr, " --bin make binary bed/fam/bim files\n");
+ // fprintf(pysam_stderr, "\n");
+ // fprintf(pysam_stderr, "PBWT options:\n");
+ // fprintf(pysam_stderr, " -b, --pbwt <prefix> or <pbwt>,<sites>,<sample>,<missing>\n");
+ // fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -1361,6 +1362,7 @@ int main_vcfconvert(int argc, char *argv[])
args->outfname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
static struct option loptions[] =
{
@@ -1389,6 +1391,7 @@ int main_vcfconvert(int argc, char *argv[])
{"haplegendsample2vcf",required_argument,NULL,'H'},
{"columns",required_argument,NULL,'c'},
{"fasta-ref",required_argument,NULL,'f'},
+ {"no-version",no_argument,NULL,10},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "?h:r:R:s:S:t:T:i:e:g:G:o:O:c:f:H:",loptions,NULL)) >= 0) {
@@ -1426,6 +1429,7 @@ int main_vcfconvert(int argc, char *argv[])
break;
case 'h': args->convert_func = vcf_to_haplegendsample; args->outfname = optarg; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 10 : args->record_cmd_line = 0; break;
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/vcffilter.c b/bcftools/vcffilter.c
index ac4c3a3..f979d77 100644
--- a/bcftools/vcffilter.c
+++ b/bcftools/vcffilter.c
@@ -71,7 +71,7 @@ typedef struct _args_t
int output_type, n_threads;
char **argv, *output_fname, *targets_list, *regions_list;
- int argc;
+ int argc, record_cmd_line;
}
args_t;
@@ -149,7 +149,7 @@ static void init_data(args_t *args)
}
}
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
if ( args->filter_str )
args->filter = filter_init(args->hdr, args->filter_str);
@@ -408,6 +408,7 @@ static void usage(args_t *args)
fprintf(stderr, " -G, --IndelGap <int> filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
fprintf(stderr, " -i, --include <expr> include only sites for which the expression is true (see man page for details\n");
fprintf(stderr, " -m, --mode [+x] \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
fprintf(stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
@@ -430,6 +431,7 @@ int main_vcffilter(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int regions_is_file = 0, targets_is_file = 0;
static struct option loptions[] =
@@ -448,6 +450,7 @@ int main_vcffilter(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"SnpGap",required_argument,NULL,'g'},
{"IndelGap",required_argument,NULL,'G'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -488,6 +491,7 @@ int main_vcffilter(int argc, char *argv[])
else error("The argument to -S not recognised: %s\n", optarg);
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage(args);
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcffilter.c.pysam.c b/bcftools/vcffilter.c.pysam.c
index c731ba3..58193da 100644
--- a/bcftools/vcffilter.c.pysam.c
+++ b/bcftools/vcffilter.c.pysam.c
@@ -73,7 +73,7 @@ typedef struct _args_t
int output_type, n_threads;
char **argv, *output_fname, *targets_list, *regions_list;
- int argc;
+ int argc, record_cmd_line;
}
args_t;
@@ -131,7 +131,7 @@ static void init_data(args_t *args)
if ( tmp.s ) kputs(" and ", &tmp);
kputs("\"IndelGap\"", &tmp);
}
- fprintf(pysamerr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
+ fprintf(pysam_stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
free(tmp.s);
}
@@ -151,7 +151,7 @@ static void init_data(args_t *args)
}
}
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
if ( args->filter_str )
args->filter = filter_init(args->hdr, args->filter_str);
@@ -400,26 +400,27 @@ static void set_genotypes(args_t *args, bcf1_t *line, int pass_site)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Apply fixed-threshold filters.\n");
- fprintf(pysamerr, "Usage: bcftools filter [options] <in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -g, --SnpGap <int> filter SNPs within <int> base pairs of an indel\n");
- fprintf(pysamerr, " -G, --IndelGap <int> filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
- fprintf(pysamerr, " -i, --include <expr> include only sites for which the expression is true (see man page for details\n");
- fprintf(pysamerr, " -m, --mode [+x] \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --soft-filter <string> annotate FILTER column with <string> or unique filter name (\"Filter%%d\") made up by the program (\"+\")\n");
- fprintf(pysamerr, " -S, --set-GTs <.|0> set genotypes of failed samples to missing (.) or ref (0)\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Apply fixed-threshold filters.\n");
+ fprintf(pysam_stderr, "Usage: bcftools filter [options] <in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -g, --SnpGap <int> filter SNPs within <int> base pairs of an indel\n");
+ fprintf(pysam_stderr, " -G, --IndelGap <int> filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
+ fprintf(pysam_stderr, " -i, --include <expr> include only sites for which the expression is true (see man page for details\n");
+ fprintf(pysam_stderr, " -m, --mode [+x] \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --soft-filter <string> annotate FILTER column with <string> or unique filter name (\"Filter%%d\") made up by the program (\"+\")\n");
+ fprintf(pysam_stderr, " -S, --set-GTs <.|0> set genotypes of failed samples to missing (.) or ref (0)\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -432,6 +433,7 @@ int main_vcffilter(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int regions_is_file = 0, targets_is_file = 0;
static struct option loptions[] =
@@ -450,6 +452,7 @@ int main_vcffilter(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"SnpGap",required_argument,NULL,'g'},
{"IndelGap",required_argument,NULL,'G'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -490,6 +493,7 @@ int main_vcffilter(int argc, char *argv[])
else error("The argument to -S not recognised: %s\n", optarg);
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage(args);
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfgtcheck.c.pysam.c b/bcftools/vcfgtcheck.c.pysam.c
index 161ca3c..2f0a288 100644
--- a/bcftools/vcfgtcheck.c.pysam.c
+++ b/bcftools/vcfgtcheck.c.pysam.c
@@ -62,7 +62,7 @@ void py_plot(char *script)
int len = strlen(script);
char *cmd = !strcmp(".py",script+len-3) ? msprintf("python %s", script) : msprintf("python %s.py", script);
int ret = system(cmd);
- if ( ret ) fprintf(pysamerr, "The command returned non-zero status %d: %s\n", ret, cmd);
+ if ( ret ) fprintf(pysam_stderr, "The command returned non-zero status %d: %s\n", ret, cmd);
free(cmd);
}
@@ -272,7 +272,7 @@ static int init_gt2ipl(args_t *args, bcf1_t *gt_line, bcf1_t *sm_line, int *gt2i
gt2ipl[ bcf_ij2G(j,i) ] = k<=l ? bcf_ij2G(k,l) : bcf_ij2G(l,k);
}
}
- //for (i=0; i<n_gt2ipl; i++) printf("%d .. %d\n", i,gt2ipl[i]);
+ //for (i=0; i<n_gt2ipl; i++) fprintf(pysam_stdout, "%d .. %d\n", i,gt2ipl[i]);
return 1;
}
@@ -353,11 +353,11 @@ static void check_gt(args_t *args)
if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
if ( !args->no_PLs )
- fprintf(pysamerr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
+ fprintf(pysam_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
fake_pls = 1;
}
- FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
+ FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : pysam_stdout;
print_header(args, fp);
int tgt_isample = -1, query_isample = 0;
@@ -370,7 +370,7 @@ static void check_gt(args_t *args)
{
if ( tgt_isample==-1 )
{
- fprintf(pysamerr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
+ fprintf(pysam_stderr,"No target sample selected for comparison, using the first sample in %s: %s\n", args->gt_fname,args->gt_hdr->samples[0]);
tgt_isample = 0;
}
}
@@ -556,12 +556,12 @@ static void cross_check_gts(args_t *args)
if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "GT")<0 )
error("[E::%s] Neither PL nor GT present in the header of %s\n", __func__, args->files->readers[0].fname);
if ( !args->no_PLs )
- fprintf(pysamerr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
+ fprintf(pysam_stderr,"Warning: PL not present in the header of %s, using GT instead\n", args->files->readers[0].fname);
fake_pls = 1;
}
if ( bcf_hdr_id2int(args->sm_hdr, BCF_DT_ID, "DP")<0 ) ignore_dp = 1;
- FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : stdout;
+ FILE *fp = args->plot ? open_file(NULL, "w", "%s.tab", args->plot) : pysam_stdout;
print_header(args, fp);
if ( args->all_sites ) fprintf(fp,"# [1]SD, Average Site Discordance\t[2]Chromosome\t[3]Position\t[4]Number of available pairs\t[5]Average discordance\n");
@@ -640,8 +640,8 @@ static void cross_check_gts(args_t *args)
if ( args->tmp_arr ) free(args->tmp_arr);
if ( is_hom ) free(is_hom);
- if ( pl_warned ) fprintf(pysamerr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
- if ( dp_warned ) fprintf(pysamerr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
+ if ( pl_warned ) fprintf(pysam_stderr, "[W::%s] PL was not found at %d site(s)\n", __func__, pl_warned);
+ if ( dp_warned ) fprintf(pysam_stderr, "[W::%s] DP was not found at %d site(s)\n", __func__, dp_warned);
// Output samples sorted by average discordance
double *score = (double*) calloc(nsamples,sizeof(double));
@@ -709,23 +709,23 @@ static char *init_prefix(char *prefix)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Check sample identity. With no -g BCF given, multi-sample cross-check is performed.\n");
- fprintf(pysamerr, "Usage: bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -a, --all-sites output comparison for all sites\n");
- fprintf(pysamerr, " -g, --genotypes <file> genotypes to compare against\n");
- fprintf(pysamerr, " -G, --GTs-only <int> use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
- fprintf(pysamerr, " -H, --homs-only homozygous genotypes only (useful for low coverage data)\n");
- fprintf(pysamerr, " -p, --plot <prefix> plot\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --query-sample <string> query sample (by default the first sample is checked)\n");
- fprintf(pysamerr, " -S, --target-sample <string> target sample in the -g file (used only for plotting)\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Check sample identity. With no -g BCF given, multi-sample cross-check is performed.\n");
+ fprintf(pysam_stderr, "Usage: bcftools gtcheck [options] [-g <genotypes.vcf.gz>] <query.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -a, --all-sites output comparison for all sites\n");
+ fprintf(pysam_stderr, " -g, --genotypes <file> genotypes to compare against\n");
+ fprintf(pysam_stderr, " -G, --GTs-only <int> use GTs, ignore PLs, using <int> for unseen genotypes [99]\n");
+ fprintf(pysam_stderr, " -H, --homs-only homozygous genotypes only (useful for low coverage data)\n");
+ fprintf(pysam_stderr, " -p, --plot <prefix> plot\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --query-sample <string> query sample (by default the first sample is checked)\n");
+ fprintf(pysam_stderr, " -S, --target-sample <string> target sample in the -g file (used only for plotting)\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
diff --git a/bcftools/vcfindex.c b/bcftools/vcfindex.c
index e40fab5..d1e9179 100644
--- a/bcftools/vcfindex.c
+++ b/bcftools/vcfindex.c
@@ -1,7 +1,7 @@
/* vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
- Copyright (C) 2014 Genome Research Ltd.
+ Copyright (C) 2014-2016 Genome Research Ltd.
Author: Shane McCarthy <sm15 at sanger.ac.uk>
@@ -177,6 +177,7 @@ int main_vcfindex(int argc, char *argv[])
if (stats) return vcf_index_stats(fname, stats);
htsFile *fp = hts_open(fname,"r");
+ if ( !fp ) error("Failed to read %s\n", fname);
htsFormat type = *hts_get_format(fp);
hts_close(fp);
diff --git a/bcftools/vcfindex.c.pysam.c b/bcftools/vcfindex.c.pysam.c
index 1cfde16..479fc57 100644
--- a/bcftools/vcfindex.c.pysam.c
+++ b/bcftools/vcfindex.c.pysam.c
@@ -3,7 +3,7 @@
/* vcfindex.c -- Index bgzip compressed VCF/BCF files for random access.
- Copyright (C) 2014 Genome Research Ltd.
+ Copyright (C) 2014-2016 Genome Research Ltd.
Author: Shane McCarthy <sm15 at sanger.ac.uk>
@@ -40,20 +40,20 @@ DEALINGS IN THE SOFTWARE. */
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Index bgzip compressed VCF/BCF files for random access.\n");
- fprintf(pysamerr, "Usage: bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Indexing options:\n");
- fprintf(pysamerr, " -c, --csi generate CSI-format index for VCF/BCF files [default]\n");
- fprintf(pysamerr, " -f, --force overwrite index if it already exists\n");
- fprintf(pysamerr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n");
- fprintf(pysamerr, " -t, --tbi generate TBI-format index for VCF files\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Stats options:\n");
- fprintf(pysamerr, " -n, --nrecords print number of records based on existing index file\n");
- fprintf(pysamerr, " -s, --stats print per contig stats based on existing index file\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Index bgzip compressed VCF/BCF files for random access.\n");
+ fprintf(pysam_stderr, "Usage: bcftools index [options] <in.bcf>|<in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Indexing options:\n");
+ fprintf(pysam_stderr, " -c, --csi generate CSI-format index for VCF/BCF files [default]\n");
+ fprintf(pysam_stderr, " -f, --force overwrite index if it already exists\n");
+ fprintf(pysam_stderr, " -m, --min-shift INT set minimal interval size for CSI indices to 2^INT [14]\n");
+ fprintf(pysam_stderr, " -t, --tbi generate TBI-format index for VCF files\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Stats options:\n");
+ fprintf(pysam_stderr, " -n, --nrecords print number of records based on existing index file\n");
+ fprintf(pysam_stderr, " -s, --stats print per contig stats based on existing index file\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -61,7 +61,7 @@ int vcf_index_stats(char *fname, int stats)
{
char *fn_out = NULL;
FILE *out;
- out = fn_out ? fopen(fn_out, "w") : stdout;
+ out = fn_out ? fopen(fn_out, "w") : pysam_stdout;
const char **seq;
int i, nseq;
@@ -69,23 +69,23 @@ int vcf_index_stats(char *fname, int stats)
hts_idx_t *idx = NULL;
htsFile *fp = hts_open(fname,"r");
- if ( !fp ) { fprintf(pysamerr,"Could not read %s\n", fname); return 1; }
+ if ( !fp ) { fprintf(pysam_stderr,"Could not read %s\n", fname); return 1; }
bcf_hdr_t *hdr = bcf_hdr_read(fp);
- if ( !hdr ) { fprintf(pysamerr,"Could not read the header: %s\n", fname); return 1; }
+ if ( !hdr ) { fprintf(pysam_stderr,"Could not read the header: %s\n", fname); return 1; }
if ( hts_get_format(fp)->format==vcf )
{
tbx = tbx_index_load(fname);
- if ( !tbx ) { fprintf(pysamerr,"Could not load TBI index: %s\n", fname); return 1; }
+ if ( !tbx ) { fprintf(pysam_stderr,"Could not load TBI index: %s\n", fname); return 1; }
}
else if ( hts_get_format(fp)->format==bcf )
{
idx = bcf_index_load(fname);
- if ( !idx ) { fprintf(pysamerr,"Could not load CSI index: %s\n", fname); return 1; }
+ if ( !idx ) { fprintf(pysam_stderr,"Could not load CSI index: %s\n", fname); return 1; }
}
else
{
- fprintf(pysamerr,"Could not detect the file type as VCF or BCF: %s\n", fname);
+ fprintf(pysam_stderr,"Could not detect the file type as VCF or BCF: %s\n", fname);
return 1;
}
@@ -108,7 +108,7 @@ int vcf_index_stats(char *fname, int stats)
bcf1_t *rec = bcf_init1();
if (bcf_read1(fp, hdr, rec) >= 0)
{
- fprintf(pysamerr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
+ fprintf(pysam_stderr,"%s index of %s does not contain any count metadata. Please re-index with a newer version of bcftools or tabix.\n", tbx ? "TBI" : "CSI", fname);
return 1;
}
bcf_destroy1(rec);
@@ -161,17 +161,17 @@ int main_vcfindex(int argc, char *argv[])
if ( optind==argc ) usage();
if (stats>2)
{
- fprintf(pysamerr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
+ fprintf(pysam_stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__);
return 1;
}
if (tbi && min_shift>0)
{
- fprintf(pysamerr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
+ fprintf(pysam_stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__);
return 1;
}
if (min_shift < 0 || min_shift > 30)
{
- fprintf(pysamerr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
+ fprintf(pysam_stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift);
return 1;
}
@@ -179,29 +179,30 @@ int main_vcfindex(int argc, char *argv[])
if (stats) return vcf_index_stats(fname, stats);
htsFile *fp = hts_open(fname,"r");
+ if ( !fp ) error("Failed to read %s\n", fname);
htsFormat type = *hts_get_format(fp);
hts_close(fp);
if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf )
{
- fprintf(pysamerr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
+ fprintf(pysam_stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__);
if ( type.compression!=bgzf )
- fprintf(pysamerr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
+ fprintf(pysam_stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__);
return 1;
}
if (tbi && type.format==bcf)
{
- fprintf(pysamerr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
+ fprintf(pysam_stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n");
tbi = 0; min_shift = BCF_LIDX_SHIFT;
}
if (min_shift == 0 && type.format==bcf)
{
- fprintf(pysamerr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
+ fprintf(pysam_stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__);
return 1;
}
if (!tbi && type.format==vcf && min_shift == 0)
{
- fprintf(pysamerr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
+ fprintf(pysam_stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n");
tbi = 1;
}
@@ -216,7 +217,7 @@ int main_vcfindex(int argc, char *argv[])
stat(fname, &stat_file);
if ( stat_file.st_mtime <= stat_tbi.st_mtime )
{
- fprintf(pysamerr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
+ fprintf(pysam_stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__);
return 1;
}
}
@@ -226,7 +227,7 @@ int main_vcfindex(int argc, char *argv[])
{
if ( bcf_index_build(fname, min_shift) != 0 )
{
- fprintf(pysamerr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
+ fprintf(pysam_stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname);
return 1;
}
}
@@ -234,7 +235,7 @@ int main_vcfindex(int argc, char *argv[])
{
if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 )
{
- fprintf(pysamerr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
+ fprintf(pysam_stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname);
return 1;
}
}
diff --git a/bcftools/vcfisec.c b/bcftools/vcfisec.c
index 6115146..9afe620 100644
--- a/bcftools/vcfisec.c
+++ b/bcftools/vcfisec.c
@@ -58,7 +58,7 @@ typedef struct
htsFile **fh_out;
char **argv, *prefix, *output_fname, **fnames, *write_files, *targets_list, *regions_list;
char *isec_exact;
- int argc;
+ int argc, record_cmd_line;
}
args_t;
@@ -143,7 +143,7 @@ void isec_vcf(args_t *args)
out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
- bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
+ if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
}
if ( !args->nwrite && !out_std && !args->prefix )
@@ -351,7 +351,7 @@ static void init_data(args_t *args)
args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type)); \
if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
- bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
+ if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
bcf_hdr_write(args->fh_out[i], args->files->readers[j].header); \
}
if ( !args->nwrite || args->write[0] )
@@ -456,6 +456,7 @@ static void usage(void)
fprintf(stderr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
fprintf(stderr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
fprintf(stderr, " -i, --include <expr> include only sites for which the expression is true\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -n, --nfiles [+-=~]<int> output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
@@ -464,8 +465,8 @@ static void usage(void)
fprintf(stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
fprintf(stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
fprintf(stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(stderr, " -w, --write <list> list of files to write with -p given as 1-based indexes. By default, all files are written\n");
fprintf(stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(stderr, " -w, --write <list> list of files to write with -p given as 1-based indexes. By default, all files are written\n");
fprintf(stderr, "\n");
fprintf(stderr, "Examples:\n");
fprintf(stderr, " # Create intersection and complements of two sets saving the output in dir/*\n");
@@ -492,6 +493,7 @@ int main_vcfisec(int argc, char *argv[])
args->output_fname = NULL;
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int targets_is_file = 0, regions_is_file = 0;
static struct option loptions[] =
@@ -512,6 +514,7 @@ int main_vcfisec(int argc, char *argv[])
{"output",required_argument,NULL,'o'},
{"output-type",required_argument,NULL,'O'},
{"threads",required_argument,NULL,9},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "hc:r:R:p:n:w:t:T:Cf:o:O:i:e:",loptions,NULL)) >= 0) {
@@ -560,6 +563,7 @@ int main_vcfisec(int argc, char *argv[])
}
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfisec.c.pysam.c b/bcftools/vcfisec.c.pysam.c
index 2418895..758d475 100644
--- a/bcftools/vcfisec.c.pysam.c
+++ b/bcftools/vcfisec.c.pysam.c
@@ -60,7 +60,7 @@ typedef struct
htsFile **fh_out;
char **argv, *prefix, *output_fname, **fnames, *write_files, *targets_list, *regions_list;
char *isec_exact;
- int argc;
+ int argc, record_cmd_line;
}
args_t;
@@ -136,7 +136,7 @@ void isec_vcf(args_t *args)
kstring_t str = {0,0,0};
htsFile *out_fh = NULL;
- // When only one VCF is output, print VCF to stdout or -o file
+ // When only one VCF is output, print VCF to pysam_stdout or -o file
int out_std = 0;
if ( args->nwrite==1 && !args->prefix ) out_std = 1;
if ( args->targets_list && files->nreaders==1 ) out_std = 1;
@@ -145,11 +145,11 @@ void isec_vcf(args_t *args)
out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type));
if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno));
if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads);
- bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
+ if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec");
bcf_hdr_write(out_fh, files->readers[args->iwrite].header);
}
if ( !args->nwrite && !out_std && !args->prefix )
- fprintf(pysamerr,"Note: -w option not given, printing list of sites...\n");
+ fprintf(pysam_stderr,"Note: -w option not given, printing list of sites...\n");
int n;
while ( (n=bcf_sr_next_line(files)) )
@@ -353,7 +353,7 @@ static void init_data(args_t *args)
args->fh_out[i] = hts_open(args->fnames[i], hts_bcf_wmode(args->output_type)); \
if ( !args->fh_out[i] ) error("Could not open %s\n", args->fnames[i]); \
if ( args->n_threads ) hts_set_threads(args->fh_out[i], args->n_threads); \
- bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
+ if (args->record_cmd_line) bcf_hdr_append_version(args->files->readers[j].header,args->argc,args->argv,"bcftools_isec"); \
bcf_hdr_write(args->fh_out[i], args->files->readers[j].header); \
}
if ( !args->nwrite || args->write[0] )
@@ -402,7 +402,7 @@ static void init_data(args_t *args)
if ( args->fh_sites == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
}
else
- args->fh_sites = stdout;
+ args->fh_sites = pysam_stdout;
}
}
@@ -448,40 +448,41 @@ static void destroy_data(args_t *args)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Create intersections, unions and complements of VCF files.\n");
- fprintf(pysamerr, "Usage: bcftools isec [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -c, --collapse <string> treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
- fprintf(pysamerr, " -C, --complement output positions present only in the first file but missing in the others\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
- fprintf(pysamerr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
- fprintf(pysamerr, " -i, --include <expr> include only sites for which the expression is true\n");
- fprintf(pysamerr, " -n, --nfiles [+-=~]<int> output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, " -p, --prefix <dir> if given, subset each of the input files accordingly, see also -w\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " -w, --write <list> list of files to write with -p given as 1-based indexes. By default, all files are written\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Examples:\n");
- fprintf(pysamerr, " # Create intersection and complements of two sets saving the output in dir/*\n");
- fprintf(pysamerr, " bcftools isec A.vcf.gz B.vcf.gz -p dir\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, " # Filter sites in A and B (but not in C) and create intersection\n");
- fprintf(pysamerr, " bcftools isec -e'MAF<0.01' -i'dbSNP=1' -e - A.vcf.gz B.vcf.gz C.vcf.gz -p dir\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, " # Extract and write records from A shared by both A and B using exact allele match\n");
- fprintf(pysamerr, " bcftools isec A.vcf.gz B.vcf.gz -p dir -n =2 -w 1\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, " # Extract records private to A or B comparing by position only\n");
- fprintf(pysamerr, " bcftools isec A.vcf.gz B.vcf.gz -p dir -n -1 -c all\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Create intersections, unions and complements of VCF files.\n");
+ fprintf(pysam_stderr, "Usage: bcftools isec [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -c, --collapse <string> treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+ fprintf(pysam_stderr, " -C, --complement output positions present only in the first file but missing in the others\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
+ fprintf(pysam_stderr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+ fprintf(pysam_stderr, " -i, --include <expr> include only sites for which the expression is true\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -n, --nfiles [+-=~]<int> output positions present in this many (=), this many or more (+), this many or fewer (-), the exact (~) files\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -p, --prefix <dir> if given, subset each of the input files accordingly, see also -w\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, " -w, --write <list> list of files to write with -p given as 1-based indexes. By default, all files are written\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Examples:\n");
+ fprintf(pysam_stderr, " # Create intersection and complements of two sets saving the output in dir/*\n");
+ fprintf(pysam_stderr, " bcftools isec A.vcf.gz B.vcf.gz -p dir\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, " # Filter sites in A and B (but not in C) and create intersection\n");
+ fprintf(pysam_stderr, " bcftools isec -e'MAF<0.01' -i'dbSNP=1' -e - A.vcf.gz B.vcf.gz C.vcf.gz -p dir\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, " # Extract and write records from A shared by both A and B using exact allele match\n");
+ fprintf(pysam_stderr, " bcftools isec A.vcf.gz B.vcf.gz -p dir -n =2 -w 1\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, " # Extract records private to A or B comparing by position only\n");
+ fprintf(pysam_stderr, " bcftools isec A.vcf.gz B.vcf.gz -p dir -n -1 -c all\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -494,6 +495,7 @@ int main_vcfisec(int argc, char *argv[])
args->output_fname = NULL;
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int targets_is_file = 0, regions_is_file = 0;
static struct option loptions[] =
@@ -514,6 +516,7 @@ int main_vcfisec(int argc, char *argv[])
{"output",required_argument,NULL,'o'},
{"output-type",required_argument,NULL,'O'},
{"threads",required_argument,NULL,9},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "hc:r:R:p:n:w:t:T:Cf:o:O:i:e:",loptions,NULL)) >= 0) {
@@ -562,6 +565,7 @@ int main_vcfisec(int argc, char *argv[])
}
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfmerge.c b/bcftools/vcfmerge.c
index 0517bd5..02fac6b 100644
--- a/bcftools/vcfmerge.c
+++ b/bcftools/vcfmerge.c
@@ -118,7 +118,7 @@ typedef struct
htsFile *out_fh;
bcf_hdr_t *out_hdr;
char **argv;
- int argc, n_threads;
+ int argc, n_threads, record_cmd_line;
}
args_t;
@@ -858,7 +858,7 @@ int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst
}
if ( ith_src!=isrc ) return -1; // requested field not found
int end_src = start_src;
- while ( end_src<src_len && src[end_src]!=',' ) end_src++;
+ while ( end_src<src_len && src[end_src] && src[end_src]!=',' ) end_src++;
int nsrc_cpy = end_src - start_src;
if ( nsrc_cpy==1 && src[start_src]=='.' ) return 0; // don't write missing values, dst is already initialized
@@ -1913,7 +1913,7 @@ void merge_vcf(args_t *args)
char buf[10]; snprintf(buf,10,"%d",i+1);
merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
}
- bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
bcf_hdr_sync(args->out_hdr);
}
info_rules_init(args);
@@ -1962,6 +1962,7 @@ static void usage(void)
fprintf(stderr, " -i, --info-rules <tag:method,..> rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
fprintf(stderr, " -l, --file-list <file> read file names from the file\n");
fprintf(stderr, " -m, --merge <string> allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
fprintf(stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
@@ -1980,6 +1981,7 @@ int main_vcfmerge(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->collapse = COLLAPSE_BOTH;
int regions_is_file = 0;
@@ -1998,6 +2000,7 @@ int main_vcfmerge(int argc, char *argv[])
{"regions",required_argument,NULL,'r'},
{"regions-file",required_argument,NULL,'R'},
{"info-rules",required_argument,NULL,'i'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
@@ -2032,6 +2035,7 @@ int main_vcfmerge(int argc, char *argv[])
case 2 : args->header_only = 1; break;
case 3 : args->force_samples = 1; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfmerge.c.pysam.c b/bcftools/vcfmerge.c.pysam.c
index 94b5252..daac458 100644
--- a/bcftools/vcfmerge.c.pysam.c
+++ b/bcftools/vcfmerge.c.pysam.c
@@ -120,7 +120,7 @@ typedef struct
htsFile *out_fh;
bcf_hdr_t *out_hdr;
char **argv;
- int argc, n_threads;
+ int argc, n_threads, record_cmd_line;
}
args_t;
@@ -451,8 +451,8 @@ void merge_headers(bcf_hdr_t *hw, const bcf_hdr_t *hr, const char *clash_prefix,
void debug_als(char **als, int nals)
{
- int k; for (k=0; k<nals; k++) fprintf(pysamerr,"%s ", als[k]);
- fprintf(pysamerr,"\n");
+ int k; for (k=0; k<nals; k++) fprintf(pysam_stderr,"%s ", als[k]);
+ fprintf(pysam_stderr,"\n");
}
/**
@@ -534,7 +534,7 @@ char **merge_alleles(char **a, int na, int *map, char **b, int *nb, int *mb)
{
if ( strncasecmp(a[0],b[0],rla<rlb?rla:rlb) )
{
- fprintf(pysamerr, "The REF prefixes differ: %s vs %s (%d,%d)\n", a[0],b[0],rla,rlb);
+ fprintf(pysam_stderr, "The REF prefixes differ: %s vs %s (%d,%d)\n", a[0],b[0],rla,rlb);
return NULL;
}
// Different case, change to uppercase
@@ -657,13 +657,13 @@ void maux_reset(maux_t *ma)
}
void maux_debug(maux_t *ma, int ir, int ib)
{
- printf("[%d,%d]\t", ir,ib);
+ fprintf(pysam_stdout, "[%d,%d]\t", ir,ib);
int i;
for (i=0; i<ma->nals; i++)
{
- printf(" %s [%d]", ma->als[i], ma->cnt[i]);
+ fprintf(pysam_stdout, " %s [%d]", ma->als[i], ma->cnt[i]);
}
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
void merge_chrom2qual(args_t *args, bcf1_t *out)
@@ -860,7 +860,7 @@ int copy_string_field(char *src, int isrc, int src_len, kstring_t *dst, int idst
}
if ( ith_src!=isrc ) return -1; // requested field not found
int end_src = start_src;
- while ( end_src<src_len && src[end_src]!=',' ) end_src++;
+ while ( end_src<src_len && src[end_src] && src[end_src]!=',' ) end_src++;
int nsrc_cpy = end_src - start_src;
if ( nsrc_cpy==1 && src[start_src]=='.' ) return 0; // don't write missing values, dst is already initialized
@@ -946,7 +946,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
case BCF_BT_INT16: BRANCH(int16_t, *src==bcf_int16_missing, *src==bcf_int16_vector_end, int); break;
case BCF_BT_INT32: BRANCH(int32_t, *src==bcf_int32_missing, *src==bcf_int32_vector_end, int); break;
case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(*src), bcf_float_is_vector_end(*src), float); break;
- default: fprintf(pysamerr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+ default: fprintf(pysam_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
}
#undef BRANCH
}
@@ -976,7 +976,7 @@ static void merge_AGR_info_tag(bcf_hdr_t *hdr, bcf1_t *line, bcf_info_t *info, i
case BCF_BT_INT16: BRANCH(int16_t, src[kori]==bcf_int16_missing, src[kori]==bcf_int16_vector_end, int); break;
case BCF_BT_INT32: BRANCH(int32_t, src[kori]==bcf_int32_missing, src[kori]==bcf_int32_vector_end, int); break;
case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(src[kori]), bcf_float_is_vector_end(src[kori]), float); break;
- default: fprintf(pysamerr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
+ default: fprintf(pysam_stderr,"TODO: %s:%d .. info->type=%d\n", __FILE__,__LINE__, info->type); exit(1);
}
#undef BRANCH
}
@@ -1556,7 +1556,7 @@ void shake_buffer(maux_t *maux, int ir, int pos)
if ( !reader->buffer ) return;
int i;
- // FILE *fp = stdout;
+ // FILE *fp = pysam_stdout;
// fprintf(fp,"<going to shake> nbuf=%d\t", reader->nbuffer); for (i=0; i<reader->nbuffer; i++) fprintf(fp," %d", skip[i]); fprintf(fp,"\n");
// debug_buffer(fp,reader);
// fprintf(fp,"--\n");
@@ -1641,43 +1641,43 @@ void debug_maux(args_t *args, int pos, int var_type)
maux_t *maux = args->maux;
int j,k,l;
- fprintf(pysamerr,"Alleles to merge at %d\n", pos+1);
+ fprintf(pysam_stderr,"Alleles to merge at %d\n", pos+1);
for (j=0; j<files->nreaders; j++)
{
bcf_sr_t *reader = &files->readers[j];
- fprintf(pysamerr," reader %d: ", j);
+ fprintf(pysam_stderr," reader %d: ", j);
for (k=0; k<=reader->nbuffer; k++)
{
if ( maux->d[j][k].skip==SKIP_DONE ) continue;
bcf1_t *line = reader->buffer[k];
if ( line->pos!=pos ) continue;
- fprintf(pysamerr,"\t");
- if ( maux->d[j][k].skip ) fprintf(pysamerr,"["); // this record will not be merged in this round
+ fprintf(pysam_stderr,"\t");
+ if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"["); // this record will not be merged in this round
for (l=0; l<line->n_allele; l++)
- fprintf(pysamerr,"%s%s", l==0?"":",", line->d.allele[l]);
- if ( maux->d[j][k].skip ) fprintf(pysamerr,"]");
+ fprintf(pysam_stderr,"%s%s", l==0?"":",", line->d.allele[l]);
+ if ( maux->d[j][k].skip ) fprintf(pysam_stderr,"]");
}
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"\n");
}
- fprintf(pysamerr," counts: ");
- for (j=0; j<maux->nals; j++) fprintf(pysamerr,"%s %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr," counts: ");
+ for (j=0; j<maux->nals; j++) fprintf(pysam_stderr,"%s %dx %s", j==0?"":",",maux->cnt[j], maux->als[j]); fprintf(pysam_stderr,"\n");
for (j=0; j<files->nreaders; j++)
{
bcf_sr_t *reader = &files->readers[j];
- fprintf(pysamerr," out %d: ", j);
+ fprintf(pysam_stderr," out %d: ", j);
for (k=0; k<=reader->nbuffer; k++)
{
if ( maux->d[j][k].skip==SKIP_DONE ) continue;
bcf1_t *line = reader->buffer[k];
if ( line->pos!=pos ) continue;
if ( maux->d[j][k].skip ) continue;
- fprintf(pysamerr,"\t");
+ fprintf(pysam_stderr,"\t");
for (l=0; l<line->n_allele; l++)
- fprintf(pysamerr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
+ fprintf(pysam_stderr,"%s%s", l==0?"":",", maux->als[maux->d[j][k].map[l]]);
}
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"\n");
}
- fprintf(pysamerr,"\n");
+ fprintf(pysam_stderr,"\n");
}
// Determine which line should be merged from which reader: go through all
@@ -1915,7 +1915,7 @@ void merge_vcf(args_t *args)
char buf[10]; snprintf(buf,10,"%d",i+1);
merge_headers(args->out_hdr, args->files->readers[i].header,buf,args->force_samples);
}
- bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->out_hdr, args->argc, args->argv, "bcftools_merge");
bcf_hdr_sync(args->out_hdr);
}
info_rules_init(args);
@@ -1950,26 +1950,27 @@ void merge_vcf(args_t *args)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Merge multiple VCF/BCF files from non-overlapping sample sets to create one multi-sample file.\n");
- fprintf(pysamerr, " Note that only records from different files can be merged, never from the same file. For\n");
- fprintf(pysamerr, " \"vertical\" merge take a look at \"bcftools norm\" instead.\n");
- fprintf(pysamerr, "Usage: bcftools merge [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " --force-samples resolve duplicate sample names\n");
- fprintf(pysamerr, " --print-header print only the merged header and exit\n");
- fprintf(pysamerr, " --use-header <file> use the provided header\n");
- fprintf(pysamerr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
- fprintf(pysamerr, " -i, --info-rules <tag:method,..> rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
- fprintf(pysamerr, " -l, --file-list <file> read file names from the file\n");
- fprintf(pysamerr, " -m, --merge <string> allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Merge multiple VCF/BCF files from non-overlapping sample sets to create one multi-sample file.\n");
+ fprintf(pysam_stderr, " Note that only records from different files can be merged, never from the same file. For\n");
+ fprintf(pysam_stderr, " \"vertical\" merge take a look at \"bcftools norm\" instead.\n");
+ fprintf(pysam_stderr, "Usage: bcftools merge [options] <A.vcf.gz> <B.vcf.gz> [...]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " --force-samples resolve duplicate sample names\n");
+ fprintf(pysam_stderr, " --print-header print only the merged header and exit\n");
+ fprintf(pysam_stderr, " --use-header <file> use the provided header\n");
+ fprintf(pysam_stderr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+ fprintf(pysam_stderr, " -i, --info-rules <tag:method,..> rules for merging INFO fields (method is one of sum,avg,min,max,join) or \"-\" to turn off the default [DP:sum,DP4:sum]\n");
+ fprintf(pysam_stderr, " -l, --file-list <file> read file names from the file\n");
+ fprintf(pysam_stderr, " -m, --merge <string> allow multiallelic records for <snps|indels|both|all|none|id>, see man page for details [both]\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -1982,6 +1983,7 @@ int main_vcfmerge(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->collapse = COLLAPSE_BOTH;
int regions_is_file = 0;
@@ -2000,6 +2002,7 @@ int main_vcfmerge(int argc, char *argv[])
{"regions",required_argument,NULL,'r'},
{"regions-file",required_argument,NULL,'R'},
{"info-rules",required_argument,NULL,'i'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "hm:f:r:R:o:O:i:l:",loptions,NULL)) >= 0) {
@@ -2034,6 +2037,7 @@ int main_vcfmerge(int argc, char *argv[])
case 2 : args->header_only = 1; break;
case 3 : args->force_samples = 1; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfnorm.c b/bcftools/vcfnorm.c
index 732eca9..781833c 100644
--- a/bcftools/vcfnorm.c
+++ b/bcftools/vcfnorm.c
@@ -1,6 +1,6 @@
/* vcfnorm.c -- Left-align and normalize indels.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -76,6 +76,7 @@ typedef struct
char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
+ int record_cmd_line;
}
args_t;
@@ -295,17 +296,19 @@ static int realign(args_t *args, bcf1_t *line)
if ( i>0 && als[i].l==als[0].l && !strcasecmp(als[0].s,als[i].s) ) return ERR_DUP_ALLELE;
}
-
// trim from right
int ori_pos = line->pos;
while (1)
{
// is the rightmost base identical in all alleles?
+ int min_len = als[0].l;
for (i=1; i<line->n_allele; i++)
{
if ( als[0].s[ als[0].l-1 ]!=als[i].s[ als[i].l-1 ] ) break;
+ if ( als[i].l < min_len ) min_len = als[i].l;
}
if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
+ if ( min_len<=1 && line->pos==0 ) break;
int pad_from_left = 0;
for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -343,7 +346,7 @@ static int realign(args_t *args, bcf1_t *line)
if ( als[0].s[ntrim_left]!=als[i].s[ntrim_left] ) break;
if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left;
}
- if ( i!=line->n_allele || min_len==1 ) break; // there are differences, cannot be trimmed
+ if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed
ntrim_left++;
}
if ( ntrim_left )
@@ -1287,7 +1290,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
{
kstring_t *tmp = &args->tmp_str[i];
kputsn(tmp->s,tmp->l,&str);
- for (j=tmp->l; j<max_len; j++) kputc(0,tmp);
+ for (j=tmp->l; j<max_len; j++) kputc('\0',&str);
}
args->ntmp_arr2 = str.m;
args->tmp_arr2 = (uint8_t*)str.s;
@@ -1581,7 +1584,7 @@ static void normalize_vcf(args_t *args)
htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
if ( args->n_threads ) hts_set_threads(out, args->n_threads);
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
bcf_hdr_write(out, args->hdr);
int prev_rid = -1, prev_pos = -1, prev_type = 0;
@@ -1641,7 +1644,6 @@ static void normalize_vcf(args_t *args)
if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
j++;
}
- if ( args->rbuf.n==args->rbuf.m ) j = 1;
if ( j>0 ) flush_buffer(args, out, j);
}
flush_buffer(args, out, args->rbuf.n);
@@ -1666,6 +1668,7 @@ static void usage(void)
fprintf(stderr, " -d, --rm-dup <type> remove duplicate snps|indels|both|any\n");
fprintf(stderr, " -f, --fasta-ref <file> reference sequence\n");
fprintf(stderr, " -m, --multiallelics <-|+>[type] split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -N, --do-not-normalize do not normalize indels (with -m or -c s)\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
@@ -1674,8 +1677,8 @@ static void usage(void)
fprintf(stderr, " -s, --strict-filter when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
fprintf(stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
fprintf(stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(stderr, " -w, --site-win <int> buffer for sorting lines which changed position during realignment [1000]\n");
fprintf(stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(stderr, " -w, --site-win <int> buffer for sorting lines which changed position during realignment [1000]\n");
fprintf(stderr, "\n");
exit(1);
}
@@ -1689,6 +1692,7 @@ int main_vcfnorm(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->aln_win = 100;
args->buf_win = 1000;
args->mrows_collapse = COLLAPSE_BOTH;
@@ -1714,6 +1718,7 @@ int main_vcfnorm(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"check-ref",required_argument,NULL,'c'},
{"strict-filter",no_argument,NULL,'s'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -1771,6 +1776,7 @@ int main_vcfnorm(int argc, char *argv[])
if ( *tmp ) error("Could not parse argument: --site-win %s\n", optarg);
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfnorm.c.pysam.c b/bcftools/vcfnorm.c.pysam.c
index 2cdf399..200ce79 100644
--- a/bcftools/vcfnorm.c.pysam.c
+++ b/bcftools/vcfnorm.c.pysam.c
@@ -2,7 +2,7 @@
/* vcfnorm.c -- Left-align and normalize indels.
- Copyright (C) 2013-2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Petr Danecek <pd3 at sanger.ac.uk>
@@ -78,6 +78,7 @@ typedef struct
char **argv, *output_fname, *ref_fname, *vcf_fname, *region, *targets;
int argc, rmdup, output_type, n_threads, check_ref, strict_filter, do_indels;
int nchanged, nskipped, nsplit, ntotal, mrows_op, mrows_collapse, parsimonious;
+ int record_cmd_line;
}
args_t;
@@ -275,7 +276,7 @@ static int realign(args_t *args, bcf1_t *line)
if ( args->check_ref==CHECK_REF_EXIT )
error("Reference allele mismatch at %s:%d .. REF_SEQ:'%s' vs VCF:'%s'\n", bcf_seqname(args->hdr,line),line->pos+1,ref,line->d.allele[0]);
if ( args->check_ref & CHECK_REF_WARN )
- fprintf(pysamerr,"REF_MISMATCH\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
+ fprintf(pysam_stderr,"REF_MISMATCH\t%s\t%d\t%s\n", bcf_seqname(args->hdr,line),line->pos+1,line->d.allele[0]);
free(ref);
return ERR_REF_MISMATCH;
}
@@ -297,17 +298,19 @@ static int realign(args_t *args, bcf1_t *line)
if ( i>0 && als[i].l==als[0].l && !strcasecmp(als[0].s,als[i].s) ) return ERR_DUP_ALLELE;
}
-
// trim from right
int ori_pos = line->pos;
while (1)
{
// is the rightmost base identical in all alleles?
+ int min_len = als[0].l;
for (i=1; i<line->n_allele; i++)
{
if ( als[0].s[ als[0].l-1 ]!=als[i].s[ als[i].l-1 ] ) break;
+ if ( als[i].l < min_len ) min_len = als[i].l;
}
if ( i!=line->n_allele ) break; // there are differences, cannot be trimmed
+ if ( min_len<=1 && line->pos==0 ) break;
int pad_from_left = 0;
for (i=0; i<line->n_allele; i++) // trim all alleles
@@ -345,7 +348,7 @@ static int realign(args_t *args, bcf1_t *line)
if ( als[0].s[ntrim_left]!=als[i].s[ntrim_left] ) break;
if ( min_len > als[i].l - ntrim_left ) min_len = als[i].l - ntrim_left;
}
- if ( i!=line->n_allele || min_len==1 ) break; // there are differences, cannot be trimmed
+ if ( i!=line->n_allele || min_len<=1 ) break; // there are differences, cannot be trimmed
ntrim_left++;
}
if ( ntrim_left )
@@ -855,7 +858,7 @@ static void merge_info_numeric(args_t *args, bcf1_t **lines, int nlines, bcf_inf
{ \
/* expecting diploid gt in INFO */ \
if (nvals_ori!=lines[0]->n_allele*(lines[0]->n_allele+1)/2) { \
- fprintf(pysamerr, "todo: merge Number=G INFO fields for haploid sites\n"); \
+ fprintf(pysam_stderr, "todo: merge Number=G INFO fields for haploid sites\n"); \
error("vcfnorm: number of fields in first record at position %s:%d for INFO tag %s not as expected [found: %d vs expected:%d]\n", bcf_seqname(args->hdr,lines[0]),lines[0]->pos+1, tag, nvals_ori, lines[0]->n_allele*(lines[0]->n_allele+1)/2); \
} \
int nvals = dst->n_allele*(dst->n_allele+1)/2; \
@@ -1289,7 +1292,7 @@ static void merge_format_string(args_t *args, bcf1_t **lines, int nlines, bcf_fm
{
kstring_t *tmp = &args->tmp_str[i];
kputsn(tmp->s,tmp->l,&str);
- for (j=tmp->l; j<max_len; j++) kputc(0,tmp);
+ for (j=tmp->l; j<max_len; j++) kputc('\0',&str);
}
args->ntmp_arr2 = str.m;
args->tmp_arr2 = (uint8_t*)str.s;
@@ -1560,7 +1563,7 @@ static void normalize_line(args_t *args, bcf1_t **line_ptr)
else if ( args->check_ref==CHECK_REF_EXIT )
error("Duplicate alleles at %s:%d; run with -cw to turn the error into warning or with -cs to fix.\n", bcf_seqname(args->hdr,line),line->pos+1);
else if ( args->check_ref & CHECK_REF_WARN )
- fprintf(pysamerr,"ALT_DUP\t%s\t%d\n", bcf_seqname(args->hdr,line),line->pos+1);
+ fprintf(pysam_stderr,"ALT_DUP\t%s\t%d\n", bcf_seqname(args->hdr,line),line->pos+1);
}
}
}
@@ -1583,7 +1586,7 @@ static void normalize_vcf(args_t *args)
htsFile *out = hts_open(args->output_fname, hts_bcf_wmode(args->output_type));
if ( out == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
if ( args->n_threads ) hts_set_threads(out, args->n_threads);
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_norm");
bcf_hdr_write(out, args->hdr);
int prev_rid = -1, prev_pos = -1, prev_type = 0;
@@ -1643,42 +1646,42 @@ static void normalize_vcf(args_t *args)
if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
j++;
}
- if ( args->rbuf.n==args->rbuf.m ) j = 1;
if ( j>0 ) flush_buffer(args, out, j);
}
flush_buffer(args, out, args->rbuf.n);
hts_close(out);
- fprintf(pysamerr,"Lines total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
+ fprintf(pysam_stderr,"Lines total/split/realigned/skipped:\t%d/%d/%d/%d\n", args->ntotal,args->nsplit,args->nchanged,args->nskipped);
if ( args->check_ref & CHECK_REF_FIX )
- fprintf(pysamerr,"REF/ALT total/modified/added: \t%d/%d/%d\n", args->nref.tot,args->nref.swap,args->nref.set);
+ fprintf(pysam_stderr,"REF/ALT total/modified/added: \t%d/%d/%d\n", args->nref.tot,args->nref.swap,args->nref.set);
}
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Left-align and normalize indels; check if REF alleles match the reference;\n");
- fprintf(pysamerr, " split multiallelic sites into multiple rows; recover multiallelics from\n");
- fprintf(pysamerr, " multiple rows.\n");
- fprintf(pysamerr, "Usage: bcftools norm [options] <in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -c, --check-ref <e|w|x|s> check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
- fprintf(pysamerr, " -D, --remove-duplicates remove duplicate lines of the same type.\n");
- fprintf(pysamerr, " -d, --rm-dup <type> remove duplicate snps|indels|both|any\n");
- fprintf(pysamerr, " -f, --fasta-ref <file> reference sequence\n");
- fprintf(pysamerr, " -m, --multiallelics <-|+>[type] split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
- fprintf(pysamerr, " -N, --do-not-normalize do not normalize indels (with -m or -c s)\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --strict-filter when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " -w, --site-win <int> buffer for sorting lines which changed position during realignment [1000]\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Left-align and normalize indels; check if REF alleles match the reference;\n");
+ fprintf(pysam_stderr, " split multiallelic sites into multiple rows; recover multiallelics from\n");
+ fprintf(pysam_stderr, " multiple rows.\n");
+ fprintf(pysam_stderr, "Usage: bcftools norm [options] <in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -c, --check-ref <e|w|x|s> check REF alleles and exit (e), warn (w), exclude (x), or set (s) bad sites [e]\n");
+ fprintf(pysam_stderr, " -D, --remove-duplicates remove duplicate lines of the same type.\n");
+ fprintf(pysam_stderr, " -d, --rm-dup <type> remove duplicate snps|indels|both|any\n");
+ fprintf(pysam_stderr, " -f, --fasta-ref <file> reference sequence\n");
+ fprintf(pysam_stderr, " -m, --multiallelics <-|+>[type] split multiallelics (-) or join biallelics (+), type: snps|indels|both|any [both]\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -N, --do-not-normalize do not normalize indels (with -m or -c s)\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --strict-filter when merging (-m+), merged site is PASS only if all sites being merged PASS\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, " -w, --site-win <int> buffer for sorting lines which changed position during realignment [1000]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -1691,6 +1694,7 @@ int main_vcfnorm(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->aln_win = 100;
args->buf_win = 1000;
args->mrows_collapse = COLLAPSE_BOTH;
@@ -1716,6 +1720,7 @@ int main_vcfnorm(int argc, char *argv[])
{"threads",required_argument,NULL,9},
{"check-ref",required_argument,NULL,'c'},
{"strict-filter",no_argument,NULL,'s'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -1759,7 +1764,7 @@ int main_vcfnorm(int argc, char *argv[])
break;
case 'o': args->output_fname = optarg; break;
case 'D':
- fprintf(pysamerr,"Warning: `-D` is functional but deprecated, replaced by `-d both`.\n");
+ fprintf(pysam_stderr,"Warning: `-D` is functional but deprecated, replaced by `-d both`.\n");
args->rmdup = COLLAPSE_NONE<<1;
break;
case 's': args->strict_filter = 1; break;
@@ -1773,6 +1778,7 @@ int main_vcfnorm(int argc, char *argv[])
if ( *tmp ) error("Could not parse argument: --site-win %s\n", optarg);
break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case 'h':
case '?': usage();
default: error("Unknown argument: %s\n", optarg);
diff --git a/bcftools/vcfplugin.c b/bcftools/vcfplugin.c
index e2ca04a..87a773f 100644
--- a/bcftools/vcfplugin.c
+++ b/bcftools/vcfplugin.c
@@ -140,7 +140,7 @@ typedef struct _args_t
char **plugin_paths;
char **argv, *output_fname, *regions_list, *targets_list;
- int argc, drop_header, verbose;
+ int argc, drop_header, verbose, record_cmd_line;
}
args_t;
@@ -239,13 +239,6 @@ static void print_plugin_usage_hint(void)
fprintf(stderr,
" in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
"- Is the plugin path correct?\n\n"
- "- Are all shared libraries, namely libhts.so, accessible? Verify with\n"
- " on Mac OS X: `otool -L your/plugin.so` and set DYLD_LIBRARY_PATH if they are not\n"
- " on Linux: `ldd your/plugin.so` and set LD_LIBRARY_PATH if they are not\n"
- "\n"
- "- If not installed systemwide, set the environment variable LD_LIBRARY_PATH (linux) or\n"
- "DYLD_LIBRARY_PATH (mac) to include directory where *libhts.so* is located.\n"
- "\n"
"- Run \"bcftools plugin -lv\" for more detailed error output.\n"
"\n",
getenv("BCFTOOLS_PLUGINS")
@@ -418,7 +411,7 @@ static void init_data(args_t *args)
if ( args->filter_str )
args->filter = filter_init(args->hdr, args->filter_str);
- bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
if ( !args->drop_header )
{
args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
@@ -460,6 +453,7 @@ static void usage(args_t *args)
fprintf(stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
fprintf(stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
fprintf(stderr, "VCF output options:\n");
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
fprintf(stderr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
fprintf(stderr, " --threads <int> number of extra output compression threads [0]\n");
@@ -480,12 +474,27 @@ int main_plugin(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->nplugin_paths = -1;
int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
if ( argc==1 ) usage(args);
+
char *plugin_name = NULL;
- if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; }
+ if ( argv[1][0]!='-' )
+ {
+ plugin_name = argv[1];
+ argc--;
+ argv++;
+ load_plugin(args, plugin_name, 1, &args->plugin);
+ if ( args->plugin.run )
+ {
+ int ret = args->plugin.run(argc, argv);
+ destroy_data(args);
+ free(args);
+ return ret;
+ }
+ }
static struct option loptions[] =
{
@@ -502,6 +511,7 @@ int main_plugin(int argc, char *argv[])
{"regions-file",required_argument,NULL,'R'},
{"targets",required_argument,NULL,'t'},
{"targets-file",required_argument,NULL,'T'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0)
@@ -527,6 +537,7 @@ int main_plugin(int argc, char *argv[])
case 'T': args->targets_list = optarg; targets_is_file = 1; break;
case 'l': plist_only = 1; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?':
case 'h': usage_only = 1; break;
default: error("Unknown argument: %s\n", optarg);
@@ -535,7 +546,6 @@ int main_plugin(int argc, char *argv[])
if ( plist_only ) return list_plugins(args);
if ( usage_only && ! plugin_name ) usage(args);
- load_plugin(args, plugin_name, 1, &args->plugin);
if ( version_only )
{
const char *bver, *hver;
@@ -554,15 +564,6 @@ int main_plugin(int argc, char *argv[])
return 0;
}
- if ( args->plugin.run )
- {
- int iopt = optind; optind = 0;
- int ret = args->plugin.run(argc-iopt, argv+iopt);
- destroy_data(args);
- free(args);
- return ret;
- }
-
char *fname = NULL;
if ( optind>=argc || argv[optind][0]=='-' )
{
diff --git a/bcftools/vcfplugin.c.pysam.c b/bcftools/vcfplugin.c.pysam.c
index 5c29993..8365f7e 100644
--- a/bcftools/vcfplugin.c.pysam.c
+++ b/bcftools/vcfplugin.c.pysam.c
@@ -142,7 +142,7 @@ typedef struct _args_t
char **plugin_paths;
char **argv, *output_fname, *regions_list, *targets_list;
- int argc, drop_header, verbose;
+ int argc, drop_header, verbose, record_cmd_line;
}
args_t;
@@ -172,11 +172,11 @@ static void add_plugin_paths(args_t *args, const char *path)
args->plugin_paths = (char**) realloc(args->plugin_paths,sizeof(char*)*(args->nplugin_paths+1));
args->plugin_paths[args->nplugin_paths] = dir;
args->nplugin_paths++;
- if ( args->verbose ) fprintf(pysamerr, "plugin directory %s .. ok\n", dir);
+ if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. ok\n", dir);
}
else
{
- if ( args->verbose ) fprintf(pysamerr, "plugin directory %s .. %s\n", dir, strerror(errno));
+ if ( args->verbose ) fprintf(pysam_stderr, "plugin directory %s .. %s\n", dir, strerror(errno));
free(dir);
}
@@ -214,8 +214,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
handle = dlopen(tmp, RTLD_NOW); // valgrind complains about unfreed memory, not our problem though
if ( args->verbose )
{
- if ( !handle ) fprintf(pysamerr,"%s:\n\tdlopen .. %s\n", tmp,dlerror());
- else fprintf(pysamerr,"%s:\n\tdlopen .. ok\n", tmp);
+ if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen .. %s\n", tmp,dlerror());
+ else fprintf(pysam_stderr,"%s:\n\tdlopen .. ok\n", tmp);
}
free(tmp);
if ( handle ) return handle;
@@ -225,8 +225,8 @@ static void *dlopen_plugin(args_t *args, const char *fname)
handle = dlopen(fname, RTLD_NOW);
if ( args->verbose )
{
- if ( !handle ) fprintf(pysamerr,"%s:\n\tdlopen .. %s\n", fname,dlerror());
- else fprintf(pysamerr,"%s:\n\tdlopen .. ok\n", fname);
+ if ( !handle ) fprintf(pysam_stderr,"%s:\n\tdlopen .. %s\n", fname,dlerror());
+ else fprintf(pysam_stderr,"%s:\n\tdlopen .. ok\n", fname);
}
return handle;
@@ -234,20 +234,13 @@ static void *dlopen_plugin(args_t *args, const char *fname)
static void print_plugin_usage_hint(void)
{
- fprintf(pysamerr, "\nNo functional bcftools plugins were found");
+ fprintf(pysam_stderr, "\nNo functional bcftools plugins were found");
if ( !getenv("BCFTOOLS_PLUGINS") )
- fprintf(pysamerr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
+ fprintf(pysam_stderr,". The environment variable BCFTOOLS_PLUGINS is not set.\n\n");
else
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
" in\n\tBCFTOOLS_PLUGINS=\"%s\".\n\n"
"- Is the plugin path correct?\n\n"
- "- Are all shared libraries, namely libhts.so, accessible? Verify with\n"
- " on Mac OS X: `otool -L your/plugin.so` and set DYLD_LIBRARY_PATH if they are not\n"
- " on Linux: `ldd your/plugin.so` and set LD_LIBRARY_PATH if they are not\n"
- "\n"
- "- If not installed systemwide, set the environment variable LD_LIBRARY_PATH (linux) or\n"
- "DYLD_LIBRARY_PATH (mac) to include directory where *libhts.so* is located.\n"
- "\n"
"- Run \"bcftools plugin -lv\" for more detailed error output.\n"
"\n",
getenv("BCFTOOLS_PLUGINS")
@@ -275,19 +268,19 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
if ( ret )
plugin->init = NULL;
else
- if ( args->verbose ) fprintf(pysamerr,"\tinit .. ok\n");
+ if ( args->verbose ) fprintf(pysam_stderr,"\tinit .. ok\n");
plugin->run = (dl_run_f) dlsym(plugin->handle, "run");
ret = dlerror();
if ( ret )
plugin->run = NULL;
else
- if ( args->verbose ) fprintf(pysamerr,"\trun .. ok\n");
+ if ( args->verbose ) fprintf(pysam_stderr,"\trun .. ok\n");
if ( !plugin->init && !plugin->run )
{
if ( exit_on_error ) error("Could not initialize %s, neither run or init found \n", plugin->name);
- else if ( args->verbose ) fprintf(pysamerr,"\tinit/run .. not found\n");
+ else if ( args->verbose ) fprintf(pysam_stderr,"\tinit/run .. not found\n");
return -1;
}
@@ -296,7 +289,7 @@ static int load_plugin(args_t *args, const char *fname, int exit_on_error, plugi
if ( ret )
{
if ( exit_on_error ) error("Could not initialize %s, version string not found\n", plugin->name);
- else if ( args->verbose ) fprintf(pysamerr,"\tversion .. not found\n");
+ else if ( args->verbose ) fprintf(pysam_stderr,"\tversion .. not found\n");
return -1;
}
@@ -344,12 +337,12 @@ static void init_plugin(args_t *args)
args->plugin.version(&bver, &hver);
if ( strcmp(bver,bcftools_version()) && !warned_bcftools )
{
- fprintf(pysamerr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver);
+ fprintf(pysam_stderr,"WARNING: bcftools version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", bcftools_version(),args->plugin.name,bver);
warned_bcftools = 1;
}
if ( strcmp(hver,hts_version()) && !warned_htslib )
{
- fprintf(pysamerr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
+ fprintf(pysam_stderr,"WARNING: htslib version mismatch .. bcftools at %s, the plugin \"%s\" at %s\n", hts_version(),args->plugin.name,hver);
warned_htslib = 1;
}
args->drop_header += ret;
@@ -401,8 +394,8 @@ static int list_plugins(args_t *args)
qsort(plugins, nplugins, sizeof(plugins[0]), cmp_plugin_name);
for (i=0; i<nplugins; i++)
- printf("\n-- %s --\n%s", plugins[i].name, plugins[i].about());
- printf("\n");
+ fprintf(pysam_stdout, "\n-- %s --\n%s", plugins[i].name, plugins[i].about());
+ fprintf(pysam_stdout, "\n");
}
else
print_plugin_usage_hint();
@@ -420,7 +413,7 @@ static void init_data(args_t *args)
if ( args->filter_str )
args->filter = filter_init(args->hdr, args->filter_str);
- bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr_out, args->argc, args->argv, "bcftools_plugin");
if ( !args->drop_header )
{
args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
@@ -449,28 +442,29 @@ static void destroy_data(args_t *args)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Run user defined plugin\n");
- fprintf(pysamerr, "Usage: bcftools plugin <name> [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
- fprintf(pysamerr, " bcftools +name [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "VCF input options:\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
- fprintf(pysamerr, " -i, --include <expr> select sites for which the expression is true\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, "VCF output options:\n");
- fprintf(pysamerr, " -o, --output <file> write output to a file [standard output]\n");
- fprintf(pysamerr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "Plugin options:\n");
- fprintf(pysamerr, " -h, --help list plugin's options\n");
- fprintf(pysamerr, " -l, --list-plugins list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
- fprintf(pysamerr, " -v, --verbose print debugging information on plugin failure\n");
- fprintf(pysamerr, " -V, --version print version string and exit\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Run user defined plugin\n");
+ fprintf(pysam_stderr, "Usage: bcftools plugin <name> [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
+ fprintf(pysam_stderr, " bcftools +name [OPTIONS] <file> [-- PLUGIN_OPTIONS]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "VCF input options:\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true\n");
+ fprintf(pysam_stderr, " -i, --include <expr> select sites for which the expression is true\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, "VCF output options:\n");
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(pysam_stderr, " -O, --output-type <type> 'b' compressed BCF; 'u' uncompressed BCF; 'z' compressed VCF; 'v' uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "Plugin options:\n");
+ fprintf(pysam_stderr, " -h, --help list plugin's options\n");
+ fprintf(pysam_stderr, " -l, --list-plugins list available plugins. See BCFTOOLS_PLUGINS environment variable and man page for details\n");
+ fprintf(pysam_stderr, " -v, --verbose print debugging information on plugin failure\n");
+ fprintf(pysam_stderr, " -V, --version print version string and exit\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -482,12 +476,27 @@ int main_plugin(int argc, char *argv[])
args->output_fname = "-";
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
args->nplugin_paths = -1;
int regions_is_file = 0, targets_is_file = 0, plist_only = 0, usage_only = 0, version_only = 0;
if ( argc==1 ) usage(args);
+
char *plugin_name = NULL;
- if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; }
+ if ( argv[1][0]!='-' )
+ {
+ plugin_name = argv[1];
+ argc--;
+ argv++;
+ load_plugin(args, plugin_name, 1, &args->plugin);
+ if ( args->plugin.run )
+ {
+ int ret = args->plugin.run(argc, argv);
+ destroy_data(args);
+ free(args);
+ return ret;
+ }
+ }
static struct option loptions[] =
{
@@ -504,6 +513,7 @@ int main_plugin(int argc, char *argv[])
{"regions-file",required_argument,NULL,'R'},
{"targets",required_argument,NULL,'t'},
{"targets-file",required_argument,NULL,'T'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
while ((c = getopt_long(argc, argv, "h?o:O:r:R:t:T:li:e:vV",loptions,NULL)) >= 0)
@@ -529,6 +539,7 @@ int main_plugin(int argc, char *argv[])
case 'T': args->targets_list = optarg; targets_is_file = 1; break;
case 'l': plist_only = 1; break;
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?':
case 'h': usage_only = 1; break;
default: error("Unknown argument: %s\n", optarg);
@@ -537,34 +548,24 @@ int main_plugin(int argc, char *argv[])
if ( plist_only ) return list_plugins(args);
if ( usage_only && ! plugin_name ) usage(args);
- load_plugin(args, plugin_name, 1, &args->plugin);
if ( version_only )
{
const char *bver, *hver;
args->plugin.version(&bver, &hver);
- printf("bcftools %s using htslib %s\n", bcftools_version(), hts_version());
- printf("plugin at %s using htslib %s\n\n", bver, hver);
+ fprintf(pysam_stdout, "bcftools %s using htslib %s\n", bcftools_version(), hts_version());
+ fprintf(pysam_stdout, "plugin at %s using htslib %s\n\n", bver, hver);
return 0;
}
if ( usage_only )
{
if ( args->plugin.usage )
- fprintf(pysamerr,"%s",args->plugin.usage());
+ fprintf(pysam_stderr,"%s",args->plugin.usage());
else
- fprintf(pysamerr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name);
+ fprintf(pysam_stderr,"Usage: bcftools +%s [General Options] -- [Plugin Options]\n",plugin_name);
return 0;
}
- if ( args->plugin.run )
- {
- int iopt = optind; optind = 0;
- int ret = args->plugin.run(argc-iopt, argv+iopt);
- destroy_data(args);
- free(args);
- return ret;
- }
-
char *fname = NULL;
if ( optind>=argc || argv[optind][0]=='-' )
{
diff --git a/bcftools/vcfquery.c.pysam.c b/bcftools/vcfquery.c.pysam.c
index 1265b57..10f56f1 100644
--- a/bcftools/vcfquery.c.pysam.c
+++ b/bcftools/vcfquery.c.pysam.c
@@ -156,7 +156,7 @@ static void list_columns(args_t *args)
int i;
bcf_sr_t *reader = &args->files->readers[0];
for (i=0; i<bcf_hdr_nsamples(reader->header); i++)
- printf("%s\n", reader->header->samples[i]);
+ fprintf(pysam_stdout, "%s\n", reader->header->samples[i]);
}
static char **copy_header(bcf_hdr_t *hdr, char **src, int nsrc)
@@ -178,30 +178,30 @@ static int compare_header(bcf_hdr_t *hdr, char **a, int na, char **b, int nb)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Extracts fields from VCF/BCF file and prints them in user-defined format\n");
- fprintf(pysamerr, "Usage: bcftools query [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -c, --collapse <string> collapse lines with duplicate positions for <snps|indels|both|all|some|none>, see man page [none]\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -f, --format <string> see man page for details\n");
- fprintf(pysamerr, " -H, --print-header print header\n");
- fprintf(pysamerr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -l, --list-samples print the list of samples and exit\n");
- fprintf(pysamerr, " -o, --output-file <file> output file name [stdout]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --samples <list> list of samples to include\n");
- fprintf(pysamerr, " -S, --samples-file <file> file of samples to include\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " -u, --allow-undef-tags print \".\" for undefined tags\n");
- fprintf(pysamerr, " -v, --vcf-list <file> process multiple VCFs listed in the file\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Examples:\n");
- fprintf(pysamerr, "\tbcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%SAMPLE=%%GT]\\n' file.vcf.gz\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Extracts fields from VCF/BCF file and prints them in user-defined format\n");
+ fprintf(pysam_stderr, "Usage: bcftools query [options] <A.vcf.gz> [<B.vcf.gz> [...]]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -c, --collapse <string> collapse lines with duplicate positions for <snps|indels|both|all|some|none>, see man page [none]\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -f, --format <string> see man page for details\n");
+ fprintf(pysam_stderr, " -H, --print-header print header\n");
+ fprintf(pysam_stderr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -l, --list-samples print the list of samples and exit\n");
+ fprintf(pysam_stderr, " -o, --output-file <file> output file name [pysam_stdout]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --samples <list> list of samples to include\n");
+ fprintf(pysam_stderr, " -S, --samples-file <file> file of samples to include\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -u, --allow-undef-tags print \".\" for undefined tags\n");
+ fprintf(pysam_stderr, " -v, --vcf-list <file> process multiple VCFs listed in the file\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Examples:\n");
+ fprintf(pysam_stderr, "\tbcftools query -f '%%CHROM\\t%%POS\\t%%REF\\t%%ALT[\\t%%SAMPLE=%%GT]\\n' file.vcf.gz\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -300,7 +300,7 @@ int main_vcfquery(int argc, char *argv[])
}
if ( !args->format_str ) usage();
- args->out = args->fn_out ? fopen(args->fn_out, "w") : stdout;
+ args->out = args->fn_out ? fopen(args->fn_out, "w") : pysam_stdout;
if ( !args->out ) error("%s: %s\n", args->fn_out,strerror(errno));
if ( !args->vcf_list )
diff --git a/bcftools/vcfroh.c b/bcftools/vcfroh.c
index fa64b79..9560559 100644
--- a/bcftools/vcfroh.c
+++ b/bcftools/vcfroh.c
@@ -368,14 +368,31 @@ static void flush_viterbi(args_t *args)
}
}
- // update the transition matrix tprob
+ // update the transition matrix
+ int n = 1;
for (i=0; i<2; i++)
{
- int n = 0;
for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
- if ( !n) error("fixme: state %d not observed\n", i+1);
- for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n;
}
+ for (i=0; i<2; i++)
+ {
+ for (j=0; j<2; j++)
+ {
+ // no transition to i-th state was observed, set to a small number
+ if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
+ else MAT(tcounts,2,i,j) /= n;
+ }
+ }
+
+ // normalize
+ for (i=0; i<2; i++)
+ {
+ double norm = 0;
+ for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
+ assert( norm!=0 );
+ for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+ }
+
if ( args->genmap_fname || args->rec_rate > 0 )
hmm_set_tprob(args->hmm, tcounts, 0);
else
@@ -385,14 +402,16 @@ static void flush_viterbi(args_t *args)
deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
niter++;
-
- fprintf(stderr,"%d: %f %f\n", niter,deltaz,delthw);
+ fprintf(stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e P(AZ|HW)=%e P(AZ|AZ)=%e P(HW|AZ)=%e\n",
+ niter,deltaz,delthw,
+ MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+ MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
}
while ( deltaz > 0.0 || delthw > 0.0 );
- fprintf(stderr, "Viterbi training converged in %d iterations to", niter);
double *tprob_arr = hmm_get_tprob(args->hmm);
- for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(stderr, " %f", MAT(tprob_arr,2,i,j));
- fprintf(stderr, "\n");
+ fprintf(stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e P(AZ|HW)=%e P(AZ|AZ)=%e P(HW|AZ)=%e\n", niter,
+ MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+ MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
// output the results
for (i=0; i<args->nrids; i++)
@@ -400,12 +419,16 @@ static void flush_viterbi(args_t *args)
int ioff = args->rid_offs[i];
int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+ hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
+ double *fwd = hmm_get_fwd_bwd_prob(args->hmm);
const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
for (j=0; j<nsites; j++)
{
- printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0);
+ int state = vpath[j*2];
+ double pval = fwd[j*2 + state];
+ printf("%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
}
}
}
diff --git a/bcftools/vcfroh.c.pysam.c b/bcftools/vcfroh.c.pysam.c
index 92a9a4f..66ddc17 100644
--- a/bcftools/vcfroh.c.pysam.c
+++ b/bcftools/vcfroh.c.pysam.c
@@ -167,12 +167,12 @@ static void init_data(args_t *args)
args->hmm = hmm_init(2, tprob, 10000);
// print header
- printf("# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
- printf("# The command line was:\tbcftools %s", args->argv[0]);
+ fprintf(pysam_stdout, "# This file was produced by: bcftools roh(%s+htslib-%s)\n", bcftools_version(),hts_version());
+ fprintf(pysam_stdout, "# The command line was:\tbcftools %s", args->argv[0]);
for (i=1; i<args->argc; i++)
- printf(" %s",args->argv[i]);
- printf("\n#\n");
- printf("# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
+ fprintf(pysam_stdout, " %s",args->argv[i]);
+ fprintf(pysam_stdout, "\n#\n");
+ fprintf(pysam_stdout, "# [1]Chromosome\t[2]Position\t[3]State (0:HW, 1:AZ)\t[4]Quality\n");
}
static void destroy_data(args_t *args)
@@ -336,7 +336,7 @@ static void flush_viterbi(args_t *args)
{
int state = vpath[i*2]==STATE_AZ ? 1 : 0;
double *pval = fwd + i*2;
- printf("%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
+ fprintf(pysam_stdout, "%s\t%d\t%d\t%.1f\n", chr,args->sites[i]+1, state, phred_score(1.0-pval[state]));
}
return;
}
@@ -370,14 +370,31 @@ static void flush_viterbi(args_t *args)
}
}
- // update the transition matrix tprob
+ // update the transition matrix
+ int n = 1;
for (i=0; i<2; i++)
{
- int n = 0;
for (j=0; j<2; j++) n += MAT(tcounts,2,i,j);
- if ( !n) error("fixme: state %d not observed\n", i+1);
- for (j=0; j<2; j++) MAT(tcounts,2,i,j) /= n;
}
+ for (i=0; i<2; i++)
+ {
+ for (j=0; j<2; j++)
+ {
+ // no transition to i-th state was observed, set to a small number
+ if ( !MAT(tcounts,2,i,j) ) MAT(tcounts,2,i,j) = 0.1/n;
+ else MAT(tcounts,2,i,j) /= n;
+ }
+ }
+
+ // normalize
+ for (i=0; i<2; i++)
+ {
+ double norm = 0;
+ for (j=0; j<2; j++) norm += MAT(tcounts,2,j,i);
+ assert( norm!=0 );
+ for (j=0; j<2; j++) MAT(tcounts,2,j,i) /= norm;
+ }
+
if ( args->genmap_fname || args->rec_rate > 0 )
hmm_set_tprob(args->hmm, tcounts, 0);
else
@@ -387,14 +404,16 @@ static void flush_viterbi(args_t *args)
deltaz = fabs(MAT(tprob_arr,2,1,0)-t2az_prev);
delthw = fabs(MAT(tprob_arr,2,0,1)-t2hw_prev);
niter++;
-
- fprintf(pysamerr,"%d: %f %f\n", niter,deltaz,delthw);
+ fprintf(pysam_stderr,"Viterbi training, iteration %d: dAZ=%e dHW=%e\tP(HW|HW)=%e P(AZ|HW)=%e P(AZ|AZ)=%e P(HW|AZ)=%e\n",
+ niter,deltaz,delthw,
+ MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+ MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
}
while ( deltaz > 0.0 || delthw > 0.0 );
- fprintf(pysamerr, "Viterbi training converged in %d iterations to", niter);
double *tprob_arr = hmm_get_tprob(args->hmm);
- for (i=0; i<2; i++) for (j=0; j<2; j++) fprintf(pysamerr, " %f", MAT(tprob_arr,2,i,j));
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "Viterbi training converged in %d iterations to P(HW|HW)=%e P(AZ|HW)=%e P(AZ|AZ)=%e P(HW|AZ)=%e\n", niter,
+ MAT(tprob_arr,2,STATE_HW,STATE_HW),MAT(tprob_arr,2,STATE_AZ,STATE_HW),
+ MAT(tprob_arr,2,STATE_AZ,STATE_AZ),MAT(tprob_arr,2,STATE_HW,STATE_AZ));
// output the results
for (i=0; i<args->nrids; i++)
@@ -402,12 +421,16 @@ static void flush_viterbi(args_t *args)
int ioff = args->rid_offs[i];
int nsites = (i+1==args->nrids ? args->nsites : args->rid_offs[i+1]) - ioff;
hmm_run_viterbi(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
+ hmm_run_fwd_bwd(args->hmm, nsites, args->eprob+ioff*2, args->sites+ioff);
uint8_t *vpath = hmm_get_viterbi_path(args->hmm);
+ double *fwd = hmm_get_fwd_bwd_prob(args->hmm);
const char *chr = bcf_hdr_id2name(args->hdr,args->rids[i]);
for (j=0; j<nsites; j++)
{
- printf("%s\t%d\t%d\t..\n", chr,args->sites[ioff+j]+1,vpath[j*2]==STATE_AZ ? 1 : 0);
+ int state = vpath[j*2];
+ double pval = fwd[j*2 + state];
+ fprintf(pysam_stdout, "%s\t%d\t%d\t%e\n", chr,args->sites[ioff+j]+1,state==STATE_AZ ? 1 : 0, pval);
}
}
}
@@ -624,7 +647,7 @@ static void vcfroh(args_t *args, bcf1_t *line)
if ( skip_rid )
{
- fprintf(pysamerr,"Skipping the sequence, no genmap for %s\n", bcf_seqname(args->hdr,line));
+ fprintf(pysam_stderr,"Skipping the sequence, no genmap for %s\n", bcf_seqname(args->hdr,line));
args->skip_rid = line->rid;
return;
}
@@ -657,30 +680,30 @@ static void vcfroh(args_t *args, bcf1_t *line)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: HMM model for detecting runs of autozygosity.\n");
- fprintf(pysamerr, "Usage: bcftools roh [options] <in.vcf.gz>\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "General Options:\n");
- fprintf(pysamerr, " --AF-dflt <float> if AF is not known, use this allele frequency [skip]\n");
- fprintf(pysamerr, " --AF-tag <TAG> use TAG for allele frequency\n");
- fprintf(pysamerr, " --AF-file <file> read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
- fprintf(pysamerr, " -e, --estimate-AF <file> calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
- fprintf(pysamerr, " -G, --GTs-only <float> use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
- fprintf(pysamerr, " -I, --skip-indels skip indels as their genotypes are enriched for errors\n");
- fprintf(pysamerr, " -m, --genetic-map <file> genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
- fprintf(pysamerr, " -M, --rec-rate <float> constant recombination rate per bp\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --sample <sample> sample to analyze\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "HMM Options:\n");
- fprintf(pysamerr, " -a, --hw-to-az <float> P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
- fprintf(pysamerr, " -H, --az-to-hw <float> P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
- fprintf(pysamerr, " -V, --viterbi-training perform Viterbi training to estimate transition probabilities\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: HMM model for detecting runs of autozygosity.\n");
+ fprintf(pysam_stderr, "Usage: bcftools roh [options] <in.vcf.gz>\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "General Options:\n");
+ fprintf(pysam_stderr, " --AF-dflt <float> if AF is not known, use this allele frequency [skip]\n");
+ fprintf(pysam_stderr, " --AF-tag <TAG> use TAG for allele frequency\n");
+ fprintf(pysam_stderr, " --AF-file <file> read allele frequencies from file (CHR\\tPOS\\tREF,ALT\\tAF)\n");
+ fprintf(pysam_stderr, " -e, --estimate-AF <file> calculate AC,AN counts on the fly, using either all samples (\"-\") or samples listed in <file>\n");
+ fprintf(pysam_stderr, " -G, --GTs-only <float> use GTs, ignore PLs, use <float> for PL of unseen genotypes. Safe value to use is 30 to account for GT errors.\n");
+ fprintf(pysam_stderr, " -I, --skip-indels skip indels as their genotypes are enriched for errors\n");
+ fprintf(pysam_stderr, " -m, --genetic-map <file> genetic map in IMPUTE2 format, single file or mask, where string \"{CHROM}\" is replaced with chromosome name\n");
+ fprintf(pysam_stderr, " -M, --rec-rate <float> constant recombination rate per bp\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --sample <sample> sample to analyze\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "HMM Options:\n");
+ fprintf(pysam_stderr, " -a, --hw-to-az <float> P(AZ|HW) transition probability from HW (Hardy-Weinberg) to AZ (autozygous) state [6.7e-8]\n");
+ fprintf(pysam_stderr, " -H, --az-to-hw <float> P(HW|AZ) transition probability from AZ to HW state [5e-9]\n");
+ fprintf(pysam_stderr, " -V, --viterbi-training perform Viterbi training to estimate transition probabilities\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -787,7 +810,7 @@ int main_vcfroh(int argc, char *argv[])
vcfroh(args, args->files->readers[0].buffer[0]);
}
vcfroh(args, NULL);
- fprintf(pysamerr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
+ fprintf(pysam_stderr,"Number of lines: total/processed: %d/%d\n", args->ntot,args->nused);
destroy_data(args);
free(args);
return 0;
diff --git a/bcftools/vcfsom.c.pysam.c b/bcftools/vcfsom.c.pysam.c
index 32e7213..58875f6 100644
--- a/bcftools/vcfsom.c.pysam.c
+++ b/bcftools/vcfsom.c.pysam.c
@@ -104,7 +104,7 @@ char *msprintf(const char *fmt, ...)
/*
* char *t, *p = str;
* t = column_next(p, '\t');
- * if ( strlen("<something>")==t-p && !strncmp(p,"<something>",t-p) ) printf("found!\n");
+ * if ( strlen("<something>")==t-p && !strncmp(p,"<something>",t-p) ) fprintf(pysam_stdout, "found!\n");
*
* char *t;
* t = column_next(str, '\t'); if ( !*t ) error("expected field\n", str);
@@ -574,7 +574,7 @@ static void do_train(args_t *args)
fprintf(fp,"%e\t%f\t%f\n", prev_score, (float)igood/ngood, (float)ibad/nbad);
if ( !printed && (float)igood/ngood > 0.9 )
{
- printf("%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
+ fprintf(pysam_stdout, "%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
printed = 1;
}
@@ -582,7 +582,7 @@ static void do_train(args_t *args)
else if ( igood<ngood ) prev_score = good[igood];
else prev_score = bad[ibad];
}
- if ( !printed ) printf("%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
+ if ( !printed ) fprintf(pysam_stdout, "%.2f\t%.2f\t%e\t# %% of bad [1] and good [2] sites at a cutoff [3]\n", 100.*ibad/nbad,100.*igood/ngood,prev_score);
if ( fp )
{
if ( fclose(fp) ) error("%s.eval: fclose failed: %s\n",args->prefix,strerror(errno));
@@ -607,36 +607,36 @@ static void do_classify(args_t *args)
case MERGE_MAX: score = get_max_score(args, -1); break;
case MERGE_AVG: score = get_avg_score(args, -1); break;
}
- printf("%e\n", 1.0 - score/max_score);
+ fprintf(pysam_stdout, "%e\n", 1.0 - score/max_score);
}
annots_reader_close(args);
}
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: SOM (Self-Organizing Map) filtering.\n");
- fprintf(pysamerr, "Usage: bcftools som --train [options] <annots.tab.gz>\n");
- fprintf(pysamerr, " bcftools som --classify [options]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Model training options:\n");
- fprintf(pysamerr, " -f, --nfold <int> n-fold cross-validation (number of maps) [5]\n");
- fprintf(pysamerr, " -p, --prefix <string> prefix of output files\n");
- fprintf(pysamerr, " -s, --size <int> map size [20]\n");
- fprintf(pysamerr, " -t, --train \n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Classifying options:\n");
- fprintf(pysamerr, " -c, --classify \n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Experimental training options (no reason to change):\n");
- fprintf(pysamerr, " -b, --bmu-threshold <float> threshold for selection of best-matching unit [0.9]\n");
- fprintf(pysamerr, " -d, --som-dimension <int> SOM dimension [2]\n");
- fprintf(pysamerr, " -e, --exclude-bad exclude bad sites from training, use for evaluation only\n");
- fprintf(pysamerr, " -l, --learning-rate <float> learning rate [1.0]\n");
- fprintf(pysamerr, " -m, --merge <min|max|avg> -f merge algorithm [avg]\n");
- fprintf(pysamerr, " -n, --ntrain-sites <int> effective number of training sites [number of good sites]\n");
- fprintf(pysamerr, " -r, --random-seed <int> random seed, 0 for time() [1]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: SOM (Self-Organizing Map) filtering.\n");
+ fprintf(pysam_stderr, "Usage: bcftools som --train [options] <annots.tab.gz>\n");
+ fprintf(pysam_stderr, " bcftools som --classify [options]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Model training options:\n");
+ fprintf(pysam_stderr, " -f, --nfold <int> n-fold cross-validation (number of maps) [5]\n");
+ fprintf(pysam_stderr, " -p, --prefix <string> prefix of output files\n");
+ fprintf(pysam_stderr, " -s, --size <int> map size [20]\n");
+ fprintf(pysam_stderr, " -t, --train \n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Classifying options:\n");
+ fprintf(pysam_stderr, " -c, --classify \n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Experimental training options (no reason to change):\n");
+ fprintf(pysam_stderr, " -b, --bmu-threshold <float> threshold for selection of best-matching unit [0.9]\n");
+ fprintf(pysam_stderr, " -d, --som-dimension <int> SOM dimension [2]\n");
+ fprintf(pysam_stderr, " -e, --exclude-bad exclude bad sites from training, use for evaluation only\n");
+ fprintf(pysam_stderr, " -l, --learning-rate <float> learning rate [1.0]\n");
+ fprintf(pysam_stderr, " -m, --merge <min|max|avg> -f merge algorithm [avg]\n");
+ fprintf(pysam_stderr, " -n, --ntrain-sites <int> effective number of training sites [number of good sites]\n");
+ fprintf(pysam_stderr, " -r, --random-seed <int> random seed, 0 for time() [1]\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -692,7 +692,7 @@ int main_vcfsom(int argc, char *argv[])
case 'd':
args->ndim = atoi(optarg);
if ( args->ndim<2 ) error("Expected -d >=2, got %d\n", args->ndim);
- if ( args->ndim>3 ) fprintf(pysamerr,"Warning: This will take a long time and is not going to make the results better: -d %d\n", args->ndim);
+ if ( args->ndim>3 ) fprintf(pysam_stderr,"Warning: This will take a long time and is not going to make the results better: -d %d\n", args->ndim);
break;
case 't': args->action = SOM_TRAIN; break;
case 'c': args->action = SOM_CLASSIFY; break;
diff --git a/bcftools/vcfstats.c.pysam.c b/bcftools/vcfstats.c.pysam.c
index fcbc15b..5653760 100644
--- a/bcftools/vcfstats.c.pysam.c
+++ b/bcftools/vcfstats.c.pysam.c
@@ -195,17 +195,17 @@ static inline int idist_i2bin(idist_t *d, int i)
static void _indel_ctx_print1(_idc1_t *idc)
{
int i;
- fprintf(stdout, "%d\t", idc->cnt);
+ fprintf(pysam_stdout, "%d\t", idc->cnt);
for (i=0; i<idc->len; i++)
- fputc(idc->seq[i], stdout);
- fputc('\n', stdout);
+ fputc(idc->seq[i], pysam_stdout);
+ fputc('\n', pysam_stdout);
}
static void _indel_ctx_print(indel_ctx_t *ctx)
{
int i;
for (i=0; i<ctx->ndat; i++)
_indel_ctx_print1(&ctx->dat[i]);
- fputc('\n',stdout);
+ fputc('\n',pysam_stdout);
}
#endif
static int _indel_ctx_lookup(indel_ctx_t *ctx, char *seq, int seq_len, int *hit)
@@ -317,9 +317,9 @@ int indel_ctx_type(indel_ctx_t *ctx, char *chr, int pos, char *ref, char *alt, i
}
#if IC_DBG
- fprintf(stdout,"ref: %s\n", ref);
- fprintf(stdout,"alt: %s\n", alt);
- fprintf(stdout,"ctx: %s\n", fai_ref);
+ fprintf(pysam_stdout,"ref: %s\n", ref);
+ fprintf(pysam_stdout,"alt: %s\n", alt);
+ fprintf(pysam_stdout,"ctx: %s\n", fai_ref);
_indel_ctx_print(ctx);
#endif
@@ -900,7 +900,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
- default: fprintf(pysamerr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
+ default: fprintf(pysam_stderr, "[E::%s] todo: %d\n", __func__, fmt_ptr->type); exit(1); break;
}
#undef BRANCH_INT
}
@@ -1010,7 +1010,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
{
nmm++;
bcf_sr_t *reader = &files->readers[0];
- printf("DBG\t%s\t%d\t%s\t%d\t%d\n",reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,files->samples[is],gt,gt2);
+ fprintf(pysam_stdout, "DBG\t%s\t%d\t%s\t%d\t%d\n",reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,files->samples[is],gt,gt2);
}
else
{
@@ -1019,7 +1019,7 @@ static void do_sample_stats(args_t *args, stats_t *stats, bcf_sr_t *reader, int
}
}
float nrd = nrefm+nmm ? 100.*nmm/(nrefm+nmm) : 0;
- printf("PSD\t%s\t%d\t%d\t%d\t%f\n", reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,nm,nmm,nrd);
+ fprintf(pysam_stdout, "PSD\t%s\t%d\t%d\t%d\t%f\n", reader->header->id[BCF_DT_CTG][reader->buffer[0]->rid].key,reader->buffer[0]->pos+1,nm,nmm,nrd);
}
}
}
@@ -1089,38 +1089,38 @@ static void do_vcf_stats(args_t *args)
static void print_header(args_t *args)
{
int i;
- printf("# This file was produced by bcftools stats (%s+htslib-%s) and can be plotted using plot-vcfstats.\n", bcftools_version(),hts_version());
- printf("# The command line was:\tbcftools %s ", args->argv[0]);
+ fprintf(pysam_stdout, "# This file was produced by bcftools stats (%s+htslib-%s) and can be plotted using plot-vcfstats.\n", bcftools_version(),hts_version());
+ fprintf(pysam_stdout, "# The command line was:\tbcftools %s ", args->argv[0]);
for (i=1; i<args->argc; i++)
- printf(" %s",args->argv[i]);
- printf("\n#\n");
+ fprintf(pysam_stdout, " %s",args->argv[i]);
+ fprintf(pysam_stdout, "\n#\n");
- printf("# Definition of sets:\n# ID\t[2]id\t[3]tab-separated file names\n");
+ fprintf(pysam_stdout, "# Definition of sets:\n# ID\t[2]id\t[3]tab-separated file names\n");
if ( args->files->nreaders==1 )
{
const char *fname = strcmp("-",args->files->readers[0].fname) ? args->files->readers[0].fname : "<STDIN>";
if ( args->split_by_id )
{
- printf("ID\t0\t%s:known (sites with ID different from \".\")\n", fname);
- printf("ID\t1\t%s:novel (sites where ID column is \".\")\n", fname);
+ fprintf(pysam_stdout, "ID\t0\t%s:known (sites with ID different from \".\")\n", fname);
+ fprintf(pysam_stdout, "ID\t1\t%s:novel (sites where ID column is \".\")\n", fname);
}
else
- printf("ID\t0\t%s\n", fname);
+ fprintf(pysam_stdout, "ID\t0\t%s\n", fname);
}
else
{
const char *fname0 = strcmp("-",args->files->readers[0].fname) ? args->files->readers[0].fname : "<STDIN>";
const char *fname1 = strcmp("-",args->files->readers[1].fname) ? args->files->readers[1].fname : "<STDIN>";
- printf("ID\t0\t%s\n", fname0);
- printf("ID\t1\t%s\n", fname1);
- printf("ID\t2\t%s\t%s\n", fname0,fname1);
+ fprintf(pysam_stdout, "ID\t0\t%s\n", fname0);
+ fprintf(pysam_stdout, "ID\t1\t%s\n", fname1);
+ fprintf(pysam_stdout, "ID\t2\t%s\t%s\n", fname0,fname1);
if ( args->verbose_sites )
{
- printf(
+ fprintf(pysam_stdout,
"# Verbose per-site discordance output.\n"
"# PSD\t[2]CHROM\t[3]POS\t[4]Number of matches\t[5]Number of mismatches\t[6]NRD\n");
- printf(
+ fprintf(pysam_stdout,
"# Verbose per-site and per-sample output. Genotype codes: %d:HomRefRef, %d:HomAltAlt, %d:HetAltRef, %d:HetAltAlt, %d:haploidRef, %d:haploidAlt\n"
"# DBG\t[2]CHROM\t[3]POS\t[4]Sample\t[5]GT in %s\t[6]GT in %s\n",
GT_HOM_RR, GT_HOM_AA, GT_HET_RA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A, fname0,fname1);
@@ -1132,42 +1132,42 @@ static void print_header(args_t *args)
static void print_stats(args_t *args)
{
int i, id;
- printf("# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
+ fprintf(pysam_stdout, "# SN, Summary numbers:\n# SN\t[2]id\t[3]key\t[4]value\n");
for (id=0; id<args->files->nreaders; id++)
- printf("SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
+ fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", id, bcf_hdr_nsamples(args->files->readers[id].header));
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
- printf("SN\t%d\tnumber of records:\t%d\n", id, stats->n_records);
- printf("SN\t%d\tnumber of no-ALTs:\t%d\n", id, stats->n_noalts);
- printf("SN\t%d\tnumber of SNPs:\t%d\n", id, stats->n_snps);
- printf("SN\t%d\tnumber of MNPs:\t%d\n", id, stats->n_mnps);
- printf("SN\t%d\tnumber of indels:\t%d\n", id, stats->n_indels);
- printf("SN\t%d\tnumber of others:\t%d\n", id, stats->n_others);
- printf("SN\t%d\tnumber of multiallelic sites:\t%d\n", id, stats->n_mals);
- printf("SN\t%d\tnumber of multiallelic SNP sites:\t%d\n", id, stats->n_snp_mals);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of records:\t%d\n", id, stats->n_records);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of no-ALTs:\t%d\n", id, stats->n_noalts);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of SNPs:\t%d\n", id, stats->n_snps);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of MNPs:\t%d\n", id, stats->n_mnps);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of indels:\t%d\n", id, stats->n_indels);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of others:\t%d\n", id, stats->n_others);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of multiallelic sites:\t%d\n", id, stats->n_mals);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of multiallelic SNP sites:\t%d\n", id, stats->n_snp_mals);
}
- printf("# TSTV, transitions/transversions:\n# TSTV\t[2]id\t[3]ts\t[4]tv\t[5]ts/tv\t[6]ts (1st ALT)\t[7]tv (1st ALT)\t[8]ts/tv (1st ALT)\n");
+ fprintf(pysam_stdout, "# TSTV, transitions/transversions:\n# TSTV\t[2]id\t[3]ts\t[4]tv\t[5]ts/tv\t[6]ts (1st ALT)\t[7]tv (1st ALT)\t[8]ts/tv (1st ALT)\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
int ts=0,tv=0;
for (i=0; i<args->m_af; i++) { ts += stats->af_ts[i]; tv += stats->af_tv[i]; }
- printf("TSTV\t%d\t%d\t%d\t%.2f\t%d\t%d\t%.2f\n", id,ts,tv,tv?(float)ts/tv:0, stats->ts_alt1,stats->tv_alt1,stats->tv_alt1?(float)stats->ts_alt1/stats->tv_alt1:0);
+ fprintf(pysam_stdout, "TSTV\t%d\t%d\t%d\t%.2f\t%d\t%d\t%.2f\n", id,ts,tv,tv?(float)ts/tv:0, stats->ts_alt1,stats->tv_alt1,stats->tv_alt1?(float)stats->ts_alt1/stats->tv_alt1:0);
}
if ( args->exons_fname )
{
- printf("# FS, Indel frameshifts:\n# FS\t[2]id\t[3]in-frame\t[4]out-frame\t[5]not applicable\t[6]out/(in+out) ratio\t[7]in-frame (1st ALT)\t[8]out-frame (1st ALT)\t[9]not applicable (1st ALT)\t[10]out/(in+out) ratio (1st ALT)\n");
+ fprintf(pysam_stdout, "# FS, Indel frameshifts:\n# FS\t[2]id\t[3]in-frame\t[4]out-frame\t[5]not applicable\t[6]out/(in+out) ratio\t[7]in-frame (1st ALT)\t[8]out-frame (1st ALT)\t[9]not applicable (1st ALT)\t[10]out/(in+out) ratio (1st ALT)\n");
for (id=0; id<args->nstats; id++)
{
int in=args->stats[id].in_frame, out=args->stats[id].out_frame, na=args->stats[id].na_frame;
int in1=args->stats[id].in_frame_alt1, out1=args->stats[id].out_frame_alt1, na1=args->stats[id].na_frame_alt1;
- printf("FS\t%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%.2f\n", id, in,out,na,out?(float)out/(in+out):0,in1,out1,na1,out1?(float)out1/(in1+out1):0);
+ fprintf(pysam_stdout, "FS\t%d\t%d\t%d\t%d\t%.2f\t%d\t%d\t%d\t%.2f\n", id, in,out,na,out?(float)out/(in+out):0,in1,out1,na1,out1?(float)out1/(in1+out1):0);
}
}
if ( args->indel_ctx )
{
- printf("# ICS, Indel context summary:\n# ICS\t[2]id\t[3]repeat-consistent\t[4]repeat-inconsistent\t[5]not applicable\t[6]c/(c+i) ratio\n");
+ fprintf(pysam_stdout, "# ICS, Indel context summary:\n# ICS\t[2]id\t[3]repeat-consistent\t[4]repeat-inconsistent\t[5]not applicable\t[6]c/(c+i) ratio\n");
for (id=0; id<args->nstats; id++)
{
int nc = 0, ni = 0, na = args->stats[id].n_repeat_na;
@@ -1176,25 +1176,25 @@ static void print_stats(args_t *args)
nc += args->stats[id].n_repeat[i][0] + args->stats[id].n_repeat[i][2];
ni += args->stats[id].n_repeat[i][1] + args->stats[id].n_repeat[i][3];
}
- printf("ICS\t%d\t%d\t%d\t%d\t%.4f\n", id, nc,ni,na,nc+ni ? (float)nc/(nc+ni) : 0.0);
+ fprintf(pysam_stdout, "ICS\t%d\t%d\t%d\t%d\t%.4f\n", id, nc,ni,na,nc+ni ? (float)nc/(nc+ni) : 0.0);
}
- printf("# ICL, Indel context by length:\n# ICL\t[2]id\t[3]length of repeat element\t[4]repeat-consistent deletions)\t[5]repeat-inconsistent deletions\t[6]consistent insertions\t[7]inconsistent insertions\t[8]c/(c+i) ratio\n");
+ fprintf(pysam_stdout, "# ICL, Indel context by length:\n# ICL\t[2]id\t[3]length of repeat element\t[4]repeat-consistent deletions)\t[5]repeat-inconsistent deletions\t[6]consistent insertions\t[7]inconsistent insertions\t[8]c/(c+i) ratio\n");
for (id=0; id<args->nstats; id++)
{
for (i=1; i<IRC_RLEN; i++)
{
int nc = args->stats[id].n_repeat[i][0]+args->stats[id].n_repeat[i][2], ni = args->stats[id].n_repeat[i][1]+args->stats[id].n_repeat[i][3];
- printf("ICL\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\n", id, i+1,
+ fprintf(pysam_stdout, "ICL\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\n", id, i+1,
args->stats[id].n_repeat[i][0],args->stats[id].n_repeat[i][1],args->stats[id].n_repeat[i][2],args->stats[id].n_repeat[i][3],
nc+ni ? (float)nc/(nc+ni) : 0.0);
}
}
}
- printf("# SiS, Singleton stats:\n# SiS\t[2]id\t[3]allele count\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
+ fprintf(pysam_stdout, "# SiS, Singleton stats:\n# SiS\t[2]id\t[3]allele count\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
- printf("SiS\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,1,stats->af_snps[0],stats->af_ts[0],stats->af_tv[0],
+ fprintf(pysam_stdout, "SiS\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,1,stats->af_snps[0],stats->af_ts[0],stats->af_tv[0],
stats->af_repeats[0][0]+stats->af_repeats[1][0]+stats->af_repeats[2][0],stats->af_repeats[0][0],stats->af_repeats[1][0],stats->af_repeats[2][0]);
// put the singletons stats into the first AF bin, note that not all of the stats is transferred (i.e. nrd mismatches)
stats->af_snps[1] += stats->af_snps[0];
@@ -1204,32 +1204,32 @@ static void print_stats(args_t *args)
stats->af_repeats[1][1] += stats->af_repeats[1][0];
stats->af_repeats[2][1] += stats->af_repeats[2][0];
}
- printf("# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
+ fprintf(pysam_stdout, "# AF, Stats by non-reference allele frequency:\n# AF\t[2]id\t[3]allele frequency\t[4]number of SNPs\t[5]number of transitions\t[6]number of transversions\t[7]number of indels\t[8]repeat-consistent\t[9]repeat-inconsistent\t[10]not applicable\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
for (i=1; i<args->m_af; i++) // note that af[1] now contains also af[0], see SiS stats output above
{
if ( stats->af_snps[i]+stats->af_ts[i]+stats->af_tv[i]+stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i] == 0 ) continue;
- printf("AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
+ fprintf(pysam_stdout, "AF\t%d\t%f\t%d\t%d\t%d\t%d\t%d\t%d\t%d\n", id,100.*(i-1)/(args->m_af-1),stats->af_snps[i],stats->af_ts[i],stats->af_tv[i],
stats->af_repeats[0][i]+stats->af_repeats[1][i]+stats->af_repeats[2][i],stats->af_repeats[0][i],stats->af_repeats[1][i],stats->af_repeats[2][i]);
}
}
#if QUAL_STATS
- printf("# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
+ fprintf(pysam_stdout, "# QUAL, Stats by quality:\n# QUAL\t[2]id\t[3]Quality\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\t[7]number of indels\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
for (i=0; i<args->m_qual; i++)
{
if ( stats->qual_snps[i]+stats->qual_ts[i]+stats->qual_tv[i]+stats->qual_indels[i] == 0 ) continue;
- printf("QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
+ fprintf(pysam_stdout, "QUAL\t%d\t%d\t%d\t%d\t%d\t%d\n", id,i,stats->qual_snps[i],stats->qual_ts[i],stats->qual_tv[i],stats->qual_indels[i]);
}
}
#endif
for (i=0; i<args->nusr; i++)
{
- printf("# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
+ fprintf(pysam_stdout, "# USR:%s, Stats by %s:\n# USR:%s\t[2]id\t[3]%s\t[4]number of SNPs\t[5]number of transitions (1st ALT)\t[6]number of transversions (1st ALT)\n",
args->usr[i].tag,args->usr[i].tag,args->usr[i].tag,args->usr[i].tag);
for (id=0; id<args->nstats; id++)
{
@@ -1240,32 +1240,32 @@ static void print_stats(args_t *args)
if ( usr->vals_ts[j]+usr->vals_tv[j] == 0 ) continue; // skip empty bins
float val = usr->min + (usr->max - usr->min)*j/(usr->nbins-1);
const char *fmt = usr->type==BCF_HT_REAL ? "USR:%s\t%d\t%e\t%d\t%d\t%d\n" : "USR:%s\t%d\t%.0f\t%d\t%d\t%d\n";
- printf(fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
+ fprintf(pysam_stdout, fmt,usr->tag,id,val,usr->vals_ts[j]+usr->vals_tv[j],usr->vals_ts[j],usr->vals_tv[j]);
}
}
}
- printf("# IDD, InDel distribution:\n# IDD\t[2]id\t[3]length (deletions negative)\t[4]count\n");
+ fprintf(pysam_stdout, "# IDD, InDel distribution:\n# IDD\t[2]id\t[3]length (deletions negative)\t[4]count\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
for (i=stats->m_indel-1; i>=0; i--)
- if ( stats->deletions[i] ) printf("IDD\t%d\t%d\t%d\n", id,-i-1,stats->deletions[i]);
+ if ( stats->deletions[i] ) fprintf(pysam_stdout, "IDD\t%d\t%d\t%d\n", id,-i-1,stats->deletions[i]);
for (i=0; i<stats->m_indel; i++)
- if ( stats->insertions[i] ) printf("IDD\t%d\t%d\t%d\n", id,i+1,stats->insertions[i]);
+ if ( stats->insertions[i] ) fprintf(pysam_stdout, "IDD\t%d\t%d\t%d\n", id,i+1,stats->insertions[i]);
}
- printf("# ST, Substitution types:\n# ST\t[2]id\t[3]type\t[4]count\n");
+ fprintf(pysam_stdout, "# ST, Substitution types:\n# ST\t[2]id\t[3]type\t[4]count\n");
for (id=0; id<args->nstats; id++)
{
int t;
for (t=0; t<15; t++)
{
if ( t>>2 == (t&3) ) continue;
- printf("ST\t%d\t%c>%c\t%d\n", id, bcf_int2acgt(t>>2),bcf_int2acgt(t&3),args->stats[id].subst[t]);
+ fprintf(pysam_stdout, "ST\t%d\t%c>%c\t%d\n", id, bcf_int2acgt(t>>2),bcf_int2acgt(t&3),args->stats[id].subst[t]);
}
}
if ( args->files->nreaders>1 && args->files->n_smpl )
{
- printf("SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
+ fprintf(pysam_stdout, "SN\t%d\tnumber of samples:\t%d\n", 2, args->files->n_smpl);
int x;
for (x=0; x<2; x++)
@@ -1273,12 +1273,12 @@ static void print_stats(args_t *args)
gtcmp_t *stats;
if ( x==0 )
{
- printf("# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+ fprintf(pysam_stdout, "# GCsAF, Genotype concordance by non-reference allele frequency (SNPs)\n# GCsAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
stats = args->af_gts_snps;
}
else
{
- printf("# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
+ fprintf(pysam_stdout, "# GCiAF, Genotype concordance by non-reference allele frequency (indels)\n# GCiAF\t[2]id\t[3]allele frequency\t[4]RR Hom matches\t[5]RA Het matches\t[6]AA Hom matches\t[7]RR Hom mismatches\t[8]RA Het mismatches\t[9]AA Hom mismatches\t[10]dosage r-squared\t[11]number of sites\n");
stats = args->af_gts_indels;
}
uint64_t nrd_m[3] = {0,0,0}, nrd_mm[3] = {0,0,0};
@@ -1292,28 +1292,28 @@ static void print_stats(args_t *args)
nrd_mm[j] += stats[i].mm[j];
}
if ( !i || !n ) continue; // skip singleton stats and empty bins
- printf("GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
- printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
- printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
- printf("\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
+ fprintf(pysam_stdout, "GC%cAF\t2\t%f", x==0 ? 's' : 'i', 100.*(i-1)/(args->m_af-1));
+ fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
+ fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
+ fprintf(pysam_stdout, "\t%f\t%"PRId32"\n", stats[i].r2n ? stats[i].r2sum/stats[i].r2n : -1.0, stats[i].r2n);
}
if ( x==0 )
{
- printf("# NRD and discordance is calculated as follows:\n");
- printf("# m .. number of matches\n");
- printf("# x .. number of mismatches\n");
- printf("# NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
- printf("# RR discordance = xRR / (xRR + mRR)\n");
- printf("# RA discordance = xRA / (xRA + mRA)\n");
- printf("# AA discordance = xAA / (xAA + mAA)\n");
- printf("# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
+ fprintf(pysam_stdout, "# NRD and discordance is calculated as follows:\n");
+ fprintf(pysam_stdout, "# m .. number of matches\n");
+ fprintf(pysam_stdout, "# x .. number of mismatches\n");
+ fprintf(pysam_stdout, "# NRD = (xRR + xRA + xAA) / (xRR + xRA + xAA + mRA + mAA)\n");
+ fprintf(pysam_stdout, "# RR discordance = xRR / (xRR + mRR)\n");
+ fprintf(pysam_stdout, "# RA discordance = xRA / (xRA + mRA)\n");
+ fprintf(pysam_stdout, "# AA discordance = xAA / (xAA + mAA)\n");
+ fprintf(pysam_stdout, "# Non-Reference Discordance (NRD), SNPs\n# NRDs\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
}
else
- printf("# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
+ fprintf(pysam_stdout, "# Non-Reference Discordance (NRD), indels\n# NRDi\t[2]id\t[3]NRD\t[4]Ref/Ref discordance\t[5]Ref/Alt discordance\t[6]Alt/Alt discordance\n");
uint64_t m = nrd_m[T2S(GT_HET_RA)] + nrd_m[T2S(GT_HOM_AA)];
uint64_t mm = nrd_mm[T2S(GT_HOM_RR)] + nrd_mm[T2S(GT_HET_RA)] + nrd_mm[T2S(GT_HOM_AA)];
- printf("NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
+ fprintf(pysam_stdout, "NRD%c\t2\t%f\t%f\t%f\t%f\n", x==0 ? 's' : 'i',
m+mm ? mm*100.0/(m+mm) : 0,
nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)] ? nrd_mm[T2S(GT_HOM_RR)]*100.0/(nrd_m[T2S(GT_HOM_RR)]+nrd_mm[T2S(GT_HOM_RR)]) : 0,
nrd_m[T2S(GT_HET_RA)]+nrd_mm[T2S(GT_HET_RA)] ? nrd_mm[T2S(GT_HET_RA)]*100.0/(nrd_m[T2S(GT_HET_RA)]+nrd_mm[T2S(GT_HET_RA)]) : 0,
@@ -1327,13 +1327,13 @@ static void print_stats(args_t *args)
smpl_r_t *smpl_r_array;
if ( x==0 )
{
- printf("# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+ fprintf(pysam_stdout, "# GCsS, Genotype concordance by sample (SNPs)\n# GCsS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
stats = args->smpl_gts_snps;
smpl_r_array = args->smpl_r_snps;
}
else
{
- printf("# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
+ fprintf(pysam_stdout, "# GCiS, Genotype concordance by sample (indels)\n# GCiS\t[2]id\t[3]sample\t[4]non-reference discordance rate\t[5]RR Hom matches\t[6]RA Het matches\t[7]AA Hom matches\t[8]RR Hom mismatches\t[9]RA Het mismatches\t[10]AA Hom mismatches\t[11]dosage r-squared\n");
stats = args->smpl_gts_indels;
smpl_r_array = args->smpl_r_indels;
}
@@ -1350,16 +1350,16 @@ static void print_stats(args_t *args)
double y2_yy = smpl_r->y2-(smpl_r->y*smpl_r->y)/smpl_r->n;
r = (sum_crossprod)/sqrt(x2_xx*y2_yy);
}
- printf("GC%cS\t2\t%s\t%.3f", x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
- printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
- printf("\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
- if (smpl_r->n && !isnan(r)) printf("\t%f\n", r*r);
- else printf("\t"NA_STRING"\n");
+ fprintf(pysam_stdout, "GC%cS\t2\t%s\t%.3f", x==0 ? 's' : 'i', args->files->samples[i], m+mm ? mm*100.0/(m+mm) : 0);
+ fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].m[T2S(GT_HOM_RR)],stats[i].m[T2S(GT_HET_RA)],stats[i].m[T2S(GT_HOM_AA)]);
+ fprintf(pysam_stdout, "\t%"PRId64"\t%"PRId64"\t%"PRId64"", stats[i].mm[T2S(GT_HOM_RR)],stats[i].mm[T2S(GT_HET_RA)],stats[i].mm[T2S(GT_HOM_AA)]);
+ if (smpl_r->n && !isnan(r)) fprintf(pysam_stdout, "\t%f\n", r*r);
+ else fprintf(pysam_stdout, "\t"NA_STRING"\n");
}
}
}
- printf("# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
+ fprintf(pysam_stdout, "# DP, Depth distribution\n# DP\t[2]id\t[3]bin\t[4]number of genotypes\t[5]fraction of genotypes (%%)\t[6]number of sites\t[7]fraction of sites (%%)\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
@@ -1368,32 +1368,32 @@ static void print_stats(args_t *args)
for (i=0; i<stats->dp.m_vals; i++)
{
if ( stats->dp.vals[i]==0 && stats->dp_sites.vals[i]==0 ) continue;
- printf("DP\t%d\t", id);
- if ( i==0 ) printf("<%d", stats->dp.min);
- else if ( i+1==stats->dp.m_vals ) printf(">%d", stats->dp.max);
- else printf("%d", idist_i2bin(&stats->dp,i));
- printf("\t%"PRId64"\t%f", stats->dp.vals[i], sum ? stats->dp.vals[i]*100./sum : 0);
- printf("\t%"PRId64"\t%f\n", stats->dp_sites.vals[i], sum_sites ? stats->dp_sites.vals[i]*100./sum_sites : 0);
+ fprintf(pysam_stdout, "DP\t%d\t", id);
+ if ( i==0 ) fprintf(pysam_stdout, "<%d", stats->dp.min);
+ else if ( i+1==stats->dp.m_vals ) fprintf(pysam_stdout, ">%d", stats->dp.max);
+ else fprintf(pysam_stdout, "%d", idist_i2bin(&stats->dp,i));
+ fprintf(pysam_stdout, "\t%"PRId64"\t%f", stats->dp.vals[i], sum ? stats->dp.vals[i]*100./sum : 0);
+ fprintf(pysam_stdout, "\t%"PRId64"\t%f\n", stats->dp_sites.vals[i], sum_sites ? stats->dp_sites.vals[i]*100./sum_sites : 0);
}
}
if ( args->files->n_smpl )
{
- printf("# PSC, Per-sample counts\n# PSC\t[2]id\t[3]sample\t[4]nRefHom\t[5]nNonRefHom\t[6]nHets\t[7]nTransitions\t[8]nTransversions\t[9]nIndels\t[10]average depth\t[11]nSingletons\n");
+ fprintf(pysam_stdout, "# PSC, Per-sample counts\n# PSC\t[2]id\t[3]sample\t[4]nRefHom\t[5]nNonRefHom\t[6]nHets\t[7]nTransitions\t[8]nTransversions\t[9]nIndels\t[10]average depth\t[11]nSingletons\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
for (i=0; i<args->files->n_smpl; i++)
{
float dp = stats->smpl_ndp[i] ? stats->smpl_dp[i]/(float)stats->smpl_ndp[i] : 0;
- printf("PSC\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.1f\t%d\n", id,args->files->samples[i],
+ fprintf(pysam_stdout, "PSC\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.1f\t%d\n", id,args->files->samples[i],
stats->smpl_homRR[i], stats->smpl_homAA[i], stats->smpl_hets[i], stats->smpl_ts[i],
stats->smpl_tv[i], stats->smpl_indels[i],dp, stats->smpl_sngl[i]);
}
}
- printf("# PSI, Per-Sample Indels\n# PSI\t[2]id\t[3]sample\t[4]in-frame\t[5]out-frame\t[6]not applicable\t[7]out/(in+out) ratio\t[8]nHets\t[9]nAA\n");
+ fprintf(pysam_stdout, "# PSI, Per-Sample Indels\n# PSI\t[2]id\t[3]sample\t[4]in-frame\t[5]out-frame\t[6]not applicable\t[7]out/(in+out) ratio\t[8]nHets\t[9]nAA\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
@@ -1408,12 +1408,12 @@ static void print_stats(args_t *args)
}
int nhom = stats->smpl_indel_homs[i];
int nhet = stats->smpl_indel_hets[i];
- printf("PSI\t%d\t%s\t%d\t%d\t%d\t%.2f\t%d\t%d\n", id,args->files->samples[i], in,out,na,in+out?1.0*out/(in+out):0,nhet,nhom);
+ fprintf(pysam_stdout, "PSI\t%d\t%s\t%d\t%d\t%d\t%.2f\t%d\t%d\n", id,args->files->samples[i], in,out,na,in+out?1.0*out/(in+out):0,nhet,nhom);
}
}
#ifdef HWE_STATS
- printf("# HWE\n# HWE\t[2]id\t[3]1st ALT allele frequency\t[4]Number of observations\t[5]25th percentile\t[6]median\t[7]75th percentile\n");
+ fprintf(pysam_stdout, "# HWE\n# HWE\t[2]id\t[3]1st ALT allele frequency\t[4]Number of observations\t[5]25th percentile\t[6]median\t[7]75th percentile\n");
for (id=0; id<args->nstats; id++)
{
stats_t *stats = &args->stats[id];
@@ -1426,28 +1426,28 @@ static void print_stats(args_t *args)
if ( !sum_tot ) continue;
int nprn = 3;
- printf("HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
+ fprintf(pysam_stdout, "HWE\t%d\t%f\t%d",id,100.*(i-1)/(args->m_af-1),sum_tot);
for (j=0; j<args->naf_hwe; j++)
{
sum_tmp += ptr[j];
float frac = (float)sum_tmp/sum_tot;
if ( frac >= 0.75 )
{
- while (nprn>0) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+ while (nprn>0) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
break;
}
if ( frac >= 0.5 )
{
- while (nprn>1) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+ while (nprn>1) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
continue;
}
if ( frac >= 0.25 )
{
- while (nprn>2) { printf("\t%f", (float)j/args->naf_hwe); nprn--; }
+ while (nprn>2) { fprintf(pysam_stdout, "\t%f", (float)j/args->naf_hwe); nprn--; }
}
}
assert(nprn==0);
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
}
#endif
@@ -1456,32 +1456,32 @@ static void print_stats(args_t *args)
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats.\n");
- fprintf(pysamerr, " When two files are given, the program generates separate stats for intersection\n");
- fprintf(pysamerr, " and the complements. By default only sites are compared, -s/-S must given to include\n");
- fprintf(pysamerr, " also sample columns.\n");
- fprintf(pysamerr, "Usage: bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -1, --1st-allele-only include only 1st allele at multiallelic sites\n");
- fprintf(pysamerr, " -c, --collapse <string> treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
- fprintf(pysamerr, " -d, --depth <int,int,int> depth distribution: min,max,bin size [0,500,1]\n");
- fprintf(pysamerr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -E, --exons <file.gz> tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
- fprintf(pysamerr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
- fprintf(pysamerr, " -F, --fasta-ref <file> faidx indexed reference sequence file to determine INDEL context\n");
- fprintf(pysamerr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -I, --split-by-ID collect stats for sites with ID separately (known vs novel)\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -s, --samples <list> list of samples for sample stats, \"-\" to include all samples\n");
- fprintf(pysamerr, " -S, --samples-file <file> file of samples to include\n");
- fprintf(pysamerr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
- fprintf(pysamerr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
- fprintf(pysamerr, " -u, --user-tstv <TAG[:min:max:n]> collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
- fprintf(pysamerr, " -v, --verbose produce verbose per-site and per-sample output\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Parses VCF or BCF and produces stats which can be plotted using plot-vcfstats.\n");
+ fprintf(pysam_stderr, " When two files are given, the program generates separate stats for intersection\n");
+ fprintf(pysam_stderr, " and the complements. By default only sites are compared, -s/-S must given to include\n");
+ fprintf(pysam_stderr, " also sample columns.\n");
+ fprintf(pysam_stderr, "Usage: bcftools stats [options] <A.vcf.gz> [<B.vcf.gz>]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -1, --1st-allele-only include only 1st allele at multiallelic sites\n");
+ fprintf(pysam_stderr, " -c, --collapse <string> treat as identical records with <snps|indels|both|all|some|none>, see man page for details [none]\n");
+ fprintf(pysam_stderr, " -d, --depth <int,int,int> depth distribution: min,max,bin size [0,500,1]\n");
+ fprintf(pysam_stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -E, --exons <file.gz> tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed)\n");
+ fprintf(pysam_stderr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+ fprintf(pysam_stderr, " -F, --fasta-ref <file> faidx indexed reference sequence file to determine INDEL context\n");
+ fprintf(pysam_stderr, " -i, --include <expr> select sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -I, --split-by-ID collect stats for sites with ID separately (known vs novel)\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -s, --samples <list> list of samples for sample stats, \"-\" to include all samples\n");
+ fprintf(pysam_stderr, " -S, --samples-file <file> file of samples to include\n");
+ fprintf(pysam_stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(pysam_stderr, " -u, --user-tstv <TAG[:min:max:n]> collect Ts/Tv stats for any tag using the given binning [0:1:100]\n");
+ fprintf(pysam_stderr, " -v, --verbose produce verbose per-site and per-sample output\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
diff --git a/bcftools/vcfview.c b/bcftools/vcfview.c
index ed41595..c14075d 100644
--- a/bcftools/vcfview.c
+++ b/bcftools/vcfview.c
@@ -72,6 +72,7 @@ typedef struct _args_t
int sample_is_file, force_samples;
char *include_types, *exclude_types;
int include, exclude;
+ int record_cmd_line;
htsFile *out;
}
args_t;
@@ -86,7 +87,8 @@ static void init_data(args_t *args)
bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">");
bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
}
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+ else bcf_hdr_sync(args->hdr);
// setup sample data
if (args->sample_names)
@@ -485,6 +487,7 @@ static void usage(args_t *args)
fprintf(stderr, " -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)\n");
fprintf(stderr, " -h/H, --header-only/--no-header print the header only/suppress the header in VCF output\n");
fprintf(stderr, " -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
+ fprintf(stderr, " --no-version do not append version and command line to the header\n");
fprintf(stderr, " -o, --output-file <file> output file name [stdout]\n");
fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
fprintf(stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
@@ -529,6 +532,7 @@ int main_vcfview(int argc, char *argv[])
args->update_info = 1;
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int targets_is_file = 0, regions_is_file = 0;
static struct option loptions[] =
@@ -569,6 +573,7 @@ int main_vcfview(int argc, char *argv[])
{"max-af",required_argument,NULL,'Q'},
{"phased",no_argument,NULL,'p'},
{"exclude-phased",no_argument,NULL,'P'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -678,6 +683,7 @@ int main_vcfview(int argc, char *argv[])
break;
}
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?': usage(args);
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/vcfview.c.pysam.c b/bcftools/vcfview.c.pysam.c
index a6a0cc0..53b7c53 100644
--- a/bcftools/vcfview.c.pysam.c
+++ b/bcftools/vcfview.c.pysam.c
@@ -74,6 +74,7 @@ typedef struct _args_t
int sample_is_file, force_samples;
char *include_types, *exclude_types;
int include, exclude;
+ int record_cmd_line;
htsFile *out;
}
args_t;
@@ -88,7 +89,8 @@ static void init_data(args_t *args)
bcf_hdr_append(args->hdr,"##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Allele count in genotypes\">");
bcf_hdr_append(args->hdr,"##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">");
}
- bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+ if (args->record_cmd_line) bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_view");
+ else bcf_hdr_sync(args->hdr);
// setup sample data
if (args->sample_names)
@@ -112,7 +114,7 @@ static void init_data(args_t *args)
for (i=0; i<nsmpl; i++) {
if (!khash_str2int_has_key(hdr_samples,smpl[i])) {
if (args->force_samples) {
- fprintf(pysamerr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
+ fprintf(pysam_stderr, "Warn: exclude called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
} else {
error("Error: exclude called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]);
}
@@ -133,7 +135,7 @@ static void init_data(args_t *args)
for (i=0; i<nsmpl; i++) {
if (!khash_str2int_has_key(hdr_samples,smpl[i])) {
if (args->force_samples) {
- fprintf(pysamerr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
+ fprintf(pysam_stderr, "Warn: subset called for sample that does not exist in header: \"%s\"... skipping\n", smpl[i]);
continue;
} else {
error("Error: subset called for sample that does not exist in header: \"%s\". Use \"--force-samples\" to ignore this error.\n", smpl[i]);
@@ -147,7 +149,7 @@ static void init_data(args_t *args)
free(smpl);
khash_str2int_destroy(hdr_samples);
if (args->n_samples == 0) {
- fprintf(pysamerr, "Warn: subsetting has removed all samples\n");
+ fprintf(pysam_stderr, "Warn: subsetting has removed all samples\n");
args->sites_only = 1;
}
}
@@ -158,7 +160,7 @@ static void init_data(args_t *args)
// determine variant types to include/exclude
if (args->include_types || args->exclude_types) {
if (args->include_types && args->exclude_types) {
- fprintf(pysamerr, "Error: only supply one of --include-types, --exclude-types options\n");
+ fprintf(pysam_stderr, "Error: only supply one of --include-types, --exclude-types options\n");
exit(1);
}
char **type_list = 0;
@@ -186,8 +188,8 @@ static void init_data(args_t *args)
else if (strcmp(type_list[i], "mnps") == 0) args->include |= VCF_MNP;
else if (strcmp(type_list[i], "other") == 0) args->include |= VCF_OTHER;
else {
- fprintf(pysamerr, "[E::%s] unknown type\n", type_list[i]);
- fprintf(pysamerr, "Accepted types are snps, indels, mnps, other\n");
+ fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
+ fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
exit(1);
}
}
@@ -200,8 +202,8 @@ static void init_data(args_t *args)
else if (strcmp(type_list[i], "mnps") == 0) args->exclude |= VCF_MNP;
else if (strcmp(type_list[i], "other") == 0) args->exclude |= VCF_OTHER;
else {
- fprintf(pysamerr, "[E::%s] unknown type\n", type_list[i]);
- fprintf(pysamerr, "Accepted types are snps, indels, mnps, other\n");
+ fprintf(pysam_stderr, "[E::%s] unknown type\n", type_list[i]);
+ fprintf(pysam_stderr, "Accepted types are snps, indels, mnps, other\n");
exit(1);
}
}
@@ -290,7 +292,7 @@ int bcf_all_phased(const bcf_hdr_t *header, bcf1_t *line)
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_vector_end); break;
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_vector_end); break;
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_vector_end); break;
- default: fprintf(pysamerr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
+ default: fprintf(pysam_stderr, "[E::%s] todo: fmt_type %d\n", __func__, fmt_ptr->type); exit(1); break;
}
#undef BRANCH_INT
if (!sample_phased) {
@@ -479,44 +481,45 @@ void set_allele_type (int *atype, char *atype_string)
static void usage(args_t *args)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: VCF/BCF conversion, view, subset and filter VCF/BCF files.\n");
- fprintf(pysamerr, "Usage: bcftools view [options] <in.vcf.gz> [region1 [...]]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Output options:\n");
- fprintf(pysamerr, " -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)\n");
- fprintf(pysamerr, " -h/H, --header-only/--no-header print the header only/suppress the header in VCF output\n");
- fprintf(pysamerr, " -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
- fprintf(pysamerr, " -o, --output-file <file> output file name [stdout]\n");
- fprintf(pysamerr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
- fprintf(pysamerr, " -r, --regions <region> restrict to comma-separated list of regions\n");
- fprintf(pysamerr, " -R, --regions-file <file> restrict to regions listed in a file\n");
- fprintf(pysamerr, " -t, --targets [^]<region> similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
- fprintf(pysamerr, " -T, --targets-file [^]<file> similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
- fprintf(pysamerr, " --threads <int> number of extra output compression threads [0]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Subset options:\n");
- fprintf(pysamerr, " -a, --trim-alt-alleles trim alternate alleles not seen in the subset\n");
- fprintf(pysamerr, " -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)\n");
- fprintf(pysamerr, " -s, --samples [^]<list> comma separated list of samples to include (or exclude with \"^\" prefix)\n");
- fprintf(pysamerr, " -S, --samples-file [^]<file> file of samples to include (or exclude with \"^\" prefix)\n");
- fprintf(pysamerr, " --force-samples only warn about unknown subset samples\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Filter options:\n");
- fprintf(pysamerr, " -c/C, --min-ac/--max-ac <int>[:<type>] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent\n");
- fprintf(pysamerr, " (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
- fprintf(pysamerr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
- fprintf(pysamerr, " -g, --genotype [^]<hom|het|miss> require one or more hom/het/missing genotype or, if prefixed with \"^\", exclude sites with hom/het/missing genotypes\n");
- fprintf(pysamerr, " -i/e, --include/--exclude <expr> select/exclude sites for which the expression is true (see man page for details)\n");
- fprintf(pysamerr, " -k/n, --known/--novel select known/novel sites only (ID is not/is '.')\n");
- fprintf(pysamerr, " -m/M, --min-alleles/--max-alleles <int> minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)\n");
- fprintf(pysamerr, " -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased\n");
- fprintf(pysamerr, " -q/Q, --min-af/--max-af <float>[:<type>] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
- fprintf(pysamerr, " (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
- fprintf(pysamerr, " -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype\n");
- fprintf(pysamerr, " -v/V, --types/--exclude-types <list> select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
- fprintf(pysamerr, " -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: VCF/BCF conversion, view, subset and filter VCF/BCF files.\n");
+ fprintf(pysam_stderr, "Usage: bcftools view [options] <in.vcf.gz> [region1 [...]]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Output options:\n");
+ fprintf(pysam_stderr, " -G, --drop-genotypes drop individual genotype information (after subsetting if -s option set)\n");
+ fprintf(pysam_stderr, " -h/H, --header-only/--no-header print the header only/suppress the header in VCF output\n");
+ fprintf(pysam_stderr, " -l, --compression-level [0-9] compression level: 0 uncompressed, 1 best speed, 9 best compression [%d]\n", args->clevel);
+ fprintf(pysam_stderr, " --no-version do not append version and command line to the header\n");
+ fprintf(pysam_stderr, " -o, --output-file <file> output file name [pysam_stdout]\n");
+ fprintf(pysam_stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(pysam_stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(pysam_stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(pysam_stderr, " -t, --targets [^]<region> similar to -r but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
+ fprintf(pysam_stderr, " -T, --targets-file [^]<file> similar to -R but streams rather than index-jumps. Exclude regions with \"^\" prefix\n");
+ fprintf(pysam_stderr, " --threads <int> number of extra output compression threads [0]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Subset options:\n");
+ fprintf(pysam_stderr, " -a, --trim-alt-alleles trim alternate alleles not seen in the subset\n");
+ fprintf(pysam_stderr, " -I, --no-update do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)\n");
+ fprintf(pysam_stderr, " -s, --samples [^]<list> comma separated list of samples to include (or exclude with \"^\" prefix)\n");
+ fprintf(pysam_stderr, " -S, --samples-file [^]<file> file of samples to include (or exclude with \"^\" prefix)\n");
+ fprintf(pysam_stderr, " --force-samples only warn about unknown subset samples\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Filter options:\n");
+ fprintf(pysam_stderr, " -c/C, --min-ac/--max-ac <int>[:<type>] minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent\n");
+ fprintf(pysam_stderr, " (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
+ fprintf(pysam_stderr, " -f, --apply-filters <list> require at least one of the listed FILTER strings (e.g. \"PASS,.\")\n");
+ fprintf(pysam_stderr, " -g, --genotype [^]<hom|het|miss> require one or more hom/het/missing genotype or, if prefixed with \"^\", exclude sites with hom/het/missing genotypes\n");
+ fprintf(pysam_stderr, " -i/e, --include/--exclude <expr> select/exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(pysam_stderr, " -k/n, --known/--novel select known/novel sites only (ID is not/is '.')\n");
+ fprintf(pysam_stderr, " -m/M, --min-alleles/--max-alleles <int> minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)\n");
+ fprintf(pysam_stderr, " -p/P, --phased/--exclude-phased select/exclude sites where all samples are phased\n");
+ fprintf(pysam_stderr, " -q/Q, --min-af/--max-af <float>[:<type>] minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent\n");
+ fprintf(pysam_stderr, " (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]\n");
+ fprintf(pysam_stderr, " -u/U, --uncalled/--exclude-uncalled select/exclude sites without a called genotype\n");
+ fprintf(pysam_stderr, " -v/V, --types/--exclude-types <list> select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]\n");
+ fprintf(pysam_stderr, " -x/X, --private/--exclude-private select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples\n");
+ fprintf(pysam_stderr, "\n");
exit(1);
}
@@ -531,6 +534,7 @@ int main_vcfview(int argc, char *argv[])
args->update_info = 1;
args->output_type = FT_VCF;
args->n_threads = 0;
+ args->record_cmd_line = 1;
int targets_is_file = 0, regions_is_file = 0;
static struct option loptions[] =
@@ -571,6 +575,7 @@ int main_vcfview(int argc, char *argv[])
{"max-af",required_argument,NULL,'Q'},
{"phased",no_argument,NULL,'p'},
{"exclude-phased",no_argument,NULL,'P'},
+ {"no-version",no_argument,NULL,8},
{NULL,0,NULL,0}
};
char *tmp;
@@ -680,6 +685,7 @@ int main_vcfview(int argc, char *argv[])
break;
}
case 9 : args->n_threads = strtol(optarg, 0, 0); break;
+ case 8 : args->record_cmd_line = 0; break;
case '?': usage(args);
default: error("Unknown argument: %s\n", optarg);
}
diff --git a/bcftools/version.c.pysam.c b/bcftools/version.c.pysam.c
index 1fd0d4e..af54532 100644
--- a/bcftools/version.c.pysam.c
+++ b/bcftools/version.c.pysam.c
@@ -41,7 +41,7 @@ void error(const char *format, ...)
{
va_list ap;
va_start(ap, format);
- vfprintf(pysamerr, format, ap);
+ vfprintf(pysam_stderr, format, ap);
va_end(ap);
exit(-1);
}
diff --git a/bcftools/version.h b/bcftools/version.h
index 70d4f93..05929f5 100644
--- a/bcftools/version.h
+++ b/bcftools/version.h
@@ -1 +1 @@
-#define BCFTOOLS_VERSION "1.3"
+#define BCFTOOLS_VERSION "1.3.1"
diff --git a/doc/faq.rst b/doc/faq.rst
index 1f45981..d5d84c4 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -10,7 +10,7 @@ use the github URL: https://github.com/pysam-developers/pysam.
As pysam is a wrapper around htslib and the samtools package, I
suggest cite `Li et al (2009) <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`.
-Is pysam thread-save?
+Is pysam thread-safe?
=====================
Pysam is a mix of python and C code. Instructions within python are
diff --git a/doc/glossary.rst b/doc/glossary.rst
index f40bcfb..e35a537 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -81,7 +81,8 @@ Glossary
In alignments with soft clipping part of the query sequence
are not aligned. The unaligned query sequence is still part
- of the alignment record. This is in difference to hard clipped reads.
+ of the alignment record. This is in difference to
+ :term:`hard clipped` reads.
hard clipping
hard clipped
diff --git a/doc/installation.rst b/doc/installation.rst
index a3fa2a2..2dbf2a4 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -58,8 +58,3 @@ python 2.7 contains pre-built C-files and cython needs not be present
during installation. However, when installing the source tarball on
python 3 or building from the repository, these pre-built C-files are
not present and cython needs to be installed beforehand.
-
-
-
-
-
diff --git a/doc/release.rst b/doc/release.rst
index 802c6e5..f49b8f0 100644
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,26 @@
Release notes
=============
+Release 0.9.1
+=============
+
+This is a bugfix release addressing some installation problems
+in pysam 0.9.0, in particular:
+
+* patch included htslib to work with older libcurl versions, fixes #262.
+* do not require cython for python 3 install, fixes #260
+* FastaFile does not accept filepath_index any more, see #270
+* add AlignedSegment.get_cigar_stats method.
+* py3 bugfix in VariantFile.subset_samples, fixes #272
+* add missing sysconfig import, fixes #278
+* do not redirect stdout, but instead write to a separately
+ created file. This should resolve issues when pysam is used
+ in notebooks or other environments that redirect stdout.
+* wrap htslib-1.3.1, samtools-1.3.1 and bcftools-1.3.1
+* use bgzf throughout instead of gzip
+* allow specifying a fasta reference for CRAM file when opening
+ for both read and write, fixes #280
+
Release 0.9.0
=============
diff --git a/import.py b/import.py
index 4018698..12d2016 100644
--- a/import.py
+++ b/import.py
@@ -12,16 +12,49 @@
# For samtools, type:
# rm -rf samtools
# python import.py samtools download/samtools
+#
# Manually, then:
# modify config.h to set compatibility flags
-# change bamtk.c.pysam.c/main to bamtk.c.pysam.c/samtools_main
#
# For bcftools, type:
# rm -rf bedtools
# python import.py bedtools download/bedtools
+# rm -rf bedtools/test bedtools/plugins
+
+import fnmatch
import os
+import re
+import shutil
import sys
-import fnmatch
+import hashlib
+
+
+EXCLUDE = {
+ "samtools": (
+ "razip.c", "bgzip.c", "main.c",
+ "calDepth.c", "bam2bed.c", "wgsim.c",
+ "md5fa.c", "md5sum-lite.c", "maq2sam.c",
+ "bamcheck.c", "chk_indel.c", "vcf-miniview.c",
+ "htslib-1.3", # do not import twice
+ "hfile_irods.c", # requires irods library
+ ),
+ "bcftools": (
+ "test", "plugins", "peakfit.c",
+ "peakfit.h",
+ # needs to renamed, name conflict with samtools reheader
+ "reheader.c",
+ "polysomy.c"),
+ "htslib": (
+ 'htslib/tabix.c', 'htslib/bgzip.c',
+ 'htslib/htsfile.c', 'htslib/hfile_irods.c'),
+}
+
+
+MAIN = {
+ "samtools": "bamtk",
+ "bcftools": "main"
+}
+
def locate(pattern, root=os.curdir):
@@ -35,20 +68,57 @@ def locate(pattern, root=os.curdir):
def _update_pysam_files(cf, destdir):
'''update pysam files applying redirection of ouput'''
+ basename = os.path.basename(destdir)
for filename in cf:
if not filename:
continue
dest = filename + ".pysam.c"
with open(filename) as infile:
+ lines = "".join(infile.readlines())
with open(dest, "w") as outfile:
outfile.write('#include "pysam.h"\n\n')
- outfile.write(
- re.sub("stderr", "pysamerr", "".join(infile.readlines())))
+ subname, _ = os.path.splitext(os.path.basename(filename))
+ if subname in MAIN.get(basename, []):
+ lines = re.sub("int main\(", "int {}_main(".format(
+ basename), lines)
+ else:
+ lines = re.sub("int main\(", "int {}_{}_main(".format(
+ basename, subname), lines)
+ lines = re.sub("stderr", "pysam_stderr", lines)
+ lines = re.sub("stdout", "pysam_stdout", lines)
+ lines = re.sub(" printf\(", " fprintf(pysam_stdout, ", lines)
+ lines = re.sub("([^kf])puts\(([^)]+)\)",
+ r"\1fputs(\2, pysam_stdout) & fputc('\\n', pysam_stdout)",
+ lines)
+ lines = re.sub("putchar\(([^)]+)\)",
+ r"fputc(\1, pysam_stdout)", lines)
+
+ fn = os.path.basename(filename)
+ # some specific fixes:
+ SPECIFIC_SUBSTITUTIONS = {
+ "bam_md.c": (
+ 'sam_open_format("-", mode_w',
+ 'sam_open_format(pysam_stdout_fn, mode_w'),
+ "phase.c": (
+ 'putc("ACGT"[f->seq[j] == 1? (c&3, pysam_stdout) : (c>>16&3)]);',
+ 'putc("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)], pysam_stdout);'),
+ "cut_target.c": (
+ 'putc(33 + (cns[j]>>8>>2, pysam_stdout));',
+ 'putc(33 + (cns[j]>>8>>2), pysam_stdout);')
+ }
+ if fn in SPECIFIC_SUBSTITUTIONS:
+ lines = lines.replace(
+ SPECIFIC_SUBSTITUTIONS[fn][0],
+ SPECIFIC_SUBSTITUTIONS[fn][1])
+ outfile.write(lines)
+
with open(os.path.join(destdir, "pysam.h"), "w")as outfile:
outfile.write("""#ifndef PYSAM_H
#define PYSAM_H
#include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
#endif
""")
@@ -57,7 +127,7 @@ if len(sys.argv) >= 1:
if len(sys.argv) != 3:
raise ValueError("import requires dest src")
- dest, srcdir = sys.argv[2:4]
+ dest, srcdir = sys.argv[1:3]
if dest not in EXCLUDE:
raise ValueError("import expected one of %s" %
",".join(EXCLUDE.keys()))
diff --git a/pysam/__init__.py b/pysam/__init__.py
index cd32bf5..d1b5d41 100644
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -1,5 +1,6 @@
import os
import sys
+import sysconfig
from pysam.libchtslib import *
from pysam.cutils import *
@@ -23,6 +24,7 @@ import pysam.Pileup as Pileup
from pysam.samtools import *
import pysam.config
+
# export all the symbols from separate modules
__all__ = \
libchtslib.__all__ +\
diff --git a/pysam/calignedsegment.pyx b/pysam/calignedsegment.pyx
index 0a2b94f..f4e0750 100644
--- a/pysam/calignedsegment.pyx
+++ b/pysam/calignedsegment.pyx
@@ -63,6 +63,7 @@ from cpython cimport array as c_array
from cpython.version cimport PY_MAJOR_VERSION
from cpython cimport PyErr_SetString, PyBytes_FromStringAndSize
from libc.string cimport strchr
+from cpython cimport array as c_array
from pysam.cutils cimport force_bytes, force_str, \
charptr_to_str, charptr_to_bytes
@@ -76,14 +77,15 @@ cdef char * parray_types = 'bBhHiIf'
# translation tables
# cigar code to character and vice versa
-cdef char* CODE2CIGAR= "MIDNSHP=X"
+cdef char* CODE2CIGAR= "MIDNSHP=XB"
+cdef int NCIGAR_CODES = 10
if PY_MAJOR_VERSION >= 3:
CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR))
else:
CIGAR2CODE = dict([ord(y), x] for x, y in enumerate(CODE2CIGAR))
-CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=X])")
+CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=XB])")
#####################################################################
# typecode guessing
@@ -93,16 +95,16 @@ cdef inline char map_typecode_htslib_to_python(uint8_t s):
# map type from htslib to python array
cdef char * f = strchr(htslib_types, s)
+
if f == NULL:
- raise ValueError("unknown htslib tag typecode '%s'" % chr(s))
+ return 0
return parray_types[f - htslib_types]
cdef inline uint8_t map_typecode_python_to_htslib(char s):
"""determine value type from type code of array"""
cdef char * f = strchr(parray_types, s)
if f == NULL:
- raise ValueError(
- "unknown conversion for array typecode '%s'" % s)
+ return 0
return htslib_types[f - parray_types]
# optional tag data manipulation
@@ -229,6 +231,8 @@ cdef inline packTags(tags):
"""
fmts, args = ["<"], []
+ cdef char array_typecode
+
datatype2format = {
b'c': ('b', 1),
b'C': ('B', 1),
@@ -273,9 +277,14 @@ cdef inline packTags(tags):
elif isinstance(value, array.array):
# binary tags from arrays
if valuetype is None:
- valuetype = force_bytes(chr(
- map_typecode_python_to_htslib(ord(value.typecode))))
+ array_typecode = map_typecode_python_to_htslib(ord(value.typecode))
+
+ if array_typecode == 0:
+ raise ValueError("unsupported type code '{}'"
+ .format(value.typecode))
+ valuetype = force_bytes(chr(array_typecode))
+
if valuetype not in datatype2format:
raise ValueError("invalid value type '%s' (%s)" %
(valuetype, type(valuetype)))
@@ -501,6 +510,13 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
with the cigar string to reconstitute the query or the reference
sequence.
+ Positions corresponding to `N` (skipped region from the reference)
+ in the CIGAR string will not appear in the returned sequence. The
+ MD should correspondingly not contain these. Thus proper tags are::
+
+ Deletion from the reference: cigar=5M1D5M MD=5^C5
+ Skipped region from reference: cigar=5M1N5M MD=10
+
Returns
-------
@@ -542,10 +558,12 @@ cdef inline bytes build_alignment_sequence(bam1_t * src):
s[s_idx] = read_sequence[r_idx]
r_idx += 1
s_idx += 1
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ elif op == BAM_CDEL:
for i from 0 <= i < l:
s[s_idx] = '-'
s_idx += 1
+ elif op == BAM_CREF_SKIP:
+ pass
elif op == BAM_CINS:
for i from 0 <= i < l:
# encode insertions into reference as lowercase
@@ -1409,10 +1427,12 @@ cdef class AlignedSegment:
for i from 0 <= i < l:
result.append(ref_seq[r_idx])
r_idx += 1
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ elif op == BAM_CDEL:
for i from 0 <= i < l:
result.append(ref_seq[r_idx])
r_idx += 1
+ elif op == BAM_CREF_SKIP:
+ pass
elif op == BAM_CINS:
r_idx += l
elif op == BAM_CSOFT_CLIP:
@@ -1426,7 +1446,6 @@ cdef class AlignedSegment:
return "".join(result)
-
def get_aligned_pairs(self, matches_only=False, with_seq=False):
"""a list of aligned read (query) and reference positions.
@@ -1505,7 +1524,7 @@ cdef class AlignedSegment:
else:
qpos += l
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ elif op == BAM_CDEL:
if not _matches_only:
if _with_seq:
for i from pos <= i < pos + l:
@@ -1519,6 +1538,17 @@ cdef class AlignedSegment:
elif op == BAM_CHARD_CLIP:
pass # advances neither
+ elif op == BAM_CREF_SKIP:
+ if not _matches_only:
+ if _with_seq:
+ for i from pos <= i < pos + l:
+ result.append((None, i, None))
+ else:
+ for i from pos <= i < pos + l:
+ result.append((None, i))
+
+ pos += l
+
elif op == BAM_CPAD:
raise NotImplementedError(
"Padding (BAM_CPAD, 6) is currently not supported. "
@@ -1597,6 +1627,81 @@ cdef class AlignedSegment:
return overlap
+ def get_cigar_stats(self):
+ """summary of operations in cigar string.
+
+ The output order in the array is "MIDNSHP=X" followed by a
+ field for the NM tag. If the NM tag is not present, this
+ field will always be 0.
+
+ +-----+--------------+-----+
+ |M |BAM_CMATCH |0 |
+ +-----+--------------+-----+
+ |I |BAM_CINS |1 |
+ +-----+--------------+-----+
+ |D |BAM_CDEL |2 |
+ +-----+--------------+-----+
+ |N |BAM_CREF_SKIP |3 |
+ +-----+--------------+-----+
+ |S |BAM_CSOFT_CLIP|4 |
+ +-----+--------------+-----+
+ |H |BAM_CHARD_CLIP|5 |
+ +-----+--------------+-----+
+ |P |BAM_CPAD |6 |
+ +-----+--------------+-----+
+ |= |BAM_CEQUAL |7 |
+ +-----+--------------+-----+
+ |X |BAM_CDIFF |8 |
+ +-----+--------------+-----+
+ |NM |NM tag |9 |
+ +-----+--------------+-----+
+
+ If no cigar string is present, empty arrays will be returned.
+
+ Parameters
+ ----------
+
+ Returns
+ -------
+
+ arrays : two arrays. The first contains the nucleotide counts within
+ each cigar operation, the second contains the number of blocks for
+ each cigar operation.
+
+ """
+
+ cdef int nfields = NCIGAR_CODES + 1
+
+ cdef c_array.array base_counts = array.array(
+ "I",
+ [0] * nfields)
+ cdef uint32_t [:] base_view = base_counts
+ cdef c_array.array block_counts = array.array(
+ "I",
+ [0] * nfields)
+ cdef uint32_t [:] block_view = block_counts
+
+ cdef bam1_t * src = self._delegate
+ cdef int op
+ cdef uint32_t l
+ cdef int32_t k
+ cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)
+
+ if cigar_p == NULL:
+ return None
+
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+ base_view[op] += l
+ block_view[op] += 1
+
+ cdef uint8_t * v = bam_aux_get(src, 'NM')
+ if v != NULL:
+ base_view[nfields - 1] = <int32_t>bam_aux2i(v)
+
+ return base_counts, block_counts
+
#####################################################
## Unsorted as yet
# TODO: capture in CIGAR object
diff --git a/pysam/calignmentfile.pxd b/pysam/calignmentfile.pxd
index a7e956d..3384e7e 100644
--- a/pysam/calignmentfile.pxd
+++ b/pysam/calignmentfile.pxd
@@ -39,6 +39,7 @@ ctypedef struct __iterdata:
cdef class AlignmentFile:
cdef object _filename
+ cdef object _reference_filename
# pointer to htsFile structure
cdef htsFile * htsfile
diff --git a/pysam/calignmentfile.pyx b/pysam/calignmentfile.pyx
index f258a66..6473220 100644
--- a/pysam/calignmentfile.pyx
+++ b/pysam/calignmentfile.pyx
@@ -112,7 +112,8 @@ VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"),
"UR", "SP"),
"RG" : ("ID", "SM", "LB", "DS",
"PU", "PI", "CN", "DT",
- "PL", "FO", "KS", "PG"),
+ "PL", "FO", "KS", "PG",
+ "PM"),
"PG" : ("PN", "ID", "VN", "CL",
"PP"),}
@@ -218,7 +219,7 @@ cdef class AlignmentFile:
"""AlignmentFile(filepath_or_object, mode=None, template=None,
reference_names=None, reference_lengths=None, text=NULL,
header=None, add_sq_text=False, check_header=True, check_sq=True,
- filename=None)
+ reference_filename=None, filename=None)
A :term:`SAM`/:term:`BAM` formatted file.
@@ -248,8 +249,8 @@ cdef class AlignmentFile:
4. The names (`reference_names`) and lengths
(`reference_lengths`) are supplied directly as lists.
- For writing a CRAM file, the filename of the reference can be
- added through a fasta formatted file (`reference_filename`)
+ When reading or writing a CRAM file, the filename of a FASTA-formatted
+ reference can be specified with `reference_filename`.
By default, if a file is opened in mode 'r', it is checked
for a valid header (`check_header` = True) and a definition of
@@ -311,6 +312,12 @@ cdef class AlignmentFile:
when reading, check if SQ entries are present in header
(default=True)
+ reference_filename : string
+ Path to a FASTA-formatted reference file. Valid only for CRAM files.
+ When reading a CRAM file, this overrides both ``$REF_PATH`` and the URL
+ specified in the header (``UR`` tag), which are normally used to find
+ the reference.
+
filename : string
Alternative to filepath_or_object. Filename of the file
to be opened.
@@ -390,6 +397,7 @@ cdef class AlignmentFile:
will be closed and a new file will be opened.
'''
cdef char *cfilename
+ cdef char *creference_filename
cdef char *cindexname
cdef char *cmode
@@ -433,6 +441,8 @@ cdef class AlignmentFile:
cdef bytes bmode = mode.encode('ascii')
self._filename = filename = encode_filename(filename)
+ self._reference_filename = reference_filename = encode_filename(
+ reference_filename)
# FIXME: Use htsFormat when it is available
self.is_stream = filename == b"-"
@@ -515,10 +525,8 @@ cdef class AlignmentFile:
# is given, the CRAM reference arrays will be built from
# the @SQ header in the header
if self.is_cram and reference_filename:
- # note that fn_aux takes ownership, so create
- # a copy
- fn = encode_filename(reference_filename)
- self.htsfile.fn_aux = strdup(fn)
+ # note that fn_aux takes ownership, so create a copy
+ self.htsfile.fn_aux = strdup(self._reference_filename)
# write header to htsfile
if self.is_bam or self.is_cram or "h" in mode:
@@ -570,6 +578,13 @@ cdef class AlignmentFile:
"- is it SAM format?" % mode )
# self.header.ignore_sam_err = True
+ # set filename with reference sequences
+ if self.is_cram and reference_filename:
+ creference_filename = self._reference_filename
+ hts_set_opt(self.htsfile,
+ CRAM_OPT_REFERENCE,
+ creference_filename)
+
if check_sq and self.header.n_targets == 0:
raise ValueError(
("file has no sequences defined (mode='%s') - "
@@ -854,7 +869,7 @@ cdef class AlignmentFile:
multiple_iterators : bool
- If `multiple_iterators` is True (default) multiple
+ If `multiple_iterators` is True, multiple
iterators on the same file can be used at the same time. The
iterator returned will receive its own copy of a filehandle to
the file effectively re-opening the file. Re-opening a file
@@ -1665,6 +1680,7 @@ cdef class IteratorRow:
def __init__(self, AlignmentFile samfile, int multiple_iterators=False):
cdef char *cfilename
+ cdef char *creference_filename
if not samfile.is_open():
raise ValueError("I/O operation on closed file")
@@ -1686,6 +1702,13 @@ cdef class IteratorRow:
self.header = sam_hdr_read(self.htsfile)
assert self.header != NULL
self.owns_samfile = True
+ # options specific to CRAM files
+ if samfile.is_cram and samfile._reference_filename:
+ creference_filename = samfile._reference_filename
+ hts_set_opt(self.htsfile,
+ CRAM_OPT_REFERENCE,
+ creference_filename)
+
else:
self.htsfile = self.samfile.htsfile
self.owns_samfile = False
diff --git a/pysam/cbcf.pyx b/pysam/cbcf.pyx
index 2a19850..41fd44f 100644
--- a/pysam/cbcf.pyx
+++ b/pysam/cbcf.pyx
@@ -1807,7 +1807,7 @@ cdef class VariantHeader(object):
'missing {:d} requested samples'.format(
len(missing_samples)))
- keep_samples = force_bytes(b','.join(keep_samples))
+ keep_samples = force_bytes(','.join(keep_samples))
cdef char *keep = <char *>keep_samples if keep_samples else NULL
cdef ret = bcf_hdr_set_samples(self.ptr, keep, 0)
diff --git a/pysam/cfaidx.pxd b/pysam/cfaidx.pxd
index d3aff09..7749274 100644
--- a/pysam/cfaidx.pxd
+++ b/pysam/cfaidx.pxd
@@ -6,7 +6,7 @@ from libc.stdio cimport FILE, printf
cimport cython
from cpython cimport array
-from pysam.chtslib cimport faidx_t, gzFile, kstring_t
+from pysam.chtslib cimport faidx_t, kstring_t, BGZF
# These functions are put here and not in chtslib.pxd in order
# to avoid warnings for unused functions.
@@ -21,13 +21,10 @@ cdef extern from "pysam_stream.h" nogil:
kstring_t seq
kstring_t qual
- gzFile gzopen(char *, char *)
- kseq_t *kseq_init(gzFile)
+ kseq_t *kseq_init(BGZF *)
int kseq_read(kseq_t *)
void kseq_destroy(kseq_t *)
- int gzclose(gzFile)
-
- kstream_t *ks_init(gzFile)
+ kstream_t *ks_init(BGZF *)
void ks_destroy(kstream_t *)
# Retrieve characters from stream until delimiter
@@ -62,9 +59,10 @@ cdef class PersistentFastqProxy:
cdef class FastxFile:
cdef object _filename
- cdef gzFile fastqfile
+ cdef BGZF * fastqfile
cdef kseq_t * entry
cdef bint persist
+ cdef bint is_remote
cdef kseq_t * getCurrent(self)
cdef int cnext(self)
diff --git a/pysam/cfaidx.pyx b/pysam/cfaidx.pyx
index 4db754e..78f9aac 100644
--- a/pysam/cfaidx.pyx
+++ b/pysam/cfaidx.pyx
@@ -60,7 +60,8 @@ from cpython.version cimport PY_MAJOR_VERSION
from pysam.chtslib cimport \
faidx_nseq, fai_load, fai_destroy, fai_fetch, \
faidx_seq_len, \
- faidx_fetch_seq, gzopen, gzclose, hisremote
+ faidx_fetch_seq, hisremote, \
+ bgzf_open, bgzf_close
from pysam.cutils cimport force_bytes, force_str, charptr_to_str
from pysam.cutils cimport encode_filename, from_string_and_size
@@ -136,6 +137,11 @@ cdef class FastaFile:
cdef char *cfilename = self._filename
self.is_remote = hisremote(cfilename)
+ if filepath_index is not None:
+ raise NotImplementedError(
+ "setting an explicit path for the index "
+ "is not implemented")
+
# open file for reading
if (self._filename != b"-"
and not self.is_remote
@@ -171,7 +177,9 @@ cdef class FastaFile:
self.fastafile = NULL
def __dealloc__(self):
- self.close()
+ if self.fastafile != NULL:
+ fai_destroy(self.fastafile)
+ self.fastafile = NULL
# context manager interface
def __enter__(self):
@@ -464,30 +472,40 @@ cdef class FastxFile:
on the file continues.
'''
- self.close()
+ if self.fastqfile != NULL:
+ self.close()
- if not os.path.exists(filename):
- raise IOError("no such file or directory: %s" % filename)
+ self._filename = encode_filename(filename)
+ cdef char *cfilename = self._filename
+ self.is_remote = hisremote(cfilename)
+
+ # open file for reading
+ if (self._filename != b"-"
+ and not self.is_remote
+ and not os.path.exists(filename)):
+ raise IOError("file `%s` not found" % filename)
self.persist = persist
- self._filename = encode_filename(filename)
- cdef char *cfilename = self._filename
with nogil:
- self.fastqfile = gzopen(cfilename, "r")
+ self.fastqfile = bgzf_open(cfilename, "r")
self.entry = kseq_init(self.fastqfile)
self._filename = filename
def close(self):
'''close the file.'''
+ if self.fastqfile != NULL:
+ bgzf_close(self.fastqfile)
+ self.fastqfile = NULL
if self.entry != NULL:
- gzclose(self.fastqfile)
- if self.entry:
- kseq_destroy(self.entry)
- self.entry = NULL
+ kseq_destroy(self.entry)
+ self.entry = NULL
def __dealloc__(self):
- self.close()
+ if self.fastqfile != NULL:
+ bgzf_close(self.fastqfile)
+ if self.entry:
+ kseq_destroy(self.entry)
# context manager interface
def __enter__(self):
diff --git a/pysam/chtslib.pxd b/pysam/chtslib.pxd
index 0cee075..33c1559 100644
--- a/pysam/chtslib.pxd
+++ b/pysam/chtslib.pxd
@@ -9,20 +9,6 @@ cdef extern from "Python.h":
FILE* PyFile_AsFile(object)
-cdef extern from "zlib.h" nogil:
- ctypedef void * gzFile
- ctypedef int64_t z_off_t
-
- int gzclose(gzFile fp)
- int gzread(gzFile fp, void *buf, unsigned int n)
- char *gzerror(gzFile fp, int *errnum)
-
- gzFile gzopen( char *path, char *mode)
- gzFile gzdopen (int fd, char *mode)
- char * gzgets(gzFile file, char *buf, int len)
- int gzeof(gzFile file)
-
-
cdef extern from "htslib/kstring.h" nogil:
ctypedef struct kstring_t:
size_t l, m
@@ -398,6 +384,29 @@ cdef extern from "htslib/hts.h" nogil:
no_compression, gzip, bgzf, custom
compression_maximum
+ enum hts_fmt_option:
+ CRAM_OPT_DECODE_MD,
+ CRAM_OPT_PREFIX,
+ CRAM_OPT_VERBOSITY,
+ CRAM_OPT_SEQS_PER_SLICE,
+ CRAM_OPT_SLICES_PER_CONTAINER,
+ CRAM_OPT_RANGE,
+ CRAM_OPT_VERSION,
+ CRAM_OPT_EMBED_REF,
+ CRAM_OPT_IGNORE_MD5,
+ CRAM_OPT_REFERENCE,
+ CRAM_OPT_MULTI_SEQ_PER_SLICE,
+ CRAM_OPT_NO_REF,
+ CRAM_OPT_USE_BZIP2,
+ CRAM_OPT_SHARED_REF,
+ CRAM_OPT_NTHREADS,
+ CRAM_OPT_THREAD_POOL,
+ CRAM_OPT_USE_LZMA,
+ CRAM_OPT_USE_RANS,
+ CRAM_OPT_REQUIRED_FIELDS,
+ HTS_OPT_COMPRESSION_LEVEL,
+ HTS_OPT_NTHREADS,
+
ctypedef struct htsVersion:
short major, minor
@@ -519,7 +528,7 @@ cdef extern from "htslib/hts.h" nogil:
# @param opt The CRAM_OPT_* option.
# @param ... Optional arguments, dependent on the option used.
# @return 0 for success, or negative if an error occurred.
- #int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...)
+ int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...)
int hts_getline(htsFile *fp, int delimiter, kstring_t *str)
char **hts_readlines(const char *fn, int *_n)
diff --git a/pysam/ctabix.pxd b/pysam/ctabix.pxd
index 39eed77..028090e 100644
--- a/pysam/ctabix.pxd
+++ b/pysam/ctabix.pxd
@@ -14,7 +14,7 @@ cdef extern from "unistd.h" nogil:
int close(int fd)
from pysam.chtslib cimport hts_idx_t, hts_itr_t, htsFile, \
- gzFile, tbx_t, kstring_t
+ tbx_t, kstring_t, BGZF
# These functions are put here and not in chtslib.pxd in order
# to avoid warnings for unused functions.
@@ -29,13 +29,10 @@ cdef extern from "pysam_stream.h" nogil:
kstring_t seq
kstring_t qual
- gzFile gzopen(char *, char *)
- kseq_t *kseq_init(gzFile)
+ kseq_t *kseq_init(BGZF *)
int kseq_read(kseq_t *)
void kseq_destroy(kseq_t *)
- int gzclose(gzFile)
-
- kstream_t *ks_init(gzFile)
+ kstream_t *ks_init(BGZF *)
void ks_destroy(kstream_t *)
# Retrieve characters from stream until delimiter
@@ -47,7 +44,7 @@ cdef extern from "pysam_stream.h" nogil:
cdef class tabix_file_iterator:
- cdef gzFile fh
+ cdef BGZF * fh
cdef kstream_t * kstream
cdef kstring_t buffer
cdef size_t size
@@ -104,7 +101,7 @@ cdef class TabixIteratorParsed(TabixIterator):
cdef class GZIterator:
cdef object _filename
- cdef gzFile gzipfile
+ cdef BGZF * gzipfile
cdef kstream_t * kstream
cdef kstring_t buffer
cdef int __cnext__(self)
diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx
index 0bb1284..a23fa87 100644
--- a/pysam/ctabix.pyx
+++ b/pysam/ctabix.pyx
@@ -69,10 +69,10 @@ from cpython.version cimport PY_MAJOR_VERSION
cimport pysam.ctabixproxies as ctabixproxies
from pysam.chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
- BGZF, bgzf_open, bgzf_close, bgzf_write, gzFile, \
+ BGZF, bgzf_open, bgzf_dopen, bgzf_close, bgzf_write, \
tbx_index_build, tbx_index_load, tbx_itr_queryi, tbx_itr_querys, \
tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \
- tbx_destroy, gzopen, gzclose, gzerror, gzdopen, hisremote
+ tbx_destroy, hisremote
from pysam.cutils cimport force_bytes, force_str, charptr_to_str
from pysam.cutils cimport encode_filename, from_string_and_size
@@ -532,7 +532,7 @@ cdef class TabixFile:
cdef int x
result = []
for x from 0 <= x < nsequences:
- result.append(sequences[x])
+ result.append(force_str(sequences[x]))
# htslib instructions:
# only free container, not the sequences themselves
@@ -681,7 +681,7 @@ cdef class GZIterator:
filename = encode_filename(filename)
cdef char *cfilename = filename
with nogil:
- self.gzipfile = gzopen(cfilename, "r")
+ self.gzipfile = bgzf_open(cfilename, "r")
self._filename = filename
self.kstream = ks_init(self.gzipfile)
self.encoding = encoding
@@ -693,11 +693,12 @@ cdef class GZIterator:
def __dealloc__(self):
'''close file.'''
if self.gzipfile != NULL:
- gzclose(self.gzipfile)
+ bgzf_close(self.gzipfile)
self.gzipfile = NULL
if self.buffer.s != NULL:
free(self.buffer.s)
- ks_destroy(self.kstream)
+ if self.kstream != NULL:
+ ks_destroy(self.kstream)
def __iter__(self):
return self
@@ -1003,10 +1004,10 @@ def tabix_index( filename,
#########################################################
## Iterators for parsing through unindexed files.
#########################################################
-cdef buildGzipError(void *gzfp):
- cdef int errnum = 0
- cdef char *s = gzerror(gzfp, &errnum)
- return "error (%d): %s (%d: %s)" % (errno, strerror(errno), errnum, s)
+# cdef buildGzipError(void *gzfp):
+# cdef int errnum = 0
+# cdef char *s = gzerror(gzfp, &errnum)
+# return "error (%d): %s (%d: %s)" % (errno, strerror(errno), errnum, s)
cdef class tabix_file_iterator:
@@ -1034,7 +1035,7 @@ cdef class tabix_file_iterator:
# in this case gzread will directly read from the file without decompression.
# When reading, this will be detected automatically by looking
# for the magic two-byte gzip header.
- self.fh = gzdopen(self.duplicated_fd, 'r')
+ self.fh = bgzf_dopen(self.duplicated_fd, 'r')
if self.fh == NULL:
raise IOError('%s' % strerror(errno))
@@ -1076,14 +1077,14 @@ cdef class tabix_file_iterator:
# gzgets terminates at \n, no need to test
# parser creates a copy
- return self.parser.parse( b, self.buffer.l)
+ return self.parser.parse(b, self.buffer.l)
raise StopIteration
def __dealloc__(self):
free(self.buffer.s)
ks_destroy(self.kstream)
- gzclose(self.fh)
+ bgzf_close(self.fh)
def __next__(self):
return self.__cnext__()
diff --git a/pysam/ctabixproxies.pyx b/pysam/ctabixproxies.pyx
index d72f082..f5288cc 100644
--- a/pysam/ctabixproxies.pyx
+++ b/pysam/ctabixproxies.pyx
@@ -8,6 +8,8 @@ from libc.stdlib cimport atoi, atol, atof
from pysam.cutils cimport force_bytes, force_str, charptr_to_str
from pysam.cutils cimport encode_filename, from_string_and_size
+import collections
+
cdef char *StrOrEmpty(char * buffer):
if buffer == NULL:
return ""
@@ -88,7 +90,8 @@ cdef class TupleProxy:
elif op == 3: # != operator
return self.compare(other) != 0
else:
- return NotImplemented
+ err_msg = "op {0} isn't implemented yet".format(op)
+ raise NotImplementedError(err_msg)
cdef take(self, char * buffer, size_t nbytes):
'''start presenting buffer.
@@ -390,6 +393,8 @@ cdef class GTFProxy(TupleProxy):
def __get__(self):
return self._getindex(1)
def __set__(self, value):
+ if value is None:
+ value = "."
self._setindex(1, value)
property feature:
@@ -397,6 +402,8 @@ cdef class GTFProxy(TupleProxy):
def __get__(self):
return self._getindex(2)
def __set__(self, value):
+ if value is None:
+ value = "."
self._setindex(2, value)
property start:
@@ -423,29 +430,40 @@ cdef class GTFProxy(TupleProxy):
return float(v)
def __set__(self, value):
- self._setindex(5, value)
+ if value is None:
+ value = "."
+ self._setindex(5, str(value))
property strand:
'''feature strand.'''
- def __get__(self ):
- return self._getindex(6)
+ def __get__(self):
+ return self._getindex(6)
def __set__(self, value ):
+ if value is None:
+ value = "."
self._setindex(6, value)
property frame:
'''feature frame.'''
def __get__(self):
- return self._getindex(7)
+ v = self._getindex(7)
+ if v == "" or v[0] == '.':
+ return v
+ else:
+ return int(v)
+
def __set__(self, value):
- self._setindex(7, value)
+ if value is None:
+ value = "."
+ self._setindex(7, str(value))
property attributes:
'''feature attributes (as a string).'''
def __get__(self):
if self.hasOwnAttributes:
- return self._attributes
+ return force_str(self._attributes)
else:
- return self._getindex(8)
+ return force_str(self._getindex(8))
def __set__( self, value):
if self.hasOwnAttributes:
free(self._attributes)
@@ -481,7 +499,7 @@ cdef class GTFProxy(TupleProxy):
# Remove white space to prevent a last empty field.
fields = [x.strip() for x in attributes.strip().split("; ")]
- result = {}
+ result = collections.OrderedDict()
for f in fields:
@@ -529,7 +547,7 @@ cdef class GTFProxy(TupleProxy):
else:
aa.append( '%s %s' % (k,str(v)) )
- a = "; ".join( aa ) + ";"
+ a = force_bytes("; ".join(aa) + ";")
p = a
l = len(a)
self._attributes = <char *>calloc(l + 1, sizeof(char))
@@ -552,9 +570,9 @@ cdef class GTFProxy(TupleProxy):
str(self.start+1),
str(self.end),
toDot(self.score),
- self.strand,
- self.frame,
- self.attributes ) )
+ toDot(self.strand),
+ toDot(self.frame),
+ self.attributes))
else:
return TupleProxy.__str__(self)
@@ -638,6 +656,26 @@ cdef class GTFProxy(TupleProxy):
r[name] = value
self.fromDict(r)
+ def __cmp__(self, other):
+ return (self.contig, self.strand, self.start) < \
+ (other.contig, other.strand, other.start)
+
+ # python 3 compatibility
+ def __richcmp__(GTFProxy self, GTFProxy other, int op):
+ if op == 0:
+ return (self.contig, self.strand, self.start) < \
+ (other.contig, other.strand, other.start)
+ elif op == 1:
+ return (self.contig, self.strand, self.start) <= \
+ (other.contig, other.strand, other.start)
+ elif op == 2:
+ return self.compare(other) == 0
+ elif op == 3:
+ return self.compare(other) != 0
+ else:
+ err_msg = "op {0} isn't implemented yet".format(op)
+ raise NotImplementedError(err_msg)
+
cdef class NamedTupleProxy(TupleProxy):
@@ -705,8 +743,8 @@ cdef class BedProxy(NamedTupleProxy):
# do automatic conversion
self.contig = self.fields[0]
- self.start = atoi( self.fields[1] )
- self.end = atoi( self.fields[2] )
+ self.start = atoi(self.fields[1])
+ self.end = atoi(self.fields[2])
# __setattr__ in base class seems to take precedence
# hence implement setters in __setattr__
diff --git a/pysam/cutils.pxd b/pysam/cutils.pxd
index 36fe554..81e544a 100644
--- a/pysam/cutils.pxd
+++ b/pysam/cutils.pxd
@@ -32,4 +32,7 @@ cdef extern from "pysam_util.h":
int bcftools_main(int argc, char *argv[])
void pysam_set_stderr(int fd)
void pysam_unset_stderr()
+ void pysam_set_stdout(int fd)
+ void pysam_set_stdout_fn(const char *)
+ void pysam_unset_stdout()
void set_optind(int)
diff --git a/pysam/cutils.pyx b/pysam/cutils.pyx
index 482db89..7510727 100644
--- a/pysam/cutils.pyx
+++ b/pysam/cutils.pyx
@@ -14,6 +14,7 @@ from libc.stdlib cimport calloc, free
from libc.string cimport strncpy
from libc.stdio cimport fprintf, stderr, fflush
from libc.stdio cimport stdout as c_stdout
+from posix.fcntl cimport open as c_open, O_WRONLY
#####################################################################
# hard-coded constants
@@ -227,129 +228,75 @@ cpdef parse_region(reference=None,
return force_bytes(reference), rstart, rend
- at contextmanager
-def stdout_redirector(to=os.devnull):
- '''
- import os
-
- with stdout_redirected(to=filename):
- print("from Python")
- os.system("echo non-Python applications are also supported")
-
- see http://stackoverflow.com/questions/5081657/how-do-i-prevent-a-c-shared-library-to-print-on-stdout-in-python/17954769#17954769
- '''
- fd = sys.stdout.fileno()
-
- def _redirect_stdout(to):
- # flush C-level stdout
- try:
- fflush(c_stdout)
- sys.stdout.close()
- except (OSError, IOError):
- # some tools close stdout
- # Py3: OSError
- # Py2: IOError
- pass
-
- # fd writes to 'to' file
- os.dup2(to.fileno(), fd)
- # Python writes to fd
- if IS_PYTHON3:
- sys.stdout = io.TextIOWrapper(
- os.fdopen(fd, 'wb'))
- else:
- sys.stdout = os.fdopen(fd, 'w')
-
- with os.fdopen(os.dup(fd), 'w') as old_stdout:
- _redirect_stdout(to)
- try:
- yield # allow code to be run with the redirected stdout
- finally:
- _redirect_stdout(old_stdout)
- # restore stdout.
- # buffering and flags may be different
-
-# def stdout_redirector(stream):
-# """
-# See discussion in:
-
-# http://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
-# """
-
-# # The original fd stdout points to. Usually 1 on POSIX systems.
-# original_stdout_fd = sys.stdout.fileno()
-# print ("original_fd=", original_stdout_fd)
-# def _redirect_stdout(to_fd):
-# """Redirect stdout to the given file descriptor."""
-# # Flush the C-level buffer stdout
-# fflush(c_stdout)
-# # Flush and close sys.stdout - also closes the file descriptor
-# # (fd)
-# sys.stdout.close()
-# # Make original_stdout_fd point to the same file as to_fd
-# os.dup2(to_fd, original_stdout_fd)
-# # Create a new sys.stdout that points to the redirected fd
-# if IS_PYTHON3:
-# sys.stdout = io.TextIOWrapper(
-# os.fdopen(original_stdout_fd, 'wb'))
-
-# # Save a copy of the original stdout fd in saved_stdout_fd
-# saved_stdout_fd = os.dup(original_stdout_fd)
-# try:
-# # Create a temporary file and redirect stdout to it
-# tfile = tempfile.TemporaryFile(mode='w+b')
-# _redirect_stdout(tfile.fileno())
-# # Yield to caller, then redirect stdout back to the saved fd
-# yield
-# _redirect_stdout(saved_stdout_fd)
-# # Copy contents of temporary file to the given stream
-# tfile.flush()
-# tfile.seek(0, io.SEEK_SET)
-# stream.write(tfile.read())
-# finally:
-# tfile.close()
-# os.close(saved_stdout_fd)
-
-
def _pysam_dispatch(collection,
method,
- args=(),
- catch_stdout=True):
+ args=None,
+ catch_stdout=True,
+ save_stdout=None):
'''call ``method`` in samtools/bcftools providing arguments in args.
- .. note::
- This method redirects stdout to capture it
- from samtools. If for some reason stdout disappears
- the reason might be in this method.
-
- .. note::
- This method captures stdout and stderr using temporary files,
- which are then read into memory in their entirety. This method
- is slow and might cause large memory overhead.
-
- Catching of stdout can be turned of by setting *catch_stdout* to
+ Catching of stdout can be turned off by setting *catch_stdout* to
False.
- See http://bytes.com/topic/c/answers/487231-how-capture-stdout-temporarily
- on the topic of redirecting stderr/stdout.
-
'''
- # note that debugging this module can be a problem
- # as stdout/stderr will not appear on the terminal
- # some special cases
if method == "index":
if not os.path.exists(args[0]):
raise IOError("No such file or directory: '%s'" % args[0])
+
+ if args is None:
+ args = []
+ else:
+ args = list(args)
- # redirect stderr and stdout to file
+ # redirect stderr to file
stderr_h, stderr_f = tempfile.mkstemp()
pysam_set_stderr(stderr_h)
+ # redirect stdout to file
+ if save_stdout:
+ stdout_f = save_stdout
+ stdout_h = c_open(force_bytes(stdout_f),
+ O_WRONLY)
+ if stdout_h == -1:
+ raise OSError("error while opening {} for writing".format(stdout_f))
+
+ pysam_set_stdout_fn(force_bytes(stdout_f))
+ pysam_set_stdout(stdout_h)
+ elif catch_stdout:
+ stdout_h, stdout_f = tempfile.mkstemp()
+
+ MAP_STDOUT_OPTIONS = {
+ "samtools": {
+ "view": "-o {}",
+ "mpileup": "-o {}",
+ "depad": "-o {}",
+ "calmd": "", # uses pysam_stdout_fn
+ },
+ "bcftools": {}
+ }
+
+ stdout_option = None
+ if collection == "bcftools":
+ # in bcftools, most methods accept -o, the exceptions
+ # are below:
+ if method not in ("index", "roh", "stats"):
+ stdout_option = "-o {}"
+ elif method in MAP_STDOUT_OPTIONS[collection]:
+ stdout_option = MAP_STDOUT_OPTIONS[collection][method]
+
+ if stdout_option is not None:
+ os.close(stdout_h)
+ pysam_set_stdout_fn(force_bytes(stdout_f))
+ args.extend(stdout_option.format(stdout_f).split(" "))
+ else:
+ pysam_set_stdout(stdout_h)
+ else:
+ pysam_set_stdout_fn("-")
+
# setup the function call to samtools/bcftools main
cdef char ** cargs
cdef int i, n, retval, l
-
n = len(args)
method = force_bytes(method)
collection = force_bytes(collection)
@@ -381,41 +328,40 @@ def _pysam_dispatch(collection,
set_optind(0)
# call samtools/bcftools
- if catch_stdout:
- with tempfile.TemporaryFile(mode='w+b') as tfile:
- with stdout_redirector(tfile):
- if collection == b"samtools":
- retval = samtools_main(n + 2, cargs)
- elif collection == b"bcftools":
- retval = bcftools_main(n + 2, cargs)
- tfile.flush()
- tfile.seek(0)
- # do not force str, as output might be binary,
- # for example BAM, VCF.gz, etc.
- out_stdout = tfile.read()
- else:
- if collection == b"samtools":
- retval = samtools_main(n + 2, cargs)
- elif collection == b"bcftools":
- retval = bcftools_main(n + 2, cargs)
- out_stdout = None
+ if collection == b"samtools":
+ retval = samtools_main(n + 2, cargs)
+ elif collection == b"bcftools":
+ retval = bcftools_main(n + 2, cargs)
for i from 0 <= i < n:
free(cargs[i + 2])
free(cargs)
# get error messages
+ def _collect(fn):
+ out = []
+ try:
+ with open(fn, "r") as inf:
+ out = inf.read()
+ except UnicodeDecodeError:
+ with open(fn, "rb") as inf:
+ # read binary output
+ out = inf.read()
+ finally:
+ os.remove(fn)
+ return out
+
pysam_unset_stderr()
- out_stderr = []
- try:
- with open(stderr_f, "r") as inf:
- out_stderr = inf.readlines()
- except UnicodeDecodeError:
- with open( stderr_f, "rb") as inf:
- # read binary output
- out_stderr = inf.read()
- finally:
- os.remove(stderr_f)
+ out_stderr = _collect(stderr_f)
+
+ if save_stdout:
+ pysam_unset_stdout()
+ out_stdout = None
+ elif catch_stdout:
+ pysam_unset_stdout()
+ out_stdout = _collect(stdout_f)
+ else:
+ out_stdout = None
return retval, out_stderr, out_stdout
diff --git a/pysam/cvcf.pyx b/pysam/cvcf.pyx
index 83d3663..5e2fda2 100644
--- a/pysam/cvcf.pyx
+++ b/pysam/cvcf.pyx
@@ -114,6 +114,7 @@ cdef class VCFRecord( ctabixproxies.TupleProxy):
def __init__(self, vcf):
self.vcf = vcf
self.encoding = vcf.encoding
+
# if len(data) != len(self.vcf._samples):
# self.vcf.error(str(data),
# self.BAD_NUMBER_OF_COLUMNS,
@@ -133,7 +134,7 @@ cdef class VCFRecord( ctabixproxies.TupleProxy):
def error(self, line, error, opt=None):
'''raise error.'''
# pass to vcf file for error handling
- return self.vcf.error( line, error, opt )
+ return self.vcf.error(line, error, opt)
cdef update(self, char * buffer, size_t nbytes):
'''update internal data.
@@ -349,6 +350,7 @@ class VCF(object):
if leftalign: self._leftalign = leftalign
self._lines = lines
self.encoding = "ascii"
+ self.tabixfile = None
def error(self,line,error,opt=None):
if error in self._ignored_errors: return
@@ -1047,6 +1049,15 @@ class VCF(object):
self.tabixfile = pysam.Tabixfile(filename, encoding=encoding)
self._parse_header(self.tabixfile.header)
+ def __del__(self):
+ self.close()
+ self.tabixfile = None
+
+ def close(self):
+ if self.tabixfile:
+ self.tabixfile.close()
+ self.tabixfile = None
+
def fetch(self,
reference=None,
start=None,
diff --git a/pysam/pysam_stream.h b/pysam/pysam_stream.h
index 3e93e29..3a4eb16 100644
--- a/pysam/pysam_stream.h
+++ b/pysam/pysam_stream.h
@@ -5,7 +5,8 @@
// #######################################################
// fastq parsing
-KSEQ_INIT(gzFile, gzread)
+// KSEQ_INIT(gzFile, gzread)
+KSEQ_INIT(BGZF *, bgzf_read)
//KSTREAM_INIT( gzFile, gzread, 16384)
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c
index e669e1d..94717c8 100644
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -1,6 +1,7 @@
#include <ctype.h>
#include <assert.h>
#include <unistd.h>
+#include <stdio.h>
#include "bam.h"
#include "bam_endian.h"
#include "htslib/khash.h"
@@ -8,23 +9,52 @@
#include "htslib/knetfile.h"
#include "pysam_util.h"
-// Definition of pysamerr
-#include "stdio.h"
-FILE * pysamerr = NULL;
+
+FILE * pysam_stderr = NULL;
+FILE * pysam_stdout = NULL;
+const char * pysam_stdout_fn = NULL;
+int PYSAM_STDOUT_FILENO = STDOUT_FILENO;
+
FILE * pysam_set_stderr(int fd)
{
- if (pysamerr != NULL)
- fclose(pysamerr);
- pysamerr = fdopen(fd, "w");
- return pysamerr;
+ if (pysam_stderr != NULL)
+ fclose(pysam_stderr);
+ pysam_stderr = fdopen(fd, "w");
+ return pysam_stderr;
}
void pysam_unset_stderr(void)
{
- if (pysamerr != NULL)
- fclose(pysamerr);
- pysamerr = fopen("/dev/null", "w");
+ if (pysam_stderr != NULL)
+ fclose(pysam_stderr);
+ pysam_stderr = fopen("/dev/null", "w");
+}
+
+FILE * pysam_set_stdout(int fd)
+{
+ if (pysam_stdout != NULL)
+ fclose(pysam_stdout);
+ pysam_stdout = fdopen(fd, "w");
+ if (pysam_stdout == NULL)
+ {
+ fprintf(pysam_stderr, "could not set stdout to fd %i", fd);
+ }
+ PYSAM_STDOUT_FILENO = fd;
+ return pysam_stdout;
+}
+
+void pysam_set_stdout_fn(const char *fn)
+{
+ pysam_stdout_fn = fn;
+}
+
+void pysam_unset_stdout(void)
+{
+ if (pysam_stdout != NULL)
+ fclose(pysam_stdout);
+ pysam_stdout = fopen("/dev/null", "w");
+ PYSAM_STDOUT_FILENO = STDOUT_FILENO;
}
void set_optind(int val)
diff --git a/pysam/pysam_util.h b/pysam/pysam_util.h
index 5f2359f..a30808f 100644
--- a/pysam/pysam_util.h
+++ b/pysam/pysam_util.h
@@ -1,20 +1,35 @@
#ifndef PYSAM_UTIL_H
#define PYSAM_UTIL_H
-//////////////////////////////////////////////////////////////////
/*! set pysam standard error to point to file descriptor
Setting the stderr will close the previous stderr.
*/
FILE * pysam_set_stderr(int fd);
-//////////////////////////////////////////////////////////////////
+/*! set pysam standard output to point to file descriptor
+
+ Setting the stderr will close the previous stdout.
+ */
+FILE * pysam_set_stdout(int fd);
+
+/*! set pysam standard output to point to filename
+
+ */
+void pysam_set_stdout_fn(const char * fn);
+
/*! set pysam standard error to /dev/null.
Unsetting the stderr will close the previous stderr.
*/
void pysam_unset_stderr(void);
+/*! set pysam standard error to /dev/null.
+
+ Unsetting the stderr will close the previous stderr.
+ */
+void pysam_unset_stdout(void);
+
int pysam_dispatch(int argc, char *argv[]);
void set_optind(int);
diff --git a/pysam/tabix_util.c b/pysam/tabix_util.c
index f94b09d..bff140e 100644
--- a/pysam/tabix_util.c
+++ b/pysam/tabix_util.c
@@ -1,8 +1,6 @@
-// Definition of pysamerr
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
-FILE * pysamerr = NULL;
#if !(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700)
/*
diff --git a/pysam/utils.py b/pysam/utils.py
index 0e49d54..c5bb539 100644
--- a/pysam/utils.py
+++ b/pysam/utils.py
@@ -46,14 +46,24 @@ class PysamDispatcher(object):
'''execute a samtools command.
Keyword arguments:
- catch_stdout -- redirect stdout from the samtools command and return as variable (default True)
+ catch_stdout -- redirect stdout from the samtools command and
+ return as variable (default True)
+ save_stdout -- redirect stdout to a filename.
raw -- ignore any parsers associated with this samtools command.
+ split_lines -- return stdout (if catch_stdout is True and stderr
+ as a list of strings.
'''
retval, stderr, stdout = _pysam_dispatch(
self.collection,
self.dispatch,
args,
- catch_stdout=kwargs.get("catch_stdout", True))
+ catch_stdout=kwargs.get("catch_stdout", True),
+ save_stdout=kwargs.get("save_stdout", None))
+
+ if kwargs.get("split_lines", False):
+ stdout = stdout.splitlines()
+ if stderr:
+ stderr = stderr.splitlines()
if retval:
raise SamtoolsError(
@@ -61,8 +71,8 @@ class PysamDispatcher(object):
"stdout=%s, stderr=%s" %
(self.collection,
retval,
- "\n".join(stdout),
- "\n".join(stderr)))
+ stdout,
+ stderr))
self.stderr = stderr
@@ -84,5 +94,5 @@ class PysamDispatcher(object):
'''return the samtools usage information for this command'''
retval, stderr, stdout = csamtools._samtools_dispatch(
self.dispatch)
- return "".join(stderr)
+ return stderr
diff --git a/pysam/version.py b/pysam/version.py
index 815e4b9..15cefc4 100644
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,7 +1,7 @@
# pysam versioning information
-__version__ = "0.9.0"
+__version__ = "0.9.1"
-__samtools_version__ = "1.3"
+__samtools_version__ = "1.3.1"
-__htslib_version__ = "1.3"
+__htslib_version__ = "1.3.1"
diff --git a/run_tests_travis.sh b/run_tests_travis.sh
index d2d9988..f1fcdce 100755
--- a/run_tests_travis.sh
+++ b/run_tests_travis.sh
@@ -34,21 +34,21 @@ mkdir -p $WORKDIR/external-tools
# install htslib
cd $WORKDIR/external-tools
-curl -L https://github.com/samtools/htslib/releases/download/1.3/htslib-1.3.tar.bz2 > htslib-1.3.tar.bz2
-tar xjvf htslib-1.3.tar.bz2
-cd htslib-1.3
+curl -L https://github.com/samtools/htslib/releases/download/1.3.1/htslib-1.3.1.tar.bz2 > htslib-1.3.1.tar.bz2
+tar xjvf htslib-1.3.1.tar.bz2
+cd htslib-1.3.1
make
-PATH=$PATH:$WORKDIR/external-tools/htslib-1.3
-LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$WORKDIR/external-tools/htslib-1.3
+PATH=$PATH:$WORKDIR/external-tools/htslib-1.3.1
+LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$WORKDIR/external-tools/htslib-1.3.1
# install samtools, compile against htslib
cd $WORKDIR/external-tools
-curl -L http://downloads.sourceforge.net/project/samtools/samtools/1.3/samtools-1.3.tar.bz2 > samtools-1.3.tar.bz2
-tar xjvf samtools-1.3.tar.bz2
-cd samtools-1.3
-./configure --with-htslib=../htslib-1.3
+curl -L http://downloads.sourceforge.net/project/samtools/samtools/1.3.1/samtools-1.3.1.tar.bz2 > samtools-1.3.1.tar.bz2
+tar xjvf samtools-1.3.1.tar.bz2
+cd samtools-1.3.1
+./configure --with-htslib=../htslib-1.3.1
make
-PATH=$PATH:$WORKDIR/external-tools/samtools-1.3
+PATH=$PATH:$WORKDIR/external-tools/samtools-1.3.1
echo "installed samtools"
samtools --version
@@ -59,12 +59,12 @@ fi
# install bcftools
cd $WORKDIR/external-tools
-curl -L https://github.com/samtools/bcftools/releases/download/1.3/bcftools-1.3.tar.bz2 > bcftools-1.3.tar.bz2
-tar xjf bcftools-1.3.tar.bz2
-cd bcftools-1.3
-./configure --with-htslib=../htslib-1.3
+curl -L https://github.com/samtools/bcftools/releases/download/1.3.1/bcftools-1.3.1.tar.bz2 > bcftools-1.3.1.tar.bz2
+tar xjf bcftools-1.3.1.tar.bz2
+cd bcftools-1.3.1
+./configure --with-htslib=../htslib-1.3.1
make
-PATH=$PATH:$WORKDIR/external-tools/bcftools-1.3
+PATH=$PATH:$WORKDIR/external-tools/bcftools-1.3.1
echo "installed bcftools"
bcftools --version
diff --git a/samtools/bam.c b/samtools/bam.c
index afab668..4965e24 100644
--- a/samtools/bam.c
+++ b/samtools/bam.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
@@ -34,15 +36,22 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b)
{
kstring_t str;
str.l = str.m = 0; str.s = NULL;
- sam_format1(header, b, &str);
+ if (sam_format1(header, b, &str) < 0) {
+ free(str.s);
+ str.s = NULL;
+ return NULL;
+ }
return str.s;
}
-void bam_view1(const bam_header_t *header, const bam1_t *b)
+int bam_view1(const bam_header_t *header, const bam1_t *b)
{
char *s = bam_format1(header, b);
- puts(s);
+ int ret = -1;
+ if (!s) return -1;
+ if (puts(s) != EOF) ret = 0;
free(s);
+ return ret;
}
int bam_validate1(const bam_header_t *header, const bam1_t *b)
@@ -103,6 +112,9 @@ const char *bam_get_library(bam_header_t *h, const bam1_t *b)
last = *cp++;
}
+ if (!ID || !LB)
+ continue;
+
// Check it's the correct ID
if (strncmp(rg, ID, strlen(rg)) != 0 || ID[strlen(rg)] != '\t')
continue;
diff --git a/samtools/bam.c.pysam.c b/samtools/bam.c.pysam.c
index a9da5b9..188fe8c 100644
--- a/samtools/bam.c.pysam.c
+++ b/samtools/bam.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
@@ -36,15 +38,22 @@ char *bam_format1(const bam_header_t *header, const bam1_t *b)
{
kstring_t str;
str.l = str.m = 0; str.s = NULL;
- sam_format1(header, b, &str);
+ if (sam_format1(header, b, &str) < 0) {
+ free(str.s);
+ str.s = NULL;
+ return NULL;
+ }
return str.s;
}
-void bam_view1(const bam_header_t *header, const bam1_t *b)
+int bam_view1(const bam_header_t *header, const bam1_t *b)
{
char *s = bam_format1(header, b);
- puts(s);
+ int ret = -1;
+ if (!s) return -1;
+ if (fputs(s, pysam_stdout) & fputc('\n', pysam_stdout) != EOF) ret = 0;
free(s);
+ return ret;
}
int bam_validate1(const bam_header_t *header, const bam1_t *b)
@@ -105,6 +114,9 @@ const char *bam_get_library(bam_header_t *h, const bam1_t *b)
last = *cp++;
}
+ if (!ID || !LB)
+ continue;
+
// Check it's the correct ID
if (strncmp(rg, ID, strlen(rg)) != 0 || ID[strlen(rg)] != '\t')
continue;
diff --git a/samtools/bam.h b/samtools/bam.h
index 57aa044..e928ce4 100644
--- a/samtools/bam.h
+++ b/samtools/bam.h
@@ -38,7 +38,7 @@ DEALINGS IN THE SOFTWARE. */
@copyright Genome Research Ltd.
*/
-#define BAM_VERSION "1.3"
+#define BAM_VERSION "1.3.1"
#include <stdint.h>
#include <stdlib.h>
@@ -322,8 +322,11 @@ extern "C" {
*/
char *bam_format1(const bam_header_t *header, const bam1_t *b);
- /*! @abstract Formats a BAM record and writes it and \n to stdout */
- void bam_view1(const bam_header_t *header, const bam1_t *b);
+ /*!
+ @abstract Formats a BAM record and writes it and \n to stdout
+ @return 0 if successful, -1 on error
+ */
+ int bam_view1(const bam_header_t *header, const bam1_t *b);
/*!
@abstract Check whether a BAM record is plausibly valid
diff --git a/samtools/bam2bcf.c b/samtools/bam2bcf.c
index ed433b1..85ce307 100644
--- a/samtools/bam2bcf.c
+++ b/samtools/bam2bcf.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdint.h>
#include <assert.h>
diff --git a/samtools/bam2bcf.c.pysam.c b/samtools/bam2bcf.c.pysam.c
index be3876d..6938ec0 100644
--- a/samtools/bam2bcf.c.pysam.c
+++ b/samtools/bam2bcf.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdint.h>
#include <assert.h>
@@ -108,7 +110,7 @@ static int get_position(const bam_pileup1_t *p, int *len)
if ( cig==BAM_CHARD_CLIP ) continue;
if ( cig==BAM_CPAD ) continue;
if ( cig==BAM_CREF_SKIP ) continue;
- fprintf(pysamerr,"todo: cigar %d\n", cig);
+ fprintf(pysam_stderr,"todo: cigar %d\n", cig);
assert(0);
}
*len = n_tot_bases;
@@ -479,7 +481,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
double sum = 0;
const double log2 = log(2.0);
- // fprintf(pysamerr,"M=%.1f p=%e q=%e f=%f dp=%d\n",M,p,q,f,avg_dp);
+ // fprintf(pysam_stderr,"M=%.1f p=%e q=%e f=%f dp=%d\n",M,p,q,f,avg_dp);
int i;
for (i=0; i<call->n; i++)
{
@@ -494,7 +496,7 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
else
tmp = log(2*f*(1-f)*exp(-q) + f*f*exp(-2*q) + (1-f)*(1-f)) + p;
sum += tmp;
- // fprintf(pysamerr,"oi=%d %e\n", oi,tmp);
+ // fprintf(pysam_stderr,"oi=%d %e\n", oi,tmp);
}
call->seg_bias = sum;
}
@@ -658,7 +660,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
}
}
-// if (ref_base < 0) fprintf(pysamerr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
+// if (ref_base < 0) fprintf(pysam_stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
call->shift = (int)(sum_min + .499);
}
// combine annotations
diff --git a/samtools/bam2bcf_indel.c b/samtools/bam2bcf_indel.c
index e1c45c4..5b353fc 100644
--- a/samtools/bam2bcf_indel.c
+++ b/samtools/bam2bcf_indel.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <assert.h>
#include <ctype.h>
#include <string.h>
diff --git a/samtools/bam2bcf_indel.c.pysam.c b/samtools/bam2bcf_indel.c.pysam.c
index 45e1101..21cbb03 100644
--- a/samtools/bam2bcf_indel.c.pysam.c
+++ b/samtools/bam2bcf_indel.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <assert.h>
#include <ctype.h>
#include <string.h>
@@ -225,7 +227,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
free(aux);
// TODO revisit how/whether to control printing this warning
if (hts_verbose >= 2)
- fprintf(pysamerr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
+ fprintf(pysam_stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
return -1;
}
types = (int*)calloc(n_types, sizeof(int));
@@ -298,7 +300,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1;
if (max_i >= 0) r[max_i] = 15;
if (max2_i >= 0) r[max2_i] = 15;
- //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], pysamerr); fputc('\n', pysamerr);
+ //for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], pysam_stderr); fputc('\n', pysam_stderr);
}
free(ref0); free(cns);
}
@@ -366,7 +368,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
else ir = est_indelreg(pos, ref, -types[t], 0);
if (ir > bca->indelreg) bca->indelreg = ir;
-// fprintf(pysamerr, "%d, %d, %d\n", pos, types[t], ir);
+// fprintf(pysam_stderr, "%d, %d, %d\n", pos, types[t], ir);
// realignment
for (s = K = 0; s < n; ++s) {
// write ref2
@@ -428,11 +430,11 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
}
/*
for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
- fputc("ACGTN"[(int)ref2[tbeg-left+l]], pysamerr);
- fputc('\n', pysamerr);
- for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], pysamerr);
- fputc('\n', pysamerr);
- fprintf(pysamerr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc);
+ fputc("ACGTN"[(int)ref2[tbeg-left+l]], pysam_stderr);
+ fputc('\n', pysam_stderr);
+ for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], pysam_stderr);
+ fputc('\n', pysam_stderr);
+ fprintf(pysam_stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc);
*/
}
}
@@ -488,7 +490,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
if (seqQ > 255) seqQ = 255;
p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-// fprintf(pysamerr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
+// fprintf(pysam_stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
}
}
// determine bca->indel_types[] and bca->inscns
@@ -520,7 +522,7 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
if (x == bca->indel_types[j]) break;
p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
if ((p->aux>>16&0x3f) > 0) ++n_alt;
- //fprintf(pysamerr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
+ //fprintf(pysam_stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d seqQ=%d indelQ=%d\n", pos, s, i, bam1_qname(p->b), (p->aux>>16)&0x3f, bca->indel_types[(p->aux>>16)&0x3f], (p->aux>>8)&0xff, p->aux&0xff);
}
}
}
diff --git a/samtools/bam2depth.c b/samtools/bam2depth.c
index f109447..21220f1 100644
--- a/samtools/bam2depth.c
+++ b/samtools/bam2depth.c
@@ -30,6 +30,8 @@ DEALINGS IN THE SOFTWARE. */
* gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
*/
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
diff --git a/samtools/bam2depth.c.pysam.c b/samtools/bam2depth.c.pysam.c
index 6549949..9d9dc40 100644
--- a/samtools/bam2depth.c.pysam.c
+++ b/samtools/bam2depth.c.pysam.c
@@ -32,6 +32,8 @@ DEALINGS IN THE SOFTWARE. */
* gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -lhts -lz
*/
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -73,26 +75,26 @@ static int read_bam(void *data, bam1_t *b) // read level filters better go here
int read_file_list(const char *file_list,int *n,char **argv[]);
static int usage() {
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -a output all positions (including zero depth)\n");
- fprintf(pysamerr, " -a -a (or -aa) output absolutely all positions, including unused ref. sequences\n");
- fprintf(pysamerr, " -b <bed> list of positions or regions\n");
- fprintf(pysamerr, " -f <list> list of input BAM filenames, one per line [null]\n");
- fprintf(pysamerr, " -l <int> read length threshold (ignore reads shorter than <int>)\n");
- fprintf(pysamerr, " -d/-m <int> maximum coverage depth [8000]\n"); // the htslib's default
- fprintf(pysamerr, " -q <int> base quality threshold\n");
- fprintf(pysamerr, " -Q <int> mapping quality threshold\n");
- fprintf(pysamerr, " -r <chr:from-to> region\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -a output all positions (including zero depth)\n");
+ fprintf(pysam_stderr, " -a -a (or -aa) output absolutely all positions, including unused ref. sequences\n");
+ fprintf(pysam_stderr, " -b <bed> list of positions or regions\n");
+ fprintf(pysam_stderr, " -f <list> list of input BAM filenames, one per line [null]\n");
+ fprintf(pysam_stderr, " -l <int> read length threshold (ignore reads shorter than <int>)\n");
+ fprintf(pysam_stderr, " -d/-m <int> maximum coverage depth [8000]\n"); // the htslib's default
+ fprintf(pysam_stderr, " -q <int> base quality threshold\n");
+ fprintf(pysam_stderr, " -Q <int> mapping quality threshold\n");
+ fprintf(pysam_stderr, " -r <chr:from-to> region\n");
- sam_global_opt_help(pysamerr, "-.--.");
+ sam_global_opt_help(pysam_stderr, "-.--.");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "The output is a simple tab-separated table with three columns: reference name,\n");
- fprintf(pysamerr, "position, and coverage depth. Note that positions with zero coverage may be\n");
- fprintf(pysamerr, "omitted by default; see the -a option.\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "The output is a simple tab-separated table with three columns: reference name,\n");
+ fprintf(pysam_stderr, "position, and coverage depth. Note that positions with zero coverage may be\n");
+ fprintf(pysam_stderr, "omitted by default; see the -a option.\n");
+ fprintf(pysam_stderr, "\n");
return 1;
}
@@ -162,18 +164,18 @@ int main_depth(int argc, char *argv[])
rf = SAM_FLAG | SAM_RNAME | SAM_POS | SAM_MAPQ | SAM_CIGAR | SAM_SEQ;
if (baseQ) rf |= SAM_QUAL;
if (hts_set_opt(data[i]->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
return 1;
}
if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
return 1;
}
data[i]->min_mapQ = mapQ; // set the mapQ filter
data[i]->min_len = min_len; // set the qlen filter
data[i]->hdr = sam_hdr_read(data[i]->fp); // read the BAM header
if (data[i]->hdr == NULL) {
- fprintf(pysamerr, "Couldn't read header for \"%s\"\n",
+ fprintf(pysam_stderr, "Couldn't read header for \"%s\"\n",
argv[optind+i]);
status = EXIT_FAILURE;
goto depth_end;
@@ -218,10 +220,10 @@ int main_depth(int argc, char *argv[])
while (++last_pos < h->target_len[last_tid]) {
if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
continue;
- fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1);
+ fputs(h->target_name[last_tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
for (i = 0; i < n; i++)
- putchar('\t'), putchar('0');
- putchar('\n');
+ fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+ fputc('\n', pysam_stdout);
}
}
last_tid++;
@@ -233,16 +235,16 @@ int main_depth(int argc, char *argv[])
if (last_pos < beg) continue; // out of range; skip
if (bed && bed_overlap(bed, h->target_name[tid], last_pos, last_pos + 1) == 0)
continue;
- fputs(h->target_name[tid], stdout); printf("\t%d", last_pos+1);
+ fputs(h->target_name[tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
for (i = 0; i < n; i++)
- putchar('\t'), putchar('0');
- putchar('\n');
+ fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+ fputc('\n', pysam_stdout);
}
last_tid = tid;
last_pos = pos;
}
- fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
+ fputs(h->target_name[tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", pos+1); // a customized fprintf(pysam_stdout, ) would be faster
for (i = 0; i < n; ++i) { // base level filters have to go here
int j, m = 0;
for (j = 0; j < n_plp[i]; ++j) {
@@ -250,9 +252,9 @@ int main_depth(int argc, char *argv[])
if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
else if (bam_get_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
}
- printf("\t%d", n_plp[i] - m); // this the depth to output
+ fprintf(pysam_stdout, "\t%d", n_plp[i] - m); // this the depth to output
}
- putchar('\n');
+ fputc('\n', pysam_stdout);
}
if (ret < 0) status = EXIT_FAILURE;
free(n_plp); free(plp);
@@ -265,10 +267,10 @@ int main_depth(int argc, char *argv[])
if (last_pos >= end) break;
if (bed && bed_overlap(bed, h->target_name[last_tid], last_pos, last_pos + 1) == 0)
continue;
- fputs(h->target_name[last_tid], stdout); printf("\t%d", last_pos+1);
+ fputs(h->target_name[last_tid], pysam_stdout); fprintf(pysam_stdout, "\t%d", last_pos+1);
for (i = 0; i < n; i++)
- putchar('\t'), putchar('0');
- putchar('\n');
+ fputc('\t', pysam_stdout), fputc('0', pysam_stdout);
+ fputc('\n', pysam_stdout);
}
last_tid++;
last_pos = -1;
@@ -296,7 +298,7 @@ depth_end:
}
#ifdef _MAIN_BAM2DEPTH
-int main(int argc, char *argv[])
+int samtools_bam2depth_main(int argc, char *argv[])
{
return main_depth(argc, argv);
}
diff --git a/samtools/bam_addrprg.c b/samtools/bam_addrprg.c
index 2b4939f..f7bbfab 100644
--- a/samtools/bam_addrprg.c
+++ b/samtools/bam_addrprg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/sam.h>
#include <htslib/kstring.h>
#include "samtools.h"
diff --git a/samtools/bam_addrprg.c.pysam.c b/samtools/bam_addrprg.c.pysam.c
index 91fa9cd..2ddd1b1 100644
--- a/samtools/bam_addrprg.c.pysam.c
+++ b/samtools/bam_addrprg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/sam.h>
#include <htslib/kstring.h>
#include "samtools.h"
@@ -95,7 +97,7 @@ static char* basic_unescape(const char* in)
if (*in == '\\') {
++in;
if (*in == '\0') {
- fprintf(pysamerr, "[%s] Unterminated escape sequence.\n", __func__);
+ fprintf(pysam_stderr, "[%s] Unterminated escape sequence.\n", __func__);
free(out);
return NULL;
}
@@ -107,11 +109,11 @@ static char* basic_unescape(const char* in)
*ptr = '\t';
break;
case 'n':
- fprintf(pysamerr, "[%s] \\n in escape sequence is not supported.\n", __func__);
+ fprintf(pysam_stderr, "[%s] \\n in escape sequence is not supported.\n", __func__);
free(out);
return NULL;
default:
- fprintf(pysamerr, "[%s] Unsupported escape sequence.\n", __func__);
+ fprintf(pysam_stderr, "[%s] Unsupported escape sequence.\n", __func__);
free(out);
return NULL;
}
@@ -226,7 +228,7 @@ static void usage(FILE *fp)
"\n"
"Options:\n"
" -m MODE Set the mode of operation from one of overwrite_all, orphan_only [overwrite_all]\n"
- " -o FILE Where to write output to [stdout]\n"
+ " -o FILE Where to write output to [pysam_stdout]\n"
" -r STRING @RG line text\n"
" -R STRING ID of @RG line in existing header to use\n"
);
@@ -238,11 +240,11 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
*opts = NULL;
int n;
- if (argc == 1) { usage(stdout); return true; }
+ if (argc == 1) { usage(pysam_stdout); return true; }
parsed_opts_t* retval = calloc(1, sizeof(parsed_opts_t));
if (! retval ) {
- fprintf(pysamerr, "[%s] Out of memory allocating parsed_opts_t\n", __func__);
+ fprintf(pysam_stderr, "[%s] Out of memory allocating parsed_opts_t\n", __func__);
return false;
}
// Set defaults
@@ -276,7 +278,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
} else if (strcmp(optarg, "orphan_only") == 0) {
retval->mode = orphan_only;
} else {
- usage(pysamerr);
+ usage(pysam_stderr);
return false;
}
break;
@@ -285,17 +287,17 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
retval->output_name = strdup(optarg);
break;
case 'h':
- usage(stdout);
+ usage(pysam_stdout);
free(retval);
return true;
case '?':
- usage(pysamerr);
+ usage(pysam_stderr);
free(retval);
return false;
case 'O':
default:
if (parse_sam_global_opt(n, optarg, lopts, &retval->ga) == 0) break;
- usage(pysamerr);
+ usage(pysam_stderr);
free(retval);
return false;
}
@@ -303,13 +305,13 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
retval->rg_line = ks_release(&rg_line);
if (argc-optind < 1) {
- fprintf(pysamerr, "You must specify an input file.\n");
- usage(pysamerr);
+ fprintf(pysam_stderr, "You must specify an input file.\n");
+ usage(pysam_stderr);
cleanup_opts(retval);
return false;
}
if (retval->rg_id && retval->rg_line) {
- fprintf(pysamerr, "The options -r and -R are mutually exclusive.\n");
+ fprintf(pysam_stderr, "The options -r and -R are mutually exclusive.\n");
cleanup_opts(retval);
return false;
}
@@ -319,7 +321,7 @@ static bool parse_args(int argc, char** argv, parsed_opts_t** opts)
char* tmp = basic_unescape(retval->rg_line);
if ((retval->rg_id = get_rg_id(tmp)) == NULL) {
- fprintf(pysamerr, "[%s] The supplied RG line lacks an ID tag.\n", __func__);
+ fprintf(pysam_stderr, "[%s] The supplied RG line lacks an ID tag.\n", __func__);
free(tmp);
cleanup_opts(retval);
return false;
@@ -361,7 +363,7 @@ static void orphan_only_func(const state_t* state, bam1_t* file_read)
static bool init(const parsed_opts_t* opts, state_t** state_out) {
state_t* retval = (state_t*) calloc(1, sizeof(state_t));
if (retval == NULL) {
- fprintf(pysamerr, "[init] Out of memory allocating state struct.\n");
+ fprintf(pysam_stderr, "[init] Out of memory allocating state struct.\n");
return false;
}
*state_out = retval;
@@ -369,7 +371,7 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
// Open files
retval->input_file = sam_open_format(opts->input_name, "r", &opts->ga.in);
if (retval->input_file == NULL) {
- fprintf(pysamerr, "[init] Could not open input file: %s\n", opts->input_name);
+ fprintf(pysam_stderr, "[init] Could not open input file: %s\n", opts->input_name);
return false;
}
retval->input_header = sam_hdr_read(retval->input_file);
@@ -386,14 +388,14 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
// Append new RG line to header.
// Check does not already exist
if ( confirm_rg(retval->output_header, opts->rg_id) ) {
- fprintf(pysamerr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
+ fprintf(pysam_stderr, "[init] ID of new RG line specified conflicts with that of an existing header RG line. Overwrite not yet implemented.\n");
return false;
}
retval->rg_id = strdup(opts->rg_id);
size_t new_len = strlen( retval->output_header->text ) + strlen( opts->rg_line ) + 2;
char* new_header = malloc(new_len);
if (!new_header) {
- fprintf(pysamerr, "[init] Out of memory whilst writing new header.\n");
+ fprintf(pysam_stderr, "[init] Out of memory whilst writing new header.\n");
return false;
}
sprintf(new_header,"%s%s\n", retval->output_header->text, opts->rg_line);
@@ -404,13 +406,13 @@ static bool init(const parsed_opts_t* opts, state_t** state_out) {
if (opts->rg_id) {
// Confirm what has been supplied exists
if ( !confirm_rg(retval->output_header, opts->rg_id) ) {
- fprintf(pysamerr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
+ fprintf(pysam_stderr, "RG ID supplied does not exist in header. Supply full @RG line with -r instead?\n");
return false;
}
retval->rg_id = strdup(opts->rg_id);
} else {
if ((retval->rg_id = get_first_rgid(retval->output_header)) == NULL ) {
- fprintf(pysamerr, "No RG specified on command line or in existing header.\n");
+ fprintf(pysam_stderr, "No RG specified on command line or in existing header.\n");
return false;
}
}
diff --git a/samtools/bam_aux.c b/samtools/bam_aux.c
index 7a67de8..d90b4a8 100644
--- a/samtools/bam_aux.c
+++ b/samtools/bam_aux.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <limits.h>
#include "bam.h"
diff --git a/samtools/bam_aux.c.pysam.c b/samtools/bam_aux.c.pysam.c
index 475c772..c6bd0aa 100644
--- a/samtools/bam_aux.c.pysam.c
+++ b/samtools/bam_aux.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <limits.h>
#include "bam.h"
diff --git a/samtools/bam_cat.c b/samtools/bam_cat.c
index 83cc0fb..5c303d1 100644
--- a/samtools/bam_cat.c
+++ b/samtools/bam_cat.c
@@ -1,6 +1,6 @@
/* bam_cat.c -- efficiently concatenates bam files.
- Copyright (C) 2008-2009, 2011-2013 Genome Research Ltd.
+ Copyright (C) 2008-2009, 2011-2013, 2015-2016 Genome Research Ltd.
Modified SAMtools work copyright (C) 2010 Illumina, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -34,6 +34,8 @@ and modified to perform concatenation by Chris Saunders on behalf of
Illumina.
*/
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -43,6 +45,7 @@ Illumina.
#include "htslib/sam.h"
#include "htslib/cram.h"
#include "htslib/khash.h"
+#include "samtools.h"
KHASH_MAP_INIT_STR(s2i, int)
@@ -195,7 +198,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
in = sam_open(fn[i], "rc");
if (in == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
return NULL;
}
in_c = in->fp.cram;
@@ -302,15 +305,18 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
sprintf(vers, "%d.%d", vers_maj, vers_min);
out = sam_open(outcram, "wc");
if (out == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
- return 1;
+ print_error_errno("cat", "fail to open output file '%s'", outcram);
+ return -1;
}
out_c = out->fp.cram;
cram_set_option(out_c, CRAM_OPT_VERSION, vers);
//fprintf(stderr, "Creating cram vers %s\n", vers);
cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text, new_h->l_text)); // needed?
- sam_hdr_write(out, new_h);
+ if (sam_hdr_write(out, new_h) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ return -1;
+ }
for (i = 0; i < nfn; ++i) {
samFile *in;
@@ -321,7 +327,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
in = sam_open(fn[i], "rc");
if (in == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
return -1;
}
in_c = in->fp.cram;
@@ -414,29 +420,37 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
{
- BGZF *fp;
- uint8_t *buf;
+ BGZF *fp, *in = NULL;
+ uint8_t *buf = NULL;
uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
const int es=BGZF_EMPTY_BLOCK_SIZE;
int i;
fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
if (fp == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
- return 1;
+ print_error_errno("cat", "fail to open output file '%s'", outbam);
+ return -1;
+ }
+ if (h) {
+ if (bam_hdr_write(fp, h) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ goto fail;
+ }
}
- if (h) bam_hdr_write(fp, h);
buf = (uint8_t*) malloc(BUF_SIZE);
+ if (!buf) {
+ fprintf(stderr, "[%s] Couldn't allocate buffer\n", __func__);
+ goto fail;
+ }
for(i = 0; i < nfn; ++i){
- BGZF *in;
bam_hdr_t *old;
int len,j;
in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
if (in == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
- return -1;
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
+ goto fail;
}
if (in->is_write) return -1;
@@ -444,14 +458,18 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
if (old == NULL) {
fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
__func__, fn[i]);
- bgzf_close(in);
- return -1;
+ goto fail;
+ }
+ if (h == 0 && i == 0) {
+ if (bam_hdr_write(fp, old) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ goto fail;
+ }
}
- if (h == 0 && i == 0) bam_hdr_write(fp, old);
if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
+ if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+ if (bgzf_flush(fp) != 0) goto write_fail;
}
j=0;
@@ -460,16 +478,19 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
int diff=es-len;
if(j==0) {
fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
- return -1;
+ goto fail;
}
- bgzf_raw_write(fp, ebuf, len);
+ if (bgzf_raw_write(fp, ebuf, len) < 0) goto write_fail;
+
memcpy(ebuf,ebuf+len,diff);
memcpy(ebuf+diff,buf,len);
} else {
- if(j!=0) bgzf_raw_write(fp, ebuf, es);
+ if(j!=0) {
+ if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
+ }
len-= es;
memcpy(ebuf,buf+len,es);
- bgzf_raw_write(fp, buf, len);
+ if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
}
j=1;
}
@@ -482,15 +503,27 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
fprintf(stderr, " Possible output corruption.\n");
- bgzf_raw_write(fp, ebuf, es);
+ if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
}
}
bam_hdr_destroy(old);
bgzf_close(in);
+ in = NULL;
}
free(buf);
- bgzf_close(fp);
+ if (bgzf_close(fp) < 0) {
+ fprintf(stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
+ return -1;
+ }
return 0;
+
+ write_fail:
+ fprintf(stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
+ fail:
+ if (in) bgzf_close(in);
+ if (fp) bgzf_close(fp);
+ free(buf);
+ return -1;
}
@@ -498,7 +531,7 @@ int main_cat(int argc, char *argv[])
{
bam_hdr_t *h = 0;
char *outfn = 0;
- int c, ret;
+ int c, ret = 0;
samFile *in;
while ((c = getopt(argc, argv, "h:o:")) >= 0) {
@@ -529,19 +562,21 @@ int main_cat(int argc, char *argv[])
in = sam_open(argv[optind], "r");
if (!in) {
- fprintf(stderr, "[%s] ERROR: failed to open file '%s'.\n", __func__, argv[optind]);
+ print_error_errno("cat", "failed to open file '%s'", argv[optind]);
return 1;
}
switch (hts_get_format(in)->format) {
case bam:
sam_close(in);
- ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+ if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+ ret = 1;
break;
case cram:
sam_close(in);
- ret = cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+ if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+ ret = 1;
break;
default:
diff --git a/samtools/bam_cat.c.pysam.c b/samtools/bam_cat.c.pysam.c
index 004911a..daa0454 100644
--- a/samtools/bam_cat.c.pysam.c
+++ b/samtools/bam_cat.c.pysam.c
@@ -2,7 +2,7 @@
/* bam_cat.c -- efficiently concatenates bam files.
- Copyright (C) 2008-2009, 2011-2013 Genome Research Ltd.
+ Copyright (C) 2008-2009, 2011-2013, 2015-2016 Genome Research Ltd.
Modified SAMtools work copyright (C) 2010 Illumina, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -36,6 +36,8 @@ and modified to perform concatenation by Chris Saunders on behalf of
Illumina.
*/
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -45,6 +47,7 @@ Illumina.
#include "htslib/sam.h"
#include "htslib/cram.h"
#include "htslib/khash.h"
+#include "samtools.h"
KHASH_MAP_INIT_STR(s2i, int)
@@ -197,7 +200,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
in = sam_open(fn[i], "rc");
if (in == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
return NULL;
}
in_c = in->fp.cram;
@@ -206,7 +209,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
int vmin = cram_minor_vers(in_c);
if ((vers_maj != -1 && vers_maj != vmaj) ||
(vers_min != -1 && vers_min != vmin)) {
- fprintf(pysamerr, "[%s] ERROR: input files have differing version numbers.\n",
+ fprintf(pysam_stderr, "[%s] ERROR: input files have differing version numbers.\n",
__func__);
return NULL;
}
@@ -226,7 +229,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
int added;
new_rg = hash_s2i_inc(*rg2id, rg2id_in->id[ki], rg2id_in->line[ki], &added);
- //fprintf(pysamerr, "RG %s: #%d -> #%d\n",
+ //fprintf(pysam_stderr, "RG %s: #%d -> #%d\n",
// rg2id_in->id[ki], ki, new_rg);
if (added) {
@@ -242,7 +245,7 @@ static bam_hdr_t *cram_cat_check_hdr(int nfn, char * const *fn, const bam_hdr_t
}
if (new_rg != ki && rg2id_in->n_id > 1) {
- fprintf(pysamerr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
+ fprintf(pysam_stderr, "[%s] ERROR: Same size @RG lists but differing order / contents\n",
__func__);
return NULL;
}
@@ -304,15 +307,18 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
sprintf(vers, "%d.%d", vers_maj, vers_min);
out = sam_open(outcram, "wc");
if (out == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outcram);
- return 1;
+ print_error_errno("cat", "fail to open output file '%s'", outcram);
+ return -1;
}
out_c = out->fp.cram;
cram_set_option(out_c, CRAM_OPT_VERSION, vers);
- //fprintf(pysamerr, "Creating cram vers %s\n", vers);
+ //fprintf(pysam_stderr, "Creating cram vers %s\n", vers);
cram_fd_set_header(out_c, sam_hdr_parse_(new_h->text, new_h->l_text)); // needed?
- sam_hdr_write(out, new_h);
+ if (sam_hdr_write(out, new_h) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ return -1;
+ }
for (i = 0; i < nfn; ++i) {
samFile *in;
@@ -323,7 +329,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
in = sam_open(fn[i], "rc");
if (in == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
return -1;
}
in_c = in->fp.cram;
@@ -367,7 +373,7 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
// we need to edit the compression header. IF WE CAN.
if (new_rg) {
int zero = 0;
- //fprintf(pysamerr, "Transcode RG %d to %d\n", 0, new_rg);
+ //fprintf(pysam_stderr, "Transcode RG %d to %d\n", 0, new_rg);
cram_transcode_rg(in_c, out_c, c, 1, &zero, &new_rg);
} else {
int32_t num_slices;
@@ -416,44 +422,56 @@ int cram_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outcram)
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
{
- BGZF *fp;
- uint8_t *buf;
+ BGZF *fp, *in = NULL;
+ uint8_t *buf = NULL;
uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
const int es=BGZF_EMPTY_BLOCK_SIZE;
int i;
- fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
+ fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(pysam_stdout), "w");
if (fp == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
- return 1;
+ print_error_errno("cat", "fail to open output file '%s'", outbam);
+ return -1;
+ }
+ if (h) {
+ if (bam_hdr_write(fp, h) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ goto fail;
+ }
}
- if (h) bam_hdr_write(fp, h);
buf = (uint8_t*) malloc(BUF_SIZE);
+ if (!buf) {
+ fprintf(pysam_stderr, "[%s] Couldn't allocate buffer\n", __func__);
+ goto fail;
+ }
for(i = 0; i < nfn; ++i){
- BGZF *in;
bam_hdr_t *old;
int len,j;
in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r");
if (in == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
- return -1;
+ print_error_errno("cat", "fail to open file '%s'", fn[i]);
+ goto fail;
}
if (in->is_write) return -1;
old = bam_hdr_read(in);
if (old == NULL) {
- fprintf(pysamerr, "[%s] ERROR: couldn't read header for '%s'.\n",
+ fprintf(pysam_stderr, "[%s] ERROR: couldn't read header for '%s'.\n",
__func__, fn[i]);
- bgzf_close(in);
- return -1;
+ goto fail;
+ }
+ if (h == 0 && i == 0) {
+ if (bam_hdr_write(fp, old) < 0) {
+ print_error_errno("cat", "Couldn't write header");
+ goto fail;
+ }
}
- if (h == 0 && i == 0) bam_hdr_write(fp, old);
if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
+ if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+ if (bgzf_flush(fp) != 0) goto write_fail;
}
j=0;
@@ -461,17 +479,20 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
if(len<es){
int diff=es-len;
if(j==0) {
- fprintf(pysamerr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
- return -1;
+ fprintf(pysam_stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
+ goto fail;
}
- bgzf_raw_write(fp, ebuf, len);
+ if (bgzf_raw_write(fp, ebuf, len) < 0) goto write_fail;
+
memcpy(ebuf,ebuf+len,diff);
memcpy(ebuf+diff,buf,len);
} else {
- if(j!=0) bgzf_raw_write(fp, ebuf, es);
+ if(j!=0) {
+ if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
+ }
len-= es;
memcpy(ebuf,buf+len,es);
- bgzf_raw_write(fp, buf, len);
+ if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
}
j=1;
}
@@ -482,17 +503,29 @@ int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam)
const uint8_t gzip2=ebuf[1];
const uint32_t isize=*((uint32_t*)(ebuf+es-4));
if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
- fprintf(pysamerr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
- fprintf(pysamerr, " Possible output corruption.\n");
- bgzf_raw_write(fp, ebuf, es);
+ fprintf(pysam_stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
+ fprintf(pysam_stderr, " Possible output corruption.\n");
+ if (bgzf_raw_write(fp, ebuf, es) < 0) goto write_fail;
}
}
bam_hdr_destroy(old);
bgzf_close(in);
+ in = NULL;
}
free(buf);
- bgzf_close(fp);
+ if (bgzf_close(fp) < 0) {
+ fprintf(pysam_stderr, "[%s] Error on closing '%s'.\n", __func__, outbam);
+ return -1;
+ }
return 0;
+
+ write_fail:
+ fprintf(pysam_stderr, "[%s] Error writing to '%s'.\n", __func__, outbam);
+ fail:
+ if (in) bgzf_close(in);
+ if (fp) bgzf_close(fp);
+ free(buf);
+ return -1;
}
@@ -500,7 +533,7 @@ int main_cat(int argc, char *argv[])
{
bam_hdr_t *h = 0;
char *outfn = 0;
- int c, ret;
+ int c, ret = 0;
samFile *in;
while ((c = getopt(argc, argv, "h:o:")) >= 0) {
@@ -508,12 +541,12 @@ int main_cat(int argc, char *argv[])
case 'h': {
samFile *fph = sam_open(optarg, "r");
if (fph == 0) {
- fprintf(pysamerr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
+ fprintf(pysam_stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
return 1;
}
h = sam_hdr_read(fph);
if (h == NULL) {
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
"[%s] ERROR: failed to read the header for '%s'.\n",
__func__, argv[1]);
return 1;
@@ -525,30 +558,32 @@ int main_cat(int argc, char *argv[])
}
}
if (argc - optind < 1) {
- fprintf(pysamerr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
+ fprintf(pysam_stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> [...]\n");
return 1;
}
in = sam_open(argv[optind], "r");
if (!in) {
- fprintf(pysamerr, "[%s] ERROR: failed to open file '%s'.\n", __func__, argv[optind]);
+ print_error_errno("cat", "failed to open file '%s'", argv[optind]);
return 1;
}
switch (hts_get_format(in)->format) {
case bam:
sam_close(in);
- ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+ if (bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+ ret = 1;
break;
case cram:
sam_close(in);
- ret = cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
+ if (cram_cat(argc - optind, argv + optind, h, outfn? outfn : "-") < 0)
+ ret = 1;
break;
default:
sam_close(in);
- fprintf(pysamerr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
+ fprintf(pysam_stderr, "[%s] ERROR: input is not BAM or CRAM\n", __func__);
return 1;
}
free(outfn);
diff --git a/samtools/bam_color.c b/samtools/bam_color.c
index 3983c44..bee19b9 100644
--- a/samtools/bam_color.c
+++ b/samtools/bam_color.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include "bam.h"
diff --git a/samtools/bam_color.c.pysam.c b/samtools/bam_color.c.pysam.c
index 78d8510..6bd12c4 100644
--- a/samtools/bam_color.c.pysam.c
+++ b/samtools/bam_color.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include "bam.h"
diff --git a/samtools/bam_flags.c b/samtools/bam_flags.c
index ddc7b11..11a82b6 100644
--- a/samtools/bam_flags.c
+++ b/samtools/bam_flags.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
diff --git a/samtools/bam_flags.c.pysam.c b/samtools/bam_flags.c.pysam.c
index f4df057..4895f9a 100644
--- a/samtools/bam_flags.c.pysam.c
+++ b/samtools/bam_flags.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
@@ -35,24 +37,24 @@ DEALINGS IN THE SOFTWARE. */
static void usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Convert between textual and numeric flag representation\n");
- fprintf(pysamerr, "Usage: samtools flags INT|STR[,...]\n");
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Flags:\n");
- fprintf(pysamerr, "\t0x%x\tPAIRED .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
- fprintf(pysamerr, "\t0x%x\tPROPER_PAIR .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
- fprintf(pysamerr, "\t0x%x\tUNMAP .. segment unmapped\n", BAM_FUNMAP);
- fprintf(pysamerr, "\t0x%x\tMUNMAP .. next segment in the template unmapped\n", BAM_FMUNMAP);
- fprintf(pysamerr, "\t0x%x\tREVERSE .. SEQ is reverse complemented\n", BAM_FREVERSE);
- fprintf(pysamerr, "\t0x%x\tMREVERSE .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
- fprintf(pysamerr, "\t0x%x\tREAD1 .. the first segment in the template\n", BAM_FREAD1);
- fprintf(pysamerr, "\t0x%x\tREAD2 .. the last segment in the template\n", BAM_FREAD2);
- fprintf(pysamerr, "\t0x%x\tSECONDARY .. secondary alignment\n", BAM_FSECONDARY);
- fprintf(pysamerr, "\t0x%x\tQCFAIL .. not passing quality controls\n", BAM_FQCFAIL);
- fprintf(pysamerr, "\t0x%x\tDUP .. PCR or optical duplicate\n", BAM_FDUP);
- fprintf(pysamerr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Convert between textual and numeric flag representation\n");
+ fprintf(pysam_stderr, "Usage: samtools flags INT|STR[,...]\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Flags:\n");
+ fprintf(pysam_stderr, "\t0x%x\tPAIRED .. paired-end (or multiple-segment) sequencing technology\n", BAM_FPAIRED);
+ fprintf(pysam_stderr, "\t0x%x\tPROPER_PAIR .. each segment properly aligned according to the aligner\n", BAM_FPROPER_PAIR);
+ fprintf(pysam_stderr, "\t0x%x\tUNMAP .. segment unmapped\n", BAM_FUNMAP);
+ fprintf(pysam_stderr, "\t0x%x\tMUNMAP .. next segment in the template unmapped\n", BAM_FMUNMAP);
+ fprintf(pysam_stderr, "\t0x%x\tREVERSE .. SEQ is reverse complemented\n", BAM_FREVERSE);
+ fprintf(pysam_stderr, "\t0x%x\tMREVERSE .. SEQ of the next segment in the template is reversed\n", BAM_FMREVERSE);
+ fprintf(pysam_stderr, "\t0x%x\tREAD1 .. the first segment in the template\n", BAM_FREAD1);
+ fprintf(pysam_stderr, "\t0x%x\tREAD2 .. the last segment in the template\n", BAM_FREAD2);
+ fprintf(pysam_stderr, "\t0x%x\tSECONDARY .. secondary alignment\n", BAM_FSECONDARY);
+ fprintf(pysam_stderr, "\t0x%x\tQCFAIL .. not passing quality controls\n", BAM_FQCFAIL);
+ fprintf(pysam_stderr, "\t0x%x\tDUP .. PCR or optical duplicate\n", BAM_FDUP);
+ fprintf(pysam_stderr, "\t0x%x\tSUPPLEMENTARY .. supplementary alignment\n", BAM_FSUPPLEMENTARY);
+ fprintf(pysam_stderr, "\n");
}
@@ -62,8 +64,8 @@ int main_flags(int argc, char *argv[])
else
{
int mask = bam_str2flag(argv[1]);
- if ( mask<0 ) { fprintf(pysamerr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
- printf("0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
+ if ( mask<0 ) { fprintf(pysam_stderr,"Error: Could not parse \"%s\"\n", argv[1]); usage(); return 1; }
+ fprintf(pysam_stdout, "0x%x\t%d\t%s\n", mask, mask, bam_flag2str(mask));
}
return 0;
}
diff --git a/samtools/bam_import.c b/samtools/bam_import.c
index d959d0e..96f8158 100644
--- a/samtools/bam_import.c
+++ b/samtools/bam_import.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <zlib.h>
#include <stdio.h>
#include <string.h>
diff --git a/samtools/bam_import.c.pysam.c b/samtools/bam_import.c.pysam.c
index c2854f4..3b5dd4a 100644
--- a/samtools/bam_import.c.pysam.c
+++ b/samtools/bam_import.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <zlib.h>
#include <stdio.h>
#include <string.h>
@@ -60,6 +62,6 @@ bam_header_t *sam_header_read2(const char *fn)
free(str->s); free(str);
header = sam_hdr_parse(samstr.l, samstr.s? samstr.s : "");
free(samstr.s);
- fprintf(pysamerr, "[sam_header_read2] %d sequences loaded.\n", n_targets);
+ fprintf(pysam_stderr, "[sam_header_read2] %d sequences loaded.\n", n_targets);
return header;
}
diff --git a/samtools/bam_index.c b/samtools/bam_index.c
index 83a855d..3a5acf6 100644
--- a/samtools/bam_index.c
+++ b/samtools/bam_index.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/hts.h>
#include <htslib/sam.h>
#include <htslib/khash.h>
diff --git a/samtools/bam_index.c.pysam.c b/samtools/bam_index.c.pysam.c
index ed902c5..6c0efdc 100644
--- a/samtools/bam_index.c.pysam.c
+++ b/samtools/bam_index.c.pysam.c
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/hts.h>
#include <htslib/sam.h>
#include <htslib/khash.h>
@@ -61,12 +63,12 @@ int bam_index(int argc, char *argv[])
case 'c': csi = 1; break;
case 'm': csi = 1; min_shift = atoi(optarg); break;
default:
- index_usage(pysamerr);
+ index_usage(pysam_stderr);
return 1;
}
if (optind == argc) {
- index_usage(stdout);
+ index_usage(pysam_stdout);
return 1;
}
@@ -91,31 +93,31 @@ int bam_idxstats(int argc, char *argv[])
samFile* fp;
if (argc < 2) {
- fprintf(pysamerr, "Usage: samtools idxstats <in.bam>\n");
+ fprintf(pysam_stderr, "Usage: samtools idxstats <in.bam>\n");
return 1;
}
fp = sam_open(argv[1], "r");
- if (fp == NULL) { fprintf(pysamerr, "[%s] fail to open BAM.\n", __func__); return 1; }
+ if (fp == NULL) { fprintf(pysam_stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
header = sam_hdr_read(fp);
if (header == NULL) {
- fprintf(pysamerr, "[%s] failed to read header for '%s'.\n",
+ fprintf(pysam_stderr, "[%s] failed to read header for '%s'.\n",
__func__, argv[1]);
return 1;
}
idx = sam_index_load(fp, argv[1]);
- if (idx == NULL) { fprintf(pysamerr, "[%s] fail to load the index.\n", __func__); return 1; }
+ if (idx == NULL) { fprintf(pysam_stderr, "[%s] fail to load the index.\n", __func__); return 1; }
int i;
for (i = 0; i < header->n_targets; ++i) {
// Print out contig name and length
- printf("%s\t%d", header->target_name[i], header->target_len[i]);
+ fprintf(pysam_stdout, "%s\t%d", header->target_name[i], header->target_len[i]);
// Now fetch info about it from the meta bin
uint64_t u, v;
hts_idx_get_stat(idx, i, &u, &v);
- printf("\t%" PRIu64 "\t%" PRIu64 "\n", u, v);
+ fprintf(pysam_stdout, "\t%" PRIu64 "\t%" PRIu64 "\n", u, v);
}
// Dump information about unmapped reads
- printf("*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx));
+ fprintf(pysam_stdout, "*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx));
bam_hdr_destroy(header);
hts_idx_destroy(idx);
sam_close(fp);
diff --git a/samtools/bam_lpileup.c b/samtools/bam_lpileup.c
index 0cee701..e20cc92 100644
--- a/samtools/bam_lpileup.c
+++ b/samtools/bam_lpileup.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
diff --git a/samtools/bam_lpileup.c.pysam.c b/samtools/bam_lpileup.c.pysam.c
index bdf4348..9f7f063 100644
--- a/samtools/bam_lpileup.c.pysam.c
+++ b/samtools/bam_lpileup.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
@@ -179,14 +181,14 @@ static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl
}
tv->n_pre = l;
/*
- fprintf(pysamerr, "%d\t", pos+1);
+ fprintf(pysam_stderr, "%d\t", pos+1);
for (i = 0; i < n; ++i) {
const bam_pileup1_t *p = pl + i;
- if (p->is_head) fprintf(pysamerr, "^");
- if (p->is_tail) fprintf(pysamerr, "$");
- fprintf(pysamerr, "%d,", p->level);
+ if (p->is_head) fprintf(pysam_stderr, "^");
+ if (p->is_tail) fprintf(pysam_stderr, "$");
+ fprintf(pysam_stderr, "%d,", p->level);
}
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
*/
return 0;
}
diff --git a/samtools/bam_mate.c b/samtools/bam_mate.c
index 54c3ed3..5b13b2e 100644
--- a/samtools/bam_mate.c
+++ b/samtools/bam_mate.c
@@ -1,6 +1,6 @@
/* bam_mate.c -- fix mate pairing information and clean up flags.
- Copyright (C) 2009, 2011-2014 Genome Research Ltd.
+ Copyright (C) 2009, 2011-2016 Genome Research Ltd.
Portions copyright (C) 2011 Broad Institute.
Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE. */
#include "sam_opts.h"
#include "htslib/kstring.h"
#include "htslib/sam.h"
+#include "samtools.h"
/*
* This function calculates ct tag for two bams, it assumes they are from the same template and
@@ -177,10 +180,10 @@ static void sync_mate(bam1_t* a, bam1_t* b)
}
// currently, this function ONLY works if each read has one hit
-static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
+static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
{
bam_hdr_t *header;
- bam1_t *b[2];
+ bam1_t *b[2] = { NULL, NULL };
int curr, has_prev, pre_end = 0, cur_end = 0;
kstring_t str;
@@ -188,7 +191,7 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
header = sam_hdr_read(in);
if (header == NULL) {
fprintf(stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
- exit(1);
+ return 1;
}
// Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
@@ -199,10 +202,10 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
// (e.g. must ignore in a @CO comment line later in header)
if ((p != 0) && (p < q)) {
fprintf(stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
- exit(1);
+ goto fail;
}
}
- sam_hdr_write(out, header);
+ if (sam_hdr_write(out, header) < 0) goto write_fail;
b[0] = bam_init1();
b[1] = bam_init1();
@@ -211,12 +214,14 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
bam1_t *cur = b[curr], *pre = b[1-curr];
if (cur->core.flag & BAM_FSECONDARY)
{
- if ( !remove_reads ) sam_write1(out, header, cur);
+ if ( !remove_reads ) {
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
+ }
continue; // skip secondary alignments
}
if (cur->core.flag & BAM_FSUPPLEMENTARY)
{
- sam_write1(out, header, cur);
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
}
if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
@@ -253,14 +258,18 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
// Write out result
if ( !remove_reads ) {
- sam_write1(out, header, pre);
- sam_write1(out, header, cur);
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
} else {
// If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- if(!(pre->core.flag&BAM_FUNMAP)) sam_write1(out, header, pre);
- if(!(cur->core.flag&BAM_FUNMAP)) sam_write1(out, header, cur);
+ if(!(pre->core.flag&BAM_FUNMAP)) {
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ }
+ if(!(cur->core.flag&BAM_FUNMAP)) {
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
+ }
}
has_prev = 0;
} else { // unpaired? clear bad info and write it out
@@ -271,7 +280,9 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
}
pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) sam_write1(out, header, pre);
+ if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) {
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ }
}
} else has_prev = 1;
curr = 1 - curr;
@@ -287,12 +298,21 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- sam_write1(out, header, pre);
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
}
bam_hdr_destroy(header);
bam_destroy1(b[0]);
bam_destroy1(b[1]);
free(str.s);
+ return 0;
+
+ write_fail:
+ print_error_errno("fixmate", "Couldn't write to output file");
+ fail:
+ bam_hdr_destroy(header);
+ bam_destroy1(b[0]);
+ bam_destroy1(b[1]);
+ return 1;
}
void usage(FILE* where)
@@ -315,8 +335,8 @@ void usage(FILE* where)
int bam_mating(int argc, char *argv[])
{
- samFile *in, *out;
- int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0;
+ samFile *in = NULL, *out = NULL;
+ int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
char wmode[3] = {'w', 'b', 0};
static const struct option lopts[] = {
@@ -333,30 +353,40 @@ int bam_mating(int argc, char *argv[])
case 'c': add_ct = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
- case '?': usage(stderr); return 1;
+ case '?': usage(stderr); goto fail;
}
}
- if (optind+1 >= argc) { usage(stderr); return 1; }
+ if (optind+1 >= argc) { usage(stderr); goto fail; }
// init
if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
- fprintf(stderr, "[bam_mating] cannot open input file\n");
- return 1;
+ print_error_errno("fixmate", "cannot open input file");
+ goto fail;
}
sam_open_mode(wmode+1, argv[optind+1], NULL);
if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) {
- fprintf(stderr, "[bam_mating] cannot open output file\n");
- return 1;
+ print_error_errno("fixmate", "cannot open output file");
+ goto fail;
}
// run
- bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
+ res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
// cleanup
- sam_close(in); sam_close(out);
+ sam_close(in);
+ if (sam_close(out) < 0) {
+ fprintf(stderr, "[bam_mating] error while closing output file\n");
+ res = 1;
+ }
+
sam_global_args_free(&ga);
+ return res;
- return 0;
+ fail:
+ if (in) sam_close(in);
+ if (out) sam_close(out);
+ sam_global_args_free(&ga);
+ return 1;
}
diff --git a/samtools/bam_mate.c.pysam.c b/samtools/bam_mate.c.pysam.c
index c7900a1..a416d07 100644
--- a/samtools/bam_mate.c.pysam.c
+++ b/samtools/bam_mate.c.pysam.c
@@ -2,7 +2,7 @@
/* bam_mate.c -- fix mate pairing information and clean up flags.
- Copyright (C) 2009, 2011-2014 Genome Research Ltd.
+ Copyright (C) 2009, 2011-2016 Genome Research Ltd.
Portions copyright (C) 2011 Broad Institute.
Portions copyright (C) 2012 Peter Cock, The James Hutton Institute.
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
@@ -34,6 +36,7 @@ DEALINGS IN THE SOFTWARE. */
#include "sam_opts.h"
#include "htslib/kstring.h"
#include "htslib/sam.h"
+#include "samtools.h"
/*
* This function calculates ct tag for two bams, it assumes they are from the same template and
@@ -179,18 +182,18 @@ static void sync_mate(bam1_t* a, bam1_t* b)
}
// currently, this function ONLY works if each read has one hit
-static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
+static int bam_mating_core(samFile* in, samFile* out, int remove_reads, int proper_pair_check, int add_ct)
{
bam_hdr_t *header;
- bam1_t *b[2];
+ bam1_t *b[2] = { NULL, NULL };
int curr, has_prev, pre_end = 0, cur_end = 0;
kstring_t str;
str.l = str.m = 0; str.s = 0;
header = sam_hdr_read(in);
if (header == NULL) {
- fprintf(pysamerr, "[bam_mating_core] ERROR: Couldn't read header\n");
- exit(1);
+ fprintf(pysam_stderr, "[bam_mating_core] ERROR: Couldn't read header\n");
+ return 1;
}
// Accept unknown, unsorted, or queryname sort order, but error on coordinate sorted.
if ((header->l_text > 3) && (strncmp(header->text, "@HD", 3) == 0)) {
@@ -200,11 +203,11 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
// Looking for SO:coordinate within the @HD line only
// (e.g. must ignore in a @CO comment line later in header)
if ((p != 0) && (p < q)) {
- fprintf(pysamerr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
- exit(1);
+ fprintf(pysam_stderr, "[bam_mating_core] ERROR: Coordinate sorted, require grouped/sorted by queryname.\n");
+ goto fail;
}
}
- sam_hdr_write(out, header);
+ if (sam_hdr_write(out, header) < 0) goto write_fail;
b[0] = bam_init1();
b[1] = bam_init1();
@@ -213,12 +216,14 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
bam1_t *cur = b[curr], *pre = b[1-curr];
if (cur->core.flag & BAM_FSECONDARY)
{
- if ( !remove_reads ) sam_write1(out, header, cur);
+ if ( !remove_reads ) {
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
+ }
continue; // skip secondary alignments
}
if (cur->core.flag & BAM_FSUPPLEMENTARY)
{
- sam_write1(out, header, cur);
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
continue; // pass supplementary alignments through unchanged (TODO:make them match read they came from)
}
if (cur->core.tid < 0 || cur->core.pos < 0) // If unmapped set the flag
@@ -255,14 +260,18 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
// Write out result
if ( !remove_reads ) {
- sam_write1(out, header, pre);
- sam_write1(out, header, cur);
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
} else {
// If we have to remove reads make sure we do it in a way that doesn't create orphans with bad flags
if(pre->core.flag&BAM_FUNMAP) cur->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
if(cur->core.flag&BAM_FUNMAP) pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- if(!(pre->core.flag&BAM_FUNMAP)) sam_write1(out, header, pre);
- if(!(cur->core.flag&BAM_FUNMAP)) sam_write1(out, header, cur);
+ if(!(pre->core.flag&BAM_FUNMAP)) {
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ }
+ if(!(cur->core.flag&BAM_FUNMAP)) {
+ if (sam_write1(out, header, cur) < 0) goto write_fail;
+ }
}
has_prev = 0;
} else { // unpaired? clear bad info and write it out
@@ -273,7 +282,9 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
}
pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) sam_write1(out, header, pre);
+ if ( !remove_reads || !(pre->core.flag&BAM_FUNMAP) ) {
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
+ }
}
} else has_prev = 1;
curr = 1 - curr;
@@ -289,12 +300,21 @@ static void bam_mating_core(samFile* in, samFile* out, int remove_reads, int pro
pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
pre->core.flag &= ~(BAM_FPAIRED|BAM_FMREVERSE|BAM_FPROPER_PAIR);
- sam_write1(out, header, pre);
+ if (sam_write1(out, header, pre) < 0) goto write_fail;
}
bam_hdr_destroy(header);
bam_destroy1(b[0]);
bam_destroy1(b[1]);
free(str.s);
+ return 0;
+
+ write_fail:
+ print_error_errno("fixmate", "Couldn't write to output file");
+ fail:
+ bam_hdr_destroy(header);
+ bam_destroy1(b[0]);
+ bam_destroy1(b[1]);
+ return 1;
}
void usage(FILE* where)
@@ -310,15 +330,15 @@ void usage(FILE* where)
fprintf(where,
"\n"
-"As elsewhere in samtools, use '-' as the filename for stdin/stdout. The input\n"
+"As elsewhere in samtools, use '-' as the filename for stdin/pysam_stdout. The input\n"
"file must be grouped by read name (e.g. sorted by name). Coordinated sorted\n"
"input is not accepted.\n");
}
int bam_mating(int argc, char *argv[])
{
- samFile *in, *out;
- int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0;
+ samFile *in = NULL, *out = NULL;
+ int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0, res = 1;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
char wmode[3] = {'w', 'b', 0};
static const struct option lopts[] = {
@@ -327,7 +347,7 @@ int bam_mating(int argc, char *argv[])
};
// parse args
- if (argc == 1) { usage(stdout); return 0; }
+ if (argc == 1) { usage(pysam_stdout); return 0; }
while ((c = getopt_long(argc, argv, "rpcO:", lopts, NULL)) >= 0) {
switch (c) {
case 'r': remove_reads = 1; break;
@@ -335,30 +355,40 @@ int bam_mating(int argc, char *argv[])
case 'c': add_ct = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
- case '?': usage(pysamerr); return 1;
+ case '?': usage(pysam_stderr); goto fail;
}
}
- if (optind+1 >= argc) { usage(pysamerr); return 1; }
+ if (optind+1 >= argc) { usage(pysam_stderr); goto fail; }
// init
if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) {
- fprintf(pysamerr, "[bam_mating] cannot open input file\n");
- return 1;
+ print_error_errno("fixmate", "cannot open input file");
+ goto fail;
}
sam_open_mode(wmode+1, argv[optind+1], NULL);
if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) {
- fprintf(pysamerr, "[bam_mating] cannot open output file\n");
- return 1;
+ print_error_errno("fixmate", "cannot open output file");
+ goto fail;
}
// run
- bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
+ res = bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct);
// cleanup
- sam_close(in); sam_close(out);
+ sam_close(in);
+ if (sam_close(out) < 0) {
+ fprintf(pysam_stderr, "[bam_mating] error while closing output file\n");
+ res = 1;
+ }
+
sam_global_args_free(&ga);
+ return res;
- return 0;
+ fail:
+ if (in) sam_close(in);
+ if (out) sam_close(out);
+ sam_global_args_free(&ga);
+ return 1;
}
diff --git a/samtools/bam_md.c b/samtools/bam_md.c
index 30f3243..71206cd 100644
--- a/samtools/bam_md.c
+++ b/samtools/bam_md.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <string.h>
#include <ctype.h>
@@ -33,6 +35,7 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/kstring.h"
#include "kprobaln.h"
#include "sam_opts.h"
+#include "samtools.h"
#define USE_EQUAL 1
#define DROP_TAG 2
@@ -349,11 +352,11 @@ int calmd_usage() {
int bam_fillmd(int argc, char *argv[])
{
int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
- samFile *fp, *fpout = 0;
- bam_hdr_t *header;
- faidx_t *fai;
- char *ref = 0, mode_w[8], *ref_file;
- bam1_t *b;
+ samFile *fp = NULL, *fpout = NULL;
+ bam_hdr_t *header = NULL;
+ faidx_t *fai = NULL;
+ char *ref = NULL, mode_w[8], *ref_file;
+ bam1_t *b = NULL;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
static const struct option lopts[] = {
@@ -391,35 +394,51 @@ int bam_fillmd(int argc, char *argv[])
if (optind + (ga.reference == NULL) >= argc)
return calmd_usage();
fp = sam_open_format(argv[optind], "r", &ga.in);
- if (fp == 0) return 1;
+ if (fp == NULL) {
+ print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
+ return 1;
+ }
header = sam_hdr_read(fp);
if (header == NULL || header->n_targets == 0) {
fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
- return 1;
+ goto fail;
}
fpout = sam_open_format("-", mode_w, &ga.out);
- sam_hdr_write(fpout, header);
+ if (fpout == NULL) {
+ print_error_errno("calmd", "Failed to open output");
+ goto fail;
+ }
+ if (sam_hdr_write(fpout, header) < 0) {
+ print_error_errno("calmd", "Failed to write sam header");
+ goto fail;
+ }
ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
fai = fai_load(ref_file);
if (!fai) {
- perror(ref_file);
- return 1;
+ print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
+ goto fail;
}
b = bam_init1();
+ if (!b) {
+ fprintf(stderr, "[bam_fillmd] Failed to allocate bam struct\n");
+ goto fail;
+ }
while ((ret = sam_read1(fp, header, b)) >= 0) {
if (b->core.tid >= 0) {
if (tid != b->core.tid) {
free(ref);
ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
tid = b->core.tid;
- if (ref == 0)
+ if (ref == 0) { // FIXME: Should this always be fatal?
fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
header->target_name[tid]);
+ if (is_realn || capQ > 10) goto fail; // Would otherwise crash
+ }
}
if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
if (capQ > 10) {
@@ -428,7 +447,14 @@ int bam_fillmd(int argc, char *argv[])
}
if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
}
- sam_write1(fpout, header, b);
+ if (sam_write1(fpout, header, b) < 0) {
+ print_error_errno("calmd", "failed to write to output file");
+ goto fail;
+ }
+ }
+ if (ret < -1) {
+ fprintf(stderr, "[bam_fillmd] Error reading input.\n");
+ goto fail;
}
bam_destroy1(b);
bam_hdr_destroy(header);
@@ -436,6 +462,18 @@ int bam_fillmd(int argc, char *argv[])
free(ref);
fai_destroy(fai);
sam_close(fp);
- sam_close(fpout);
+ if (sam_close(fpout) < 0) {
+ fprintf(stderr, "[bam_fillmd] error when closing output file\n");
+ return 1;
+ }
return 0;
+
+ fail:
+ free(ref);
+ if (b) bam_destroy1(b);
+ if (header) bam_hdr_destroy(header);
+ if (fai) fai_destroy(fai);
+ if (fp) sam_close(fp);
+ if (fpout) sam_close(fpout);
+ return 1;
}
diff --git a/samtools/bam_md.c.pysam.c b/samtools/bam_md.c.pysam.c
index 070f9cd..d00c01d 100644
--- a/samtools/bam_md.c.pysam.c
+++ b/samtools/bam_md.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <string.h>
#include <ctype.h>
@@ -35,6 +37,7 @@ DEALINGS IN THE SOFTWARE. */
#include "htslib/kstring.h"
#include "kprobaln.h"
#include "sam_opts.h"
+#include "samtools.h"
#define USE_EQUAL 1
#define DROP_TAG 2
@@ -115,7 +118,7 @@ void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm)
if (old_nm) old_nm_i = bam_aux2i(old_nm);
if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
else if (nm != old_nm_i) {
- fprintf(pysamerr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
+ fprintf(pysam_stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam_get_qname(b), old_nm_i, nm);
bam_aux_del(b, old_nm);
bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
}
@@ -133,7 +136,7 @@ void bam_fillmd1_core(bam1_t *b, char *ref, int ref_len, int flag, int max_nm)
if (i < str->l) is_diff = 1;
} else is_diff = 1;
if (is_diff) {
- fprintf(pysamerr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
+ fprintf(pysam_stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam_get_qname(b), old_md+1, str->s);
bam_aux_del(b, old_md);
bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
}
@@ -207,7 +210,7 @@ int bam_cap_mapQ(bam1_t *b, char *ref, int ref_len, int thres)
if (t > thres) return -1;
if (t < 0) t = 0;
t = sqrt((thres - t) / thres) * thres;
-// fprintf(pysamerr, "%s %lf %d\n", bam_get_qname(b), t, q);
+// fprintf(pysam_stderr, "%s %lf %d\n", bam_get_qname(b), t, q);
return (int)(t + .499);
}
@@ -333,7 +336,7 @@ int bam_prob_realn(bam1_t *b, const char *ref)
}
int calmd_usage() {
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
"Usage: samtools calmd [-eubrAES] <aln.bam> <ref.fasta>\n"
"Options:\n"
" -e change identical bases to '='\n"
@@ -344,18 +347,18 @@ int calmd_usage() {
" -r compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n"
" -E extended BAQ for better sensitivity but lower specificity\n");
- sam_global_opt_help(pysamerr, "-....");
+ sam_global_opt_help(pysam_stderr, "-....");
return 1;
}
int bam_fillmd(int argc, char *argv[])
{
int c, flt_flag, tid = -2, ret, len, is_bam_out, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
- samFile *fp, *fpout = 0;
- bam_hdr_t *header;
- faidx_t *fai;
- char *ref = 0, mode_w[8], *ref_file;
- bam1_t *b;
+ samFile *fp = NULL, *fpout = NULL;
+ bam_hdr_t *header = NULL;
+ faidx_t *fai = NULL;
+ char *ref = NULL, mode_w[8], *ref_file;
+ bam1_t *b = NULL;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
static const struct option lopts[] = {
@@ -382,7 +385,7 @@ int bam_fillmd(int argc, char *argv[])
case 'A': baq_flag |= 1; break;
case 'E': baq_flag |= 2; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
- fprintf(pysamerr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
+ fprintf(pysam_stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
/* else fall-through */
case '?': return calmd_usage();
}
@@ -393,35 +396,51 @@ int bam_fillmd(int argc, char *argv[])
if (optind + (ga.reference == NULL) >= argc)
return calmd_usage();
fp = sam_open_format(argv[optind], "r", &ga.in);
- if (fp == 0) return 1;
+ if (fp == NULL) {
+ print_error_errno("calmd", "Failed to open input file '%s'", argv[optind]);
+ return 1;
+ }
header = sam_hdr_read(fp);
if (header == NULL || header->n_targets == 0) {
- fprintf(pysamerr, "[bam_fillmd] input SAM does not have header. Abort!\n");
- return 1;
+ fprintf(pysam_stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
+ goto fail;
+ }
+
+ fpout = sam_open_format(pysam_stdout_fn, mode_w, &ga.out);
+ if (fpout == NULL) {
+ print_error_errno("calmd", "Failed to open output");
+ goto fail;
+ }
+ if (sam_hdr_write(fpout, header) < 0) {
+ print_error_errno("calmd", "Failed to write sam header");
+ goto fail;
}
-
- fpout = sam_open_format("-", mode_w, &ga.out);
- sam_hdr_write(fpout, header);
ref_file = argc > optind + 1 ? argv[optind+1] : ga.reference;
fai = fai_load(ref_file);
if (!fai) {
- perror(ref_file);
- return 1;
+ print_error_errno("calmd", "Failed to open reference file '%s'", ref_file);
+ goto fail;
}
b = bam_init1();
+ if (!b) {
+ fprintf(pysam_stderr, "[bam_fillmd] Failed to allocate bam struct\n");
+ goto fail;
+ }
while ((ret = sam_read1(fp, header, b)) >= 0) {
if (b->core.tid >= 0) {
if (tid != b->core.tid) {
free(ref);
ref = fai_fetch(fai, header->target_name[b->core.tid], &len);
tid = b->core.tid;
- if (ref == 0)
- fprintf(pysamerr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
+ if (ref == 0) { // FIXME: Should this always be fatal?
+ fprintf(pysam_stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
header->target_name[tid]);
+ if (is_realn || capQ > 10) goto fail; // Would otherwise crash
+ }
}
if (is_realn) bam_prob_realn_core(b, ref, len, baq_flag);
if (capQ > 10) {
@@ -430,7 +449,14 @@ int bam_fillmd(int argc, char *argv[])
}
if (ref) bam_fillmd1_core(b, ref, len, flt_flag, max_nm);
}
- sam_write1(fpout, header, b);
+ if (sam_write1(fpout, header, b) < 0) {
+ print_error_errno("calmd", "failed to write to output file");
+ goto fail;
+ }
+ }
+ if (ret < -1) {
+ fprintf(pysam_stderr, "[bam_fillmd] Error reading input.\n");
+ goto fail;
}
bam_destroy1(b);
bam_hdr_destroy(header);
@@ -438,6 +464,18 @@ int bam_fillmd(int argc, char *argv[])
free(ref);
fai_destroy(fai);
sam_close(fp);
- sam_close(fpout);
+ if (sam_close(fpout) < 0) {
+ fprintf(pysam_stderr, "[bam_fillmd] error when closing output file\n");
+ return 1;
+ }
return 0;
+
+ fail:
+ free(ref);
+ if (b) bam_destroy1(b);
+ if (header) bam_hdr_destroy(header);
+ if (fai) fai_destroy(fai);
+ if (fp) sam_close(fp);
+ if (fpout) sam_close(fpout);
+ return 1;
}
diff --git a/samtools/bam_plbuf.c b/samtools/bam_plbuf.c
index a579b77..12ea250 100644
--- a/samtools/bam_plbuf.c
+++ b/samtools/bam_plbuf.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
diff --git a/samtools/bam_plbuf.c.pysam.c b/samtools/bam_plbuf.c.pysam.c
index 5b8dda0..76c1ac1 100644
--- a/samtools/bam_plbuf.c.pysam.c
+++ b/samtools/bam_plbuf.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
diff --git a/samtools/bam_plcmd.c b/samtools/bam_plcmd.c
index 9e00836..dc12bf3 100644
--- a/samtools/bam_plcmd.c
+++ b/samtools/bam_plcmd.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -785,7 +787,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -b, --bam-list FILE list of input BAM filenames, one per line\n"
" -B, --no-BAQ disable BAQ (per-Base Alignment Quality)\n"
" -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0]\n"
-" -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+" -d, --max-depth INT max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
fprintf(fp,
" -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs\n"
" -f, --fasta-ref FILE faidx indexed reference sequence file\n"
@@ -826,7 +828,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -h, --tandem-qual INT coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
fprintf(fp,
" -I, --skip-indels do not perform indel calling\n"
-" -L, --max-idepth INT maximum per-sample depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+" -L, --max-idepth INT maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
fprintf(fp,
" -m, --min-ireads INT minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
fprintf(fp,
diff --git a/samtools/bam_plcmd.c.pysam.c b/samtools/bam_plcmd.c.pysam.c
index bafbb92..650e818 100644
--- a/samtools/bam_plcmd.c.pysam.c
+++ b/samtools/bam_plcmd.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
@@ -248,7 +250,7 @@ static int mplp_func(void *data, bam1_t *b)
if (ma->conf->fai && b->core.tid >= 0) {
has_ref = mplp_get_ref(ma, b->core.tid, &ref, &ref_len);
if (has_ref && ref_len <= b->core.pos) { // exclude reads outside of the reference sequence
- fprintf(pysamerr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
+ fprintf(pysam_stderr,"[%s] Skipping because %d is outside of %d [ref:%d]\n",
__func__, b->core.pos, ref_len, b->core.tid);
skip = 1;
continue;
@@ -285,7 +287,7 @@ static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf);
if (id < 0 || id >= m->n) {
assert(q); // otherwise a bug
- fprintf(pysamerr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
+ fprintf(pysam_stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
exit(EXIT_FAILURE);
}
if (m->n_plp[id] == m->m_plp[id]) {
@@ -336,7 +338,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
sm = bam_smpl_init();
if (n == 0) {
- fprintf(pysamerr,"[%s] no input file/data given\n", __func__);
+ fprintf(pysam_stderr,"[%s] no input file/data given\n", __func__);
exit(EXIT_FAILURE);
}
@@ -347,15 +349,15 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
data[i]->fp = sam_open_format(fn[i], "rb", &conf->ga.in);
if ( !data[i]->fp )
{
- fprintf(pysamerr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
+ fprintf(pysam_stderr, "[%s] failed to open %s: %s\n", __func__, fn[i], strerror(errno));
exit(EXIT_FAILURE);
}
if (hts_set_opt(data[i]->fp, CRAM_OPT_DECODE_MD, 0)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
exit(EXIT_FAILURE);
}
if (conf->fai_fname && hts_set_fai_filename(data[i]->fp, conf->fai_fname) != 0) {
- fprintf(pysamerr, "[%s] failed to process %s: %s\n",
+ fprintf(pysam_stderr, "[%s] failed to process %s: %s\n",
__func__, conf->fai_fname, strerror(errno));
exit(EXIT_FAILURE);
}
@@ -363,7 +365,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
data[i]->ref = &mp_ref;
h_tmp = sam_hdr_read(data[i]->fp);
if ( !h_tmp ) {
- fprintf(pysamerr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
+ fprintf(pysam_stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
exit(EXIT_FAILURE);
}
bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text);
@@ -372,11 +374,11 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
if (conf->reg) {
hts_idx_t *idx = sam_index_load(data[i]->fp, fn[i]);
if (idx == NULL) {
- fprintf(pysamerr, "[%s] fail to load index for %s\n", __func__, fn[i]);
+ fprintf(pysam_stderr, "[%s] fail to load index for %s\n", __func__, fn[i]);
exit(EXIT_FAILURE);
}
if ( (data[i]->iter=sam_itr_querys(idx, h_tmp, conf->reg)) == 0) {
- fprintf(pysamerr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
+ fprintf(pysam_stderr, "[E::%s] fail to parse region '%s' with %s\n", __func__, conf->reg, fn[i]);
exit(EXIT_FAILURE);
}
if (i == 0) beg0 = data[i]->iter->beg, end0 = data[i]->iter->end;
@@ -401,7 +403,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
gplp.m_plp = calloc(sm->n, sizeof(int));
gplp.plp = calloc(sm->n, sizeof(bam_pileup1_t*));
- fprintf(pysamerr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
+ fprintf(pysam_stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
// write the VCF header
if (conf->flag & MPLP_BCF)
{
@@ -413,7 +415,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
bcf_fp = bcf_open(conf->output_fname? conf->output_fname : "-", mode);
if (bcf_fp == NULL) {
- fprintf(pysamerr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
+ fprintf(pysam_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname? conf->output_fname : "standard output", strerror(errno));
exit(EXIT_FAILURE);
}
@@ -529,10 +531,10 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
}
}
else {
- pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : stdout;
+ pileup_fp = conf->output_fname? fopen(conf->output_fname, "w") : pysam_stdout;
if (pileup_fp == NULL) {
- fprintf(pysamerr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
+ fprintf(pysam_stderr, "[%s] failed to write to %s: %s\n", __func__, conf->output_fname, strerror(errno));
exit(EXIT_FAILURE);
}
}
@@ -542,10 +544,10 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
if ( conf->flag & MPLP_SMART_OVERLAPS ) bam_mplp_init_overlaps(iter);
max_depth = conf->max_depth;
if (max_depth * sm->n > 1<<20)
- fprintf(pysamerr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
+ fprintf(pysam_stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
if (max_depth * sm->n < 8000) {
max_depth = 8000 / sm->n;
- fprintf(pysamerr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
+ fprintf(pysam_stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
}
max_indel_depth = conf->max_indel_depth * sm->n;
bam_mplp_set_maxcnt(iter, max_depth);
@@ -639,7 +641,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn)
if ( c < conf->min_baseQ ) continue;
if (last++) putc(',', pileup_fp);
- fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow...
+ fprintf(pileup_fp, "%d", plp[i][j].qpos + 1); // FIXME: fprintf(pysam_stdout, ) is very slow...
}
}
}
@@ -695,7 +697,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
FILE *fh = fopen(file_list,"r");
if ( !fh )
{
- fprintf(pysamerr,"%s: %s\n", file_list,strerror(errno));
+ fprintf(pysam_stderr,"%s: %s\n", file_list,strerror(errno));
return 1;
}
@@ -717,9 +719,9 @@ int read_file_list(const char *file_list,int *n,char **argv[])
for (i=0; i<len; i++)
if (!isprint(buf[i])) { safe_to_print = 0; break; }
if ( safe_to_print )
- fprintf(pysamerr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
+ fprintf(pysam_stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
else
- fprintf(pysamerr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
+ fprintf(pysam_stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
return 1;
}
@@ -730,7 +732,7 @@ int read_file_list(const char *file_list,int *n,char **argv[])
fclose(fh);
if ( !nfiles )
{
- fprintf(pysamerr,"No files read from %s\n", file_list);
+ fprintf(pysam_stderr,"No files read from %s\n", file_list);
return 1;
}
*argv = files;
@@ -746,11 +748,11 @@ int parse_format_flag(const char *str)
for(i=0; i<n_tags; i++)
{
if ( !strcasecmp(tags[i],"DP") ) flag |= B2B_FMT_DP;
- else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(pysamerr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
+ else if ( !strcasecmp(tags[i],"DV") ) { flag |= B2B_FMT_DV; fprintf(pysam_stderr, "[warning] tag DV functional, but deprecated. Please switch to `AD` in future.\n"); }
else if ( !strcasecmp(tags[i],"SP") ) flag |= B2B_FMT_SP;
- else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(pysamerr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
- else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(pysamerr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
- else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(pysamerr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
+ else if ( !strcasecmp(tags[i],"DP4") ) { flag |= B2B_FMT_DP4; fprintf(pysam_stderr, "[warning] tag DP4 functional, but deprecated. Please switch to `ADF` and `ADR` in future.\n"); }
+ else if ( !strcasecmp(tags[i],"DPR") ) { flag |= B2B_FMT_DPR; fprintf(pysam_stderr, "[warning] tag DPR functional, but deprecated. Please switch to `AD` in future.\n"); }
+ else if ( !strcasecmp(tags[i],"INFO/DPR") ) { flag |= B2B_INFO_DPR; fprintf(pysam_stderr, "[warning] tag INFO/DPR functional, but deprecated. Please switch to `INFO/AD` in future.\n"); }
else if ( !strcasecmp(tags[i],"AD") ) flag |= B2B_FMT_AD;
else if ( !strcasecmp(tags[i],"ADF") ) flag |= B2B_FMT_ADF;
else if ( !strcasecmp(tags[i],"ADR") ) flag |= B2B_FMT_ADR;
@@ -759,7 +761,7 @@ int parse_format_flag(const char *str)
else if ( !strcasecmp(tags[i],"INFO/ADR") ) flag |= B2B_INFO_ADR;
else
{
- fprintf(pysamerr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
+ fprintf(pysam_stderr,"Could not parse tag \"%s\" in \"%s\"\n", tags[i], str);
exit(EXIT_FAILURE);
}
free(tags[i]);
@@ -787,7 +789,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -b, --bam-list FILE list of input BAM filenames, one per line\n"
" -B, --no-BAQ disable BAQ (per-Base Alignment Quality)\n"
" -C, --adjust-MQ INT adjust mapping quality; recommended:50, disable:0 [0]\n"
-" -d, --max-depth INT max per-BAM depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
+" -d, --max-depth INT max per-file depth; avoids excessive memory usage [%d]\n", mplp->max_depth);
fprintf(fp,
" -E, --redo-BAQ recalculate BAQ on the fly, ignore existing BQs\n"
" -f, --fasta-ref FILE faidx indexed reference sequence file\n"
@@ -828,7 +830,7 @@ static void print_usage(FILE *fp, const mplp_conf_t *mplp)
" -h, --tandem-qual INT coefficient for homopolymer errors [%d]\n", mplp->tandemQ);
fprintf(fp,
" -I, --skip-indels do not perform indel calling\n"
-" -L, --max-idepth INT maximum per-sample depth for INDEL calling [%d]\n", mplp->max_indel_depth);
+" -L, --max-idepth INT maximum per-file depth for INDEL calling [%d]\n", mplp->max_indel_depth);
fprintf(fp,
" -m, --min-ireads INT minimum number gapped reads for indel candidates [%d]\n", mplp->min_support);
fprintf(fp,
@@ -921,11 +923,11 @@ int bam_mpileup(int argc, char *argv[])
case 'x': mplp.flag &= ~MPLP_SMART_OVERLAPS; break;
case 1 :
mplp.rflag_require = bam_str2flag(optarg);
- if ( mplp.rflag_require<0 ) { fprintf(pysamerr,"Could not parse --rf %s\n", optarg); return 1; }
+ if ( mplp.rflag_require<0 ) { fprintf(pysam_stderr,"Could not parse --rf %s\n", optarg); return 1; }
break;
case 2 :
mplp.rflag_filter = bam_str2flag(optarg);
- if ( mplp.rflag_filter<0 ) { fprintf(pysamerr,"Could not parse --ff %s\n", optarg); return 1; }
+ if ( mplp.rflag_filter<0 ) { fprintf(pysam_stderr,"Could not parse --ff %s\n", optarg); return 1; }
break;
case 3 : mplp.output_fname = optarg; break;
case 4 : mplp.openQ = atoi(optarg); break;
@@ -949,9 +951,9 @@ int bam_mpileup(int argc, char *argv[])
case 'v': mplp.flag |= MPLP_BCF | MPLP_VCF; break;
case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_BCF; break;
case 'B': mplp.flag &= ~MPLP_REALN; break;
- case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(pysamerr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
- case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(pysamerr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
- case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(pysamerr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
+ case 'D': mplp.fmt_flag |= B2B_FMT_DP; fprintf(pysam_stderr, "[warning] samtools mpileup option `-D` is functional, but deprecated. Please switch to `-t DP` in future.\n"); break;
+ case 'S': mplp.fmt_flag |= B2B_FMT_SP; fprintf(pysam_stderr, "[warning] samtools mpileup option `-S` is functional, but deprecated. Please switch to `-t SP` in future.\n"); break;
+ case 'V': mplp.fmt_flag |= B2B_FMT_DV; fprintf(pysam_stderr, "[warning] samtools mpileup option `-V` is functional, but deprecated. Please switch to `-t DV` in future.\n"); break;
case 'I': mplp.flag |= MPLP_NO_INDEL; break;
case 'E': mplp.flag |= MPLP_REDO_BAQ; break;
case '6': mplp.flag |= MPLP_ILLUMINA13; break;
@@ -981,7 +983,7 @@ int bam_mpileup(int argc, char *argv[])
char buf[1024];
mplp.rghash = khash_str2int_init();
if ((fp_rg = fopen(optarg, "r")) == NULL)
- fprintf(pysamerr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
+ fprintf(pysam_stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
khash_str2int_inc(mplp.rghash, strdup(buf));
fclose(fp_rg);
@@ -992,7 +994,7 @@ int bam_mpileup(int argc, char *argv[])
if (parse_sam_global_opt(c, optarg, lopts, &mplp.ga) == 0) break;
/* else fall-through */
case '?':
- print_usage(pysamerr, &mplp);
+ print_usage(pysam_stderr, &mplp);
return 1;
}
}
@@ -1004,13 +1006,13 @@ int bam_mpileup(int argc, char *argv[])
if ( !(mplp.flag&MPLP_REALN) && mplp.flag&MPLP_REDO_BAQ )
{
- fprintf(pysamerr,"Error: The -B option cannot be combined with -E\n");
+ fprintf(pysam_stderr,"Error: The -B option cannot be combined with -E\n");
return 1;
}
if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
if (argc == 1)
{
- print_usage(pysamerr, &mplp);
+ print_usage(pysam_stderr, &mplp);
return 1;
}
int ret;
diff --git a/samtools/bam_quickcheck.c b/samtools/bam_quickcheck.c
index 8d1e7ef..6c3c664 100644
--- a/samtools/bam_quickcheck.c
+++ b/samtools/bam_quickcheck.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/hts.h>
#include <htslib/sam.h>
#include <htslib/bgzf.h>
@@ -36,6 +38,21 @@ static void usage_quickcheck(FILE *write_to)
"Options:\n"
" -v verbose output (repeat for more verbosity)\n"
"\n"
+"Notes:\n"
+"\n"
+"1. In order to use this command effectively, you should check its exit status;\n"
+" without any -v options it will NOT print any output, even when some files\n"
+" fail the check. One way to use quickcheck might be as a check that all\n"
+" BAM files in a directory are okay:\n"
+"\n"
+"\tsamtools quickcheck *.bam && echo 'all ok' \\\n"
+"\t || echo 'fail!'\n"
+"\n"
+" To also determine which files have failed, use the -v option:\n"
+"\n"
+"\tsamtools quickcheck -v *.bam > bad_bams.fofn \\\n"
+"\t && echo 'all ok' \\\n"
+"\t || echo 'some files failed check, see bad_bams.fofn'\n"
);
}
@@ -121,7 +138,10 @@ int main_quickcheck(int argc, char** argv)
}
}
- hts_close(hts_fp);
+ if (hts_close(hts_fp) < 0) {
+ file_state |= 32;
+ if (verbose >= 2) fprintf(stderr, "%s did not close cleanly\n", fn);
+ }
}
if (file_state > 0 && verbose >= 1) {
diff --git a/samtools/bam_quickcheck.c.pysam.c b/samtools/bam_quickcheck.c.pysam.c
index b589d46..26dbeb9 100644
--- a/samtools/bam_quickcheck.c.pysam.c
+++ b/samtools/bam_quickcheck.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/hts.h>
#include <htslib/sam.h>
#include <htslib/bgzf.h>
@@ -38,6 +40,21 @@ static void usage_quickcheck(FILE *write_to)
"Options:\n"
" -v verbose output (repeat for more verbosity)\n"
"\n"
+"Notes:\n"
+"\n"
+"1. In order to use this command effectively, you should check its exit status;\n"
+" without any -v options it will NOT print any output, even when some files\n"
+" fail the check. One way to use quickcheck might be as a check that all\n"
+" BAM files in a directory are okay:\n"
+"\n"
+"\tsamtools quickcheck *.bam && echo 'all ok' \\\n"
+"\t || echo 'fail!'\n"
+"\n"
+" To also determine which files have failed, use the -v option:\n"
+"\n"
+"\tsamtools quickcheck -v *.bam > bad_bams.fofn \\\n"
+"\t && echo 'all ok' \\\n"
+"\t || echo 'some files failed check, see bad_bams.fofn'\n"
);
}
@@ -54,7 +71,7 @@ int main_quickcheck(int argc, char** argv)
verbose++;
break;
default:
- usage_quickcheck(pysamerr);
+ usage_quickcheck(pysam_stderr);
return 1;
}
}
@@ -63,12 +80,12 @@ int main_quickcheck(int argc, char** argv)
argv += optind;
if (argc < 1) {
- usage_quickcheck(stdout);
+ usage_quickcheck(pysam_stdout);
return 1;
}
if (verbose >= 2) {
- fprintf(pysamerr, "verbosity set to %d\n", verbose);
+ fprintf(pysam_stderr, "verbosity set to %d\n", verbose);
}
if (verbose >= 4) {
@@ -82,52 +99,55 @@ int main_quickcheck(int argc, char** argv)
char* fn = argv[i];
int file_state = 0;
- if (verbose >= 3) fprintf(pysamerr, "checking %s\n", fn);
+ if (verbose >= 3) fprintf(pysam_stderr, "checking %s\n", fn);
// attempt to open
htsFile *hts_fp = hts_open(fn, "r");
if (hts_fp == NULL) {
- if (verbose >= 2) fprintf(pysamerr, "%s could not be opened for reading\n", fn);
+ if (verbose >= 2) fprintf(pysam_stderr, "%s could not be opened for reading\n", fn);
file_state |= 2;
}
else {
- if (verbose >= 3) fprintf(pysamerr, "opened %s\n", fn);
+ if (verbose >= 3) fprintf(pysam_stderr, "opened %s\n", fn);
// make sure we have sequence data
const htsFormat *fmt = hts_get_format(hts_fp);
if (fmt->category != sequence_data ) {
- if (verbose >= 2) fprintf(pysamerr, "%s was not identified as sequence data\n", fn);
+ if (verbose >= 2) fprintf(pysam_stderr, "%s was not identified as sequence data\n", fn);
file_state |= 4;
}
else {
- if (verbose >= 3) fprintf(pysamerr, "%s is sequence data\n", fn);
+ if (verbose >= 3) fprintf(pysam_stderr, "%s is sequence data\n", fn);
// check header
bam_hdr_t *header = sam_hdr_read(hts_fp);
if (header->n_targets <= 0) {
- if (verbose >= 2) fprintf(pysamerr, "%s had no targets in header\n", fn);
+ if (verbose >= 2) fprintf(pysam_stderr, "%s had no targets in header\n", fn);
file_state |= 8;
}
else {
- if (verbose >= 3) fprintf(pysamerr, "%s has %d targets in header\n", fn, header->n_targets);
+ if (verbose >= 3) fprintf(pysam_stderr, "%s has %d targets in header\n", fn, header->n_targets);
}
// only check EOF on BAM for now
// TODO implement and use hts_check_EOF() to include CRAM support
if (fmt->format == bam) {
if (bgzf_check_EOF(hts_fp->fp.bgzf) <= 0) {
- if (verbose >= 2) fprintf(pysamerr, "%s was missing EOF block\n", fn);
+ if (verbose >= 2) fprintf(pysam_stderr, "%s was missing EOF block\n", fn);
file_state |= 16;
}
else {
- if (verbose >= 3) fprintf(pysamerr, "%s has good EOF block\n", fn);
+ if (verbose >= 3) fprintf(pysam_stderr, "%s has good EOF block\n", fn);
}
}
}
- hts_close(hts_fp);
+ if (hts_close(hts_fp) < 0) {
+ file_state |= 32;
+ if (verbose >= 2) fprintf(pysam_stderr, "%s did not close cleanly\n", fn);
+ }
}
if (file_state > 0 && verbose >= 1) {
- fprintf(stdout, "%s\n", fn);
+ fprintf(pysam_stdout, "%s\n", fn);
}
ret |= file_state;
}
diff --git a/samtools/bam_reheader.c b/samtools/bam_reheader.c
index dc43807..0469c06 100644
--- a/samtools/bam_reheader.c
+++ b/samtools/bam_reheader.c
@@ -1,7 +1,7 @@
/* bam_reheader.c -- reheader subcommand.
Copyright (C) 2010 Broad Institute.
- Copyright (C) 2012, 2013 Genome Research Ltd.
+ Copyright (C) 2012-2015 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -43,47 +45,77 @@ DEALINGS IN THE SOFTWARE. */
int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
const char *arg_list, int add_PG)
{
- BGZF *fp;
+ BGZF *fp = NULL;
ssize_t len;
- uint8_t *buf;
+ uint8_t *buf = NULL;
+ SAM_hdr *sh = NULL;
if (in->is_write) return -1;
buf = malloc(BUF_SIZE);
+ if (!buf) {
+ fprintf(stderr, "Out of memory\n");
+ return -1;
+ }
if (bam_hdr_read(in) == NULL) {
fprintf(stderr, "Couldn't read header\n");
- free(buf);
- return -1;
+ goto fail;
}
fp = bgzf_fdopen(fd, "w");
+ if (!fp) {
+ print_error_errno("reheader", "Couldn't open output file");
+ goto fail;
+ }
if (add_PG) {
// Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
- SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
+ sh = sam_hdr_parse_(h->text, h->l_text);
+ if (!sh)
+ goto fail;
if (sam_hdr_add_PG(sh, "samtools",
"VN", samtools_version(),
arg_list ? "CL": NULL,
arg_list ? arg_list : NULL,
NULL) != 0)
- return -1;
+ goto fail;
free(h->text);
h->text = strdup(sam_hdr_str(sh));
h->l_text = sam_hdr_length(sh);
if (!h->text)
- return -1;
+ goto fail;
sam_hdr_free(sh);
+ sh = NULL;
}
- bam_hdr_write(fp, h);
+ if (bam_hdr_write(fp, h) < 0) {
+ print_error_errno("reheader", "Couldn't write header");
+ goto fail;
+ }
if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
+ if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+ if (bgzf_flush(fp) < 0) goto write_fail;
+ }
+ while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
+ if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
+ }
+ if (len < 0) {
+ fprintf(stderr, "[%s] Error reading input file\n", __func__);
+ goto fail;
}
- while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
- bgzf_raw_write(fp, buf, len);
free(buf);
fp->block_offset = in->block_offset = 0;
- bgzf_close(fp);
+ if (bgzf_close(fp) < 0) {
+ fprintf(stderr, "[%s] Error closing output file\n", __func__);
+ return -1;
+ }
return 0;
+
+ write_fail:
+ print_error_errno("reheader", "Error writing to output file");
+ fail:
+ bgzf_close(fp);
+ free(buf);
+ sam_hdr_free(sh);
+ return -1;
}
/*
@@ -445,7 +477,7 @@ int main_reheader(int argc, char *argv[])
{ // read the header
samFile *fph = sam_open(argv[optind], "r");
if (fph == 0) {
- fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[optind]);
+ print_error_errno("reheader", "fail to read the header from '%s'", argv[optind]);
return 1;
}
h = sam_hdr_read(fph);
@@ -458,7 +490,7 @@ int main_reheader(int argc, char *argv[])
}
in = sam_open(argv[optind+1], inplace?"r+":"r");
if (in == 0) {
- fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[optind+1]);
+ print_error_errno("reheader", "fail to open file '%s'", argv[optind+1]);
return 1;
}
if (hts_get_format(in)->format == bam) {
diff --git a/samtools/bam_reheader.c.pysam.c b/samtools/bam_reheader.c.pysam.c
index 0519137..16990e6 100644
--- a/samtools/bam_reheader.c.pysam.c
+++ b/samtools/bam_reheader.c.pysam.c
@@ -3,7 +3,7 @@
/* bam_reheader.c -- reheader subcommand.
Copyright (C) 2010 Broad Institute.
- Copyright (C) 2012, 2013 Genome Research Ltd.
+ Copyright (C) 2012-2015 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
@@ -45,51 +47,81 @@ DEALINGS IN THE SOFTWARE. */
int bam_reheader(BGZF *in, bam_hdr_t *h, int fd,
const char *arg_list, int add_PG)
{
- BGZF *fp;
+ BGZF *fp = NULL;
ssize_t len;
- uint8_t *buf;
+ uint8_t *buf = NULL;
+ SAM_hdr *sh = NULL;
if (in->is_write) return -1;
buf = malloc(BUF_SIZE);
- if (bam_hdr_read(in) == NULL) {
- fprintf(pysamerr, "Couldn't read header\n");
- free(buf);
+ if (!buf) {
+ fprintf(pysam_stderr, "Out of memory\n");
return -1;
}
+ if (bam_hdr_read(in) == NULL) {
+ fprintf(pysam_stderr, "Couldn't read header\n");
+ goto fail;
+ }
fp = bgzf_fdopen(fd, "w");
+ if (!fp) {
+ print_error_errno("reheader", "Couldn't open output file");
+ goto fail;
+ }
if (add_PG) {
// Around the houses, but it'll do until we can manipulate bam_hdr_t natively.
- SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text);
+ sh = sam_hdr_parse_(h->text, h->l_text);
+ if (!sh)
+ goto fail;
if (sam_hdr_add_PG(sh, "samtools",
"VN", samtools_version(),
arg_list ? "CL": NULL,
arg_list ? arg_list : NULL,
NULL) != 0)
- return -1;
+ goto fail;
free(h->text);
h->text = strdup(sam_hdr_str(sh));
h->l_text = sam_hdr_length(sh);
if (!h->text)
- return -1;
+ goto fail;
sam_hdr_free(sh);
+ sh = NULL;
}
- bam_hdr_write(fp, h);
+ if (bam_hdr_write(fp, h) < 0) {
+ print_error_errno("reheader", "Couldn't write header");
+ goto fail;
+ }
if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
+ if (bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset) < 0) goto write_fail;
+ if (bgzf_flush(fp) < 0) goto write_fail;
+ }
+ while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) {
+ if (bgzf_raw_write(fp, buf, len) < 0) goto write_fail;
+ }
+ if (len < 0) {
+ fprintf(pysam_stderr, "[%s] Error reading input file\n", __func__);
+ goto fail;
}
- while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0)
- bgzf_raw_write(fp, buf, len);
free(buf);
fp->block_offset = in->block_offset = 0;
- bgzf_close(fp);
+ if (bgzf_close(fp) < 0) {
+ fprintf(pysam_stderr, "[%s] Error closing output file\n", __func__);
+ return -1;
+ }
return 0;
+
+ write_fail:
+ print_error_errno("reheader", "Error writing to output file");
+ fail:
+ bgzf_close(fp);
+ free(buf);
+ sam_hdr_free(sh);
+ return -1;
}
/*
- * Reads a file and outputs a new CRAM file to stdout with 'h'
+ * Reads a file and outputs a new CRAM file to pysam_stdout with 'h'
* replaced as the header. No checks are made to the validity.
*
* FIXME: error checking
@@ -173,7 +205,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
if (cram_major_vers(fd) < 2 ||
cram_major_vers(fd) > 3) {
- fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+ fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
cram_major_vers(fd));
goto err;
}
@@ -206,7 +238,7 @@ int cram_reheader_inplace2(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
goto err;
if (cram_block_get_uncomp_size(b) < header_len+4) {
- fprintf(pysamerr, "New header will not fit. Use non-inplace version (%d > %d)\n",
+ fprintf(pysam_stderr, "New header will not fit. Use non-inplace version (%d > %d)\n",
header_len+4, cram_block_get_uncomp_size(b));
ret = -2;
goto err;
@@ -269,7 +301,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
if (cram_major_vers(fd) < 2 ||
cram_major_vers(fd) > 3) {
- fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+ fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
cram_major_vers(fd));
goto err;
}
@@ -341,7 +373,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
goto err;
if (old_container_sz != container_sz) {
- fprintf(pysamerr, "Quirk of fate makes this troublesome! "
+ fprintf(pysam_stderr, "Quirk of fate makes this troublesome! "
"Please use non-inplace version.\n");
goto err;
}
@@ -360,7 +392,7 @@ int cram_reheader_inplace3(cram_fd *fd, const bam_hdr_t *h, const char *arg_list
goto err;
if (cram_block_size(b) > cram_container_get_length(c)) {
- fprintf(pysamerr, "New header will not fit. Use non-inplace version"
+ fprintf(pysam_stderr, "New header will not fit. Use non-inplace version"
" (%d > %d)\n",
(int)cram_block_size(b), cram_container_get_length(c));
ret = -2;
@@ -398,7 +430,7 @@ int cram_reheader_inplace(cram_fd *fd, const bam_hdr_t *h, const char *arg_list,
case 2: return cram_reheader_inplace2(fd, h, arg_list, add_PG);
case 3: return cram_reheader_inplace3(fd, h, arg_list, add_PG);
default:
- fprintf(pysamerr, "[%s] unsupported CRAM version %d\n", __func__,
+ fprintf(pysam_stderr, "[%s] unsupported CRAM version %d\n", __func__,
cram_major_vers(fd));
return -1;
}
@@ -412,7 +444,7 @@ static void usage(FILE *fp, int ret) {
"Options:\n"
" -P, --no-PG Do not generate an @PG header line.\n"
" -i, --in-place Modify the bam/cram file directly.\n"
- " (Defaults to outputting to stdout.)\n");
+ " (Defaults to outputting to pysam_stdout.)\n");
exit(ret);
}
@@ -431,41 +463,40 @@ int main_reheader(int argc, char *argv[])
};
while ((c = getopt_long(argc, argv, "hiP", lopts, NULL)) >= 0) {
- fprintf(stderr, " %i %c %s\n", optind, c, argv[optind-1]);
switch (c) {
case 'P': add_PG = 0; break;
case 'i': inplace = 1; break;
- case 'h': usage(stdout, 0); break;
+ case 'h': usage(pysam_stdout, 0); break;
default:
- fprintf(pysamerr, "Invalid option '%c'\n", c);
- usage(pysamerr, 1);
+ fprintf(pysam_stderr, "Invalid option '%c'\n", c);
+ usage(pysam_stderr, 1);
}
}
if (argc - optind != 2)
- usage(pysamerr, 1);
+ usage(pysam_stderr, 1);
{ // read the header
samFile *fph = sam_open(argv[optind], "r");
if (fph == 0) {
- fprintf(pysamerr, "[%s] fail to read the header from %s.\n", __func__, argv[optind]);
+ print_error_errno("reheader", "fail to read the header from '%s'", argv[optind]);
return 1;
}
h = sam_hdr_read(fph);
sam_close(fph);
if (h == NULL) {
- fprintf(pysamerr, "[%s] failed to read the header for '%s'.\n",
+ fprintf(pysam_stderr, "[%s] failed to read the header for '%s'.\n",
__func__, argv[1]);
return 1;
}
}
in = sam_open(argv[optind+1], inplace?"r+":"r");
if (in == 0) {
- fprintf(pysamerr, "[%s] fail to open file %s.\n", __func__, argv[optind+1]);
+ print_error_errno("reheader", "fail to open file '%s'", argv[optind+1]);
return 1;
}
if (hts_get_format(in)->format == bam) {
- r = bam_reheader(in->fp.bgzf, h, fileno(stdout), arg_list, add_PG);
+ r = bam_reheader(in->fp.bgzf, h, fileno(pysam_stdout), arg_list, add_PG);
} else {
if (inplace)
r = cram_reheader_inplace(in->fp.cram, h, arg_list, add_PG);
diff --git a/samtools/bam_rmdup.c b/samtools/bam_rmdup.c
index cdca878..57612b4 100644
--- a/samtools/bam_rmdup.c
+++ b/samtools/bam_rmdup.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -30,6 +32,7 @@ DEALINGS IN THE SOFTWARE. */
#include <unistd.h>
#include "htslib/sam.h"
#include "sam_opts.h"
+#include "samtools.h"
#include "bam.h" // for bam_get_library
typedef bam1_t *bam1_p;
@@ -60,14 +63,24 @@ static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
stack->a[stack->n++] = b;
}
-static inline void dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
+static inline int dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
{
int i;
for (i = 0; i != stack->n; ++i) {
- sam_write1(out, hdr, stack->a[i]);
+ if (sam_write1(out, hdr, stack->a[i]) < 0) return -1;
bam_destroy1(stack->a[i]);
+ stack->a[i] = NULL;
}
stack->n = 0;
+ return 0;
+}
+
+static inline void clear_stack(tmp_stack_t *stack) {
+ int i;
+ if (!stack->a) return;
+ for (i = 0; i != stack->n; ++i) {
+ bam_destroy1(stack->a[i]);
+ }
}
static void clear_del_set(khash_t(name) *del_set)
@@ -114,25 +127,29 @@ static inline int sum_qual(const bam1_t *b)
return q;
}
-void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
+int bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
{
- bam1_t *b;
- int last_tid = -1, last_pos = -1;
+ bam1_t *b = NULL;
+ int last_tid = -1, last_pos = -1, r;
tmp_stack_t stack;
khint_t k;
- khash_t(lib) *aux;
- khash_t(name) *del_set;
+ khash_t(lib) *aux = NULL;
+ khash_t(name) *del_set = NULL;
+ memset(&stack, 0, sizeof(tmp_stack_t));
aux = kh_init(lib);
del_set = kh_init(name);
b = bam_init1();
- memset(&stack, 0, sizeof(tmp_stack_t));
+ if (!aux || !del_set || !b) {
+ perror(__func__);
+ goto fail;
+ }
kh_resize(name, del_set, 4 * BUFFER_SIZE);
- while (sam_read1(in, hdr, b) >= 0) {
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
bam1_core_t *c = &b->core;
if (c->tid != last_tid || last_pos != c->pos) {
- dump_best(&stack, out, hdr); // write the result
+ if (dump_best(&stack, out, hdr) < 0) goto write_fail; // write the result
clear_best(aux, BUFFER_SIZE);
if (c->tid != last_tid) {
clear_best(aux, 0);
@@ -141,8 +158,10 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
clear_del_set(del_set);
}
if ((int)c->tid == -1) { // append unmapped reads
- sam_write1(out, hdr, b);
- while (sam_read1(in, hdr, b) >= 0) sam_write1(out, hdr, b);
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ }
break;
}
last_tid = c->tid;
@@ -150,7 +169,7 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
}
}
if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) {
- sam_write1(out, hdr, b);
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
} else if (c->isize > 0) { // paired, head
uint64_t key = (uint64_t)c->pos<<32 | c->isize;
const char *lib;
@@ -178,19 +197,26 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
if (k != kh_end(del_set)) {
free((char*)kh_key(del_set, k));
kh_del(name, del_set, k);
- } else sam_write1(out, hdr, b);
+ } else {
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ }
}
last_pos = c->pos;
}
+ if (r < -1) {
+ fprintf(stderr, "[%s] failed to read input file\n", __func__);
+ goto fail;
+ }
for (k = kh_begin(aux); k != kh_end(aux); ++k) {
if (kh_exist(aux, k)) {
lib_aux_t *q = &kh_val(aux, k);
- dump_best(&stack, out, hdr);
+ if (dump_best(&stack, out, hdr) < 0) goto write_fail;
fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
(long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
kh_destroy(pos, q->best_hash);
free((char*)kh_key(aux, k));
+ kh_del(lib, aux, k);
}
}
kh_destroy(lib, aux);
@@ -199,9 +225,32 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
kh_destroy(name, del_set);
free(stack.a);
bam_destroy1(b);
+ return 0;
+
+ write_fail:
+ print_error_errno("rmdup", "failed to write record");
+ fail:
+ clear_stack(&stack);
+ free(stack.a);
+ if (aux) {
+ for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+ if (kh_exist(aux, k)) {
+ lib_aux_t *q = &kh_val(aux, k);
+ kh_destroy(pos, q->best_hash);
+ free((char*)kh_key(aux, k));
+ }
+ }
+ kh_destroy(lib, aux);
+ }
+ if (del_set) {
+ clear_del_set(del_set);
+ kh_destroy(name, del_set);
+ }
+ bam_destroy1(b);
+ return 1;
}
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
static int rmdup_usage(void) {
fprintf(stderr, "\n");
@@ -215,7 +264,7 @@ static int rmdup_usage(void) {
int bam_rmdup(int argc, char *argv[])
{
- int c, is_se = 0, force_se = 0;
+ int c, ret, is_se = 0, force_se = 0;
samFile *in, *out;
bam_hdr_t *header;
char wmode[3] = {'w', 'b', 0};
@@ -239,6 +288,10 @@ int bam_rmdup(int argc, char *argv[])
return rmdup_usage();
in = sam_open_format(argv[optind], "r", &ga.in);
+ if (!in) {
+ print_error_errno("rmdup", "failed to open \"%s\" for input", argv[optind]);
+ return 1;
+ }
header = sam_hdr_read(in);
if (header == NULL || header->n_targets == 0) {
fprintf(stderr, "[bam_rmdup] input SAM does not have header. Abort!\n");
@@ -247,15 +300,23 @@ int bam_rmdup(int argc, char *argv[])
sam_open_mode(wmode+1, argv[optind+1], NULL);
out = sam_open_format(argv[optind+1], wmode, &ga.out);
- if (in == 0 || out == 0) {
- fprintf(stderr, "[bam_rmdup] fail to read/write input files\n");
+ if (!out) {
+ print_error_errno("rmdup", "failed to open \"%s\" for output", argv[optind+1]);
+ return 1;
+ }
+ if (sam_hdr_write(out, header) < 0) {
+ print_error_errno("rmdup", "failed to write header");
return 1;
}
- sam_hdr_write(out, header);
- if (is_se) bam_rmdupse_core(in, header, out, force_se);
- else bam_rmdup_core(in, header, out);
+ if (is_se) ret = bam_rmdupse_core(in, header, out, force_se);
+ else ret = bam_rmdup_core(in, header, out);
+
bam_hdr_destroy(header);
- sam_close(in); sam_close(out);
- return 0;
+ sam_close(in);
+ if (sam_close(out) < 0) {
+ fprintf(stderr, "[bam_rmdup] error closing output file\n");
+ ret = 1;
+ }
+ return ret;
}
diff --git a/samtools/bam_rmdup.c.pysam.c b/samtools/bam_rmdup.c.pysam.c
index 4ece6f2..3c16025 100644
--- a/samtools/bam_rmdup.c.pysam.c
+++ b/samtools/bam_rmdup.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE. */
#include <unistd.h>
#include "htslib/sam.h"
#include "sam_opts.h"
+#include "samtools.h"
#include "bam.h" // for bam_get_library
typedef bam1_t *bam1_p;
@@ -62,14 +65,24 @@ static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
stack->a[stack->n++] = b;
}
-static inline void dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
+static inline int dump_best(tmp_stack_t *stack, samFile *out, bam_hdr_t *hdr)
{
int i;
for (i = 0; i != stack->n; ++i) {
- sam_write1(out, hdr, stack->a[i]);
+ if (sam_write1(out, hdr, stack->a[i]) < 0) return -1;
bam_destroy1(stack->a[i]);
+ stack->a[i] = NULL;
}
stack->n = 0;
+ return 0;
+}
+
+static inline void clear_stack(tmp_stack_t *stack) {
+ int i;
+ if (!stack->a) return;
+ for (i = 0; i != stack->n; ++i) {
+ bam_destroy1(stack->a[i]);
+ }
}
static void clear_del_set(khash_t(name) *del_set)
@@ -116,43 +129,49 @@ static inline int sum_qual(const bam1_t *b)
return q;
}
-void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
+int bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
{
- bam1_t *b;
- int last_tid = -1, last_pos = -1;
+ bam1_t *b = NULL;
+ int last_tid = -1, last_pos = -1, r;
tmp_stack_t stack;
khint_t k;
- khash_t(lib) *aux;
- khash_t(name) *del_set;
+ khash_t(lib) *aux = NULL;
+ khash_t(name) *del_set = NULL;
+ memset(&stack, 0, sizeof(tmp_stack_t));
aux = kh_init(lib);
del_set = kh_init(name);
b = bam_init1();
- memset(&stack, 0, sizeof(tmp_stack_t));
+ if (!aux || !del_set || !b) {
+ perror(__func__);
+ goto fail;
+ }
kh_resize(name, del_set, 4 * BUFFER_SIZE);
- while (sam_read1(in, hdr, b) >= 0) {
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
bam1_core_t *c = &b->core;
if (c->tid != last_tid || last_pos != c->pos) {
- dump_best(&stack, out, hdr); // write the result
+ if (dump_best(&stack, out, hdr) < 0) goto write_fail; // write the result
clear_best(aux, BUFFER_SIZE);
if (c->tid != last_tid) {
clear_best(aux, 0);
if (kh_size(del_set)) { // check
- fprintf(pysamerr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set));
+ fprintf(pysam_stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set));
clear_del_set(del_set);
}
if ((int)c->tid == -1) { // append unmapped reads
- sam_write1(out, hdr, b);
- while (sam_read1(in, hdr, b) >= 0) sam_write1(out, hdr, b);
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ }
break;
}
last_tid = c->tid;
- fprintf(pysamerr, "[bam_rmdup_core] processing reference %s...\n", hdr->target_name[c->tid]);
+ fprintf(pysam_stderr, "[bam_rmdup_core] processing reference %s...\n", hdr->target_name[c->tid]);
}
}
if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) {
- sam_write1(out, hdr, b);
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
} else if (c->isize > 0) { // paired, head
uint64_t key = (uint64_t)c->pos<<32 | c->isize;
const char *lib;
@@ -170,7 +189,7 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
bam_copy1(p, b); // replaced as b
} else kh_put(name, del_set, strdup(bam_get_qname(b)), &ret); // b will be removed
if (ret == 0)
- fprintf(pysamerr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam_get_qname(b));
+ fprintf(pysam_stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam_get_qname(b));
} else { // not found in best_hash
kh_val(q->best_hash, k) = bam_dup1(b);
stack_insert(&stack, kh_val(q->best_hash, k));
@@ -180,19 +199,26 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
if (k != kh_end(del_set)) {
free((char*)kh_key(del_set, k));
kh_del(name, del_set, k);
- } else sam_write1(out, hdr, b);
+ } else {
+ if (sam_write1(out, hdr, b) < 0) goto write_fail;
+ }
}
last_pos = c->pos;
}
+ if (r < -1) {
+ fprintf(pysam_stderr, "[%s] failed to read input file\n", __func__);
+ goto fail;
+ }
for (k = kh_begin(aux); k != kh_end(aux); ++k) {
if (kh_exist(aux, k)) {
lib_aux_t *q = &kh_val(aux, k);
- dump_best(&stack, out, hdr);
- fprintf(pysamerr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+ if (dump_best(&stack, out, hdr) < 0) goto write_fail;
+ fprintf(pysam_stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
(long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
kh_destroy(pos, q->best_hash);
free((char*)kh_key(aux, k));
+ kh_del(lib, aux, k);
}
}
kh_destroy(lib, aux);
@@ -201,23 +227,46 @@ void bam_rmdup_core(samFile *in, bam_hdr_t *hdr, samFile *out)
kh_destroy(name, del_set);
free(stack.a);
bam_destroy1(b);
+ return 0;
+
+ write_fail:
+ print_error_errno("rmdup", "failed to write record");
+ fail:
+ clear_stack(&stack);
+ free(stack.a);
+ if (aux) {
+ for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+ if (kh_exist(aux, k)) {
+ lib_aux_t *q = &kh_val(aux, k);
+ kh_destroy(pos, q->best_hash);
+ free((char*)kh_key(aux, k));
+ }
+ }
+ kh_destroy(lib, aux);
+ }
+ if (del_set) {
+ clear_del_set(del_set);
+ kh_destroy(name, del_set);
+ }
+ bam_destroy1(b);
+ return 1;
}
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se);
static int rmdup_usage(void) {
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Usage: samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n");
- fprintf(pysamerr, "Option: -s rmdup for SE reads\n");
- fprintf(pysamerr, " -S treat PE reads as SE in rmdup (force -s)\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Usage: samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n");
+ fprintf(pysam_stderr, "Option: -s rmdup for SE reads\n");
+ fprintf(pysam_stderr, " -S treat PE reads as SE in rmdup (force -s)\n");
- sam_global_opt_help(pysamerr, "-....");
+ sam_global_opt_help(pysam_stderr, "-....");
return 1;
}
int bam_rmdup(int argc, char *argv[])
{
- int c, is_se = 0, force_se = 0;
+ int c, ret, is_se = 0, force_se = 0;
samFile *in, *out;
bam_hdr_t *header;
char wmode[3] = {'w', 'b', 0};
@@ -241,23 +290,35 @@ int bam_rmdup(int argc, char *argv[])
return rmdup_usage();
in = sam_open_format(argv[optind], "r", &ga.in);
+ if (!in) {
+ print_error_errno("rmdup", "failed to open \"%s\" for input", argv[optind]);
+ return 1;
+ }
header = sam_hdr_read(in);
if (header == NULL || header->n_targets == 0) {
- fprintf(pysamerr, "[bam_rmdup] input SAM does not have header. Abort!\n");
+ fprintf(pysam_stderr, "[bam_rmdup] input SAM does not have header. Abort!\n");
return 1;
}
sam_open_mode(wmode+1, argv[optind+1], NULL);
out = sam_open_format(argv[optind+1], wmode, &ga.out);
- if (in == 0 || out == 0) {
- fprintf(pysamerr, "[bam_rmdup] fail to read/write input files\n");
+ if (!out) {
+ print_error_errno("rmdup", "failed to open \"%s\" for output", argv[optind+1]);
+ return 1;
+ }
+ if (sam_hdr_write(out, header) < 0) {
+ print_error_errno("rmdup", "failed to write header");
return 1;
}
- sam_hdr_write(out, header);
- if (is_se) bam_rmdupse_core(in, header, out, force_se);
- else bam_rmdup_core(in, header, out);
+ if (is_se) ret = bam_rmdupse_core(in, header, out, force_se);
+ else ret = bam_rmdup_core(in, header, out);
+
bam_hdr_destroy(header);
- sam_close(in); sam_close(out);
- return 0;
+ sam_close(in);
+ if (sam_close(out) < 0) {
+ fprintf(pysam_stderr, "[bam_rmdup] error closing output file\n");
+ ret = 1;
+ }
+ return ret;
}
diff --git a/samtools/bam_rmdupse.c b/samtools/bam_rmdupse.c
index d17f6f5..f6baef0 100644
--- a/samtools/bam_rmdupse.c
+++ b/samtools/bam_rmdupse.c
@@ -23,12 +23,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdio.h>
#include "bam.h" // for bam_get_library
#include "htslib/sam.h"
#include "htslib/khash.h"
#include "htslib/klist.h"
+#include "samtools.h"
#define QUEUE_CLEAR_SIZE 0x100000
#define MAX_POS 0x7fffffff
@@ -93,8 +96,8 @@ static void clear_besthash(besthash_t *h, int32_t pos)
kh_del(best, h, k);
}
-static void dump_alignment(samFile *out, bam_hdr_t *hdr,
- queue_t *queue, int32_t pos, khash_t(lib) *h)
+static int dump_alignment(samFile *out, bam_hdr_t *hdr,
+ queue_t *queue, int32_t pos, khash_t(lib) *h)
{
if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
khint_t k;
@@ -108,7 +111,7 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
continue;
}
if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
- sam_write1(out, hdr, q->b);
+ if (sam_write1(out, hdr, q->b) < 0) return -1;
q->b->l_data = 0;
kl_shift(q, queue, 0);
}
@@ -119,28 +122,40 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
}
}
}
+ return 0;
}
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
{
- bam1_t *b;
- queue_t *queue;
+ bam1_t *b = NULL;
+ queue_t *queue = NULL;
khint_t k;
- int last_tid = -2;
- khash_t(lib) *aux;
+ int last_tid = -2, r;
+ khash_t(lib) *aux = NULL;
aux = kh_init(lib);
b = bam_init1();
queue = kl_init(q);
- while (sam_read1(in, hdr, b) >= 0) {
+ if (!aux || !b || !queue) {
+ perror(__func__);
+ goto fail;
+ }
+
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
bam1_core_t *c = &b->core;
int endpos = bam_endpos(b);
int score = sum_qual(b);
if (last_tid != c->tid) {
- if (last_tid >= 0) dump_alignment(out, hdr, queue, MAX_POS, aux);
+ if (last_tid >= 0) {
+ if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0)
+ goto write_fail;
+ }
last_tid = c->tid;
- } else dump_alignment(out, hdr, queue, c->pos, aux);
+ } else {
+ if (dump_alignment(out, hdr, queue, c->pos, aux) < 0)
+ goto write_fail;
+ }
if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
push_queue(queue, b, endpos, score);
} else {
@@ -170,7 +185,12 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
} else kh_val(h, k) = push_queue(queue, b, endpos, score);
}
}
- dump_alignment(out, hdr, queue, MAX_POS, aux);
+ if (r < -1) {
+ fprintf(stderr, "[%s] error reading input file\n", __func__);
+ goto fail;
+ }
+
+ if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0) goto write_fail;
for (k = kh_begin(aux); k != kh_end(aux); ++k) {
if (kh_exist(aux, k)) {
@@ -179,9 +199,29 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
(long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
kh_destroy(best, q->left); kh_destroy(best, q->rght);
free((char*)kh_key(aux, k));
+ kh_del(lib, aux, k);
}
}
kh_destroy(lib, aux);
bam_destroy1(b);
kl_destroy(q, queue);
+ return 0;
+
+ write_fail:
+ print_error_errno("rmdup", "failed to write record");
+ fail:
+ if (aux) {
+ for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+ if (kh_exist(aux, k)) {
+ lib_aux_t *q = &kh_val(aux, k);
+ kh_destroy(best, q->left);
+ kh_destroy(best, q->rght);
+ free((char*)kh_key(aux, k));
+ }
+ }
+ kh_destroy(lib, aux);
+ }
+ bam_destroy1(b);
+ kl_destroy(q, queue);
+ return 1;
}
diff --git a/samtools/bam_rmdupse.c.pysam.c b/samtools/bam_rmdupse.c.pysam.c
index 06895a8..3a3d0d0 100644
--- a/samtools/bam_rmdupse.c.pysam.c
+++ b/samtools/bam_rmdupse.c.pysam.c
@@ -25,12 +25,15 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include <stdio.h>
#include "bam.h" // for bam_get_library
#include "htslib/sam.h"
#include "htslib/khash.h"
#include "htslib/klist.h"
+#include "samtools.h"
#define QUEUE_CLEAR_SIZE 0x100000
#define MAX_POS 0x7fffffff
@@ -95,8 +98,8 @@ static void clear_besthash(besthash_t *h, int32_t pos)
kh_del(best, h, k);
}
-static void dump_alignment(samFile *out, bam_hdr_t *hdr,
- queue_t *queue, int32_t pos, khash_t(lib) *h)
+static int dump_alignment(samFile *out, bam_hdr_t *hdr,
+ queue_t *queue, int32_t pos, khash_t(lib) *h)
{
if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
khint_t k;
@@ -110,7 +113,7 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
continue;
}
if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
- sam_write1(out, hdr, q->b);
+ if (sam_write1(out, hdr, q->b) < 0) return -1;
q->b->l_data = 0;
kl_shift(q, queue, 0);
}
@@ -121,28 +124,40 @@ static void dump_alignment(samFile *out, bam_hdr_t *hdr,
}
}
}
+ return 0;
}
-void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
+int bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
{
- bam1_t *b;
- queue_t *queue;
+ bam1_t *b = NULL;
+ queue_t *queue = NULL;
khint_t k;
- int last_tid = -2;
- khash_t(lib) *aux;
+ int last_tid = -2, r;
+ khash_t(lib) *aux = NULL;
aux = kh_init(lib);
b = bam_init1();
queue = kl_init(q);
- while (sam_read1(in, hdr, b) >= 0) {
+ if (!aux || !b || !queue) {
+ perror(__func__);
+ goto fail;
+ }
+
+ while ((r = sam_read1(in, hdr, b)) >= 0) {
bam1_core_t *c = &b->core;
int endpos = bam_endpos(b);
int score = sum_qual(b);
if (last_tid != c->tid) {
- if (last_tid >= 0) dump_alignment(out, hdr, queue, MAX_POS, aux);
+ if (last_tid >= 0) {
+ if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0)
+ goto write_fail;
+ }
last_tid = c->tid;
- } else dump_alignment(out, hdr, queue, c->pos, aux);
+ } else {
+ if (dump_alignment(out, hdr, queue, c->pos, aux) < 0)
+ goto write_fail;
+ }
if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
push_queue(queue, b, endpos, score);
} else {
@@ -172,18 +187,43 @@ void bam_rmdupse_core(samFile *in, bam_hdr_t *hdr, samFile *out, int force_se)
} else kh_val(h, k) = push_queue(queue, b, endpos, score);
}
}
- dump_alignment(out, hdr, queue, MAX_POS, aux);
+ if (r < -1) {
+ fprintf(pysam_stderr, "[%s] error reading input file\n", __func__);
+ goto fail;
+ }
+
+ if (dump_alignment(out, hdr, queue, MAX_POS, aux) < 0) goto write_fail;
for (k = kh_begin(aux); k != kh_end(aux); ++k) {
if (kh_exist(aux, k)) {
lib_aux_t *q = &kh_val(aux, k);
- fprintf(pysamerr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
+ fprintf(pysam_stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
(long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
kh_destroy(best, q->left); kh_destroy(best, q->rght);
free((char*)kh_key(aux, k));
+ kh_del(lib, aux, k);
}
}
kh_destroy(lib, aux);
bam_destroy1(b);
kl_destroy(q, queue);
+ return 0;
+
+ write_fail:
+ print_error_errno("rmdup", "failed to write record");
+ fail:
+ if (aux) {
+ for (k = kh_begin(aux); k != kh_end(aux); ++k) {
+ if (kh_exist(aux, k)) {
+ lib_aux_t *q = &kh_val(aux, k);
+ kh_destroy(best, q->left);
+ kh_destroy(best, q->rght);
+ free((char*)kh_key(aux, k));
+ }
+ }
+ kh_destroy(lib, aux);
+ }
+ bam_destroy1(b);
+ kl_destroy(q, queue);
+ return 1;
}
diff --git a/samtools/bam_sort.c b/samtools/bam_sort.c
index 7a441ae..4955dcc 100644
--- a/samtools/bam_sort.c
+++ b/samtools/bam_sort.c
@@ -1,6 +1,6 @@
/* bam_sort.c -- sorting and merging.
- Copyright (C) 2008-2015 Genome Research Ltd.
+ Copyright (C) 2008-2016 Genome Research Ltd.
Portions copyright (C) 2009-2012 Broad Institute.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
@@ -31,6 +33,7 @@ DEALINGS IN THE SOFTWARE. */
#include <stdio.h>
#include <string.h>
#include <time.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
@@ -404,7 +407,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
hdr_match_t *new_sq_matches = NULL;
char *text;
hdr_match_t matches[2];
- int32_t i, missing;
+ int32_t i;
int32_t old_n_targets = merged_hdr->n_targets;
khiter_t iter;
int min_tid = -1;
@@ -502,20 +505,20 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
text += matches[0].rm_eo;
}
- // Check if any new targets have been missed
- missing = 0;
+ // Copy the @SQ headers found and recreate any missing from binary header.
for (i = 0; i < merged_hdr->n_targets - old_n_targets; i++) {
if (new_sq_matches[i].rm_so >= 0) {
if (match_to_ks(translate->text, &new_sq_matches[i], out_text))
goto memfail;
if (kputc('\n', out_text) == EOF) goto memfail;
} else {
- fprintf(stderr, "[E::%s] @SQ SN (%s) found in binary header but not text header.\n",
- __func__, merged_hdr->target_name[i + old_n_targets]);
- missing++;
+ if (kputs("@SQ\tSN:", out_text) == EOF ||
+ kputs(merged_hdr->target_name[i + old_n_targets], out_text) == EOF ||
+ kputs("\tLN:", out_text) == EOF ||
+ kputuw(merged_hdr->target_len[i + old_n_targets], out_text) == EOF ||
+ kputc('\n', out_text) == EOF) goto memfail;
}
}
- if (missing) goto fail;
free(new_sq_matches);
return 0;
@@ -775,7 +778,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
trans_tbl_t* tbl, bool merge_rg, bool merge_pg,
- char* rg_override)
+ bool copy_co, char* rg_override)
{
klist_t(hdrln) *rg_list = NULL;
klist_t(hdrln) *pg_list = NULL;
@@ -817,20 +820,22 @@ static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
kl_destroy(hdrln, rg_list); rg_list = NULL;
kl_destroy(hdrln, pg_list); pg_list = NULL;
- // Just append @CO headers without translation
- const char *line, *end_pointer;
- for (line = translate->text; *line; line = end_pointer + 1) {
- end_pointer = strchr(line, '\n');
- if (strncmp(line, "@CO", 3) == 0) {
- if (end_pointer) {
- if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
- goto memfail;
- } else { // Last line with no trailing '\n'
- if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
- if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+ if (copy_co) {
+ // Just append @CO headers without translation
+ const char *line, *end_pointer;
+ for (line = translate->text; *line; line = end_pointer + 1) {
+ end_pointer = strchr(line, '\n');
+ if (strncmp(line, "@CO", 3) == 0) {
+ if (end_pointer) {
+ if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
+ goto memfail;
+ } else { // Last line with no trailing '\n'
+ if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
+ if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+ }
}
+ if (end_pointer == NULL) break;
}
- if (end_pointer == NULL) break;
}
return 0;
@@ -1036,6 +1041,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
// Create reverse translation table for tids
int* rtrans = (int*)malloc(sizeof(int32_t)*n*n_targets);
const int32_t NOTID = INT32_MIN;
+ if (!rtrans) return NULL;
memset_pattern4((void*)rtrans, &NOTID, sizeof(int32_t)*n*n_targets);
int i;
for (i = 0; i < n; ++i) {
@@ -1056,6 +1062,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
#define MERGE_FORCE 8 // Overwrite output BAM if it exists
#define MERGE_COMBINE_RG 16 // Combine RG tags frather than redefining them
#define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
+#define MERGE_FIRST_CO 64 // Use only first file's @CO headers (sort cmd only)
/*
* How merging is handled
@@ -1101,8 +1108,8 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
const char *reg, int n_threads,
const htsFormat *in_fmt, const htsFormat *out_fmt)
{
- samFile *fpout, **fp;
- heap1_t *heap;
+ samFile *fpout, **fp = NULL;
+ heap1_t *heap = NULL;
bam_hdr_t *hout = NULL;
bam_hdr_t *hin = NULL;
int i, j, *RG_len = NULL;
@@ -1111,6 +1118,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
hts_itr_t **iter = NULL;
bam_hdr_t **hdr = NULL;
trans_tbl_t *translation_tbl = NULL;
+ int *rtrans = NULL;
merged_header_t *merged_hdr = init_merged_header();
if (!merged_hdr) return -1;
@@ -1127,20 +1135,36 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
if (hin == NULL) {
fprintf(stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
headers);
- return -1;
+ goto mem_fail;
+ }
+ } else {
+ hout = bam_hdr_init();
+ if (!hout) {
+ fprintf(stderr, "[bam_merge_core] couldn't allocate bam header\n");
+ goto mem_fail;
}
+ hout->text = strdup("");
+ if (!hout->text) goto mem_fail;
}
g_is_by_qname = by_qname;
fp = (samFile**)calloc(n, sizeof(samFile*));
+ if (!fp) goto mem_fail;
heap = (heap1_t*)calloc(n, sizeof(heap1_t));
+ if (!heap) goto mem_fail;
iter = (hts_itr_t**)calloc(n, sizeof(hts_itr_t*));
+ if (!iter) goto mem_fail;
hdr = (bam_hdr_t**)calloc(n, sizeof(bam_hdr_t*));
+ if (!hdr) goto mem_fail;
translation_tbl = (trans_tbl_t*)calloc(n, sizeof(trans_tbl_t));
+ if (!translation_tbl) goto mem_fail;
RG = (char**)calloc(n, sizeof(char*));
+ if (!RG) goto mem_fail;
+
// prepare RG tag from file names
if (flag & MERGE_RG) {
RG_len = (int*)calloc(n, sizeof(int));
+ if (!RG_len) goto mem_fail;
for (i = 0; i != n; ++i) {
int l = strlen(fn[i]);
const char *s = fn[i];
@@ -1149,6 +1173,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
++j; l -= j;
RG[i] = (char*)calloc(l + 1, 1);
+ if (!RG[i]) goto mem_fail;
RG_len[i] = l;
strncpy(RG[i], s + j, l);
}
@@ -1159,7 +1184,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
trans_tbl_t dummy;
int res;
res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
- flag & MERGE_COMBINE_PG, NULL);
+ flag & MERGE_COMBINE_PG, true, NULL);
trans_tbl_destroy(&dummy);
if (res) return -1; // FIXME: memory leak
}
@@ -1169,31 +1194,19 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
bam_hdr_t *hin;
fp[i] = sam_open_format(fn[i], "r", in_fmt);
if (fp[i] == NULL) {
- int j;
fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
- for (j = 0; j < i; ++j) {
- bam_hdr_destroy(hdr[i]);
- sam_close(fp[j]);
- }
- free(fp); free(heap);
- // FIXME: possible memory leak
- return -1;
+ goto fail;
}
hin = sam_hdr_read(fp[i]);
if (hin == NULL) {
fprintf(stderr, "[bam_merge_core] failed to read header for '%s'\n",
fn[i]);
- for (j = 0; j < i; ++j) {
- bam_hdr_destroy(hdr[i]);
- sam_close(fp[j]);
- }
- free(fp); free(heap);
- // FIXME: possible memory leak
- return -1;
+ goto fail;
}
if (trans_tbl_init(merged_hdr, hin, translation_tbl+i,
flag & MERGE_COMBINE_RG, flag & MERGE_COMBINE_PG,
+ (flag & MERGE_FIRST_CO)? (i == 0) : true,
RG[i]))
return -1; // FIXME: memory leak
@@ -1224,12 +1237,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
// If we're only merging a specified region move our iters to start at that point
if (reg) {
- int* rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
-
int tid, beg, end;
- const char *name_lim = hts_parse_reg(reg, &beg, &end);
+ const char *name_lim;
+
+ rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
+ if (!rtrans) goto mem_fail;
+
+ name_lim = hts_parse_reg(reg, &beg, &end);
if (name_lim) {
char *name = malloc(name_lim - reg + 1);
+ if (!name) goto mem_fail;
memcpy(name, reg, name_lim - reg);
name[name_lim - reg] = '\0';
tid = bam_name2id(hout, name);
@@ -1244,7 +1261,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
if (tid < 0) {
if (name_lim) fprintf(stderr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
else fprintf(stderr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
- return -1;
+ goto fail;
}
for (i = 0; i < n; ++i) {
hts_idx_t *idx = sam_index_load(fp[i], fn[i]);
@@ -1253,7 +1270,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
if (idx == NULL) {
fprintf(stderr, "[%s] failed to load index for %s. Random alignment retrieval only works for indexed BAM or CRAM files.\n",
__func__, fn[i]);
- return -1;
+ goto fail;
}
if (mapped_tid != INT32_MIN) {
iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
@@ -1261,47 +1278,70 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
}
hts_idx_destroy(idx);
- if (iter[i] == NULL) break;
+ if (iter[i] == NULL) {
+ if (mapped_tid != INT32_MIN) {
+ fprintf(stderr,
+ "[%s] failed to get iterator over "
+ "{%s, %d, %d, %d}\n",
+ __func__, fn[i], mapped_tid, beg, end);
+ } else {
+ fprintf(stderr,
+ "[%s] failed to get iterator over "
+ "{%s, HTS_IDX_NONE, 0, 0}\n",
+ __func__, fn[i]);
+ }
+ goto fail;
+ }
}
free(rtrans);
+ rtrans = NULL;
} else {
for (i = 0; i < n; ++i) {
if (hdr[i] == NULL) {
iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
- if (iter[i] == NULL) break;
+ if (iter[i] == NULL) {
+ fprintf(stderr, "[%s] failed to get iterator\n", __func__);
+ goto fail;
+ }
}
else iter[i] = NULL;
}
}
- if (i < n) {
- fprintf(stderr, "[%s] Memory allocation failed\n", __func__);
- return -1;
- }
-
// Load the first read from each file into the heap
for (i = 0; i < n; ++i) {
heap1_t *h = heap + i;
+ int res;
h->i = i;
h->b = bam_init1();
- if ((iter[i]? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b)) >= 0) {
+ if (!h->b) goto mem_fail;
+ res = iter[i] ? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b);
+ if (res >= 0) {
bam_translate(h->b, translation_tbl + i);
h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam_is_rev(h->b);
h->idx = idx++;
}
- else {
+ else if (res == -1 && (!iter[i] || iter[i]->finished)) {
h->pos = HEAP_EMPTY;
bam_destroy1(h->b);
h->b = NULL;
+ } else {
+ fprintf(stderr, "[%s] failed to read first record from %s\n",
+ __func__, fn[i]);
+ goto fail;
}
}
// Open output file and write header
if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
- fprintf(stderr, "[%s] fail to create the output file.\n", __func__);
+ fprintf(stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+ return -1;
+ }
+ if (sam_hdr_write(fpout, hout) != 0) {
+ fprintf(stderr, "[%s] failed to write header.\n", __func__);
+ sam_close(fpout);
return -1;
}
- sam_hdr_write(fpout, hout);
if (!(flag & MERGE_UNCOMP)) hts_set_threads(fpout, n_threads);
// Begin the actual merge
@@ -1313,16 +1353,24 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
if (rg) bam_aux_del(b, rg);
bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
}
- sam_write1(fpout, hout, b);
+ if (sam_write1(fpout, hout, b) < 0) {
+ fprintf(stderr, "[%s] failed to write to output file.\n", __func__);
+ sam_close(fpout);
+ return -1;
+ }
if ((j = (iter[heap->i]? sam_itr_next(fp[heap->i], iter[heap->i], b) : sam_read1(fp[heap->i], hdr[heap->i], b))) >= 0) {
bam_translate(b, translation_tbl + heap->i);
heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
heap->idx = idx++;
- } else if (j == -1) {
+ } else if (j == -1 && (!iter[heap->i] || iter[heap->i]->finished)) {
heap->pos = HEAP_EMPTY;
bam_destroy1(heap->b);
heap->b = NULL;
- } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
+ } else {
+ fprintf(stderr, "[bam_merge_core] error: '%s' is truncated.\n",
+ fn[heap->i]);
+ goto fail;
+ }
ks_heapadjust(heap, 0, n, heap);
}
@@ -1340,9 +1388,39 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
bam_hdr_destroy(hin);
bam_hdr_destroy(hout);
free_merged_header(merged_hdr);
- sam_close(fpout);
free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
+ if (sam_close(fpout) < 0) {
+ fprintf(stderr, "[bam_merge_core] error closing output file\n");
+ return -1;
+ }
return 0;
+
+ mem_fail:
+ fprintf(stderr, "[bam_merge_core] Out of memory\n");
+
+ fail:
+ if (flag & MERGE_RG) {
+ if (RG) {
+ for (i = 0; i != n; ++i) free(RG[i]);
+ }
+ free(RG_len);
+ }
+ for (i = 0; i < n; ++i) {
+ if (translation_tbl && translation_tbl[i].tid_trans) trans_tbl_destroy(translation_tbl + i);
+ if (iter && iter[i]) hts_itr_destroy(iter[i]);
+ if (hdr && hdr[i]) bam_hdr_destroy(hdr[i]);
+ if (fp && fp[i]) sam_close(fp[i]);
+ if (heap && heap[i].b) bam_destroy1(heap[i].b);
+ }
+ if (hout) bam_hdr_destroy(hout);
+ free(RG);
+ free(translation_tbl);
+ free(hdr);
+ free(iter);
+ free(heap);
+ free(fp);
+ free(rtrans);
+ return -1;
}
// Unused here but may be used by legacy samtools-using third-party code
@@ -1361,7 +1439,7 @@ static void merge_usage(FILE *to)
"Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
"\n"
"Options:\n"
-" -n Sort by read names\n"
+" -n Input files are sorted by read name\n"
" -r Attach RG tag (inferred from file names)\n"
" -u Uncompressed BAM output\n"
" -f Overwrite the output BAM if exist\n"
@@ -1541,29 +1619,40 @@ typedef struct {
bam1_p *buf;
const bam_hdr_t *h;
int index;
+ int error;
} worker_t;
-static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
+// Returns 0 for success
+// -1 for failure
+static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
{
size_t i;
samFile* fp;
fp = sam_open_format(fn, mode, fmt);
- if (fp == NULL) return;
- sam_hdr_write(fp, h);
+ if (fp == NULL) return -1;
+ if (sam_hdr_write(fp, h) != 0) goto fail;
if (n_threads > 1) hts_set_threads(fp, n_threads);
- for (i = 0; i < l; ++i)
- sam_write1(fp, h, buf[i]);
+ for (i = 0; i < l; ++i) {
+ if (sam_write1(fp, h, buf[i]) < 0) goto fail;
+ }
+ if (sam_close(fp) < 0) return -1;
+ return 0;
+ fail:
sam_close(fp);
+ return -1;
}
static void *worker(void *data)
{
worker_t *w = (worker_t*)data;
char *name;
+ w->error = 0;
ks_mergesort(sort, w->buf_len, w->buf, 0);
name = (char*)calloc(strlen(w->prefix) + 20, 1);
+ if (!name) { w->error = errno; return 0; }
sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
- write_buffer(name, "wb1", w->buf_len, w->buf, w->h, 0, NULL);
+ if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+ w->error = errno;
// Consider using CRAM temporary files if the final output is CRAM.
// Typically it is comparable speed while being smaller.
@@ -1572,7 +1661,8 @@ static void *worker(void *data)
// {"no_ref", CRAM_OPT_NO_REF, {1}, NULL}
// };
// opt[0].next = &opt[1];
-// write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt);
+// if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
+// w->error = errno;
free(name);
return 0;
@@ -1586,6 +1676,7 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
pthread_t *tid;
pthread_attr_t attr;
worker_t *w;
+ int n_failed = 0;
if (n_threads < 1) n_threads = 1;
if (k < n_threads * 64) n_threads = 1; // use a single thread if we only sort a small batch of records
@@ -1603,9 +1694,15 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
b += w[i].buf_len; rest -= w[i].buf_len;
pthread_create(&tid[i], &attr, worker, &w[i]);
}
- for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+ for (i = 0; i < n_threads; ++i) {
+ pthread_join(tid[i], 0);
+ if (w[i].error != 0) {
+ fprintf(stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+ n_failed++;
+ }
+ }
free(tid); free(w);
- return n_files + n_threads;
+ return (n_failed == 0)? n_files + n_threads : -1;
}
/*!
@@ -1675,6 +1772,10 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
++k;
if (mem >= max_mem) {
n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+ if (n_files < 0) {
+ ret = -1;
+ goto err;
+ }
mem = k = 0;
}
}
@@ -1687,10 +1788,18 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
// write the final output
if (n_files == 0) { // a single block
ks_mergesort(sort, k, buf, 0);
- write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt);
+ if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
+ fprintf(stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+ ret = -1;
+ goto err;
+ }
} else { // then merge
char **fns;
n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+ if (n_files == -1) {
+ ret = -1;
+ goto err;
+ }
fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files);
fns = (char**)calloc(n_files, sizeof(char*));
for (i = 0; i < n_files; ++i) {
@@ -1698,8 +1807,8 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
sprintf(fns[i], "%s.%.4d.bam", prefix, i);
}
if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
- MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads,
- in_fmt, out_fmt) < 0) {
+ MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
+ NULL, n_threads, in_fmt, out_fmt) < 0) {
// Propagate bam_merge_core2() failure; it has already emitted a
// message explaining the failure, so no further message is needed.
goto err;
@@ -1754,6 +1863,7 @@ int bam_sort(int argc, char *argv[])
int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
char *fnout = "-", modeout[12];
kstring_t tmpprefix = { 0, 0, NULL };
+ struct stat st;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
static const struct option lopts[] = {
@@ -1804,8 +1914,15 @@ int bam_sort(int argc, char *argv[])
sam_open_mode(modeout+1, fnout, NULL);
if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
- if (tmpprefix.l == 0)
- ksprintf(&tmpprefix, "%s.tmp", (nargs > 0)? argv[optind] : "STDIN");
+ if (tmpprefix.l == 0) {
+ if (strcmp(fnout, "-") != 0) ksprintf(&tmpprefix, "%s.tmp", fnout);
+ else kputc('.', &tmpprefix);
+ }
+ if (stat(tmpprefix.s, &st) == 0 && S_ISDIR(st.st_mode)) {
+ unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock());
+ if (tmpprefix.s[tmpprefix.l-1] != '/') kputc('/', &tmpprefix);
+ ksprintf(&tmpprefix, "samtools.%d.%u.tmp", (int) getpid(), t % 10000);
+ }
ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
tmpprefix.s, fnout, modeout, max_mem, n_threads,
diff --git a/samtools/bam_sort.c.pysam.c b/samtools/bam_sort.c.pysam.c
index d486beb..b2b625d 100644
--- a/samtools/bam_sort.c.pysam.c
+++ b/samtools/bam_sort.c.pysam.c
@@ -2,7 +2,7 @@
/* bam_sort.c -- sorting and merging.
- Copyright (C) 2008-2015 Genome Research Ltd.
+ Copyright (C) 2008-2016 Genome Research Ltd.
Portions copyright (C) 2009-2012 Broad Institute.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdbool.h>
#include <stdlib.h>
#include <ctype.h>
@@ -33,6 +35,7 @@ DEALINGS IN THE SOFTWARE. */
#include <stdio.h>
#include <string.h>
#include <time.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <assert.h>
@@ -406,7 +409,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
hdr_match_t *new_sq_matches = NULL;
char *text;
hdr_match_t matches[2];
- int32_t i, missing;
+ int32_t i;
int32_t old_n_targets = merged_hdr->n_targets;
khiter_t iter;
int min_tid = -1;
@@ -483,7 +486,7 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
if (iter == kh_end(sq_tids)) {
// Warn about this, but it's not really fatal.
- fprintf(pysamerr, "[W::%s] @SQ SN (%.*s) found in text header but not binary header.\n",
+ fprintf(pysam_stderr, "[W::%s] @SQ SN (%.*s) found in text header but not binary header.\n",
__func__,
(int) (matches[1].rm_eo - matches[1].rm_so),
text + matches[1].rm_so);
@@ -504,20 +507,20 @@ static int trans_tbl_add_sq(merged_header_t* merged_hdr, bam_hdr_t *translate,
text += matches[0].rm_eo;
}
- // Check if any new targets have been missed
- missing = 0;
+ // Copy the @SQ headers found and recreate any missing from binary header.
for (i = 0; i < merged_hdr->n_targets - old_n_targets; i++) {
if (new_sq_matches[i].rm_so >= 0) {
if (match_to_ks(translate->text, &new_sq_matches[i], out_text))
goto memfail;
if (kputc('\n', out_text) == EOF) goto memfail;
} else {
- fprintf(pysamerr, "[E::%s] @SQ SN (%s) found in binary header but not text header.\n",
- __func__, merged_hdr->target_name[i + old_n_targets]);
- missing++;
+ if (kputs("@SQ\tSN:", out_text) == EOF ||
+ kputs(merged_hdr->target_name[i + old_n_targets], out_text) == EOF ||
+ kputs("\tLN:", out_text) == EOF ||
+ kputuw(merged_hdr->target_len[i + old_n_targets], out_text) == EOF ||
+ kputc('\n', out_text) == EOF) goto memfail;
}
}
- if (missing) goto fail;
free(new_sq_matches);
return 0;
@@ -720,7 +723,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
idx = kh_get(c2c, pg_map, id);
if (idx == kh_end(pg_map)) {
// Not found, warn.
- fprintf(pysamerr, "[W::%s] Tag %s%s not found in @PG records\n",
+ fprintf(pysam_stderr, "[W::%s] Tag %s%s not found in @PG records\n",
__func__, search + 1, id);
} else {
// Remember new id and splice points on original string
@@ -777,7 +780,7 @@ static int finish_rg_pg(bool is_rg, klist_t(hdrln) *hdr_lines,
static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
trans_tbl_t* tbl, bool merge_rg, bool merge_pg,
- char* rg_override)
+ bool copy_co, char* rg_override)
{
klist_t(hdrln) *rg_list = NULL;
klist_t(hdrln) *pg_list = NULL;
@@ -819,20 +822,22 @@ static int trans_tbl_init(merged_header_t* merged_hdr, bam_hdr_t* translate,
kl_destroy(hdrln, rg_list); rg_list = NULL;
kl_destroy(hdrln, pg_list); pg_list = NULL;
- // Just append @CO headers without translation
- const char *line, *end_pointer;
- for (line = translate->text; *line; line = end_pointer + 1) {
- end_pointer = strchr(line, '\n');
- if (strncmp(line, "@CO", 3) == 0) {
- if (end_pointer) {
- if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
- goto memfail;
- } else { // Last line with no trailing '\n'
- if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
- if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+ if (copy_co) {
+ // Just append @CO headers without translation
+ const char *line, *end_pointer;
+ for (line = translate->text; *line; line = end_pointer + 1) {
+ end_pointer = strchr(line, '\n');
+ if (strncmp(line, "@CO", 3) == 0) {
+ if (end_pointer) {
+ if (kputsn(line, end_pointer - line + 1, &merged_hdr->out_co) == EOF)
+ goto memfail;
+ } else { // Last line with no trailing '\n'
+ if (kputs(line, &merged_hdr->out_co) == EOF) goto memfail;
+ if (kputc('\n', &merged_hdr->out_co) == EOF) goto memfail;
+ }
}
+ if (end_pointer == NULL) break;
}
- if (end_pointer == NULL) break;
}
return 0;
@@ -869,7 +874,7 @@ static bam_hdr_t * finish_merged_header(merged_header_t *merged_hdr) {
+ ks_len(&merged_hdr->out_pg)
+ ks_len(&merged_hdr->out_co));
if (txt_sz >= INT32_MAX) {
- fprintf(pysamerr, "[%s] Output header text too long\n", __func__);
+ fprintf(pysam_stderr, "[%s] Output header text too long\n", __func__);
return NULL;
}
@@ -986,7 +991,7 @@ static void bam_translate(bam1_t* b, trans_tbl_t* tbl)
}
} else {
char *tmp = strdup(decoded_rg);
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
"[bam_translate] RG tag \"%s\" on read \"%s\" encountered "
"with no corresponding entry in header, tag lost. "
"Unknown tags are only reported once per input file for "
@@ -1016,7 +1021,7 @@ static void bam_translate(bam1_t* b, trans_tbl_t* tbl)
}
} else {
char *tmp = strdup(decoded_pg);
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
"[bam_translate] PG tag \"%s\" on read \"%s\" encountered "
"with no corresponding entry in header, tag lost. "
"Unknown tags are only reported once per input file for "
@@ -1038,6 +1043,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
// Create reverse translation table for tids
int* rtrans = (int*)malloc(sizeof(int32_t)*n*n_targets);
const int32_t NOTID = INT32_MIN;
+ if (!rtrans) return NULL;
memset_pattern4((void*)rtrans, &NOTID, sizeof(int32_t)*n*n_targets);
int i;
for (i = 0; i < n; ++i) {
@@ -1058,6 +1064,7 @@ int* rtrans_build(int n, int n_targets, trans_tbl_t* translation_tbl)
#define MERGE_FORCE 8 // Overwrite output BAM if it exists
#define MERGE_COMBINE_RG 16 // Combine RG tags frather than redefining them
#define MERGE_COMBINE_PG 32 // Combine PG tags frather than redefining them
+#define MERGE_FIRST_CO 64 // Use only first file's @CO headers (sort cmd only)
/*
* How merging is handled
@@ -1103,8 +1110,8 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
const char *reg, int n_threads,
const htsFormat *in_fmt, const htsFormat *out_fmt)
{
- samFile *fpout, **fp;
- heap1_t *heap;
+ samFile *fpout, **fp = NULL;
+ heap1_t *heap = NULL;
bam_hdr_t *hout = NULL;
bam_hdr_t *hin = NULL;
int i, j, *RG_len = NULL;
@@ -1113,6 +1120,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
hts_itr_t **iter = NULL;
bam_hdr_t **hdr = NULL;
trans_tbl_t *translation_tbl = NULL;
+ int *rtrans = NULL;
merged_header_t *merged_hdr = init_merged_header();
if (!merged_hdr) return -1;
@@ -1121,28 +1129,44 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
samFile* fpheaders = sam_open(headers, "r");
if (fpheaders == NULL) {
const char *message = strerror(errno);
- fprintf(pysamerr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
+ fprintf(pysam_stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
return -1;
}
hin = sam_hdr_read(fpheaders);
sam_close(fpheaders);
if (hin == NULL) {
- fprintf(pysamerr, "[bam_merge_core] couldn't read headers for '%s'\n",
+ fprintf(pysam_stderr, "[bam_merge_core] couldn't read headers for '%s'\n",
headers);
- return -1;
+ goto mem_fail;
+ }
+ } else {
+ hout = bam_hdr_init();
+ if (!hout) {
+ fprintf(pysam_stderr, "[bam_merge_core] couldn't allocate bam header\n");
+ goto mem_fail;
}
+ hout->text = strdup("");
+ if (!hout->text) goto mem_fail;
}
g_is_by_qname = by_qname;
fp = (samFile**)calloc(n, sizeof(samFile*));
+ if (!fp) goto mem_fail;
heap = (heap1_t*)calloc(n, sizeof(heap1_t));
+ if (!heap) goto mem_fail;
iter = (hts_itr_t**)calloc(n, sizeof(hts_itr_t*));
+ if (!iter) goto mem_fail;
hdr = (bam_hdr_t**)calloc(n, sizeof(bam_hdr_t*));
+ if (!hdr) goto mem_fail;
translation_tbl = (trans_tbl_t*)calloc(n, sizeof(trans_tbl_t));
+ if (!translation_tbl) goto mem_fail;
RG = (char**)calloc(n, sizeof(char*));
+ if (!RG) goto mem_fail;
+
// prepare RG tag from file names
if (flag & MERGE_RG) {
RG_len = (int*)calloc(n, sizeof(int));
+ if (!RG_len) goto mem_fail;
for (i = 0; i != n; ++i) {
int l = strlen(fn[i]);
const char *s = fn[i];
@@ -1151,6 +1175,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
++j; l -= j;
RG[i] = (char*)calloc(l + 1, 1);
+ if (!RG[i]) goto mem_fail;
RG_len[i] = l;
strncpy(RG[i], s + j, l);
}
@@ -1161,7 +1186,7 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
trans_tbl_t dummy;
int res;
res = trans_tbl_init(merged_hdr, hin, &dummy, flag & MERGE_COMBINE_RG,
- flag & MERGE_COMBINE_PG, NULL);
+ flag & MERGE_COMBINE_PG, true, NULL);
trans_tbl_destroy(&dummy);
if (res) return -1; // FIXME: memory leak
}
@@ -1171,31 +1196,19 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
bam_hdr_t *hin;
fp[i] = sam_open_format(fn[i], "r", in_fmt);
if (fp[i] == NULL) {
- int j;
- fprintf(pysamerr, "[bam_merge_core] fail to open file %s\n", fn[i]);
- for (j = 0; j < i; ++j) {
- bam_hdr_destroy(hdr[i]);
- sam_close(fp[j]);
- }
- free(fp); free(heap);
- // FIXME: possible memory leak
- return -1;
+ fprintf(pysam_stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
+ goto fail;
}
hin = sam_hdr_read(fp[i]);
if (hin == NULL) {
- fprintf(pysamerr, "[bam_merge_core] failed to read header for '%s'\n",
+ fprintf(pysam_stderr, "[bam_merge_core] failed to read header for '%s'\n",
fn[i]);
- for (j = 0; j < i; ++j) {
- bam_hdr_destroy(hdr[i]);
- sam_close(fp[j]);
- }
- free(fp); free(heap);
- // FIXME: possible memory leak
- return -1;
+ goto fail;
}
if (trans_tbl_init(merged_hdr, hin, translation_tbl+i,
flag & MERGE_COMBINE_RG, flag & MERGE_COMBINE_PG,
+ (flag & MERGE_FIRST_CO)? (i == 0) : true,
RG[i]))
return -1; // FIXME: memory leak
@@ -1205,13 +1218,13 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
else { bam_hdr_destroy(hin); hdr[i] = NULL; }
if ((translation_tbl+i)->lost_coord_sort && !by_qname) {
- fprintf(pysamerr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
+ fprintf(pysam_stderr, "[bam_merge_core] Order of targets in file %s caused coordinate sort to be lost\n", fn[i]);
}
}
// Did we get an @HD line?
if (!merged_hdr->have_hd) {
- fprintf(pysamerr, "[W::%s] No @HD tag found.\n", __func__);
+ fprintf(pysam_stderr, "[W::%s] No @HD tag found.\n", __func__);
/* FIXME: Should we add an @HD line here, and if so what should
we put in it? Ideally we want a way of getting htslib to tell
us the SAM version number to assume given no @HD line. Is
@@ -1226,12 +1239,16 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
// If we're only merging a specified region move our iters to start at that point
if (reg) {
- int* rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
-
int tid, beg, end;
- const char *name_lim = hts_parse_reg(reg, &beg, &end);
+ const char *name_lim;
+
+ rtrans = rtrans_build(n, hout->n_targets, translation_tbl);
+ if (!rtrans) goto mem_fail;
+
+ name_lim = hts_parse_reg(reg, &beg, &end);
if (name_lim) {
char *name = malloc(name_lim - reg + 1);
+ if (!name) goto mem_fail;
memcpy(name, reg, name_lim - reg);
name[name_lim - reg] = '\0';
tid = bam_name2id(hout, name);
@@ -1244,18 +1261,18 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
end = INT_MAX;
}
if (tid < 0) {
- if (name_lim) fprintf(pysamerr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
- else fprintf(pysamerr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
- return -1;
+ if (name_lim) fprintf(pysam_stderr, "[%s] Region \"%s\" specifies an unknown reference name\n", __func__, reg);
+ else fprintf(pysam_stderr, "[%s] Badly formatted region: \"%s\"\n", __func__, reg);
+ goto fail;
}
for (i = 0; i < n; ++i) {
hts_idx_t *idx = sam_index_load(fp[i], fn[i]);
// (rtrans[i*n+tid]) Look up what hout tid translates to in input tid space
int mapped_tid = rtrans[i*hout->n_targets+tid];
if (idx == NULL) {
- fprintf(pysamerr, "[%s] failed to load index for %s. Random alignment retrieval only works for indexed BAM or CRAM files.\n",
+ fprintf(pysam_stderr, "[%s] failed to load index for %s. Random alignment retrieval only works for indexed BAM or CRAM files.\n",
__func__, fn[i]);
- return -1;
+ goto fail;
}
if (mapped_tid != INT32_MIN) {
iter[i] = sam_itr_queryi(idx, mapped_tid, beg, end);
@@ -1263,47 +1280,70 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
iter[i] = sam_itr_queryi(idx, HTS_IDX_NONE, 0, 0);
}
hts_idx_destroy(idx);
- if (iter[i] == NULL) break;
+ if (iter[i] == NULL) {
+ if (mapped_tid != INT32_MIN) {
+ fprintf(pysam_stderr,
+ "[%s] failed to get iterator over "
+ "{%s, %d, %d, %d}\n",
+ __func__, fn[i], mapped_tid, beg, end);
+ } else {
+ fprintf(pysam_stderr,
+ "[%s] failed to get iterator over "
+ "{%s, HTS_IDX_NONE, 0, 0}\n",
+ __func__, fn[i]);
+ }
+ goto fail;
+ }
}
free(rtrans);
+ rtrans = NULL;
} else {
for (i = 0; i < n; ++i) {
if (hdr[i] == NULL) {
iter[i] = sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0);
- if (iter[i] == NULL) break;
+ if (iter[i] == NULL) {
+ fprintf(pysam_stderr, "[%s] failed to get iterator\n", __func__);
+ goto fail;
+ }
}
else iter[i] = NULL;
}
}
- if (i < n) {
- fprintf(pysamerr, "[%s] Memory allocation failed\n", __func__);
- return -1;
- }
-
// Load the first read from each file into the heap
for (i = 0; i < n; ++i) {
heap1_t *h = heap + i;
+ int res;
h->i = i;
h->b = bam_init1();
- if ((iter[i]? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b)) >= 0) {
+ if (!h->b) goto mem_fail;
+ res = iter[i] ? sam_itr_next(fp[i], iter[i], h->b) : sam_read1(fp[i], hdr[i], h->b);
+ if (res >= 0) {
bam_translate(h->b, translation_tbl + i);
h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam_is_rev(h->b);
h->idx = idx++;
}
- else {
+ else if (res == -1 && (!iter[i] || iter[i]->finished)) {
h->pos = HEAP_EMPTY;
bam_destroy1(h->b);
h->b = NULL;
+ } else {
+ fprintf(pysam_stderr, "[%s] failed to read first record from %s\n",
+ __func__, fn[i]);
+ goto fail;
}
}
// Open output file and write header
if ((fpout = sam_open_format(out, mode, out_fmt)) == 0) {
- fprintf(pysamerr, "[%s] fail to create the output file.\n", __func__);
+ fprintf(pysam_stderr, "[%s] failed to create \"%s\": %s\n", __func__, out, strerror(errno));
+ return -1;
+ }
+ if (sam_hdr_write(fpout, hout) != 0) {
+ fprintf(pysam_stderr, "[%s] failed to write header.\n", __func__);
+ sam_close(fpout);
return -1;
}
- sam_hdr_write(fpout, hout);
if (!(flag & MERGE_UNCOMP)) hts_set_threads(fpout, n_threads);
// Begin the actual merge
@@ -1315,16 +1355,24 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
if (rg) bam_aux_del(b, rg);
bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
}
- sam_write1(fpout, hout, b);
+ if (sam_write1(fpout, hout, b) < 0) {
+ fprintf(pysam_stderr, "[%s] failed to write to output file.\n", __func__);
+ sam_close(fpout);
+ return -1;
+ }
if ((j = (iter[heap->i]? sam_itr_next(fp[heap->i], iter[heap->i], b) : sam_read1(fp[heap->i], hdr[heap->i], b))) >= 0) {
bam_translate(b, translation_tbl + heap->i);
heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam_is_rev(b);
heap->idx = idx++;
- } else if (j == -1) {
+ } else if (j == -1 && (!iter[heap->i] || iter[heap->i]->finished)) {
heap->pos = HEAP_EMPTY;
bam_destroy1(heap->b);
heap->b = NULL;
- } else fprintf(pysamerr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
+ } else {
+ fprintf(pysam_stderr, "[bam_merge_core] error: '%s' is truncated.\n",
+ fn[heap->i]);
+ goto fail;
+ }
ks_heapadjust(heap, 0, n, heap);
}
@@ -1342,9 +1390,39 @@ int bam_merge_core2(int by_qname, const char *out, const char *mode,
bam_hdr_destroy(hin);
bam_hdr_destroy(hout);
free_merged_header(merged_hdr);
- sam_close(fpout);
free(RG); free(translation_tbl); free(fp); free(heap); free(iter); free(hdr);
+ if (sam_close(fpout) < 0) {
+ fprintf(pysam_stderr, "[bam_merge_core] error closing output file\n");
+ return -1;
+ }
return 0;
+
+ mem_fail:
+ fprintf(pysam_stderr, "[bam_merge_core] Out of memory\n");
+
+ fail:
+ if (flag & MERGE_RG) {
+ if (RG) {
+ for (i = 0; i != n; ++i) free(RG[i]);
+ }
+ free(RG_len);
+ }
+ for (i = 0; i < n; ++i) {
+ if (translation_tbl && translation_tbl[i].tid_trans) trans_tbl_destroy(translation_tbl + i);
+ if (iter && iter[i]) hts_itr_destroy(iter[i]);
+ if (hdr && hdr[i]) bam_hdr_destroy(hdr[i]);
+ if (fp && fp[i]) sam_close(fp[i]);
+ if (heap && heap[i].b) bam_destroy1(heap[i].b);
+ }
+ if (hout) bam_hdr_destroy(hout);
+ free(RG);
+ free(translation_tbl);
+ free(hdr);
+ free(iter);
+ free(heap);
+ free(fp);
+ free(rtrans);
+ return -1;
}
// Unused here but may be used by legacy samtools-using third-party code
@@ -1363,7 +1441,7 @@ static void merge_usage(FILE *to)
"Usage: samtools merge [-nurlf] [-h inh.sam] [-b <bamlist.fofn>] <out.bam> <in1.bam> [<in2.bam> ... <inN.bam>]\n"
"\n"
"Options:\n"
-" -n Sort by read names\n"
+" -n Input files are sorted by read name\n"
" -r Attach RG tag (inferred from file names)\n"
" -u Uncompressed BAM output\n"
" -f Overwrite the output BAM if exist\n"
@@ -1396,7 +1474,7 @@ int bam_merge(int argc, char *argv[])
};
if (argc == 1) {
- merge_usage(stdout);
+ merge_usage(pysam_stdout);
return 0;
}
@@ -1426,7 +1504,7 @@ int bam_merge(int argc, char *argv[])
fn_size += nfiles;
}
else {
- fprintf(pysamerr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
+ fprintf(pysam_stderr, "[%s] Invalid file list \"%s\"\n", __func__, optarg);
ret = 1;
}
break;
@@ -1434,12 +1512,12 @@ int bam_merge(int argc, char *argv[])
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
- case '?': merge_usage(pysamerr); return 1;
+ case '?': merge_usage(pysam_stderr); return 1;
}
}
if ( argc - optind < 1 ) {
- fprintf(pysamerr, "You must at least specify the output file.\n");
- merge_usage(pysamerr);
+ fprintf(pysam_stderr, "You must at least specify the output file.\n");
+ merge_usage(pysam_stderr);
return 1;
}
@@ -1448,7 +1526,7 @@ int bam_merge(int argc, char *argv[])
FILE *fp = fopen(argv[optind], "rb");
if (fp != NULL) {
fclose(fp);
- fprintf(pysamerr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
+ fprintf(pysam_stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
return 1;
}
}
@@ -1461,8 +1539,8 @@ int bam_merge(int argc, char *argv[])
memcpy(fn+fn_size, argv + (optind+1), nargcfiles * sizeof(char*));
}
if (fn_size+nargcfiles < 1) {
- fprintf(pysamerr, "You must specify at least one (and usually two or more) input files.\n");
- merge_usage(pysamerr);
+ fprintf(pysam_stderr, "You must specify at least one (and usually two or more) input files.\n");
+ merge_usage(pysam_stderr);
return 1;
}
strcpy(mode, "wb");
@@ -1543,29 +1621,40 @@ typedef struct {
bam1_p *buf;
const bam_hdr_t *h;
int index;
+ int error;
} worker_t;
-static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
+// Returns 0 for success
+// -1 for failure
+static int write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads, const htsFormat *fmt)
{
size_t i;
samFile* fp;
fp = sam_open_format(fn, mode, fmt);
- if (fp == NULL) return;
- sam_hdr_write(fp, h);
+ if (fp == NULL) return -1;
+ if (sam_hdr_write(fp, h) != 0) goto fail;
if (n_threads > 1) hts_set_threads(fp, n_threads);
- for (i = 0; i < l; ++i)
- sam_write1(fp, h, buf[i]);
+ for (i = 0; i < l; ++i) {
+ if (sam_write1(fp, h, buf[i]) < 0) goto fail;
+ }
+ if (sam_close(fp) < 0) return -1;
+ return 0;
+ fail:
sam_close(fp);
+ return -1;
}
static void *worker(void *data)
{
worker_t *w = (worker_t*)data;
char *name;
+ w->error = 0;
ks_mergesort(sort, w->buf_len, w->buf, 0);
name = (char*)calloc(strlen(w->prefix) + 20, 1);
+ if (!name) { w->error = errno; return 0; }
sprintf(name, "%s.%.4d.bam", w->prefix, w->index);
- write_buffer(name, "wb1", w->buf_len, w->buf, w->h, 0, NULL);
+ if (write_buffer(name, "wbx1", w->buf_len, w->buf, w->h, 0, NULL) < 0)
+ w->error = errno;
// Consider using CRAM temporary files if the final output is CRAM.
// Typically it is comparable speed while being smaller.
@@ -1574,7 +1663,8 @@ static void *worker(void *data)
// {"no_ref", CRAM_OPT_NO_REF, {1}, NULL}
// };
// opt[0].next = &opt[1];
-// write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt);
+// if (write_buffer(name, "wc1", w->buf_len, w->buf, w->h, 0, opt) < 0)
+// w->error = errno;
free(name);
return 0;
@@ -1588,6 +1678,7 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
pthread_t *tid;
pthread_attr_t attr;
worker_t *w;
+ int n_failed = 0;
if (n_threads < 1) n_threads = 1;
if (k < n_threads * 64) n_threads = 1; // use a single thread if we only sort a small batch of records
@@ -1605,9 +1696,15 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c
b += w[i].buf_len; rest -= w[i].buf_len;
pthread_create(&tid[i], &attr, worker, &w[i]);
}
- for (i = 0; i < n_threads; ++i) pthread_join(tid[i], 0);
+ for (i = 0; i < n_threads; ++i) {
+ pthread_join(tid[i], 0);
+ if (w[i].error != 0) {
+ fprintf(pysam_stderr, "[bam_sort_core] failed to create temporary file \"%s.%.4d.bam\": %s\n", prefix, w[i].index, strerror(w[i].error));
+ n_failed++;
+ }
+ }
free(tid); free(w);
- return n_files + n_threads;
+ return (n_failed == 0)? n_files + n_threads : -1;
}
/*!
@@ -1647,12 +1744,12 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
fp = sam_open_format(fn, "r", in_fmt);
if (fp == NULL) {
const char *message = strerror(errno);
- fprintf(pysamerr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
+ fprintf(pysam_stderr, "[bam_sort_core] fail to open '%s': %s\n", fn, message);
return -2;
}
header = sam_hdr_read(fp);
if (header == NULL) {
- fprintf(pysamerr, "[bam_sort_core] failed to read header for '%s'\n", fn);
+ fprintf(pysam_stderr, "[bam_sort_core] failed to read header for '%s'\n", fn);
goto err;
}
if (is_by_qname) change_SO(header, "queryname");
@@ -1677,11 +1774,15 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
++k;
if (mem >= max_mem) {
n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
+ if (n_files < 0) {
+ ret = -1;
+ goto err;
+ }
mem = k = 0;
}
}
if (ret != -1) {
- fprintf(pysamerr, "[bam_sort_core] truncated file. Aborting.\n");
+ fprintf(pysam_stderr, "[bam_sort_core] truncated file. Aborting.\n");
ret = -1;
goto err;
}
@@ -1689,19 +1790,27 @@ int bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix,
// write the final output
if (n_files == 0) { // a single block
ks_mergesort(sort, k, buf, 0);
- write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt);
+ if (write_buffer(fnout, modeout, k, buf, header, n_threads, out_fmt) != 0) {
+ fprintf(pysam_stderr, "[bam_sort_core] failed to create \"%s\": %s\n", fnout, strerror(errno));
+ ret = -1;
+ goto err;
+ }
} else { // then merge
char **fns;
n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads);
- fprintf(pysamerr, "[bam_sort_core] merging from %d files...\n", n_files);
+ if (n_files == -1) {
+ ret = -1;
+ goto err;
+ }
+ fprintf(pysam_stderr, "[bam_sort_core] merging from %d files...\n", n_files);
fns = (char**)calloc(n_files, sizeof(char*));
for (i = 0; i < n_files; ++i) {
fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
sprintf(fns[i], "%s.%.4d.bam", prefix, i);
}
if (bam_merge_core2(is_by_qname, fnout, modeout, NULL, n_files, fns,
- MERGE_COMBINE_RG|MERGE_COMBINE_PG, NULL, n_threads,
- in_fmt, out_fmt) < 0) {
+ MERGE_COMBINE_RG|MERGE_COMBINE_PG|MERGE_FIRST_CO,
+ NULL, n_threads, in_fmt, out_fmt) < 0) {
// Propagate bam_merge_core2() failure; it has already emitted a
// message explaining the failure, so no further message is needed.
goto err;
@@ -1756,6 +1865,7 @@ int bam_sort(int argc, char *argv[])
int c, nargs, is_by_qname = 0, ret, o_seen = 0, n_threads = 0, level = -1;
char *fnout = "-", modeout[12];
kstring_t tmpprefix = { 0, 0, NULL };
+ struct stat st;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
static const struct option lopts[] = {
@@ -1782,22 +1892,22 @@ int bam_sort(int argc, char *argv[])
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
- case '?': sort_usage(pysamerr); ret = EXIT_FAILURE; goto sort_end;
+ case '?': sort_usage(pysam_stderr); ret = EXIT_FAILURE; goto sort_end;
}
}
nargs = argc - optind;
if (nargs == 0 && isatty(STDIN_FILENO)) {
- sort_usage(stdout);
+ sort_usage(pysam_stdout);
ret = EXIT_SUCCESS;
goto sort_end;
}
else if (nargs >= 2) {
// If exactly two, user probably tried to specify legacy <out.prefix>
if (nargs == 2)
- fprintf(pysamerr, "[bam_sort] Use -T PREFIX / -o FILE to specify temporary and final output files\n");
+ fprintf(pysam_stderr, "[bam_sort] Use -T PREFIX / -o FILE to specify temporary and final output files\n");
- sort_usage(pysamerr);
+ sort_usage(pysam_stderr);
ret = EXIT_FAILURE;
goto sort_end;
}
@@ -1806,8 +1916,15 @@ int bam_sort(int argc, char *argv[])
sam_open_mode(modeout+1, fnout, NULL);
if (level >= 0) sprintf(strchr(modeout, '\0'), "%d", level < 9? level : 9);
- if (tmpprefix.l == 0)
- ksprintf(&tmpprefix, "%s.tmp", (nargs > 0)? argv[optind] : "STDIN");
+ if (tmpprefix.l == 0) {
+ if (strcmp(fnout, "-") != 0) ksprintf(&tmpprefix, "%s.tmp", fnout);
+ else kputc('.', &tmpprefix);
+ }
+ if (stat(tmpprefix.s, &st) == 0 && S_ISDIR(st.st_mode)) {
+ unsigned t = ((unsigned) time(NULL)) ^ ((unsigned) clock());
+ if (tmpprefix.s[tmpprefix.l-1] != '/') kputc('/', &tmpprefix);
+ ksprintf(&tmpprefix, "samtools.%d.%u.tmp", (int) getpid(), t % 10000);
+ }
ret = bam_sort_core_ext(is_by_qname, (nargs > 0)? argv[optind] : "-",
tmpprefix.s, fnout, modeout, max_mem, n_threads,
@@ -1819,7 +1936,7 @@ int bam_sort(int argc, char *argv[])
// If we failed on opening the input file & it has no .bam/.cram/etc
// extension, the user probably tried legacy -o <infile> <out.prefix>
if (ret == -2 && o_seen && nargs > 0 && sam_open_mode(dummy, argv[optind], NULL) < 0)
- fprintf(pysamerr, "[bam_sort] Note the <out.prefix> argument has been replaced by -T/-o options\n");
+ fprintf(pysam_stderr, "[bam_sort] Note the <out.prefix> argument has been replaced by -T/-o options\n");
ret = EXIT_FAILURE;
}
diff --git a/samtools/bam_split.c b/samtools/bam_split.c
index e44acc0..9a2998a 100644
--- a/samtools/bam_split.c
+++ b/samtools/bam_split.c
@@ -1,6 +1,6 @@
/* bam_split.c -- split subcommand.
- Copyright (C) 2013, 2014 Genome Research Ltd.
+ Copyright (C) 2013-2015 Genome Research Ltd.
Author: Martin Pollard <mp15 at sanger.ac.uk>
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/sam.h>
#include <string.h>
#include <stdio.h>
@@ -55,6 +57,7 @@ struct state {
bam_hdr_t* unaccounted_header;
size_t output_count;
char** rg_id;
+ char **rg_output_file_name;
samFile** rg_output_file;
bam_hdr_t** rg_output_header;
kh_c2i_t* rg_hash;
@@ -62,7 +65,7 @@ struct state {
typedef struct state state_t;
-static int cleanup_state(state_t* status);
+static int cleanup_state(state_t* status, bool check_close);
static void cleanup_opts(parsed_opts_t* opts);
static void usage(FILE *write_to)
@@ -334,7 +337,7 @@ static state_t* init(parsed_opts_t* opts)
if (retval->merged_input_header == NULL) {
fprintf(stderr, "Could not read header for file '%s'\n",
opts->merged_input_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
@@ -343,14 +346,14 @@ static state_t* init(parsed_opts_t* opts)
samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
if (!hdr_load) {
fprintf(stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
retval->unaccounted_header = sam_hdr_read(hdr_load);
if (retval->unaccounted_header == NULL) {
fprintf(stderr, "Could not read header for file '%s'\n",
opts->unaccounted_header_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
sam_close(hdr_load);
@@ -361,7 +364,7 @@ static state_t* init(parsed_opts_t* opts)
retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
if (retval->unaccounted_file == NULL) {
fprintf(stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
}
@@ -370,12 +373,13 @@ static state_t* init(parsed_opts_t* opts)
if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL;
if (opts->verbose) fprintf(stderr, "@RG's found %zu\n",retval->output_count);
+ retval->rg_output_file_name = (char **)calloc(retval->output_count, sizeof(char *));
retval->rg_output_file = (samFile**)calloc(retval->output_count, sizeof(samFile*));
retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
retval->rg_hash = kh_init_c2i();
- if (!retval->rg_output_file || !retval->rg_output_header) {
+ if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
fprintf(stderr, "Could not allocate memory for output file array. Out of memory?");
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
@@ -383,7 +387,7 @@ static state_t* init(parsed_opts_t* opts)
char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
if (!input_base_name) {
fprintf(stderr, "Out of memory\n");
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
char* extension = strrchr(input_base_name, '.');
@@ -399,16 +403,17 @@ static state_t* init(parsed_opts_t* opts)
&opts->ga.out);
if ( output_filename == NULL ) {
- fprintf(stderr, "Error expanding output filename format string.\r\n");
- cleanup_state(retval);
+ fprintf(stderr, "Error expanding output filename format string.\n");
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
+ retval->rg_output_file_name[i] = output_filename;
retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
if (retval->rg_output_file[i] == NULL) {
- fprintf(stderr, "Could not open output file: %s\r\n", output_filename);
- cleanup_state(retval);
+ fprintf(stderr, "Could not open output file: %s\n", output_filename);
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
@@ -421,13 +426,11 @@ static state_t* init(parsed_opts_t* opts)
// Set and edit header
retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
- fprintf(stderr, "Could not rewrite header for file: %s\r\n", output_filename);
- cleanup_state(retval);
- free(output_filename);
+ fprintf(stderr, "Could not rewrite header for file: %s\n", output_filename);
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
- free(output_filename);
}
free(input_base_name);
@@ -444,7 +447,8 @@ static bool split(state_t* state)
size_t i;
for (i = 0; i < state->output_count; i++) {
if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
- fprintf(stderr, "Could not write output file header\n");
+ fprintf(stderr, "Could not write output file header for '%s'\n",
+ state->rg_output_file_name[i]);
return false;
}
}
@@ -457,7 +461,7 @@ static bool split(state_t* state)
bam_destroy1(file_read);
file_read = NULL;
if (r < -1) {
- fprintf(stderr, "Could not write read sequence\n");
+ fprintf(stderr, "Could not read first input record\n");
return false;
}
}
@@ -478,7 +482,9 @@ static bool split(state_t* state)
// if found write to the appropriate untangled bam
int i = kh_val(state->rg_hash,iter);
if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
- fprintf(stderr, "Could not write sequence\n");
+ fprintf(stderr, "Could not write to output file '%s'\n",
+ state->rg_output_file_name[i]);
+ bam_destroy1(file_read);
return false;
}
} else {
@@ -493,7 +499,8 @@ static bool split(state_t* state)
return false;
} else {
if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
- fprintf(stderr, "Could not write sequence\n");
+ fprintf(stderr, "Could not write to unaccounted output file\n");
+ bam_destroy1(file_read);
return false;
}
}
@@ -505,7 +512,7 @@ static bool split(state_t* state)
bam_destroy1(file_read);
file_read = NULL;
if (r < -1) {
- fprintf(stderr, "Could not write read sequence\n");
+ fprintf(stderr, "Could not read input record\n");
return false;
}
}
@@ -514,23 +521,38 @@ static bool split(state_t* state)
return true;
}
-static int cleanup_state(state_t* status)
+static int cleanup_state(state_t* status, bool check_close)
{
int ret = 0;
if (!status) return 0;
if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
- if (status->unaccounted_file) ret |= sam_close(status->unaccounted_file);
+ if (status->unaccounted_file) {
+ if (sam_close(status->unaccounted_file) < 0 && check_close) {
+ fprintf(stderr, "Error on closing unaccounted file\n");
+ ret = -1;
+ }
+ }
sam_close(status->merged_input_file);
size_t i;
for (i = 0; i < status->output_count; i++) {
- bam_hdr_destroy(status->rg_output_header[i]);
- ret |= sam_close(status->rg_output_file[i]);
- free(status->rg_id[i]);
+ if (status->rg_output_header && status->rg_output_header[i])
+ bam_hdr_destroy(status->rg_output_header[i]);
+ if (status->rg_output_file && status->rg_output_file[i]) {
+ if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
+ fprintf(stderr, "Error on closing output file '%s'\n",
+ status->rg_output_file_name[i]);
+ ret = -1;
+ }
+ }
+ if (status->rg_id) free(status->rg_id[i]);
+ if (status->rg_output_file_name) free(status->rg_output_file_name[i]);
}
- bam_hdr_destroy(status->merged_input_header);
+ if (status->merged_input_header)
+ bam_hdr_destroy(status->merged_input_header);
free(status->rg_output_header);
free(status->rg_output_file);
+ free(status->rg_output_file_name);
kh_destroy_c2i(status->rg_hash);
free(status->rg_id);
free(status);
@@ -553,13 +575,17 @@ int main_split(int argc, char** argv)
{
int ret = 1;
parsed_opts_t* opts = parse_args(argc, argv);
- if (!opts ) goto cleanup_opts;
+ if (!opts) goto cleanup_opts;
state_t* status = init(opts);
if (!status) goto cleanup_opts;
- if (split(status)) ret = 0;
+ if (!split(status)) {
+ cleanup_state(status, false);
+ goto cleanup_opts;
+ }
+
+ ret = cleanup_state(status, true);
- ret |= (cleanup_state(status) != 0);
cleanup_opts:
cleanup_opts(opts);
diff --git a/samtools/bam_split.c.pysam.c b/samtools/bam_split.c.pysam.c
index 329556f..2348f48 100644
--- a/samtools/bam_split.c.pysam.c
+++ b/samtools/bam_split.c.pysam.c
@@ -2,7 +2,7 @@
/* bam_split.c -- split subcommand.
- Copyright (C) 2013, 2014 Genome Research Ltd.
+ Copyright (C) 2013-2015 Genome Research Ltd.
Author: Martin Pollard <mp15 at sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <htslib/sam.h>
#include <string.h>
#include <stdio.h>
@@ -57,6 +59,7 @@ struct state {
bam_hdr_t* unaccounted_header;
size_t output_count;
char** rg_id;
+ char **rg_output_file_name;
samFile** rg_output_file;
bam_hdr_t** rg_output_header;
kh_c2i_t* rg_hash;
@@ -64,7 +67,7 @@ struct state {
typedef struct state state_t;
-static int cleanup_state(state_t* status);
+static int cleanup_state(state_t* status, bool check_close);
static void cleanup_opts(parsed_opts_t* opts);
static void usage(FILE *write_to)
@@ -92,7 +95,7 @@ static void usage(FILE *write_to)
// Takes the command line options and turns them into something we can understand
static parsed_opts_t* parse_args(int argc, char** argv)
{
- if (argc == 1) { usage(stdout); return NULL; }
+ if (argc == 1) { usage(pysam_stdout); return NULL; }
const char* optstring = "vf:u:";
char* delim;
@@ -130,7 +133,7 @@ static parsed_opts_t* parse_args(int argc, char** argv)
if (parse_sam_global_opt(opt, optarg, lopts, &retval->ga) == 0) break;
/* else fall-through */
case '?':
- usage(stdout);
+ usage(pysam_stdout);
free(retval);
return NULL;
}
@@ -142,8 +145,8 @@ static parsed_opts_t* parse_args(int argc, char** argv)
argv += optind;
if (argc != 1) {
- fprintf(pysamerr, "Invalid number of arguments: %d\n", argc);
- usage(pysamerr);
+ fprintf(pysam_stderr, "Invalid number of arguments: %d\n", argc);
+ usage(pysam_stderr);
free(retval);
return NULL;
}
@@ -184,11 +187,11 @@ static char* expand_format_string(const char* format_string, const char* basenam
kputs("bam", &str);
break;
case '\0':
- // Error is: fprintf(pysamerr, "bad format string, trailing %%\n");
+ // Error is: fprintf(pysam_stderr, "bad format string, trailing %%\n");
free(str.s);
return NULL;
default:
- // Error is: fprintf(pysamerr, "bad format string, unknown format specifier\n");
+ // Error is: fprintf(pysam_stderr, "bad format string, unknown format specifier\n");
free(str.s);
return NULL;
}
@@ -322,21 +325,21 @@ static state_t* init(parsed_opts_t* opts)
{
state_t* retval = calloc(sizeof(state_t), 1);
if (!retval) {
- fprintf(pysamerr, "Out of memory");
+ fprintf(pysam_stderr, "Out of memory");
return NULL;
}
retval->merged_input_file = sam_open_format(opts->merged_input_name, "rb", &opts->ga.in);
if (!retval->merged_input_file) {
- fprintf(pysamerr, "Could not open input file (%s)\n", opts->merged_input_name);
+ fprintf(pysam_stderr, "Could not open input file (%s)\n", opts->merged_input_name);
free(retval);
return NULL;
}
retval->merged_input_header = sam_hdr_read(retval->merged_input_file);
if (retval->merged_input_header == NULL) {
- fprintf(pysamerr, "Could not read header for file '%s'\n",
+ fprintf(pysam_stderr, "Could not read header for file '%s'\n",
opts->merged_input_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
@@ -344,15 +347,15 @@ static state_t* init(parsed_opts_t* opts)
if (opts->unaccounted_header_name) {
samFile* hdr_load = sam_open_format(opts->unaccounted_header_name, "r", &opts->ga.in);
if (!hdr_load) {
- fprintf(pysamerr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
- cleanup_state(retval);
+ fprintf(pysam_stderr, "Could not open unaccounted header file (%s)\n", opts->unaccounted_header_name);
+ cleanup_state(retval, false);
return NULL;
}
retval->unaccounted_header = sam_hdr_read(hdr_load);
if (retval->unaccounted_header == NULL) {
- fprintf(pysamerr, "Could not read header for file '%s'\n",
+ fprintf(pysam_stderr, "Could not read header for file '%s'\n",
opts->unaccounted_header_name);
- cleanup_state(retval);
+ cleanup_state(retval, false);
return NULL;
}
sam_close(hdr_load);
@@ -362,30 +365,31 @@ static state_t* init(parsed_opts_t* opts)
retval->unaccounted_file = sam_open_format(opts->unaccounted_name, "wb", &opts->ga.out);
if (retval->unaccounted_file == NULL) {
- fprintf(pysamerr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
- cleanup_state(retval);
+ fprintf(pysam_stderr, "Could not open unaccounted output file: %s\n", opts->unaccounted_name);
+ cleanup_state(retval, false);
return NULL;
}
}
// Open output files for RGs
if (!count_RG(retval->merged_input_header, &retval->output_count, &retval->rg_id)) return NULL;
- if (opts->verbose) fprintf(pysamerr, "@RG's found %zu\n",retval->output_count);
+ if (opts->verbose) fprintf(pysam_stderr, "@RG's found %zu\n",retval->output_count);
+ retval->rg_output_file_name = (char **)calloc(retval->output_count, sizeof(char *));
retval->rg_output_file = (samFile**)calloc(retval->output_count, sizeof(samFile*));
retval->rg_output_header = (bam_hdr_t**)calloc(retval->output_count, sizeof(bam_hdr_t*));
retval->rg_hash = kh_init_c2i();
- if (!retval->rg_output_file || !retval->rg_output_header) {
- fprintf(pysamerr, "Could not allocate memory for output file array. Out of memory?");
- cleanup_state(retval);
+ if (!retval->rg_output_file_name || !retval->rg_output_file || !retval->rg_output_header || !retval->rg_hash) {
+ fprintf(pysam_stderr, "Could not allocate memory for output file array. Out of memory?");
+ cleanup_state(retval, false);
return NULL;
}
char* dirsep = strrchr(opts->merged_input_name, '/');
char* input_base_name = strdup(dirsep? dirsep+1 : opts->merged_input_name);
if (!input_base_name) {
- fprintf(pysamerr, "Out of memory\n");
- cleanup_state(retval);
+ fprintf(pysam_stderr, "Out of memory\n");
+ cleanup_state(retval, false);
return NULL;
}
char* extension = strrchr(input_base_name, '.');
@@ -401,16 +405,17 @@ static state_t* init(parsed_opts_t* opts)
&opts->ga.out);
if ( output_filename == NULL ) {
- fprintf(pysamerr, "Error expanding output filename format string.\r\n");
- cleanup_state(retval);
+ fprintf(pysam_stderr, "Error expanding output filename format string.\n");
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
+ retval->rg_output_file_name[i] = output_filename;
retval->rg_output_file[i] = sam_open_format(output_filename, "wb", &opts->ga.out);
if (retval->rg_output_file[i] == NULL) {
- fprintf(pysamerr, "Could not open output file: %s\r\n", output_filename);
- cleanup_state(retval);
+ fprintf(pysam_stderr, "Could not open output file: %s\n", output_filename);
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
@@ -423,13 +428,11 @@ static state_t* init(parsed_opts_t* opts)
// Set and edit header
retval->rg_output_header[i] = bam_hdr_dup(retval->merged_input_header);
if ( !filter_header_rg(retval->rg_output_header[i], retval->rg_id[i]) ) {
- fprintf(pysamerr, "Could not rewrite header for file: %s\r\n", output_filename);
- cleanup_state(retval);
- free(output_filename);
+ fprintf(pysam_stderr, "Could not rewrite header for file: %s\n", output_filename);
+ cleanup_state(retval, false);
free(input_base_name);
return NULL;
}
- free(output_filename);
}
free(input_base_name);
@@ -440,13 +443,14 @@ static state_t* init(parsed_opts_t* opts)
static bool split(state_t* state)
{
if (state->unaccounted_file && sam_hdr_write(state->unaccounted_file, state->unaccounted_header) != 0) {
- fprintf(pysamerr, "Could not write output file header\n");
+ fprintf(pysam_stderr, "Could not write output file header\n");
return false;
}
size_t i;
for (i = 0; i < state->output_count; i++) {
if (sam_hdr_write(state->rg_output_file[i], state->rg_output_header[i]) != 0) {
- fprintf(pysamerr, "Could not write output file header\n");
+ fprintf(pysam_stderr, "Could not write output file header for '%s'\n",
+ state->rg_output_file_name[i]);
return false;
}
}
@@ -459,7 +463,7 @@ static bool split(state_t* state)
bam_destroy1(file_read);
file_read = NULL;
if (r < -1) {
- fprintf(pysamerr, "Could not write read sequence\n");
+ fprintf(pysam_stderr, "Could not read first input record\n");
return false;
}
}
@@ -480,22 +484,25 @@ static bool split(state_t* state)
// if found write to the appropriate untangled bam
int i = kh_val(state->rg_hash,iter);
if (sam_write1(state->rg_output_file[i], state->rg_output_header[i], file_read) < 0) {
- fprintf(pysamerr, "Could not write sequence\n");
+ fprintf(pysam_stderr, "Could not write to output file '%s'\n",
+ state->rg_output_file_name[i]);
+ bam_destroy1(file_read);
return false;
}
} else {
// otherwise write to the unaccounted bam if there is one or fail
if (state->unaccounted_file == NULL) {
if (tag) {
- fprintf(pysamerr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
+ fprintf(pysam_stderr, "Read \"%s\" with unaccounted for tag \"%s\".\n", bam_get_qname(file_read), bam_aux2Z(tag));
} else {
- fprintf(pysamerr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
+ fprintf(pysam_stderr, "Read \"%s\" has no RG tag.\n", bam_get_qname(file_read));
}
bam_destroy1(file_read);
return false;
} else {
if (sam_write1(state->unaccounted_file, state->unaccounted_header, file_read) < 0) {
- fprintf(pysamerr, "Could not write sequence\n");
+ fprintf(pysam_stderr, "Could not write to unaccounted output file\n");
+ bam_destroy1(file_read);
return false;
}
}
@@ -507,7 +514,7 @@ static bool split(state_t* state)
bam_destroy1(file_read);
file_read = NULL;
if (r < -1) {
- fprintf(pysamerr, "Could not write read sequence\n");
+ fprintf(pysam_stderr, "Could not read input record\n");
return false;
}
}
@@ -516,23 +523,38 @@ static bool split(state_t* state)
return true;
}
-static int cleanup_state(state_t* status)
+static int cleanup_state(state_t* status, bool check_close)
{
int ret = 0;
if (!status) return 0;
if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header);
- if (status->unaccounted_file) ret |= sam_close(status->unaccounted_file);
+ if (status->unaccounted_file) {
+ if (sam_close(status->unaccounted_file) < 0 && check_close) {
+ fprintf(pysam_stderr, "Error on closing unaccounted file\n");
+ ret = -1;
+ }
+ }
sam_close(status->merged_input_file);
size_t i;
for (i = 0; i < status->output_count; i++) {
- bam_hdr_destroy(status->rg_output_header[i]);
- ret |= sam_close(status->rg_output_file[i]);
- free(status->rg_id[i]);
+ if (status->rg_output_header && status->rg_output_header[i])
+ bam_hdr_destroy(status->rg_output_header[i]);
+ if (status->rg_output_file && status->rg_output_file[i]) {
+ if (sam_close(status->rg_output_file[i]) < 0 && check_close) {
+ fprintf(pysam_stderr, "Error on closing output file '%s'\n",
+ status->rg_output_file_name[i]);
+ ret = -1;
+ }
+ }
+ if (status->rg_id) free(status->rg_id[i]);
+ if (status->rg_output_file_name) free(status->rg_output_file_name[i]);
}
- bam_hdr_destroy(status->merged_input_header);
+ if (status->merged_input_header)
+ bam_hdr_destroy(status->merged_input_header);
free(status->rg_output_header);
free(status->rg_output_file);
+ free(status->rg_output_file_name);
kh_destroy_c2i(status->rg_hash);
free(status->rg_id);
free(status);
@@ -555,13 +577,17 @@ int main_split(int argc, char** argv)
{
int ret = 1;
parsed_opts_t* opts = parse_args(argc, argv);
- if (!opts ) goto cleanup_opts;
+ if (!opts) goto cleanup_opts;
state_t* status = init(opts);
if (!status) goto cleanup_opts;
- if (split(status)) ret = 0;
+ if (!split(status)) {
+ cleanup_state(status, false);
+ goto cleanup_opts;
+ }
+
+ ret = cleanup_state(status, true);
- ret |= (cleanup_state(status) != 0);
cleanup_opts:
cleanup_opts(opts);
diff --git a/samtools/bam_stat.c b/samtools/bam_stat.c
index 5cb3235..f6cf1d5 100644
--- a/samtools/bam_stat.c
+++ b/samtools/bam_stat.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
diff --git a/samtools/bam_stat.c.pysam.c b/samtools/bam_stat.c.pysam.c
index a519312..cdca4dd 100644
--- a/samtools/bam_stat.c.pysam.c
+++ b/samtools/bam_stat.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdint.h>
#include <stdlib.h>
@@ -81,7 +83,7 @@ bam_flagstat_t *bam_flagstat_core(samFile *fp, bam_hdr_t *h)
flagstat_loop(s, c);
bam_destroy1(b);
if (ret != -1)
- fprintf(pysamerr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
+ fprintf(pysam_stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
return s;
}
@@ -120,16 +122,16 @@ int bam_flagstat(int argc, char *argv[])
switch (c) {
case INPUT_FMT_OPTION:
if (hts_opt_add(&in_opts, optarg) < 0)
- usage_exit(pysamerr, EXIT_FAILURE);
+ usage_exit(pysam_stderr, EXIT_FAILURE);
break;
default:
- usage_exit(pysamerr, EXIT_FAILURE);
+ usage_exit(pysam_stderr, EXIT_FAILURE);
}
}
if (argc != optind+1) {
- if (argc == optind) usage_exit(stdout, EXIT_SUCCESS);
- else usage_exit(pysamerr, EXIT_FAILURE);
+ if (argc == optind) usage_exit(pysam_stdout, EXIT_SUCCESS);
+ else usage_exit(pysam_stderr, EXIT_FAILURE);
}
fp = sam_open(argv[optind], "r");
if (fp == NULL) {
@@ -137,40 +139,40 @@ int bam_flagstat(int argc, char *argv[])
return 1;
}
if (hts_opt_apply(fp, in_opts)) {
- fprintf(pysamerr, "Failed to apply input-fmt-options\n");
+ fprintf(pysam_stderr, "Failed to apply input-fmt-options\n");
return 1;
}
if (hts_set_opt(fp, CRAM_OPT_REQUIRED_FIELDS,
SAM_FLAG | SAM_MAPQ | SAM_RNEXT)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
return 1;
}
if (hts_set_opt(fp, CRAM_OPT_DECODE_MD, 0)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
return 1;
}
header = sam_hdr_read(fp);
if (header == NULL) {
- fprintf(pysamerr, "Failed to read header for \"%s\"\n", argv[optind]);
+ fprintf(pysam_stderr, "Failed to read header for \"%s\"\n", argv[optind]);
return 1;
}
s = bam_flagstat_core(fp, header);
- printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
- printf("%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
- printf("%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
- printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
- printf("%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
- printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
- printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
- printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
- printf("%lld + %lld properly paired (%s : %s)\n", s->n_pair_good[0], s->n_pair_good[1], percent(b0, s->n_pair_good[0], s->n_pair_all[0]), percent(b1, s->n_pair_good[1], s->n_pair_all[1]));
- printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
- printf("%lld + %lld singletons (%s : %s)\n", s->n_sgltn[0], s->n_sgltn[1], percent(b0, s->n_sgltn[0], s->n_pair_all[0]), percent(b1, s->n_sgltn[1], s->n_pair_all[1]));
- printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
- printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
+ fprintf(pysam_stdout, "%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
+ fprintf(pysam_stdout, "%lld + %lld secondary\n", s->n_secondary[0], s->n_secondary[1]);
+ fprintf(pysam_stdout, "%lld + %lld supplementary\n", s->n_supp[0], s->n_supp[1]);
+ fprintf(pysam_stdout, "%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
+ fprintf(pysam_stdout, "%lld + %lld mapped (%s : %s)\n", s->n_mapped[0], s->n_mapped[1], percent(b0, s->n_mapped[0], s->n_reads[0]), percent(b1, s->n_mapped[1], s->n_reads[1]));
+ fprintf(pysam_stdout, "%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
+ fprintf(pysam_stdout, "%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
+ fprintf(pysam_stdout, "%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
+ fprintf(pysam_stdout, "%lld + %lld properly paired (%s : %s)\n", s->n_pair_good[0], s->n_pair_good[1], percent(b0, s->n_pair_good[0], s->n_pair_all[0]), percent(b1, s->n_pair_good[1], s->n_pair_all[1]));
+ fprintf(pysam_stdout, "%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
+ fprintf(pysam_stdout, "%lld + %lld singletons (%s : %s)\n", s->n_sgltn[0], s->n_sgltn[1], percent(b0, s->n_sgltn[0], s->n_pair_all[0]), percent(b1, s->n_sgltn[1], s->n_pair_all[1]));
+ fprintf(pysam_stdout, "%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
+ fprintf(pysam_stdout, "%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
free(s);
bam_hdr_destroy(header);
sam_close(fp);
diff --git a/samtools/bam_tview.c b/samtools/bam_tview.c
index f86ae43..f1f0cc7 100644
--- a/samtools/bam_tview.c
+++ b/samtools/bam_tview.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <regex.h>
#include <assert.h>
#include "bam_tview.h"
diff --git a/samtools/bam_tview.c.pysam.c b/samtools/bam_tview.c.pysam.c
index 736b588..a47bced 100644
--- a/samtools/bam_tview.c.pysam.c
+++ b/samtools/bam_tview.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <regex.h>
#include <assert.h>
#include "bam_tview.h"
@@ -68,7 +70,7 @@ int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
tv->fp = sam_open_format(fn, "r", fmt);
if(tv->fp == NULL)
{
- fprintf(pysamerr,"sam_open %s. %s\n", fn,fn_fa);
+ fprintf(pysam_stderr,"sam_open %s. %s\n", fn,fn_fa);
exit(EXIT_FAILURE);
}
// TODO bgzf_set_cache_size(tv->fp->fp.bgzf, 8 * 1024 *1024);
@@ -77,13 +79,13 @@ int base_tv_init(tview_t* tv, const char *fn, const char *fn_fa,
tv->header = sam_hdr_read(tv->fp);
if(tv->header == NULL)
{
- fprintf(pysamerr,"Cannot read '%s'.\n", fn);
+ fprintf(pysam_stderr,"Cannot read '%s'.\n", fn);
exit(EXIT_FAILURE);
}
tv->idx = sam_index_load(tv->fp, fn);
if (tv->idx == NULL)
{
- fprintf(pysamerr,"Cannot read index for '%s'.\n", fn);
+ fprintf(pysam_stderr,"Cannot read index for '%s'.\n", fn);
exit(EXIT_FAILURE);
}
tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
@@ -297,7 +299,7 @@ int base_draw_aln(tview_t *tv, int tid, int pos)
free(str);
if ( !tv->ref )
{
- fprintf(pysamerr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
+ fprintf(pysam_stderr,"Could not read the reference sequence. Is it seekable (plain text or compressed + .gzi indexed with bgzip)?\n");
exit(1);
}
}
@@ -326,19 +328,19 @@ static void error(const char *format, ...)
{
if ( !format )
{
- fprintf(pysamerr,
+ fprintf(pysam_stderr,
"Usage: samtools tview [options] <aln.bam> [ref.fasta]\n"
"Options:\n"
" -d display output as (H)tml or (C)urses or (T)ext \n"
" -p chr:pos go directly to this position\n"
" -s STR display only reads from this sample or group\n");
- sam_global_opt_help(pysamerr, "-.--.");
+ sam_global_opt_help(pysam_stderr, "-.--.");
}
else
{
va_list ap;
va_start(ap, format);
- vfprintf(pysamerr, format, ap);
+ vfprintf(pysam_stderr, format, ap);
va_end(ap);
}
exit(-1);
@@ -428,7 +430,7 @@ int bam_tview_main(int argc, char *argv[])
}
if ( i==tv->header->n_targets )
{
- fprintf(pysamerr,"None of the BAM sequence names present in the fasta file\n");
+ fprintf(pysam_stderr,"None of the BAM sequence names present in the fasta file\n");
exit(EXIT_FAILURE);
}
tv->curr_tid = i;
diff --git a/samtools/bam_tview_curses.c.pysam.c b/samtools/bam_tview_curses.c.pysam.c
index bbeedf8..90a8335 100644
--- a/samtools/bam_tview_curses.c.pysam.c
+++ b/samtools/bam_tview_curses.c.pysam.c
@@ -304,7 +304,7 @@ tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples,
tview_t* base=(tview_t*)tv;
if(tv==0)
{
- fprintf(pysamerr,"Calloc failed\n");
+ fprintf(pysam_stderr,"Calloc failed\n");
return 0;
}
diff --git a/samtools/bam_tview_html.c b/samtools/bam_tview_html.c
index 9db8fce..e3aecda 100644
--- a/samtools/bam_tview_html.c
+++ b/samtools/bam_tview_html.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include "bam_tview.h"
diff --git a/samtools/bam_tview_html.c.pysam.c b/samtools/bam_tview_html.c.pysam.c
index b42c737..164e33d 100644
--- a/samtools/bam_tview_html.c.pysam.c
+++ b/samtools/bam_tview_html.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include "bam_tview.h"
@@ -183,7 +185,7 @@ static int html_drawaln(struct AbstractTview* tv, int tid, int pos)
fprintf(ptr->out,"<span");
while(css<32)
{
- //if(y>1) fprintf(pysamerr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
+ //if(y>1) fprintf(pysam_stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
{
@@ -322,12 +324,12 @@ tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples,
tview_t* base=(tview_t*)tv;
if(tv==0)
{
- fprintf(pysamerr,"Calloc failed\n");
+ fprintf(pysam_stderr,"Calloc failed\n");
return 0;
}
tv->row_count=0;
tv->screen=NULL;
- tv->out=stdout;
+ tv->out=pysam_stdout;
tv->attributes=0;
base_tv_init(base,fn,fn_fa,samples,fmt);
/* initialize callbacks */
diff --git a/samtools/bamshuf.c b/samtools/bamshuf.c
index ac97bb8..044bc4e 100644
--- a/samtools/bamshuf.c
+++ b/samtools/bamshuf.c
@@ -1,7 +1,7 @@
/* bamshuf.c -- collate subcommand.
Copyright (C) 2012 Broad Institute.
- Copyright (C) 2013 Genome Research Ltd.
+ Copyright (C) 2013, 2015 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -77,14 +79,16 @@ KSORT_INIT(bamshuf, elem_t, elem_lt)
static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
int is_stdout, sam_global_args *ga)
{
- samFile *fp, *fpw, **fpt;
- char **fnt, modew[8];
- bam1_t *b;
- int i, l;
- bam_hdr_t *h;
- int64_t *cnt;
+ samFile *fp, *fpw = NULL, **fpt = NULL;
+ char **fnt = NULL, modew[8];
+ bam1_t *b = NULL;
+ int i, l, r;
+ bam_hdr_t *h = NULL;
+ int64_t j, max_cnt = 0, *cnt = NULL;
+ elem_t *a = NULL;
- // split
+ // Read input, distribute reads pseudo-randomly into n_files temporary
+ // files.
fp = sam_open_format(fn, "r", &ga->in);
if (fp == NULL) {
print_error_errno("collate", "Cannot open input file \"%s\"", fn);
@@ -94,39 +98,69 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
h = sam_hdr_read(fp);
if (h == NULL) {
fprintf(stderr, "Couldn't read header for '%s'\n", fn);
- return 1;
+ goto fail;
}
fnt = (char**)calloc(n_files, sizeof(char*));
+ if (!fnt) goto mem_fail;
fpt = (samFile**)calloc(n_files, sizeof(samFile*));
+ if (!fpt) goto mem_fail;
cnt = (int64_t*)calloc(n_files, 8);
+ if (!cnt) goto mem_fail;
+
l = strlen(pre);
for (i = 0; i < n_files; ++i) {
fnt[i] = (char*)calloc(l + 10, 1);
+ if (!fnt[i]) goto mem_fail;
sprintf(fnt[i], "%s.%.4d.bam", pre, i);
fpt[i] = sam_open(fnt[i], "wb1");
if (fpt[i] == NULL) {
print_error_errno("collate", "Cannot open intermediate file \"%s\"", fnt[i]);
- return 1;
+ goto fail;
+ }
+ if (sam_hdr_write(fpt[i], h) < 0) {
+ print_error_errno("collate", "Couldn't write header to intermediate file \"%s\"", fnt[i]);
+ goto fail;
}
- sam_hdr_write(fpt[i], h);
}
b = bam_init1();
- while (sam_read1(fp, h, b) >= 0) {
+ if (!b) goto mem_fail;
+ while ((r = sam_read1(fp, h, b)) >= 0) {
uint32_t x;
x = hash_X31_Wang(bam_get_qname(b)) % n_files;
- sam_write1(fpt[x], h, b);
+ if (sam_write1(fpt[x], h, b) < 0) {
+ print_error_errno("collate", "Couldn't write to intermediate file \"%s\"", fnt[x]);
+ goto fail;
+ }
++cnt[x];
}
bam_destroy1(b);
- for (i = 0; i < n_files; ++i) sam_close(fpt[i]);
+ b = NULL;
+ if (r < -1) {
+ fprintf(stderr, "Error reading input file\n");
+ goto fail;
+ }
+ for (i = 0; i < n_files; ++i) {
+ // Close split output
+ r = sam_close(fpt[i]);
+ fpt[i] = NULL;
+ if (r < 0) {
+ fprintf(stderr, "Error on closing '%s'\n", fnt[i]);
+ return 1;
+ }
+
+ // Find biggest count
+ if (max_cnt < cnt[i]) max_cnt = cnt[i];
+ }
free(fpt);
+ fpt = NULL;
sam_close(fp);
-
+ fp = NULL;
// merge
sprintf(modew, "wb%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL);
if (!is_stdout) { // output to a file
char *fnw = (char*)calloc(l + 5, 1);
+ if (!fnw) goto mem_fail;
if (ga->out.format == unknown_format)
sprintf(fnw, "%s.bam", pre); // "wb" above makes BAM the default
else
@@ -137,37 +171,86 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
if (fpw == NULL) {
if (is_stdout) print_error_errno("collate", "Cannot open standard output");
else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
- return 1;
+ goto fail;
+ }
+
+ if (sam_hdr_write(fpw, h) < 0) {
+ print_error_errno("collate", "Couldn't write header");
+ goto fail;
+ }
+
+ a = malloc(max_cnt * sizeof(elem_t));
+ if (!a) goto mem_fail;
+ for (j = 0; j < max_cnt; ++j) {
+ a[j].b = bam_init1();
+ if (!a[j].b) { max_cnt = j; goto mem_fail; }
}
- sam_hdr_write(fpw, h);
for (i = 0; i < n_files; ++i) {
- int64_t j, c = cnt[i];
- elem_t *a;
+ int64_t c = cnt[i];
fp = sam_open_format(fnt[i], "r", &ga->in);
- bam_hdr_destroy(sam_hdr_read(fp));
- a = (elem_t*)calloc(c, sizeof(elem_t));
+ if (NULL == fp) {
+ print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
+ goto fail;
+ }
+ bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
+
+ // Slurp in one of the split files
for (j = 0; j < c; ++j) {
- a[j].b = bam_init1();
- sam_read1(fp, h, a[j].b);
+ if (sam_read1(fp, h, a[j].b) < 0) {
+ fprintf(stderr, "Error reading '%s'\n", fnt[i]);
+ goto fail;
+ }
a[j].key = hash_X31_Wang(bam_get_qname(a[j].b));
}
sam_close(fp);
unlink(fnt[i]);
free(fnt[i]);
- ks_introsort(bamshuf, c, a);
+ fnt[i] = NULL;
+
+ ks_introsort(bamshuf, c, a); // Shuffle all the reads
+
+ // Write them out again
for (j = 0; j < c; ++j) {
- sam_write1(fpw, h, a[j].b);
- bam_destroy1(a[j].b);
+ if (sam_write1(fpw, h, a[j].b) < 0) {
+ print_error_errno("collate", "Error writing to output");
+ goto fail;
+ }
}
- free(a);
}
- sam_close(fpw);
+
bam_hdr_destroy(h);
- free(fnt); free(cnt);
+ for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+ free(a); free(fnt); free(cnt);
sam_global_args_free(ga);
+ if (sam_close(fpw) < 0) {
+ fprintf(stderr, "Error on closing output\n");
+ return 1;
+ }
return 0;
+
+ mem_fail:
+ fprintf(stderr, "Out of memory\n");
+
+ fail:
+ if (fp) sam_close(fp);
+ if (fpw) sam_close(fpw);
+ if (h) bam_hdr_destroy(h);
+ if (b) bam_destroy1(b);
+ for (i = 0; i < n_files; ++i) {
+ if (fnt) free(fnt[i]);
+ if (fpt && fpt[i]) sam_close(fpt[i]);
+ }
+ if (a) {
+ for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+ free(a);
+ }
+ free(fnt);
+ free(fpt);
+ free(cnt);
+ sam_global_args_free(ga);
+ return 1;
}
static int usage(FILE *fp, int n_files) {
diff --git a/samtools/bamshuf.c.pysam.c b/samtools/bamshuf.c.pysam.c
index d17cf9b..fb1a5ac 100644
--- a/samtools/bamshuf.c.pysam.c
+++ b/samtools/bamshuf.c.pysam.c
@@ -3,7 +3,7 @@
/* bamshuf.c -- collate subcommand.
Copyright (C) 2012 Broad Institute.
- Copyright (C) 2013 Genome Research Ltd.
+ Copyright (C) 2013, 2015 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
@@ -77,16 +79,18 @@ static inline int elem_lt(elem_t x, elem_t y)
KSORT_INIT(bamshuf, elem_t, elem_lt)
static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
- int is_stdout, sam_global_args *ga)
+ int is_pysam_stdout, sam_global_args *ga)
{
- samFile *fp, *fpw, **fpt;
- char **fnt, modew[8];
- bam1_t *b;
- int i, l;
- bam_hdr_t *h;
- int64_t *cnt;
+ samFile *fp, *fpw = NULL, **fpt = NULL;
+ char **fnt = NULL, modew[8];
+ bam1_t *b = NULL;
+ int i, l, r;
+ bam_hdr_t *h = NULL;
+ int64_t j, max_cnt = 0, *cnt = NULL;
+ elem_t *a = NULL;
- // split
+ // Read input, distribute reads pseudo-randomly into n_files temporary
+ // files.
fp = sam_open_format(fn, "r", &ga->in);
if (fp == NULL) {
print_error_errno("collate", "Cannot open input file \"%s\"", fn);
@@ -95,88 +99,167 @@ static int bamshuf(const char *fn, int n_files, const char *pre, int clevel,
h = sam_hdr_read(fp);
if (h == NULL) {
- fprintf(pysamerr, "Couldn't read header for '%s'\n", fn);
- return 1;
+ fprintf(pysam_stderr, "Couldn't read header for '%s'\n", fn);
+ goto fail;
}
fnt = (char**)calloc(n_files, sizeof(char*));
+ if (!fnt) goto mem_fail;
fpt = (samFile**)calloc(n_files, sizeof(samFile*));
+ if (!fpt) goto mem_fail;
cnt = (int64_t*)calloc(n_files, 8);
+ if (!cnt) goto mem_fail;
+
l = strlen(pre);
for (i = 0; i < n_files; ++i) {
fnt[i] = (char*)calloc(l + 10, 1);
+ if (!fnt[i]) goto mem_fail;
sprintf(fnt[i], "%s.%.4d.bam", pre, i);
fpt[i] = sam_open(fnt[i], "wb1");
if (fpt[i] == NULL) {
print_error_errno("collate", "Cannot open intermediate file \"%s\"", fnt[i]);
- return 1;
+ goto fail;
+ }
+ if (sam_hdr_write(fpt[i], h) < 0) {
+ print_error_errno("collate", "Couldn't write header to intermediate file \"%s\"", fnt[i]);
+ goto fail;
}
- sam_hdr_write(fpt[i], h);
}
b = bam_init1();
- while (sam_read1(fp, h, b) >= 0) {
+ if (!b) goto mem_fail;
+ while ((r = sam_read1(fp, h, b)) >= 0) {
uint32_t x;
x = hash_X31_Wang(bam_get_qname(b)) % n_files;
- sam_write1(fpt[x], h, b);
+ if (sam_write1(fpt[x], h, b) < 0) {
+ print_error_errno("collate", "Couldn't write to intermediate file \"%s\"", fnt[x]);
+ goto fail;
+ }
++cnt[x];
}
bam_destroy1(b);
- for (i = 0; i < n_files; ++i) sam_close(fpt[i]);
+ b = NULL;
+ if (r < -1) {
+ fprintf(pysam_stderr, "Error reading input file\n");
+ goto fail;
+ }
+ for (i = 0; i < n_files; ++i) {
+ // Close split output
+ r = sam_close(fpt[i]);
+ fpt[i] = NULL;
+ if (r < 0) {
+ fprintf(pysam_stderr, "Error on closing '%s'\n", fnt[i]);
+ return 1;
+ }
+
+ // Find biggest count
+ if (max_cnt < cnt[i]) max_cnt = cnt[i];
+ }
free(fpt);
+ fpt = NULL;
sam_close(fp);
-
+ fp = NULL;
// merge
sprintf(modew, "wb%d", (clevel >= 0 && clevel <= 9)? clevel : DEF_CLEVEL);
- if (!is_stdout) { // output to a file
+ if (!is_pysam_stdout) { // output to a file
char *fnw = (char*)calloc(l + 5, 1);
+ if (!fnw) goto mem_fail;
if (ga->out.format == unknown_format)
sprintf(fnw, "%s.bam", pre); // "wb" above makes BAM the default
else
sprintf(fnw, "%s.%s", pre, hts_format_file_extension(&ga->out));
fpw = sam_open_format(fnw, modew, &ga->out);
free(fnw);
- } else fpw = sam_open_format("-", modew, &ga->out); // output to stdout
+ } else fpw = sam_open_format("-", modew, &ga->out); // output to pysam_stdout
if (fpw == NULL) {
- if (is_stdout) print_error_errno("collate", "Cannot open standard output");
+ if (is_pysam_stdout) print_error_errno("collate", "Cannot open standard output");
else print_error_errno("collate", "Cannot open output file \"%s.bam\"", pre);
- return 1;
+ goto fail;
+ }
+
+ if (sam_hdr_write(fpw, h) < 0) {
+ print_error_errno("collate", "Couldn't write header");
+ goto fail;
+ }
+
+ a = malloc(max_cnt * sizeof(elem_t));
+ if (!a) goto mem_fail;
+ for (j = 0; j < max_cnt; ++j) {
+ a[j].b = bam_init1();
+ if (!a[j].b) { max_cnt = j; goto mem_fail; }
}
- sam_hdr_write(fpw, h);
for (i = 0; i < n_files; ++i) {
- int64_t j, c = cnt[i];
- elem_t *a;
+ int64_t c = cnt[i];
fp = sam_open_format(fnt[i], "r", &ga->in);
- bam_hdr_destroy(sam_hdr_read(fp));
- a = (elem_t*)calloc(c, sizeof(elem_t));
+ if (NULL == fp) {
+ print_error_errno("collate", "Couldn't open \"%s\"", fnt[i]);
+ goto fail;
+ }
+ bam_hdr_destroy(sam_hdr_read(fp)); // Skip over header
+
+ // Slurp in one of the split files
for (j = 0; j < c; ++j) {
- a[j].b = bam_init1();
- sam_read1(fp, h, a[j].b);
+ if (sam_read1(fp, h, a[j].b) < 0) {
+ fprintf(pysam_stderr, "Error reading '%s'\n", fnt[i]);
+ goto fail;
+ }
a[j].key = hash_X31_Wang(bam_get_qname(a[j].b));
}
sam_close(fp);
unlink(fnt[i]);
free(fnt[i]);
- ks_introsort(bamshuf, c, a);
+ fnt[i] = NULL;
+
+ ks_introsort(bamshuf, c, a); // Shuffle all the reads
+
+ // Write them out again
for (j = 0; j < c; ++j) {
- sam_write1(fpw, h, a[j].b);
- bam_destroy1(a[j].b);
+ if (sam_write1(fpw, h, a[j].b) < 0) {
+ print_error_errno("collate", "Error writing to output");
+ goto fail;
+ }
}
- free(a);
}
- sam_close(fpw);
+
bam_hdr_destroy(h);
- free(fnt); free(cnt);
+ for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+ free(a); free(fnt); free(cnt);
sam_global_args_free(ga);
+ if (sam_close(fpw) < 0) {
+ fprintf(pysam_stderr, "Error on closing output\n");
+ return 1;
+ }
return 0;
+
+ mem_fail:
+ fprintf(pysam_stderr, "Out of memory\n");
+
+ fail:
+ if (fp) sam_close(fp);
+ if (fpw) sam_close(fpw);
+ if (h) bam_hdr_destroy(h);
+ if (b) bam_destroy1(b);
+ for (i = 0; i < n_files; ++i) {
+ if (fnt) free(fnt[i]);
+ if (fpt && fpt[i]) sam_close(fpt[i]);
+ }
+ if (a) {
+ for (j = 0; j < max_cnt; ++j) bam_destroy1(a[j].b);
+ free(a);
+ }
+ free(fnt);
+ free(fpt);
+ free(cnt);
+ sam_global_args_free(ga);
+ return 1;
}
static int usage(FILE *fp, int n_files) {
fprintf(fp,
"Usage: samtools collate [-Ou] [-n nFiles] [-c cLevel] <in.bam> <out.prefix>\n\n"
"Options:\n"
- " -O output to stdout\n"
+ " -O output to pysam_stdout\n"
" -u uncompressed BAM output\n"
" -l INT compression level [%d]\n" // DEF_CLEVEL
" -n INT number of temporary files [%d]\n", // n_files
@@ -189,7 +272,7 @@ static int usage(FILE *fp, int n_files) {
int main_bamshuf(int argc, char *argv[])
{
- int c, n_files = 64, clevel = DEF_CLEVEL, is_stdout = 0, is_un = 0;
+ int c, n_files = 64, clevel = DEF_CLEVEL, is_pysam_stdout = 0, is_un = 0;
sam_global_args ga = SAM_GLOBAL_ARGS_INIT;
static const struct option lopts[] = {
SAM_OPT_GLOBAL_OPTIONS('-', 0, 0, 0, 0),
@@ -201,15 +284,15 @@ int main_bamshuf(int argc, char *argv[])
case 'n': n_files = atoi(optarg); break;
case 'l': clevel = atoi(optarg); break;
case 'u': is_un = 1; break;
- case 'O': is_stdout = 1; break;
+ case 'O': is_pysam_stdout = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
/* else fall-through */
- case '?': return usage(pysamerr, n_files);
+ case '?': return usage(pysam_stderr, n_files);
}
}
if (is_un) clevel = 0;
if (optind + 2 > argc)
- return usage(pysamerr, n_files);
+ return usage(pysam_stderr, n_files);
- return bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_stdout, &ga);
+ return bamshuf(argv[optind], n_files, argv[optind+1], clevel, is_pysam_stdout, &ga);
}
diff --git a/samtools/bamtk.c b/samtools/bamtk.c
index 4b4df77..5c1c60d 100644
--- a/samtools/bamtk.c
+++ b/samtools/bamtk.c
@@ -1,6 +1,6 @@
/* bamtk.c -- main samtools command front-end.
- Copyright (C) 2008-2015 Genome Research Ltd.
+ Copyright (C) 2008-2016 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
@@ -213,7 +215,7 @@ int main(int argc, char *argv[])
printf(
"samtools %s\n"
"Using htslib %s\n"
-"Copyright (C) 2015 Genome Research Ltd.\n",
+"Copyright (C) 2016 Genome Research Ltd.\n",
samtools_version(), hts_version());
}
else if (strcmp(argv[1], "--version-only") == 0) {
diff --git a/samtools/bamtk.c.pysam.c b/samtools/bamtk.c.pysam.c
index a369810..1f3d938 100644
--- a/samtools/bamtk.c.pysam.c
+++ b/samtools/bamtk.c.pysam.c
@@ -2,7 +2,7 @@
/* bamtk.c -- main samtools command front-end.
- Copyright (C) 2008-2015 Genome Research Ltd.
+ Copyright (C) 2008-2016 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
@@ -71,13 +73,13 @@ const char *samtools_version()
static void vprint_error_core(const char *subcommand, const char *format, va_list args, const char *extra)
{
- fflush(stdout);
- if (subcommand && *subcommand) fprintf(pysamerr, "samtools %s: ", subcommand);
- else fprintf(pysamerr, "samtools: ");
- vfprintf(pysamerr, format, args);
- if (extra) fprintf(pysamerr, ": %s\n", extra);
- else fprintf(pysamerr, "\n");
- fflush(pysamerr);
+ fflush(pysam_stdout);
+ if (subcommand && *subcommand) fprintf(pysam_stderr, "samtools %s: ", subcommand);
+ else fprintf(pysam_stderr, "samtools: ");
+ vfprintf(pysam_stderr, format, args);
+ if (extra) fprintf(pysam_stderr, ": %s\n", extra);
+ else fprintf(pysam_stderr, "\n");
+ fflush(pysam_stderr);
}
void print_error(const char *subcommand, const char *format, ...)
@@ -158,14 +160,13 @@ static void usage(FILE *fp)
int samtools_main(int argc, char *argv[])
{
#ifdef _WIN32
- setmode(fileno(stdout), O_BINARY);
+ setmode(fileno(pysam_stdout), O_BINARY);
setmode(fileno(stdin), O_BINARY);
#endif
+ if (argc < 2) { usage(pysam_stderr); return 1; }
- if (argc < 2) { usage(pysamerr); return 1; }
-
if (strcmp(argv[1], "help") == 0 || strcmp(argv[1], "--help") == 0) {
- if (argc == 2) { usage(stdout); return 0; }
+ if (argc == 2) { usage(pysam_stdout); return 0; }
// Otherwise change "samtools help COMMAND [...]" to "samtools COMMAND";
// main_xyz() functions by convention display the subcommand's usage
@@ -173,6 +174,7 @@ int samtools_main(int argc, char *argv[])
argv++;
argc = 2;
}
+
int ret = 0;
if (strcmp(argv[1], "view") == 0) ret = main_samview(argc-1, argv+1);
else if (strcmp(argv[1], "import") == 0) ret = main_import(argc-1, argv+1);
@@ -207,22 +209,22 @@ int samtools_main(int argc, char *argv[])
else if (strcmp(argv[1], "quickcheck") == 0) ret = main_quickcheck(argc-1, argv+1);
else if (strcmp(argv[1], "addreplacerg") == 0) ret = main_addreplacerg(argc-1, argv+1);
else if (strcmp(argv[1], "pileup") == 0) {
- fprintf(pysamerr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
+ fprintf(pysam_stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
return 1;
}
else if (strcmp(argv[1], "tview") == 0) ret = bam_tview_main(argc-1, argv+1);
else if (strcmp(argv[1], "--version") == 0) {
- printf(
+ fprintf(pysam_stdout,
"samtools %s\n"
"Using htslib %s\n"
-"Copyright (C) 2015 Genome Research Ltd.\n",
+"Copyright (C) 2016 Genome Research Ltd.\n",
samtools_version(), hts_version());
}
else if (strcmp(argv[1], "--version-only") == 0) {
- printf("%s+htslib-%s\n", samtools_version(), hts_version());
+ fprintf(pysam_stdout, "%s+htslib-%s\n", samtools_version(), hts_version());
}
else {
- fprintf(pysamerr, "[main] unrecognized command '%s'\n", argv[1]);
+ fprintf(pysam_stderr, "[main] unrecognized command '%s'\n", argv[1]);
return 1;
}
return ret;
diff --git a/samtools/bedcov.c b/samtools/bedcov.c
index e2f0db8..d4dceee 100644
--- a/samtools/bedcov.c
+++ b/samtools/bedcov.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <zlib.h>
#include <stdio.h>
#include <ctype.h>
diff --git a/samtools/bedcov.c.pysam.c b/samtools/bedcov.c.pysam.c
index 6faa7bf..25fdffc 100644
--- a/samtools/bedcov.c.pysam.c
+++ b/samtools/bedcov.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <zlib.h>
#include <stdio.h>
#include <ctype.h>
@@ -88,9 +90,9 @@ int main_bedcov(int argc, char *argv[])
if (usage) break;
}
if (usage || optind + 2 > argc) {
- fprintf(pysamerr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
- fprintf(pysamerr, " -Q INT Only count bases of at least INT quality [0]\n");
- sam_global_opt_help(pysamerr, "-.--.");
+ fprintf(pysam_stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n");
+ fprintf(pysam_stderr, " -Q INT Only count bases of at least INT quality [0]\n");
+ sam_global_opt_help(pysam_stderr, "-.--.");
return 1;
}
memset(&str, 0, sizeof(kstring_t));
@@ -104,13 +106,13 @@ int main_bedcov(int argc, char *argv[])
if (aux[i]->fp)
idx[i] = sam_index_load(aux[i]->fp, argv[i+optind+1]);
if (aux[i]->fp == 0 || idx[i] == 0) {
- fprintf(pysamerr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
+ fprintf(pysam_stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]);
return 2;
}
// TODO bgzf_set_cache_size(aux[i]->fp, 20);
aux[i]->header = sam_hdr_read(aux[i]->fp);
if (aux[i]->header == NULL) {
- fprintf(pysamerr, "ERROR: failed to read header for '%s'\n",
+ fprintf(pysam_stderr, "ERROR: failed to read header for '%s'\n",
argv[i+optind+1]);
return 2;
}
@@ -153,12 +155,12 @@ int main_bedcov(int argc, char *argv[])
kputc('\t', &str);
kputl(cnt[i], &str);
}
- puts(str.s);
+ fputs(str.s, pysam_stdout) & fputc('\n', pysam_stdout);
bam_mplp_destroy(mplp);
continue;
bed_error:
- fprintf(pysamerr, "Errors in BED line '%s'\n", str.s);
+ fprintf(pysam_stderr, "Errors in BED line '%s'\n", str.s);
}
free(n_plp); free(plp);
ks_destroy(ks);
diff --git a/samtools/bedidx.c b/samtools/bedidx.c
index 627783e..c1954ad 100644
--- a/samtools/bedidx.c
+++ b/samtools/bedidx.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
diff --git a/samtools/bedidx.c.pysam.c b/samtools/bedidx.c.pysam.c
index 716aee5..5b7df0c 100644
--- a/samtools/bedidx.c.pysam.c
+++ b/samtools/bedidx.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
@@ -199,7 +201,7 @@ void *bed_read(const char *fn)
// has called their reference "browser" or "track".
if (0 == strcmp(ref, "browser")) continue;
if (0 == strcmp(ref, "track")) continue;
- fprintf(pysamerr, "[bed_read] Parse error reading %s at line %u\n",
+ fprintf(pysam_stderr, "[bed_read] Parse error reading %s at line %u\n",
fn, line);
goto fail_no_msg;
}
@@ -236,7 +238,7 @@ void *bed_read(const char *fn)
bed_index(h);
return h;
fail:
- fprintf(pysamerr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
+ fprintf(pysam_stderr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
fail_no_msg:
if (ks) ks_destroy(ks);
if (fp) gzclose(fp);
diff --git a/samtools/cut_target.c b/samtools/cut_target.c
index 56ec9f9..71a6c85 100644
--- a/samtools/cut_target.c
+++ b/samtools/cut_target.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
diff --git a/samtools/cut_target.c.pysam.c b/samtools/cut_target.c.pysam.c
index 92b15a0..82a4c4c 100644
--- a/samtools/cut_target.c.pysam.c
+++ b/samtools/cut_target.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
@@ -126,18 +128,18 @@ static void process_cns(bam_hdr_t *h, int tid, int l, uint16_t *cns)
if (i == l || ((b[i]>>2&3) == 0 && s >= 0)) {
if (s >= 0) {
int j;
- printf("%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s);
+ fprintf(pysam_stdout, "%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s);
for (j = s; j < i; ++j) {
int c = cns[j]>>8;
- if (c == 0) putchar('N');
- else putchar("ACGT"[c&3]);
+ if (c == 0) fputc('N', pysam_stdout);
+ else fputc("ACGT"[c&3], pysam_stdout);
}
- putchar('\t');
+ fputc('\t', pysam_stdout);
for (j = s; j < i; ++j)
- putchar(33 + (cns[j]>>8>>2));
- putchar('\n');
+ fputc(33 + (cns[j]>>8>>2), pysam_stdout);
+ fputc('\n', pysam_stdout);
}
- //if (s >= 0) printf("%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s);
+ //if (s >= 0) fprintf(pysam_stdout, "%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s);
s = -1;
} else if ((b[i]>>2&3) && s < 0) s = i;
}
@@ -197,18 +199,18 @@ int main_cut_target(int argc, char *argv[])
}
if (ga.reference) {
g.fai = fai_load(ga.reference);
- if (g.fai == 0) fprintf(pysamerr, "[%s] fail to load the fasta index.\n", __func__);
+ if (g.fai == 0) fprintf(pysam_stderr, "[%s] fail to load the fasta index.\n", __func__);
}
if (usage || argc == optind) {
- fprintf(pysamerr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
- sam_global_opt_help(pysamerr, "-.--f");
+ fprintf(pysam_stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] <in.bam>\n");
+ sam_global_opt_help(pysam_stderr, "-.--f");
return 1;
}
l = max_l = 0; cns = 0;
g.fp = sam_open_format(argv[optind], "r", &ga.in);
g.h = sam_hdr_read(g.fp);
if (g.h == NULL) {
- fprintf(pysamerr, "Couldn't read header for '%s'\n", argv[optind]);
+ fprintf(pysam_stderr, "Couldn't read header for '%s'\n", argv[optind]);
sam_close(g.fp);
return 1;
}
diff --git a/samtools/dict.c b/samtools/dict.c
index 241d119..fa64a16 100644
--- a/samtools/dict.c
+++ b/samtools/dict.c
@@ -22,7 +22,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
+#include <unistd.h>
#include <zlib.h>
#include <getopt.h>
#include "htslib/kseq.h"
@@ -140,7 +143,7 @@ int dict_main(int argc, char *argv[])
char *fname = NULL;
if ( optind>=argc )
{
- if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin
+ if ( !isatty(STDIN_FILENO) ) fname = "-"; // reading from stdin
else return dict_usage();
}
else fname = argv[optind];
diff --git a/samtools/dict.c.pysam.c b/samtools/dict.c.pysam.c
index 6b4a25a..5368851 100644
--- a/samtools/dict.c.pysam.c
+++ b/samtools/dict.c.pysam.c
@@ -24,7 +24,10 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
+#include <unistd.h>
#include <zlib.h>
#include <getopt.h>
#include "htslib/kseq.h"
@@ -51,14 +54,14 @@ static void write_dict(const char *fn, args_t *args)
fp = strcmp(fn, "-") ? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
if (fp == 0) {
- fprintf(pysamerr, "dict: %s: No such file or directory\n", fn);
+ fprintf(pysam_stderr, "dict: %s: No such file or directory\n", fn);
exit(1);
}
- FILE *out = stdout;
+ FILE *out = pysam_stdout;
if (args->output_fname) {
out = fopen(args->output_fname, "w");
if (out == NULL) {
- fprintf(pysamerr, "dict: %s: Cannot open file for writing\n", args->output_fname);
+ fprintf(pysam_stderr, "dict: %s: Cannot open file for writing\n", args->output_fname);
exit(1);
}
}
@@ -97,15 +100,15 @@ static void write_dict(const char *fn, args_t *args)
static int dict_usage(void)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "About: Create a sequence dictionary file from a fasta file\n");
- fprintf(pysamerr, "Usage: samtools dict [options] <file.fa|file.fa.gz>\n\n");
- fprintf(pysamerr, "Options: -a, --assembly STR assembly\n");
- fprintf(pysamerr, " -H, --no-header do not print @HD line\n");
- fprintf(pysamerr, " -o, --output STR file to write out dict file [stdout]\n");
- fprintf(pysamerr, " -s, --species STR species\n");
- fprintf(pysamerr, " -u, --uri STR URI [file:///abs/path/to/file.fa]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "About: Create a sequence dictionary file from a fasta file\n");
+ fprintf(pysam_stderr, "Usage: samtools dict [options] <file.fa|file.fa.gz>\n\n");
+ fprintf(pysam_stderr, "Options: -a, --assembly STR assembly\n");
+ fprintf(pysam_stderr, " -H, --no-header do not print @HD line\n");
+ fprintf(pysam_stderr, " -o, --output STR file to write out dict file [pysam_stdout]\n");
+ fprintf(pysam_stderr, " -s, --species STR species\n");
+ fprintf(pysam_stderr, " -u, --uri STR URI [file:///abs/path/to/file.fa]\n");
+ fprintf(pysam_stderr, "\n");
return 1;
}
@@ -142,7 +145,7 @@ int dict_main(int argc, char *argv[])
char *fname = NULL;
if ( optind>=argc )
{
- if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin
+ if ( !isatty(STDIN_FILENO) ) fname = "-"; // reading from stdin
else return dict_usage();
}
else fname = argv[optind];
diff --git a/samtools/errmod.c b/samtools/errmod.c
index f8b5aa7..c37c6d1 100644
--- a/samtools/errmod.c
+++ b/samtools/errmod.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include "errmod.h"
#include "htslib/ksort.h"
diff --git a/samtools/errmod.c.pysam.c b/samtools/errmod.c.pysam.c
index fce3042..12176cf 100644
--- a/samtools/errmod.c.pysam.c
+++ b/samtools/errmod.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <math.h>
#include "errmod.h"
#include "htslib/ksort.h"
diff --git a/samtools/faidx.c b/samtools/faidx.c
index dcc1041..336bde5 100644
--- a/samtools/faidx.c
+++ b/samtools/faidx.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
@@ -67,7 +69,9 @@ int faidx_main(int argc, char *argv[])
error(NULL);
if ( argc==2 )
{
- fai_build(argv[optind]);
+ if (fai_build(argv[optind]) != 0) {
+ error("Could not build fai index %s.fai\n", argv[optind]);
+ }
return 0;
}
diff --git a/samtools/faidx.c.pysam.c b/samtools/faidx.c.pysam.c
index 971db3b..ac06647 100644
--- a/samtools/faidx.c.pysam.c
+++ b/samtools/faidx.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
@@ -40,14 +42,14 @@ static void error(const char *format, ...)
{
va_list ap;
va_start(ap, format);
- vfprintf(pysamerr, format, ap);
+ vfprintf(pysam_stderr, format, ap);
va_end(ap);
}
else
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Usage: samtools faidx <file.fa|file.fa.gz> [<reg> [...]]\n");
+ fprintf(pysam_stderr, "\n");
}
exit(-1);
}
@@ -69,7 +71,9 @@ int faidx_main(int argc, char *argv[])
error(NULL);
if ( argc==2 )
{
- fai_build(argv[optind]);
+ if (fai_build(argv[optind]) != 0) {
+ error("Could not build fai index %s.fai\n", argv[optind]);
+ }
return 0;
}
@@ -78,15 +82,15 @@ int faidx_main(int argc, char *argv[])
while ( ++optind<argc )
{
- printf(">%s\n", argv[optind]);
+ fprintf(pysam_stdout, ">%s\n", argv[optind]);
int i, j, seq_len;
char *seq = fai_fetch(fai, argv[optind], &seq_len);
if ( seq_len < 0 ) error("Failed to fetch sequence in %s\n", argv[optind]);
for (i=0; i<seq_len; i+=60)
{
for (j=0; j<60 && i+j<seq_len; j++)
- putchar(seq[i+j]);
- putchar('\n');
+ fputc(seq[i+j], pysam_stdout);
+ fputc('\n', pysam_stdout);
}
free(seq);
}
diff --git a/samtools/kprobaln.c b/samtools/kprobaln.c
index c746803..e319708 100644
--- a/samtools/kprobaln.c
+++ b/samtools/kprobaln.c
@@ -23,6 +23,8 @@
SOFTWARE.
*/
+#include <config.h>
+
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
diff --git a/samtools/kprobaln.c.pysam.c b/samtools/kprobaln.c.pysam.c
index 63dad4c..630b730 100644
--- a/samtools/kprobaln.c.pysam.c
+++ b/samtools/kprobaln.c.pysam.c
@@ -25,6 +25,8 @@
SOFTWARE.
*/
+#include <config.h>
+
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
@@ -144,7 +146,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
sum += fi[u] + fi[u+1] + fi[u+2];
-// fprintf(pysamerr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
+// fprintf(pysam_stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
}
// rescale
s[i] = sum;
@@ -199,7 +201,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
-// fprintf(pysamerr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
+// fprintf(pysam_stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
}
// rescale
set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
@@ -236,7 +238,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
if (state) state[i-1] = max_k;
if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
#ifdef _MAIN
- fprintf(pysamerr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
+ fprintf(pysam_stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
"ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
#endif
}
@@ -250,7 +252,7 @@ int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_quer
#ifdef _MAIN
#include <unistd.h>
-int main(int argc, char *argv[])
+int samtools_kprobaln_main(int argc, char *argv[])
{
uint8_t conv[256], *iqual, *ref, *query;
int c, l_ref, l_query, i, q = 30, b = 10, P;
@@ -261,7 +263,7 @@ int main(int argc, char *argv[])
}
}
if (optind + 2 > argc) {
- fprintf(pysamerr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
+ fprintf(pysam_stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
return 1;
}
memset(conv, 4, 256);
@@ -275,7 +277,7 @@ int main(int argc, char *argv[])
memset(iqual, q, l_query);
kpa_par_def.bw = b;
P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0);
- fprintf(pysamerr, "%d\n", P);
+ fprintf(pysam_stderr, "%d\n", P);
free(iqual);
return 0;
}
diff --git a/samtools/misc/ace2sam.c b/samtools/misc/ace2sam.c
index 24b6933..77b9993 100644
--- a/samtools/misc/ace2sam.c
+++ b/samtools/misc/ace2sam.c
@@ -23,6 +23,8 @@
SOFTWARE.
*/
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/samtools/misc/ace2sam.c.pysam.c b/samtools/misc/ace2sam.c.pysam.c
index a7f92e2..a663399 100644
--- a/samtools/misc/ace2sam.c.pysam.c
+++ b/samtools/misc/ace2sam.c.pysam.c
@@ -25,6 +25,8 @@
SOFTWARE.
*/
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -49,7 +51,7 @@ KSTREAM_INIT(gzFile, gzread, 16384)
// a fatal error
static void fatal(const char *msg)
{
- fprintf(pysamerr, "E %s\n", msg);
+ fprintf(pysam_stderr, "E %s\n", msg);
exit(1);
}
// remove pads
@@ -64,7 +66,7 @@ static void remove_pads(const kstring_t *src, kstring_t *dst)
dst->l = j;
}
-int main(int argc, char *argv[])
+int samtools_ace2sam_main(int argc, char *argv[])
{
gzFile fp;
kstream_t *ks;
@@ -80,13 +82,13 @@ int main(int argc, char *argv[])
}
}
if (argc == optind) {
- fprintf(pysamerr, "\nUsage: ace2sam [-pc] <in.ace>\n\n");
- fprintf(pysamerr, "Options: -p output padded SAM\n");
- fprintf(pysamerr, " -c write the contig sequence in SAM\n\n");
- fprintf(pysamerr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n");
- fprintf(pysamerr, " 2. The order of reads in AF and in RD must be identical\n");
- fprintf(pysamerr, " 3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n");
- fprintf(pysamerr, " 4. This program writes the headerless SAM to stdout and header to pysamerr\n\n");
+ fprintf(pysam_stderr, "\nUsage: ace2sam [-pc] <in.ace>\n\n");
+ fprintf(pysam_stderr, "Options: -p output padded SAM\n");
+ fprintf(pysam_stderr, " -c write the contig sequence in SAM\n\n");
+ fprintf(pysam_stderr, "Notes: 1. Fields must appear in the following order: (CO->[BQ]->(AF)->(RD->QA))\n");
+ fprintf(pysam_stderr, " 2. The order of reads in AF and in RD must be identical\n");
+ fprintf(pysam_stderr, " 3. Except in BQ, words and numbers must be separated by a single SPACE or TAB\n");
+ fprintf(pysam_stderr, " 4. This program writes the headerless SAM to pysam_stdout and header to pysam_stderr\n\n");
return 1;
}
@@ -111,14 +113,14 @@ int main(int argc, char *argv[])
if (t[1].s[i] != '*') ++k;
}
// write out the SAM header and contig sequences
- fprintf(pysamerr, "H @SQ\tSN:%s\tLN:%llu\n", t[0].s, (unsigned long long)(t[is_padded?1:2].l)); // The SAM header line
+ fprintf(pysam_stderr, "H @SQ\tSN:%s\tLN:%llu\n", t[0].s, (unsigned long long)(t[is_padded?1:2].l)); // The SAM header line
cns = &t[is_padded?1:2];
- fprintf(pysamerr, "S >%s\n", t[0].s);
+ fprintf(pysam_stderr, "S >%s\n", t[0].s);
for (i = 0; i < cns->l; i += LINE_LEN) {
- fputs("S ", pysamerr);
+ fputs("S ", pysam_stderr);
for (k = 0; k < LINE_LEN && i + k < cns->l; ++k)
- fputc(cns->s[i + k], pysamerr);
- fputc('\n', pysamerr);
+ fputc(cns->s[i + k], pysam_stderr);
+ fputc('\n', pysam_stderr);
}
#define __padded2cigar(sp) do { \
@@ -152,7 +154,7 @@ int main(int argc, char *argv[])
if (write_cns) t[4].s[--t[4].l] = 0; // remove the trailing "*"
for (i = 0; i < t[2].l; ++i) { // read the consensus quality
int q;
- if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(pysamerr, "E truncated contig quality\n");
+ if (ks_getuntil(ks, 0, &s, &dret) < 0) fprintf(pysam_stderr, "E truncated contig quality\n");
if (s.l) {
q = atoi(s.s) + 33;
if (q > 126) q = 126;
@@ -161,12 +163,12 @@ int main(int argc, char *argv[])
}
if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
ks_getuntil(ks, '\n', &s, &dret); // skip the empty line
- if (write_cns) puts(t[4].s); t[4].l = 0;
+ if (write_cns) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout); t[4].l = 0;
} else if (strcmp(s.s, "AF") == 0) { // padded read position
int reversed, neg, pos;
if (t[0].l == 0) fatal("come to 'AF' before reading 'CO'");
if (write_cns) {
- if (t[4].l) puts(t[4].s);
+ if (t[4].l) fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout);
t[4].l = 0;
}
ks_getuntil(ks, 0, &s, &dret); // read name
@@ -239,7 +241,7 @@ int main(int argc, char *argv[])
kputs("\t*\t0\t0\t", &t[4]); // empty MRNM, MPOS and TLEN
kputsn(t[3].s, t[3].l, &t[4]); // unpadded SEQ
kputs("\t*", &t[4]); // QUAL
- puts(t[4].s); // print to stdout
+ fputs(t[4].s, pysam_stdout) & fputc('\n', pysam_stdout); // print to pysam_stdout
++af_i;
} else if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
}
diff --git a/samtools/padding.c b/samtools/padding.c
index 436d716..cea79cf 100644
--- a/samtools/padding.c
+++ b/samtools/padding.c
@@ -1,7 +1,7 @@
/* padding.c -- depad subcommand.
Copyright (C) 2011, 2012 Broad Institute.
- Copyright (C) 2014, 2015 Genome Research Ltd.
+ Copyright (C) 2014-2016 Genome Research Ltd.
Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <string.h>
#include <assert.h>
#include <unistd.h>
@@ -32,6 +34,7 @@ DEALINGS IN THE SOFTWARE. */
#include <htslib/faidx.h>
#include "sam_header.h"
#include "sam_opts.h"
+#include "samtools.h"
#define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
@@ -191,6 +194,10 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
b = bam_init1();
+ if (!b) {
+ fprintf(stderr, "[depad] Couldn't allocate bam struct\n");
+ return -1;
+ }
r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
int read_ret;
while ((read_ret = sam_read1(in, h, b)) >= 0) { // read one alignment from `in'
@@ -357,7 +364,10 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
b->core.bin = bam_reg2bin(b->core.pos, bam_endpos(b));
next_seq:
- sam_write1(out, h, b);
+ if (sam_write1(out, h, b) < 0) {
+ print_error_errno("depad", "error writing to output");
+ return -1;
+ }
}
if (read_ret < -1) {
fprintf(stderr, "[depad] truncated file.\n");
@@ -525,7 +535,7 @@ int main_pad2unpad(int argc, char *argv[])
}
// open file handlers
if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
- fprintf(stderr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
+ print_error_errno("depad", "failed to open \"%s\" for reading", argv[optind]);
ret = 1;
goto depad_end;
}
@@ -548,7 +558,7 @@ int main_pad2unpad(int argc, char *argv[])
char wmode[2];
strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
- fprintf(stderr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
+ print_error_errno("depad", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
ret = 1;
goto depad_end;
}
@@ -565,14 +575,17 @@ int main_pad2unpad(int argc, char *argv[])
}
// Do the depad
- ret = bam_pad2unpad(in, out, h, fai);
+ if (bam_pad2unpad(in, out, h, fai) != 0) ret = 1;
depad_end:
// close files, free and return
if (fai) fai_destroy(fai);
if (h) bam_hdr_destroy(h);
- sam_close(in);
- sam_close(out);
+ if (in) sam_close(in);
+ if (out && sam_close(out) < 0) {
+ fprintf(stderr, "[depad] error on closing output file.\n");
+ ret = 1;
+ }
free(fn_list); free(fn_out);
return ret;
}
@@ -593,12 +606,13 @@ static int usage(int is_long_help)
sam_global_opt_help(stderr, "-...-");
if (is_long_help)
- fprintf(stderr, "Notes:\n\
-\n\
- 1. Requires embedded reference sequences (before the reads for that reference),\n\
- or ideally a FASTA file of the padded reference sequences (via the -T argument).\n\
-\n\
- 2. The input padded alignment read's CIGAR strings must not use P or I operators.\n\
-\n");
+ fprintf(stderr,
+"Notes:\n"
+"\n"
+"1. Requires embedded reference sequences (before the reads for that reference),\n"
+" or ideally a FASTA file of the padded reference sequences (via a -T option).\n"
+"\n"
+"2. Input padded alignment reads' CIGAR strings must not use P or I operators.\n"
+"\n");
return 1;
}
diff --git a/samtools/padding.c.pysam.c b/samtools/padding.c.pysam.c
index fd889f3..9f85c95 100644
--- a/samtools/padding.c.pysam.c
+++ b/samtools/padding.c.pysam.c
@@ -3,7 +3,7 @@
/* padding.c -- depad subcommand.
Copyright (C) 2011, 2012 Broad Institute.
- Copyright (C) 2014, 2015 Genome Research Ltd.
+ Copyright (C) 2014-2016 Genome Research Ltd.
Portions copyright (C) 2012, 2013 Peter Cock, The James Hutton Institute.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -26,6 +26,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <string.h>
#include <assert.h>
#include <unistd.h>
@@ -34,6 +36,7 @@ DEALINGS IN THE SOFTWARE. */
#include <htslib/faidx.h>
#include "sam_header.h"
#include "sam_opts.h"
+#include "samtools.h"
#define bam_reg2bin(b,e) hts_reg2bin((b),(e), 14, 5)
@@ -96,10 +99,10 @@ static int unpad_seq(bam1_t *b, kstring_t *s)
for (i = 0; i < ol; ++i) s->s[s->l++] = 0;
if (0 == cigar_n_warning) {
cigar_n_warning = -1;
- fprintf(pysamerr, "[depad] WARNING: CIGAR op N treated as op D in read %s\n", bam_get_qname(b));
+ fprintf(pysam_stderr, "[depad] WARNING: CIGAR op N treated as op D in read %s\n", bam_get_qname(b));
}
} else {
- fprintf(pysamerr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam_get_qname(b));
+ fprintf(pysam_stderr, "[depad] ERROR: Didn't expect CIGAR op %c in read %s\n", BAM_CIGAR_STR[op], bam_get_qname(b));
return -1;
}
}
@@ -114,7 +117,7 @@ int load_unpadded_ref(faidx_t *fai, char *ref_name, int ref_len, kstring_t *seq)
fai_ref = fai_fetch(fai, ref_name, &fai_ref_len);
if (fai_ref_len != ref_len) {
- fprintf(pysamerr, "[depad] ERROR: FASTA sequence %s length %i, expected %i\n", ref_name, fai_ref_len, ref_len);
+ fprintf(pysam_stderr, "[depad] ERROR: FASTA sequence %s length %i, expected %i\n", ref_name, fai_ref_len, ref_len);
free(fai_ref);
return -1;
}
@@ -128,7 +131,7 @@ int load_unpadded_ref(faidx_t *fai, char *ref_name, int ref_len, kstring_t *seq)
} else {
int i = seq_nt16_table[(int)base];
if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16
- fprintf(pysamerr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence %s\n", base, (int)base, ref_name);
+ fprintf(pysam_stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence %s\n", base, (int)base, ref_name);
free(fai_ref);
return -1;
}
@@ -149,19 +152,19 @@ int get_unpadded_len(faidx_t *fai, char *ref_name, int padded_len)
fai_ref = fai_fetch(fai, ref_name, &fai_ref_len);
if (fai_ref_len != padded_len) {
- fprintf(pysamerr, "[depad] ERROR: FASTA sequence '%s' length %i, expected %i\n", ref_name, fai_ref_len, padded_len);
+ fprintf(pysam_stderr, "[depad] ERROR: FASTA sequence '%s' length %i, expected %i\n", ref_name, fai_ref_len, padded_len);
free(fai_ref);
return -1;
}
for (k = 0; k < padded_len; ++k) {
- //fprintf(pysamerr, "[depad] checking base %i of %i or %i\n", k+1, ref_len, strlen(fai_ref));
+ //fprintf(pysam_stderr, "[depad] checking base %i of %i or %i\n", k+1, ref_len, strlen(fai_ref));
base = fai_ref[k];
if (base == '-' || base == '*') {
gaps += 1;
} else {
int i = seq_nt16_table[(int)base];
if (i == 0 || i==16) { // Equals maps to 0, anything unexpected to 16
- fprintf(pysamerr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence '%s'\n", base, (int)base, ref_name);
+ fprintf(pysam_stderr, "[depad] ERROR: Invalid character %c (ASCII %i) in FASTA sequence '%s'\n", base, (int)base, ref_name);
free(fai_ref);
return -1;
}
@@ -193,6 +196,10 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
int ret = 0, n2 = 0, m2 = 0, *posmap = 0;
b = bam_init1();
+ if (!b) {
+ fprintf(pysam_stderr, "[depad] Couldn't allocate bam struct\n");
+ return -1;
+ }
r.l = r.m = q.l = q.m = 0; r.s = q.s = 0;
int read_ret;
while ((read_ret = sam_read1(in, h, b)) >= 0) { // read one alignment from `in'
@@ -203,20 +210,20 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
uint32_t *cigar = bam_get_cigar(b);
n2 = 0;
if (b->core.pos == 0 && b->core.tid >= 0 && strcmp(bam_get_qname(b), h->target_name[b->core.tid]) == 0) {
- // fprintf(pysamerr, "[depad] Found embedded reference '%s'\n", bam_get_qname(b));
+ // fprintf(pysam_stderr, "[depad] Found embedded reference '%s'\n", bam_get_qname(b));
r_tid = b->core.tid;
if (0!=unpad_seq(b, &r)) {
- fprintf(pysamerr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in reference %s\n", bam_get_qname(b));
+ fprintf(pysam_stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in reference %s\n", bam_get_qname(b));
return -1;
};
if (h->target_len[r_tid] != r.l) {
- fprintf(pysamerr, "[depad] ERROR: (Padded) length of '%s' is %u in BAM header, but %llu in embedded reference\n", bam_get_qname(b), h->target_len[r_tid], (unsigned long long)(r.l));
+ fprintf(pysam_stderr, "[depad] ERROR: (Padded) length of '%s' is %u in BAM header, but %llu in embedded reference\n", bam_get_qname(b), h->target_len[r_tid], (unsigned long long)(r.l));
return -1;
}
if (fai) {
// Check the embedded reference matches the FASTA file
if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &q)) {
- fprintf(pysamerr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Failed to load embedded reference '%s' from FASTA\n", h->target_name[b->core.tid]);
return -1;
}
assert(r.l == q.l);
@@ -224,7 +231,7 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
for (i = 0; i < r.l; ++i) {
if (r.s[i] != q.s[i]) {
// Show gaps as ASCII 45
- fprintf(pysamerr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
+ fprintf(pysam_stderr, "[depad] ERROR: Embedded sequence and reference FASTA don't match for %s base %i, '%c' vs '%c'\n",
h->target_name[b->core.tid], i+1,
r.s[i] ? seq_nt16_str[(int)r.s[i]] : 45,
q.s[i] ? seq_nt16_str[(int)q.s[i]] : 45);
@@ -238,25 +245,25 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
} else if (b->core.n_cigar > 0) {
int i, k, op;
if (b->core.tid < 0) {
- fprintf(pysamerr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam_get_qname(b));
+ fprintf(pysam_stderr, "[depad] ERROR: Read '%s' has CIGAR but no RNAME\n", bam_get_qname(b));
return -1;
} else if (b->core.tid == r_tid) {
; // good case, reference available
- //fprintf(pysamerr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam_get_qname(b));
+ //fprintf(pysam_stderr, "[depad] Have ref '%s' for read '%s'\n", h->target_name[b->core.tid], bam_get_qname(b));
} else if (fai) {
if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
- fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
return -1;
}
posmap = update_posmap(posmap, r);
r_tid = b->core.tid;
- // fprintf(pysamerr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]);
+ // fprintf(pysam_stderr, "[depad] Loaded %s from FASTA file\n", h->target_name[b->core.tid]);
} else {
- fprintf(pysamerr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Missing %s embedded reference sequence (and no FASTA file)\n", h->target_name[b->core.tid]);
return -1;
}
if (0!=unpad_seq(b, &q)) {
- fprintf(pysamerr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in read %s\n", bam_get_qname(b));
+ fprintf(pysam_stderr, "[depad] ERROR: Problem parsing SEQ and/or CIGAR in read %s\n", bam_get_qname(b));
return -1;
};
if (bam_cigar_op(cigar[0]) == BAM_CSOFT_CLIP) {
@@ -325,32 +332,32 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
if (b->core.pos != -1) b->core.pos = posmap[b->core.pos];
if (b->core.mtid < 0 || b->core.mpos < 0) {
/* Nice case, no mate to worry about*/
- // fprintf(pysamerr, "[depad] Read '%s' mate not mapped\n", bam_get_qname(b));
+ // fprintf(pysam_stderr, "[depad] Read '%s' mate not mapped\n", bam_get_qname(b));
/* TODO - Warning if FLAG says mate should be mapped? */
/* Clean up funny input where mate position is given but mate reference is missing: */
b->core.mtid = -1;
b->core.mpos = -1;
} else if (b->core.mtid == b->core.tid) {
/* Nice case, same reference */
- // fprintf(pysamerr, "[depad] Read '%s' mate mapped to same ref\n", bam_get_qname(b));
+ // fprintf(pysam_stderr, "[depad] Read '%s' mate mapped to same ref\n", bam_get_qname(b));
b->core.mpos = posmap[b->core.mpos];
} else {
/* Nasty case, Must load alternative posmap */
- // fprintf(pysamerr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]);
+ // fprintf(pysam_stderr, "[depad] Loading reference '%s' temporarily\n", h->target_name[b->core.mtid]);
if (!fai) {
- fprintf(pysamerr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Needed reference %s sequence for mate (and no FASTA file)\n", h->target_name[b->core.mtid]);
return -1;
}
/* Temporarily load the other reference sequence */
if (load_unpadded_ref(fai, h->target_name[b->core.mtid], h->target_len[b->core.mtid], &r)) {
- fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.mtid]);
return -1;
}
posmap = update_posmap(posmap, r);
b->core.mpos = posmap[b->core.mpos];
/* Restore the reference and posmap*/
if (load_unpadded_ref(fai, h->target_name[b->core.tid], h->target_len[b->core.tid], &r)) {
- fprintf(pysamerr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
+ fprintf(pysam_stderr, "[depad] ERROR: Failed to load '%s' from reference FASTA\n", h->target_name[b->core.tid]);
return -1;
}
posmap = update_posmap(posmap, r);
@@ -359,10 +366,13 @@ int bam_pad2unpad(samFile *in, samFile *out, bam_hdr_t *h, faidx_t *fai)
b->core.bin = bam_reg2bin(b->core.pos, bam_endpos(b));
next_seq:
- sam_write1(out, h, b);
+ if (sam_write1(out, h, b) < 0) {
+ print_error_errno("depad", "error writing to output");
+ return -1;
+ }
}
if (read_ret < -1) {
- fprintf(pysamerr, "[depad] truncated file.\n");
+ fprintf(pysam_stderr, "[depad] truncated file.\n");
ret = 1;
}
free(r.s); free(q.s); free(posmap);
@@ -379,10 +389,10 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
for (i = 0; i < old->n_targets; ++i) {
unpadded_len = get_unpadded_len(fai, old->target_name[i], old->target_len[i]);
if (unpadded_len < 0) {
- fprintf(pysamerr, "[depad] ERROR getting unpadded length of '%s', padded length %i\n", old->target_name[i], old->target_len[i]);
+ fprintf(pysam_stderr, "[depad] ERROR getting unpadded length of '%s', padded length %i\n", old->target_name[i], old->target_len[i]);
} else {
header->target_len[i] = unpadded_len;
- //fprintf(pysamerr, "[depad] Recalculating '%s' length %i -> %i\n", old->target_name[i], old->target_len[i], header->target_len[i]);
+ //fprintf(pysam_stderr, "[depad] Recalculating '%s' length %i -> %i\n", old->target_name[i], old->target_len[i], header->target_len[i]);
}
}
/* Duplicating the header allocated new buffer for header string */
@@ -404,7 +414,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
char *name = strstr(text, "\tSN:");
char *name_end;
if (!name) {
- fprintf(pysamerr, "Unable to find SN: header field\n");
+ fprintf(pysam_stderr, "Unable to find SN: header field\n");
return NULL;
}
name += 4;
@@ -458,7 +468,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
/* Check we didn't overflow the buffer */
assert (strlen(header->text) <= strlen(old->text));
if (strlen(header->text) < header->l_text) {
- //fprintf(pysamerr, "[depad] Reallocating header buffer\n");
+ //fprintf(pysam_stderr, "[depad] Reallocating header buffer\n");
assert (newtext == header->text);
newtext = malloc(strlen(header->text) + 1);
strcpy(newtext, header->text);
@@ -466,7 +476,7 @@ bam_hdr_t * fix_header(bam_hdr_t *old, faidx_t *fai)
header->text = newtext;
header->l_text = strlen(newtext);
}
- //fprintf(pysamerr, "[depad] Here is the new header (pending @SQ lines),\n\n%s\n(end)\n", header->text);
+ //fprintf(pysam_stderr, "[depad] Here is the new header (pending @SQ lines),\n\n%s\n(end)\n", header->text);
return header;
}
@@ -507,7 +517,7 @@ int main_pad2unpad(int argc, char *argv[])
break;
case '?': is_long_help = 1; break;
default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break;
- fprintf(pysamerr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
+ fprintf(pysam_stderr, "[bam_fillmd] unrecognized option '-%c'\n\n", c);
return usage(is_long_help);
}
}
@@ -527,30 +537,30 @@ int main_pad2unpad(int argc, char *argv[])
}
// open file handlers
if ((in = sam_open_format(argv[optind], in_mode, &ga.in)) == 0) {
- fprintf(pysamerr, "[depad] failed to open \"%s\" for reading.\n", argv[optind]);
+ print_error_errno("depad", "failed to open \"%s\" for reading", argv[optind]);
ret = 1;
goto depad_end;
}
if (fn_list && hts_set_fai_filename(in, fn_list) != 0) {
- fprintf(pysamerr, "[depad] failed to load reference file \"%s\".\n", fn_list);
+ fprintf(pysam_stderr, "[depad] failed to load reference file \"%s\".\n", fn_list);
ret = 1;
goto depad_end;
}
if ((h = sam_hdr_read(in)) == 0) {
- fprintf(pysamerr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
+ fprintf(pysam_stderr, "[depad] failed to read the header from \"%s\".\n", argv[optind]);
ret = 1;
goto depad_end;
}
if (fai) {
h_fix = fix_header(h, fai);
} else {
- fprintf(pysamerr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
+ fprintf(pysam_stderr, "[depad] Warning - reference lengths will not be corrected without FASTA reference\n");
h_fix = h;
}
char wmode[2];
strcat(out_mode, sam_open_mode(wmode, fn_out, NULL)==0 ? wmode : "b");
if ((out = sam_open_format(fn_out? fn_out : "-", out_mode, &ga.out)) == 0) {
- fprintf(pysamerr, "[depad] failed to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
+ print_error_errno("depad", "failed to open \"%s\" for writing", fn_out? fn_out : "standard output");
ret = 1;
goto depad_end;
}
@@ -561,46 +571,50 @@ int main_pad2unpad(int argc, char *argv[])
hts_set_opt(out, CRAM_OPT_NO_REF, 1);
if (sam_hdr_write(out, h_fix) != 0) {
- fprintf(pysamerr, "[depad] failed to write header.\n");
+ fprintf(pysam_stderr, "[depad] failed to write header.\n");
ret = 1;
goto depad_end;
}
// Do the depad
- ret = bam_pad2unpad(in, out, h, fai);
+ if (bam_pad2unpad(in, out, h, fai) != 0) ret = 1;
depad_end:
// close files, free and return
if (fai) fai_destroy(fai);
if (h) bam_hdr_destroy(h);
- sam_close(in);
- sam_close(out);
+ if (in) sam_close(in);
+ if (out && sam_close(out) < 0) {
+ fprintf(pysam_stderr, "[depad] error on closing output file.\n");
+ ret = 1;
+ }
free(fn_list); free(fn_out);
return ret;
}
static int usage(int is_long_help)
{
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Usage: samtools depad <in.bam>\n\n");
- fprintf(pysamerr, "Options:\n");
- fprintf(pysamerr, " -s Output is SAM (default is BAM)\n");
- fprintf(pysamerr, " -S Input is SAM (default is BAM)\n");
- fprintf(pysamerr, " -u Uncompressed BAM output (can't use with -s)\n");
- fprintf(pysamerr, " -1 Fast compression BAM output (can't use with -s)\n");
- fprintf(pysamerr, " -T, --reference FILE\n");
- fprintf(pysamerr, " Padded reference sequence file [null]\n");
- fprintf(pysamerr, " -o FILE Output file name [stdout]\n");
- fprintf(pysamerr, " -? Longer help\n");
- sam_global_opt_help(pysamerr, "-...-");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Usage: samtools depad <in.bam>\n\n");
+ fprintf(pysam_stderr, "Options:\n");
+ fprintf(pysam_stderr, " -s Output is SAM (default is BAM)\n");
+ fprintf(pysam_stderr, " -S Input is SAM (default is BAM)\n");
+ fprintf(pysam_stderr, " -u Uncompressed BAM output (can't use with -s)\n");
+ fprintf(pysam_stderr, " -1 Fast compression BAM output (can't use with -s)\n");
+ fprintf(pysam_stderr, " -T, --reference FILE\n");
+ fprintf(pysam_stderr, " Padded reference sequence file [null]\n");
+ fprintf(pysam_stderr, " -o FILE Output file name [pysam_stdout]\n");
+ fprintf(pysam_stderr, " -? Longer help\n");
+ sam_global_opt_help(pysam_stderr, "-...-");
if (is_long_help)
- fprintf(pysamerr, "Notes:\n\
-\n\
- 1. Requires embedded reference sequences (before the reads for that reference),\n\
- or ideally a FASTA file of the padded reference sequences (via the -T argument).\n\
-\n\
- 2. The input padded alignment read's CIGAR strings must not use P or I operators.\n\
-\n");
+ fprintf(pysam_stderr,
+"Notes:\n"
+"\n"
+"1. Requires embedded reference sequences (before the reads for that reference),\n"
+" or ideally a FASTA file of the padded reference sequences (via a -T option).\n"
+"\n"
+"2. Input padded alignment reads' CIGAR strings must not use P or I operators.\n"
+"\n");
return 1;
}
diff --git a/samtools/phase.c b/samtools/phase.c
index 0667ea5..6909912 100644
--- a/samtools/phase.c
+++ b/samtools/phase.c
@@ -1,7 +1,7 @@
/* phase.c -- phase subcommand.
Copyright (C) 2011 Broad Institute.
- Copyright (C) 2013, 2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -30,8 +32,10 @@ DEALINGS IN THE SOFTWARE. */
#include <math.h>
#include <zlib.h>
#include "htslib/sam.h"
+#include "htslib/kstring.h"
#include "errmod.h"
#include "sam_opts.h"
+#include "samtools.h"
#include "htslib/kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
@@ -53,6 +57,7 @@ typedef struct {
samFile* fp;
bam_hdr_t* fp_hdr;
char *pre;
+ char *out_name[3];
samFile* out[3];
bam_hdr_t* out_hdr[3];
// alignment queue
@@ -333,7 +338,7 @@ static int clean_seqs(int vpos, nseq_t *hash)
return ret;
}
-static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
+static int dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
{
int i, is_flip, drop_ambi;
drop_ambi = g->flag & FLAG_DROP_AMBI;
@@ -361,12 +366,16 @@ static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
if (which < 2 && is_flip) which = 1 - which; // increase the randomness
}
if (which == 3) which = (drand48() < 0.5);
- sam_write1(g->out[which], g->out_hdr[which], b);
+ if (sam_write1(g->out[which], g->out_hdr[which], b) < 0) {
+ print_error_errno("phase", "error writing to '%s'", g->out_name[which]);
+ return -1;
+ }
bam_destroy1(b);
g->b[i] = 0;
}
memmove(g->b, g->b + i, (g->n - i) * sizeof(void*));
g->n -= i;
+ return 0;
}
static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash)
@@ -393,7 +402,7 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
else f->phased = 1, f->phase = f->seq[0] - 1;
}
}
- dump_aln(g, min_pos, hash);
+ if (dump_aln(g, min_pos, hash) < 0) return -1;
++g->vpos_shift;
return 1;
}
@@ -451,7 +460,7 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
printf("//\n");
fflush(stdout);
g->vpos_shift += vpos;
- dump_aln(g, min_pos, hash);
+ if (dump_aln(g, min_pos, hash) < 0) return -1;
return vpos;
}
@@ -536,6 +545,26 @@ static int gl2cns(float q[16])
return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2;
}
+static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat *fmt)
+{
+ kstring_t s = { 0, 0, NULL };
+ ksprintf(&s, "%s.%s.%s", g->pre, middle, hts_format_file_extension(fmt));
+ g->out_name[c] = ks_release(&s);
+ g->out[c] = sam_open_format(g->out_name[c], "wb", fmt);
+ if (! g->out[c]) {
+ print_error_errno("phase", "Failed to open output file '%s'", g->out_name[c]);
+ return -1;
+ }
+
+ g->out_hdr[c] = bam_hdr_dup(g->fp_hdr);
+ if (sam_hdr_write(g->out[c], g->out_hdr[c]) < 0) {
+ print_error_errno("phase", "Failed to write header for '%s'", g->out_name[c]);
+ return -1;
+ }
+
+ return 0;
+}
+
int main_phase(int argc, char *argv[])
{
int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
@@ -555,6 +584,8 @@ int main_phase(int argc, char *argv[])
{ NULL, 0, NULL, 0 }
};
+ // FIXME Leaks galore in the case of error returns
+
memset(&g, 0, sizeof(phaseg_t));
g.flag = FLAG_FIX_CHIMERA;
g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256;
@@ -594,9 +625,14 @@ int main_phase(int argc, char *argv[])
return 1;
}
g.fp = sam_open_format(argv[optind], "r", &ga.in);
+ if (!g.fp) {
+ print_error_errno("phase", "Couldn't open '%s'", argv[optind]);
+ return 1;
+ }
g.fp_hdr = sam_hdr_read(g.fp);
if (g.fp_hdr == NULL) {
- fprintf(stderr, "Failed to read header for '%s'\n", argv[optind]);
+ fprintf(stderr, "[%s] Failed to read header for '%s'\n",
+ __func__, argv[optind]);
return 1;
}
if (fn_list) { // read the list of sites to phase
@@ -604,20 +640,13 @@ int main_phase(int argc, char *argv[])
free(fn_list);
} else g.flag &= ~FLAG_LIST_EXCL;
if (g.pre) { // open BAMs to write
- char *s = (char*)malloc(strlen(g.pre) + 20);
if (ga.out.format == unknown_format)
ga.out.format = bam; // default via "wb".
- strcpy(s, g.pre); strcat(s, ".0."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[0] = sam_open_format(s, "wb", &ga.out);
- strcpy(s, g.pre); strcat(s, ".1."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[1] = sam_open_format(s, "wb", &ga.out);
- strcpy(s, g.pre); strcat(s, ".chimera."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[2] = sam_open_format(s, "wb", &ga.out);
- for (c = 0; c <= 2; ++c) {
- g.out_hdr[c] = bam_hdr_dup(g.fp_hdr);
- sam_hdr_write(g.out[c], g.out_hdr[c]);
- }
- free(s);
+
+ // Open each output file g.out[0..2], dupping and writing the header
+ if (start_output(&g, 0, "0", &ga.out) < 0 ||
+ start_output(&g, 1, "1", &ga.out) < 0 ||
+ start_output(&g, 2, "chimera", &ga.out) < 0) return 1;
}
iter = bam_plp_init(readaln, &g);
@@ -647,7 +676,10 @@ int main_phase(int argc, char *argv[])
g.vpos_shift = 0;
if (lasttid >= 0) {
seqs = shrink_hash(seqs);
- phase(&g, g.fp_hdr->target_name[lasttid], vpos, cns, seqs);
+ if (phase(&g, g.fp_hdr->target_name[lasttid],
+ vpos, cns, seqs) < 0) {
+ return 1;
+ }
update_vpos(0x7fffffff, seqs);
}
lasttid = tid;
@@ -716,14 +748,20 @@ int main_phase(int argc, char *argv[])
}
if (dophase) {
seqs = shrink_hash(seqs);
- phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+ if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+ return 1;
+ }
update_vpos(vpos, seqs);
cns[0] = cns[vpos];
vpos = 0;
}
++vpos;
}
- if (tid >= 0) phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+ if (tid >= 0) {
+ if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+ return 1;
+ }
+ }
bam_hdr_destroy(g.fp_hdr);
bam_plp_destroy(iter);
sam_close(g.fp);
@@ -733,11 +771,18 @@ int main_phase(int argc, char *argv[])
errmod_destroy(em);
free(bases);
if (g.pre) {
+ int res = 0;
for (c = 0; c <= 2; ++c) {
- sam_close(g.out[c]);
+ if (sam_close(g.out[c]) < 0) {
+ fprintf(stderr, "[%s] error on closing '%s'\n",
+ __func__, g.out_name[c]);
+ res = 1;
+ }
bam_hdr_destroy(g.out_hdr[c]);
+ free(g.out_name[c]);
}
free(g.pre); free(g.b);
+ if (res) return 1;
}
sam_global_args_free(&ga);
return 0;
diff --git a/samtools/phase.c.pysam.c b/samtools/phase.c.pysam.c
index bc1d455..3babd37 100644
--- a/samtools/phase.c.pysam.c
+++ b/samtools/phase.c.pysam.c
@@ -3,7 +3,7 @@
/* phase.c -- phase subcommand.
Copyright (C) 2011 Broad Institute.
- Copyright (C) 2013, 2014 Genome Research Ltd.
+ Copyright (C) 2013-2016 Genome Research Ltd.
Author: Heng Li <lh3 at sanger.ac.uk>
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -32,8 +34,10 @@ DEALINGS IN THE SOFTWARE. */
#include <math.h>
#include <zlib.h>
#include "htslib/sam.h"
+#include "htslib/kstring.h"
#include "errmod.h"
#include "sam_opts.h"
+#include "samtools.h"
#include "htslib/kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
@@ -55,6 +59,7 @@ typedef struct {
samFile* fp;
bam_hdr_t* fp_hdr;
char *pre;
+ char *out_name[3];
samFile* out[3];
bam_hdr_t* out_hdr[3];
// alignment queue
@@ -335,7 +340,7 @@ static int clean_seqs(int vpos, nseq_t *hash)
return ret;
}
-static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
+static int dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
{
int i, is_flip, drop_ambi;
drop_ambi = g->flag & FLAG_DROP_AMBI;
@@ -363,12 +368,16 @@ static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
if (which < 2 && is_flip) which = 1 - which; // increase the randomness
}
if (which == 3) which = (drand48() < 0.5);
- sam_write1(g->out[which], g->out_hdr[which], b);
+ if (sam_write1(g->out[which], g->out_hdr[which], b) < 0) {
+ print_error_errno("phase", "error writing to '%s'", g->out_name[which]);
+ return -1;
+ }
bam_destroy1(b);
g->b[i] = 0;
}
memmove(g->b, g->b + i, (g->n - i) * sizeof(void*));
g->n -= i;
+ return 0;
}
static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash)
@@ -383,8 +392,8 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
i = clean_seqs(vpos, hash); // i is true if hash has an element with its vpos >= vpos
min_pos = i? cns[vpos]>>32 : 0x7fffffff;
if (vpos == 1) {
- printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1);
- printf("M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1,
+ fprintf(pysam_stdout, "PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1);
+ fprintf(pysam_stdout, "M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1,
"ACGTX"[cns[0]&3], "ACGTX"[cns[0]>>16&3], g->vpos_shift + 1);
for (k = 0; k < kh_end(hash); ++k) {
if (kh_exist(hash, k)) {
@@ -395,14 +404,14 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
else f->phased = 1, f->phase = f->seq[0] - 1;
}
}
- dump_aln(g, min_pos, hash);
+ if (dump_aln(g, min_pos, hash) < 0) return -1;
++g->vpos_shift;
return 1;
}
{ // phase
int **cnt;
uint64_t *mask;
- printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1);
+ fprintf(pysam_stdout, "PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1);
sitemask = calloc(vpos, 1);
cnt = count_all(g->k, vpos, hash);
path = dynaprog(g->k, vpos, cnt);
@@ -423,13 +432,13 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
}
}
for (i = 0; i < n_masked; ++i)
- printf("FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1);
+ fprintf(pysam_stdout, "FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1);
for (i = 0; i < vpos; ++i) {
uint64_t x = pcnt[i];
int8_t c[2];
c[0] = (cns[i]&0xffff)>>2 == 0? 4 : (cns[i]&3);
c[1] = (cns[i]>>16&0xffff)>>2 == 0? 4 : (cns[i]>>16&3);
- printf("M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]],
+ fprintf(pysam_stdout, "M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]],
i + g->vpos_shift + 1, (int)(x&0xffff), (int)(x>>16&0xffff), (int)(x>>32&0xffff), (int)(x>>48&0xffff));
}
free(path); free(pcnt); free(regmask); free(sitemask);
@@ -441,19 +450,19 @@ static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *
ks_introsort_rseq(n_seqs, seqs);
for (i = 0; i < n_seqs; ++i) {
frag_t *f = seqs[i];
- printf("EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen);
+ fprintf(pysam_stdout, "EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen);
for (j = 0; j < f->vlen; ++j) {
uint32_t c = cns[f->vpos + j];
- if (f->seq[j] == 0) putchar('N');
- else putchar("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)]);
+ if (f->seq[j] == 0) fputc('N', pysam_stdout);
+ else fputc("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)], pysam_stdout);
}
- printf("\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1);
+ fprintf(pysam_stdout, "\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1);
}
free(seqs);
- printf("//\n");
- fflush(stdout);
+ fprintf(pysam_stdout, "//\n");
+ fflush(pysam_stdout);
g->vpos_shift += vpos;
- dump_aln(g, min_pos, hash);
+ if (dump_aln(g, min_pos, hash) < 0) return -1;
return vpos;
}
@@ -538,6 +547,26 @@ static int gl2cns(float q[16])
return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2;
}
+static int start_output(phaseg_t *g, int c, const char *middle, const htsFormat *fmt)
+{
+ kstring_t s = { 0, 0, NULL };
+ ksprintf(&s, "%s.%s.%s", g->pre, middle, hts_format_file_extension(fmt));
+ g->out_name[c] = ks_release(&s);
+ g->out[c] = sam_open_format(g->out_name[c], "wb", fmt);
+ if (! g->out[c]) {
+ print_error_errno("phase", "Failed to open output file '%s'", g->out_name[c]);
+ return -1;
+ }
+
+ g->out_hdr[c] = bam_hdr_dup(g->fp_hdr);
+ if (sam_hdr_write(g->out[c], g->out_hdr[c]) < 0) {
+ print_error_errno("phase", "Failed to write header for '%s'", g->out_name[c]);
+ return -1;
+ }
+
+ return 0;
+}
+
int main_phase(int argc, char *argv[])
{
int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0, usage = 0;
@@ -557,6 +586,8 @@ int main_phase(int argc, char *argv[])
{ NULL, 0, NULL, 0 }
};
+ // FIXME Leaks galore in the case of error returns
+
memset(&g, 0, sizeof(phaseg_t));
g.flag = FLAG_FIX_CHIMERA;
g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256;
@@ -578,27 +609,32 @@ int main_phase(int argc, char *argv[])
if (usage) break;
}
if (usage || argc == optind) {
- fprintf(pysamerr, "\n");
- fprintf(pysamerr, "Usage: samtools phase [options] <in.bam>\n\n");
- fprintf(pysamerr, "Options: -k INT block length [%d]\n", g.k);
- fprintf(pysamerr, " -b STR prefix of BAMs to output [null]\n");
- fprintf(pysamerr, " -q INT min het phred-LOD [%d]\n", g.min_varLOD);
- fprintf(pysamerr, " -Q INT min base quality in het calling [%d]\n", g.min_baseQ);
- fprintf(pysamerr, " -D INT max read depth [%d]\n", g.max_depth);
-// fprintf(pysamerr, " -l FILE list of sites to phase [null]\n");
- fprintf(pysamerr, " -F do not attempt to fix chimeras\n");
- fprintf(pysamerr, " -A drop reads with ambiguous phase\n");
-// fprintf(pysamerr, " -e do not discover SNPs (effective with -l)\n");
- fprintf(pysamerr, "\n");
-
- sam_global_opt_help(pysamerr, "-....");
+ fprintf(pysam_stderr, "\n");
+ fprintf(pysam_stderr, "Usage: samtools phase [options] <in.bam>\n\n");
+ fprintf(pysam_stderr, "Options: -k INT block length [%d]\n", g.k);
+ fprintf(pysam_stderr, " -b STR prefix of BAMs to output [null]\n");
+ fprintf(pysam_stderr, " -q INT min het phred-LOD [%d]\n", g.min_varLOD);
+ fprintf(pysam_stderr, " -Q INT min base quality in het calling [%d]\n", g.min_baseQ);
+ fprintf(pysam_stderr, " -D INT max read depth [%d]\n", g.max_depth);
+// fprintf(pysam_stderr, " -l FILE list of sites to phase [null]\n");
+ fprintf(pysam_stderr, " -F do not attempt to fix chimeras\n");
+ fprintf(pysam_stderr, " -A drop reads with ambiguous phase\n");
+// fprintf(pysam_stderr, " -e do not discover SNPs (effective with -l)\n");
+ fprintf(pysam_stderr, "\n");
+
+ sam_global_opt_help(pysam_stderr, "-....");
return 1;
}
g.fp = sam_open_format(argv[optind], "r", &ga.in);
+ if (!g.fp) {
+ print_error_errno("phase", "Couldn't open '%s'", argv[optind]);
+ return 1;
+ }
g.fp_hdr = sam_hdr_read(g.fp);
if (g.fp_hdr == NULL) {
- fprintf(pysamerr, "Failed to read header for '%s'\n", argv[optind]);
+ fprintf(pysam_stderr, "[%s] Failed to read header for '%s'\n",
+ __func__, argv[optind]);
return 1;
}
if (fn_list) { // read the list of sites to phase
@@ -606,20 +642,13 @@ int main_phase(int argc, char *argv[])
free(fn_list);
} else g.flag &= ~FLAG_LIST_EXCL;
if (g.pre) { // open BAMs to write
- char *s = (char*)malloc(strlen(g.pre) + 20);
if (ga.out.format == unknown_format)
ga.out.format = bam; // default via "wb".
- strcpy(s, g.pre); strcat(s, ".0."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[0] = sam_open_format(s, "wb", &ga.out);
- strcpy(s, g.pre); strcat(s, ".1."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[1] = sam_open_format(s, "wb", &ga.out);
- strcpy(s, g.pre); strcat(s, ".chimera."); strcat(s, hts_format_file_extension(&ga.out));
- g.out[2] = sam_open_format(s, "wb", &ga.out);
- for (c = 0; c <= 2; ++c) {
- g.out_hdr[c] = bam_hdr_dup(g.fp_hdr);
- sam_hdr_write(g.out[c], g.out_hdr[c]);
- }
- free(s);
+
+ // Open each output file g.out[0..2], dupping and writing the header
+ if (start_output(&g, 0, "0", &ga.out) < 0 ||
+ start_output(&g, 1, "1", &ga.out) < 0 ||
+ start_output(&g, 2, "chimera", &ga.out) < 0) return 1;
}
iter = bam_plp_init(readaln, &g);
@@ -627,20 +656,20 @@ int main_phase(int argc, char *argv[])
seqs = kh_init(64);
em = errmod_init(1. - 0.83);
bases = calloc(g.max_depth, 2);
- printf("CC\n");
- printf("CC\tDescriptions:\nCC\n");
- printf("CC\t CC comments\n");
- printf("CC\t PS start of a phase set\n");
- printf("CC\t FL filtered region\n");
- printf("CC\t M[012] markers; 0 for singletons, 1 for phased and 2 for filtered\n");
- printf("CC\t EV supporting reads; SAM format\n");
- printf("CC\t // end of a phase set\nCC\n");
- printf("CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n");
- printf("CC\t PS chr phaseSetStart phaseSetEnd\n");
- printf("CC\t FL chr filterStart filterEnd\n");
- printf("CC\t M? chr PS pos allele0 allele1 hetIndex #supports0 #errors0 #supp1 #err1\n");
- printf("CC\nCC\n");
- fflush(stdout);
+ fprintf(pysam_stdout, "CC\n");
+ fprintf(pysam_stdout, "CC\tDescriptions:\nCC\n");
+ fprintf(pysam_stdout, "CC\t CC comments\n");
+ fprintf(pysam_stdout, "CC\t PS start of a phase set\n");
+ fprintf(pysam_stdout, "CC\t FL filtered region\n");
+ fprintf(pysam_stdout, "CC\t M[012] markers; 0 for singletons, 1 for phased and 2 for filtered\n");
+ fprintf(pysam_stdout, "CC\t EV supporting reads; SAM format\n");
+ fprintf(pysam_stdout, "CC\t // end of a phase set\nCC\n");
+ fprintf(pysam_stdout, "CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n");
+ fprintf(pysam_stdout, "CC\t PS chr phaseSetStart phaseSetEnd\n");
+ fprintf(pysam_stdout, "CC\t FL chr filterStart filterEnd\n");
+ fprintf(pysam_stdout, "CC\t M? chr PS pos allele0 allele1 hetIndex #supports0 #errors0 #supp1 #err1\n");
+ fprintf(pysam_stdout, "CC\nCC\n");
+ fflush(pysam_stdout);
while ((plp = bam_plp_auto(iter, &tid, &pos, &n)) != 0) {
int i, k, c, tmp, dophase = 1, in_set = 0;
float q[16];
@@ -649,7 +678,10 @@ int main_phase(int argc, char *argv[])
g.vpos_shift = 0;
if (lasttid >= 0) {
seqs = shrink_hash(seqs);
- phase(&g, g.fp_hdr->target_name[lasttid], vpos, cns, seqs);
+ if (phase(&g, g.fp_hdr->target_name[lasttid],
+ vpos, cns, seqs) < 0) {
+ return 1;
+ }
update_vpos(0x7fffffff, seqs);
}
lasttid = tid;
@@ -718,14 +750,20 @@ int main_phase(int argc, char *argv[])
}
if (dophase) {
seqs = shrink_hash(seqs);
- phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+ if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+ return 1;
+ }
update_vpos(vpos, seqs);
cns[0] = cns[vpos];
vpos = 0;
}
++vpos;
}
- if (tid >= 0) phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs);
+ if (tid >= 0) {
+ if (phase(&g, g.fp_hdr->target_name[tid], vpos, cns, seqs) < 0) {
+ return 1;
+ }
+ }
bam_hdr_destroy(g.fp_hdr);
bam_plp_destroy(iter);
sam_close(g.fp);
@@ -735,11 +773,18 @@ int main_phase(int argc, char *argv[])
errmod_destroy(em);
free(bases);
if (g.pre) {
+ int res = 0;
for (c = 0; c <= 2; ++c) {
- sam_close(g.out[c]);
+ if (sam_close(g.out[c]) < 0) {
+ fprintf(pysam_stderr, "[%s] error on closing '%s'\n",
+ __func__, g.out_name[c]);
+ res = 1;
+ }
bam_hdr_destroy(g.out_hdr[c]);
+ free(g.out_name[c]);
}
free(g.pre); free(g.b);
+ if (res) return 1;
}
sam_global_args_free(&ga);
return 0;
diff --git a/samtools/pysam.h b/samtools/pysam.h
index 008cbbd..b0fc4fb 100644
--- a/samtools/pysam.h
+++ b/samtools/pysam.h
@@ -1,5 +1,7 @@
#ifndef PYSAM_H
#define PYSAM_H
#include "stdio.h"
-extern FILE * pysamerr;
+extern FILE * pysam_stderr;
+extern FILE * pysam_stdout;
+extern const char * pysam_stdout_fn;
#endif
diff --git a/samtools/sam.c b/samtools/sam.c
index d6cc9f6..237c3e8 100644
--- a/samtools/sam.c
+++ b/samtools/sam.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <string.h>
#include <unistd.h>
#include "htslib/faidx.h"
@@ -31,7 +33,7 @@ DEALINGS IN THE SOFTWARE. */
int samthreads(samfile_t *fp, int n_threads, int n_sub_blks)
{
if (hts_get_format(fp->file)->format != bam || !fp->is_write) return -1;
- bgzf_mt(fp->x.bam, n_threads, n_sub_blks);
+ if (bgzf_mt(fp->x.bam, n_threads, n_sub_blks) < 0) return -1;
return 0;
}
@@ -42,6 +44,10 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
if (hts_fp == NULL) return NULL;
samfile_t *fp = malloc(sizeof (samfile_t));
+ if (!fp) {
+ sam_close(hts_fp);
+ return NULL;
+ }
fp->file = hts_fp;
fp->x.bam = hts_fp->fp.bgzf;
if (strchr(mode, 'r')) {
@@ -66,7 +72,15 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
enum htsExactFormat fmt = hts_get_format(fp->file)->format;
fp->header = (bam_hdr_t *)aux; // For writing, we won't free it
fp->is_write = 1;
- if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) sam_hdr_write(fp->file, fp->header);
+ if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) {
+ if (sam_hdr_write(fp->file, fp->header) < 0) {
+ if (bam_verbose >= 1)
+ fprintf(stderr, "[samopen] Couldn't write header\n");
+ sam_close(hts_fp);
+ free(fp);
+ return NULL;
+ }
+ }
}
return fp;
diff --git a/samtools/sam.c.pysam.c b/samtools/sam.c.pysam.c
index e7c4cac..f7db820 100644
--- a/samtools/sam.c.pysam.c
+++ b/samtools/sam.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <string.h>
#include <unistd.h>
#include "htslib/faidx.h"
@@ -33,7 +35,7 @@ DEALINGS IN THE SOFTWARE. */
int samthreads(samfile_t *fp, int n_threads, int n_sub_blks)
{
if (hts_get_format(fp->file)->format != bam || !fp->is_write) return -1;
- bgzf_mt(fp->x.bam, n_threads, n_sub_blks);
+ if (bgzf_mt(fp->x.bam, n_threads, n_sub_blks) < 0) return -1;
return 0;
}
@@ -44,6 +46,10 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
if (hts_fp == NULL) return NULL;
samfile_t *fp = malloc(sizeof (samfile_t));
+ if (!fp) {
+ sam_close(hts_fp);
+ return NULL;
+ }
fp->file = hts_fp;
fp->x.bam = hts_fp->fp.bgzf;
if (strchr(mode, 'r')) {
@@ -62,13 +68,21 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux)
}
fp->is_write = 0;
if (fp->header->n_targets == 0 && bam_verbose >= 1)
- fprintf(pysamerr, "[samopen] no @SQ lines in the header.\n");
+ fprintf(pysam_stderr, "[samopen] no @SQ lines in the header.\n");
}
else {
enum htsExactFormat fmt = hts_get_format(fp->file)->format;
fp->header = (bam_hdr_t *)aux; // For writing, we won't free it
fp->is_write = 1;
- if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) sam_hdr_write(fp->file, fp->header);
+ if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) {
+ if (sam_hdr_write(fp->file, fp->header) < 0) {
+ if (bam_verbose >= 1)
+ fprintf(pysam_stderr, "[samopen] Couldn't write header\n");
+ sam_close(hts_fp);
+ free(fp);
+ return NULL;
+ }
+ }
}
return fp;
@@ -122,11 +136,11 @@ char *samfaipath(const char *fn_ref)
strcat(strcpy(fn_list, fn_ref), ".fai");
if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
if (access(fn_ref, R_OK) == -1) {
- fprintf(pysamerr, "[samfaipath] fail to read file %s.\n", fn_ref);
+ fprintf(pysam_stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
} else {
- if (bam_verbose >= 3) fprintf(pysamerr, "[samfaipath] build FASTA index...\n");
+ if (bam_verbose >= 3) fprintf(pysam_stderr, "[samfaipath] build FASTA index...\n");
if (fai_build(fn_ref) == -1) {
- fprintf(pysamerr, "[samfaipath] fail to build FASTA index.\n");
+ fprintf(pysam_stderr, "[samfaipath] fail to build FASTA index.\n");
free(fn_list); fn_list = 0;
}
}
diff --git a/samtools/sam_header.c b/samtools/sam_header.c
index 75ca724..64da68f 100644
--- a/samtools/sam_header.c
+++ b/samtools/sam_header.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "sam_header.h"
#include <stdio.h>
#include <string.h>
diff --git a/samtools/sam_header.c.pysam.c b/samtools/sam_header.c.pysam.c
index ecf937c..e39807d 100644
--- a/samtools/sam_header.c.pysam.c
+++ b/samtools/sam_header.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "sam_header.h"
#include <stdio.h>
#include <string.h>
@@ -81,7 +83,7 @@ static void debug(const char *format, ...)
{
va_list ap;
va_start(ap, format);
- vfprintf(pysamerr, format, ap);
+ vfprintf(pysam_stderr, format, ap);
va_end(ap);
}
@@ -775,8 +777,8 @@ void *sam_header_merge(int n, const void **_dicts)
if ( status==2 )
{
- print_header_line(pysamerr,tmpl_hlines->data);
- print_header_line(pysamerr,out_hlines->data);
+ print_header_line(pysam_stderr,tmpl_hlines->data);
+ print_header_line(pysam_stderr,out_hlines->data);
debug("Conflicting lines, cannot merge the headers.\n");
return 0;
}
diff --git a/samtools/sam_opts.c b/samtools/sam_opts.c
index 0ed197e..9369145 100644
--- a/samtools/sam_opts.c
+++ b/samtools/sam_opts.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/samtools/sam_opts.c.pysam.c b/samtools/sam_opts.c.pysam.c
index c976438..d0b56a3 100644
--- a/samtools/sam_opts.c.pysam.c
+++ b/samtools/sam_opts.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -79,7 +81,7 @@ int parse_sam_global_opt(int c, const char *optarg, const struct option *lopt,
}
if (!lopt->name) {
- fprintf(pysamerr, "Unexpected global option: %s\n", lopt->name);
+ fprintf(pysam_stderr, "Unexpected global option: %s\n", lopt->name);
return -1;
}
diff --git a/samtools/sam_view.c b/samtools/sam_view.c
index 4358a1c..402e1d3 100644
--- a/samtools/sam_view.c
+++ b/samtools/sam_view.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -96,7 +98,7 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
}
if (settings->library) {
const char *p = bam_get_library((bam_hdr_t*)h, b);
- if (p && strcmp(p, settings->library) != 0) return 1;
+ if (!p || strcmp(p, settings->library) != 0) return 1;
}
if (settings->remove_aux_len) {
size_t i;
@@ -400,18 +402,18 @@ int main_samview(int argc, char *argv[])
}
}
if (fn_un_out) {
- if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
+ if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
ret = 1;
goto view_end;
}
- if (fn_list) {
- if (hts_set_fai_filename(un_out, fn_list) != 0) {
- fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
- ret = 1;
- goto view_end;
- }
+ if (fn_list) {
+ if (hts_set_fai_filename(un_out, fn_list) != 0) {
+ fprintf(stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+ ret = 1;
+ goto view_end;
}
+ }
if (*out_format || is_header ||
out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
(ga.out.format != sam && ga.out.format != unknown_format)) {
@@ -556,35 +558,37 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
fprintf(fp,
"Notes:\n"
"\n"
-" 1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
-" Further control over the CRAM format can be specified by using the\n"
-" --output-fmt-option, e.g. to specify the number of sequences per slice\n"
-" and to use avoid reference based compression:\n"
-" `samtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
-" --output-fmt-option no_ref -o out.cram in.bam'\n"
+"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
+" Further control over the CRAM format can be specified by using the\n"
+" --output-fmt-option, e.g. to specify the number of sequences per slice\n"
+" and to use avoid reference based compression:\n"
"\n"
-" Options can also be specified as a comma separated list within the\n"
-" --output-fmt value too. For example this is equivalent to the above\n"
-" `samtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
-" -o out.cram in.bam'\n"
+"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
+"\t --output-fmt-option no_ref -o out.cram in.bam\n"
"\n"
-" 2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
-" two fields of each line consisting of the reference name and the\n"
-" corresponding sequence length. The `.fai' file generated by \n"
-" `samtools faidx' is suitable for use as this file. This may be an\n"
-" empty file if reads are unaligned.\n"
+" Options can also be specified as a comma separated list within the\n"
+" --output-fmt value too. For example this is equivalent to the above\n"
"\n"
-" 3. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n"
+"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
+"\t -o out.cram in.bam\n"
"\n"
-" 4. BAM->SAM conversion: `samtools view -h in.bam'.\n"
+"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
+" two fields of each line consisting of the reference name and the\n"
+" corresponding sequence length. The `.fai' file generated by \n"
+" `samtools faidx' is suitable for use as this file. This may be an\n"
+" empty file if reads are unaligned.\n"
"\n"
-" 5. A region should be presented in one of the following formats:\n"
-" `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
-" specified, the input alignment file must be a sorted and indexed\n"
-" alignment (BAM/CRAM) file.\n"
+"3. SAM->BAM conversion: samtools view -bT ref.fa in.sam.gz\n"
"\n"
-" 6. Option `-u' is preferred over `-b' when the output is piped to\n"
-" another samtools command.\n"
+"4. BAM->SAM conversion: samtools view -h in.bam\n"
+"\n"
+"5. A region should be presented in one of the following formats:\n"
+" `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
+" specified, the input alignment file must be a sorted and indexed\n"
+" alignment (BAM/CRAM) file.\n"
+"\n"
+"6. Option `-u' is preferred over `-b' when the output is piped to\n"
+" another samtools command.\n"
"\n");
return exit_status;
@@ -611,6 +615,7 @@ static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
static void bam2fq_usage(FILE *to, const char *command)
{
+ int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
fprintf(to,
"Usage: samtools %s [options...] <in.bam>\n", command);
fprintf(to,
@@ -620,10 +625,14 @@ static void bam2fq_usage(FILE *to, const char *command)
" -2 FILE write paired reads flagged READ2 to FILE\n"
" -f INT only include reads with all bits set in INT set in FLAG [0]\n"
" -F INT only include reads with none of the bits set in INT set in FLAG [0]\n"
-" -n don't append /1 and /2 to the read name\n"
-" -O output quality in the OQ tag if present\n"
+" -n don't append /1 and /2 to the read name\n");
+ if (fq) fprintf(to,
+" -O output quality in the OQ tag if present\n");
+ fprintf(to,
" -s FILE write singleton reads to FILE [assume single-end]\n"
-" -t copy RG, BC and QT tags to the FASTQ header line\n"
+" -t copy RG, BC and QT tags to the %s header line\n",
+ fq ? "FASTQ" : "FASTA");
+ if (fq) fprintf(to,
" -v INT default quality score if not given in file [1]\n");
sam_global_opt_help(to, "-.--.");
}
@@ -673,7 +682,10 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
uint8_t *seq;
uint8_t *qual = bam_get_qual(b);
const uint8_t *oq = NULL;
- if (state->use_oq) oq = bam_aux_get(b, "OQ") + 1;
+ if (state->use_oq) {
+ oq = bam_aux_get(b, "OQ");
+ if (oq) oq++; // skip tag type
+ }
bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
linebuf->l = 0;
@@ -921,7 +933,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
bool valid = true;
while (true) {
- at_eof = sam_read1(state->fp, state->h, b);
+ at_eof = sam_read1(state->fp, state->h, b) < 0;
if (!at_eof && filter_it_out(b, state)) continue;
if (!at_eof) ++n_reads;
diff --git a/samtools/sam_view.c.pysam.c b/samtools/sam_view.c.pysam.c
index dfc8065..3d5ffa5 100644
--- a/samtools/sam_view.c.pysam.c
+++ b/samtools/sam_view.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@@ -98,7 +100,7 @@ static int process_aln(const bam_hdr_t *h, bam1_t *b, samview_settings_t* settin
}
if (settings->library) {
const char *p = bam_get_library((bam_hdr_t*)h, b);
- if (p && strcmp(p, settings->library) != 0) return 1;
+ if (!p || strcmp(p, settings->library) != 0) return 1;
}
if (settings->remove_aux_len) {
size_t i;
@@ -317,8 +319,8 @@ int main_samview(int argc, char *argv[])
case 'x':
{
if (strlen(optarg) != 2) {
- fprintf(pysamerr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
- return usage(pysamerr, EXIT_FAILURE, is_long_help);
+ fprintf(pysam_stderr, "main_samview: Error parsing -x auxiliary tags should be exactly two characters long.\n");
+ return usage(pysam_stderr, EXIT_FAILURE, is_long_help);
}
settings.remove_aux = (char**)realloc(settings.remove_aux, sizeof(char*) * (++settings.remove_aux_len));
settings.remove_aux[settings.remove_aux_len-1] = optarg;
@@ -327,7 +329,7 @@ int main_samview(int argc, char *argv[])
default:
if (parse_sam_global_opt(c, optarg, lopts, &ga) != 0)
- return usage(pysamerr, EXIT_FAILURE, is_long_help);
+ return usage(pysam_stderr, EXIT_FAILURE, is_long_help);
break;
}
}
@@ -347,7 +349,7 @@ int main_samview(int argc, char *argv[])
strcat(out_mode, tmp);
strcat(out_un_mode, tmp);
}
- if (argc == optind && isatty(STDIN_FILENO)) return usage(stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...
+ if (argc == optind && isatty(STDIN_FILENO)) return usage(pysam_stdout, EXIT_SUCCESS, is_long_help); // potential memory leak...
fn_in = (optind < argc)? argv[optind] : "-";
// generate the fn_list if necessary
@@ -361,13 +363,13 @@ int main_samview(int argc, char *argv[])
if (fn_list) {
if (hts_set_fai_filename(in, fn_list) != 0) {
- fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+ fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
ret = 1;
goto view_end;
}
}
if ((header = sam_hdr_read(in)) == 0) {
- fprintf(pysamerr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
+ fprintf(pysam_stderr, "[main_samview] fail to read the header from \"%s\".\n", fn_in);
ret = 1;
goto view_end;
}
@@ -387,7 +389,7 @@ int main_samview(int argc, char *argv[])
}
if (fn_list) {
if (hts_set_fai_filename(out, fn_list) != 0) {
- fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+ fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
ret = 1;
goto view_end;
}
@@ -396,29 +398,29 @@ int main_samview(int argc, char *argv[])
out_mode[1] == 'b' || out_mode[1] == 'c' ||
(ga.out.format != sam && ga.out.format != unknown_format)) {
if (sam_hdr_write(out, header) != 0) {
- fprintf(pysamerr, "[main_samview] failed to write the SAM header\n");
+ fprintf(pysam_stderr, "[main_samview] failed to write the SAM header\n");
ret = 1;
goto view_end;
}
}
if (fn_un_out) {
- if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
+ if ((un_out = sam_open_format(fn_un_out, out_un_mode, &ga.out)) == 0) {
print_error_errno("view", "failed to open \"%s\" for writing", fn_un_out);
ret = 1;
goto view_end;
}
- if (fn_list) {
- if (hts_set_fai_filename(un_out, fn_list) != 0) {
- fprintf(pysamerr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
- ret = 1;
- goto view_end;
- }
+ if (fn_list) {
+ if (hts_set_fai_filename(un_out, fn_list) != 0) {
+ fprintf(pysam_stderr, "[main_samview] failed to use reference \"%s\".\n", fn_list);
+ ret = 1;
+ goto view_end;
}
+ }
if (*out_format || is_header ||
out_un_mode[1] == 'b' || out_un_mode[1] == 'c' ||
(ga.out.format != sam && ga.out.format != unknown_format)) {
if (sam_hdr_write(un_out, header) != 0) {
- fprintf(pysamerr, "[main_samview] failed to write the SAM header\n");
+ fprintf(pysam_stderr, "[main_samview] failed to write the SAM header\n");
ret = 1;
goto view_end;
}
@@ -441,7 +443,7 @@ int main_samview(int argc, char *argv[])
}
}
if (r < -1) {
- fprintf(pysamerr, "[main_samview] truncated file.\n");
+ fprintf(pysam_stderr, "[main_samview] truncated file.\n");
ret = 1;
}
bam_destroy1(b);
@@ -450,7 +452,7 @@ int main_samview(int argc, char *argv[])
bam1_t *b;
hts_idx_t *idx = sam_index_load(in, fn_in); // load index
if (idx == 0) { // index is unavailable
- fprintf(pysamerr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
+ fprintf(pysam_stderr, "[main_samview] random alignment retrieval only works for indexed BAM or CRAM files.\n");
ret = 1;
goto view_end;
}
@@ -461,9 +463,9 @@ int main_samview(int argc, char *argv[])
if (iter == NULL) { // region invalid or reference name not found
int beg, end;
if (hts_parse_reg(argv[i], &beg, &end))
- fprintf(pysamerr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
+ fprintf(pysam_stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
else
- fprintf(pysamerr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
+ fprintf(pysam_stderr, "[main_samview] region \"%s\" could not be parsed. Continue anyway.\n", argv[i]);
continue;
}
// fetch alignments
@@ -477,7 +479,7 @@ int main_samview(int argc, char *argv[])
}
hts_itr_destroy(iter);
if (result < -1) {
- fprintf(pysamerr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
+ fprintf(pysam_stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
ret = 1;
break;
}
@@ -488,7 +490,7 @@ int main_samview(int argc, char *argv[])
view_end:
if (is_count && ret == 0)
- printf("%" PRId64 "\n", count);
+ fprintf(pysam_stdout, "%" PRId64 "\n", count);
// close files, free and return
if (in) check_sam_close("view", in, fn_in, "standard input", &ret);
@@ -526,7 +528,7 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
" -h include header in SAM output\n"
" -H print SAM header only (no alignments)\n"
" -c print only the count of matching records\n"
-" -o FILE output file name [stdout]\n"
+" -o FILE output file name [pysam_stdout]\n"
" -U FILE output reads not selected by filters to FILE [null]\n"
// extra input
" -t FILE FILE listing reference names and lengths (see long help) [null]\n"
@@ -558,35 +560,37 @@ static int usage(FILE *fp, int exit_status, int is_long_help)
fprintf(fp,
"Notes:\n"
"\n"
-" 1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
-" Further control over the CRAM format can be specified by using the\n"
-" --output-fmt-option, e.g. to specify the number of sequences per slice\n"
-" and to use avoid reference based compression:\n"
-" `samtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
-" --output-fmt-option no_ref -o out.cram in.bam'\n"
+"1. This command now auto-detects the input format (BAM/CRAM/SAM).\n"
+" Further control over the CRAM format can be specified by using the\n"
+" --output-fmt-option, e.g. to specify the number of sequences per slice\n"
+" and to use avoid reference based compression:\n"
"\n"
-" Options can also be specified as a comma separated list within the\n"
-" --output-fmt value too. For example this is equivalent to the above\n"
-" `samtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
-" -o out.cram in.bam'\n"
+"\tsamtools view -C --output-fmt-option seqs_per_slice=5000 \\\n"
+"\t --output-fmt-option no_ref -o out.cram in.bam\n"
"\n"
-" 2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
-" two fields of each line consisting of the reference name and the\n"
-" corresponding sequence length. The `.fai' file generated by \n"
-" `samtools faidx' is suitable for use as this file. This may be an\n"
-" empty file if reads are unaligned.\n"
+" Options can also be specified as a comma separated list within the\n"
+" --output-fmt value too. For example this is equivalent to the above\n"
"\n"
-" 3. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n"
+"\tsamtools view --output-fmt cram,seqs_per_slice=5000,no_ref \\\n"
+"\t -o out.cram in.bam\n"
"\n"
-" 4. BAM->SAM conversion: `samtools view -h in.bam'.\n"
+"2. The file supplied with `-t' is SPACE/TAB delimited with the first\n"
+" two fields of each line consisting of the reference name and the\n"
+" corresponding sequence length. The `.fai' file generated by \n"
+" `samtools faidx' is suitable for use as this file. This may be an\n"
+" empty file if reads are unaligned.\n"
"\n"
-" 5. A region should be presented in one of the following formats:\n"
-" `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
-" specified, the input alignment file must be a sorted and indexed\n"
-" alignment (BAM/CRAM) file.\n"
+"3. SAM->BAM conversion: samtools view -bT ref.fa in.sam.gz\n"
"\n"
-" 6. Option `-u' is preferred over `-b' when the output is piped to\n"
-" another samtools command.\n"
+"4. BAM->SAM conversion: samtools view -h in.bam\n"
+"\n"
+"5. A region should be presented in one of the following formats:\n"
+" `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n"
+" specified, the input alignment file must be a sorted and indexed\n"
+" alignment (BAM/CRAM) file.\n"
+"\n"
+"6. Option `-u' is preferred over `-b' when the output is piped to\n"
+" another samtools command.\n"
"\n");
return exit_status;
@@ -597,7 +601,7 @@ int main_import(int argc, char *argv[])
int argc2, ret;
char **argv2;
if (argc != 4) {
- fprintf(pysamerr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
+ fprintf(pysam_stderr, "Usage: samtools import <in.ref_list> <in.sam> <out.bam>\n");
return 1;
}
argc2 = 6;
@@ -613,6 +617,7 @@ static const char *copied_tags[] = { "RG", "BC", "QT", NULL };
static void bam2fq_usage(FILE *to, const char *command)
{
+ int fq = strcasecmp("fastq", command) == 0 || strcasecmp("bam2fq", command) == 0;
fprintf(to,
"Usage: samtools %s [options...] <in.bam>\n", command);
fprintf(to,
@@ -622,10 +627,14 @@ static void bam2fq_usage(FILE *to, const char *command)
" -2 FILE write paired reads flagged READ2 to FILE\n"
" -f INT only include reads with all bits set in INT set in FLAG [0]\n"
" -F INT only include reads with none of the bits set in INT set in FLAG [0]\n"
-" -n don't append /1 and /2 to the read name\n"
-" -O output quality in the OQ tag if present\n"
+" -n don't append /1 and /2 to the read name\n");
+ if (fq) fprintf(to,
+" -O output quality in the OQ tag if present\n");
+ fprintf(to,
" -s FILE write singleton reads to FILE [assume single-end]\n"
-" -t copy RG, BC and QT tags to the FASTQ header line\n"
+" -t copy RG, BC and QT tags to the %s header line\n",
+ fq ? "FASTQ" : "FASTA");
+ if (fq) fprintf(to,
" -v INT default quality score if not given in file [1]\n");
sam_global_opt_help(to, "-.--.");
}
@@ -675,7 +684,10 @@ static bool bam1_to_fq(const bam1_t *b, kstring_t *linebuf, const bam2fq_state_t
uint8_t *seq;
uint8_t *qual = bam_get_qual(b);
const uint8_t *oq = NULL;
- if (state->use_oq) oq = bam_aux_get(b, "OQ") + 1;
+ if (state->use_oq) {
+ oq = bam_aux_get(b, "OQ");
+ if (oq) oq++; // skip tag type
+ }
bool has_qual = (qual[0] != 0xff || (state->use_oq && oq)); // test if there is quality
linebuf->l = 0;
@@ -776,10 +788,10 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
case 's': opts->fnse = optarg; break;
case 't': opts->copy_tags = true; break;
case 'v': opts->def_qual = atoi(optarg); break;
- case '?': bam2fq_usage(pysamerr, argv[0]); free(opts); return false;
+ case '?': bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
default:
if (parse_sam_global_opt(c, optarg, lopts, &opts->ga) != 0) {
- bam2fq_usage(pysamerr, argv[0]); free(opts); return false;
+ bam2fq_usage(pysam_stderr, argv[0]); free(opts); return false;
}
break;
}
@@ -788,8 +800,8 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
if (opts->fnr[1] || opts->fnr[2]) opts->has12 = false;
if (opts->def_qual < 0 || 93 < opts->def_qual) {
- fprintf(pysamerr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
- bam2fq_usage(pysamerr, argv[0]);
+ fprintf(pysam_stderr, "Invalid -v default quality %i, allowed range 0 to 93\n", opts->def_qual);
+ bam2fq_usage(pysam_stderr, argv[0]);
free(opts);
return true;
}
@@ -801,20 +813,20 @@ static bool parse_opts(int argc, char *argv[], bam2fq_opts_t** opts_out)
opts->filetype = FASTA;
} else {
print_error("bam2fq", "Unrecognised type call \"%s\", this should be impossible... but you managed it!", type_str);
- bam2fq_usage(pysamerr, argv[0]);
+ bam2fq_usage(pysam_stderr, argv[0]);
free(opts);
return false;
}
if ((argc - (optind)) == 0) {
- bam2fq_usage(stdout, argv[0]);
+ bam2fq_usage(pysam_stdout, argv[0]);
free(opts);
return false;
}
if ((argc - (optind)) != 1) {
- fprintf(pysamerr, "Too many arguments.\n");
- bam2fq_usage(pysamerr, argv[0]);
+ fprintf(pysam_stderr, "Too many arguments.\n");
+ bam2fq_usage(pysam_stderr, argv[0]);
free(opts);
return false;
}
@@ -843,12 +855,12 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
uint32_t rf = SAM_QNAME | SAM_FLAG | SAM_SEQ | SAM_QUAL;
if (opts->use_oq) rf |= SAM_AUX;
if (hts_set_opt(state->fp, CRAM_OPT_REQUIRED_FIELDS, rf)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_REQUIRED_FIELDS value\n");
free(state);
return false;
}
if (hts_set_opt(state->fp, CRAM_OPT_DECODE_MD, 0)) {
- fprintf(pysamerr, "Failed to set CRAM_OPT_DECODE_MD value\n");
+ fprintf(pysam_stderr, "Failed to set CRAM_OPT_DECODE_MD value\n");
free(state);
return false;
}
@@ -871,13 +883,13 @@ static bool init_state(const bam2fq_opts_t* opts, bam2fq_state_t** state_out)
return false;
}
} else {
- state->fpr[i] = stdout;
+ state->fpr[i] = pysam_stdout;
}
}
state->h = sam_hdr_read(state->fp);
if (state->h == NULL) {
- fprintf(pysamerr, "Failed to read header for \"%s\"\n", opts->fn_input);
+ fprintf(pysam_stderr, "Failed to read header for \"%s\"\n", opts->fn_input);
free(state);
return false;
}
@@ -894,7 +906,7 @@ static bool destroy_state(const bam2fq_opts_t *opts, bam2fq_state_t *state, int*
if (state->fpse && fclose(state->fpse)) { print_error_errno("bam2fq", "Error closing singleton file \"%s\"", opts->fnse); valid = false; }
int i;
for (i = 0; i < 3; ++i) {
- if (state->fpr[i] != stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
+ if (state->fpr[i] != pysam_stdout && fclose(state->fpr[i])) { print_error_errno("bam2fq", "Error closing r%d file \"%s\"", i, opts->fnr[i]); valid = false; }
}
free(state);
return valid;
@@ -923,7 +935,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
bool valid = true;
while (true) {
- at_eof = sam_read1(state->fp, state->h, b);
+ at_eof = sam_read1(state->fp, state->h, b) < 0;
if (!at_eof && filter_it_out(b, state)) continue;
if (!at_eof) ++n_reads;
@@ -960,7 +972,7 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
int b_score = bam_get_qual(b)[0] != 0xff? 2 : 1;
if (b_score > score[which_readpart(b)]) {
if(!bam1_to_fq(b, &linebuf[which_readpart(b)], state)) {
- fprintf(pysamerr, "[%s] Error converting read to FASTA/Q\n", __func__);
+ fprintf(pysam_stderr, "[%s] Error converting read to FASTA/Q\n", __func__);
return false;
}
score[which_readpart(b)] = b_score;
@@ -975,8 +987,8 @@ static bool bam2fq_mainloop_singletontrack(bam2fq_state_t *state)
free(linebuf[0].s);
free(linebuf[1].s);
free(linebuf[2].s);
- fprintf(pysamerr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
- fprintf(pysamerr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+ fprintf(pysam_stderr, "[M::%s] discarded %" PRId64 " singletons\n", __func__, n_singletons);
+ fprintf(pysam_stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
return valid;
}
@@ -1003,7 +1015,7 @@ static bool bam2fq_mainloop(bam2fq_state_t *state)
free(linebuf.s);
bam_destroy1(b);
- fprintf(pysamerr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
+ fprintf(pysam_stderr, "[M::%s] processed %" PRId64 " reads\n", __func__, n_reads);
return true;
}
diff --git a/samtools/sample.c b/samtools/sample.c
index aa38132..4cc89ce 100644
--- a/samtools/sample.c
+++ b/samtools/sample.c
@@ -23,6 +23,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include "sample.h"
diff --git a/samtools/sample.c.pysam.c b/samtools/sample.c.pysam.c
index 73ec01f..dff8188 100644
--- a/samtools/sample.c.pysam.c
+++ b/samtools/sample.c.pysam.c
@@ -25,6 +25,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdlib.h>
#include <string.h>
#include "sample.h"
diff --git a/samtools/stats.c b/samtools/stats.c
index 512df1d..eb6bb52 100644
--- a/samtools/stats.c
+++ b/samtools/stats.c
@@ -37,6 +37,8 @@ DEALINGS IN THE SOFTWARE. */
*/
+#include <config.h>
+
#include <unistd.h> // for isatty()
#include <stdio.h>
#include <stdlib.h>
diff --git a/samtools/stats.c.pysam.c b/samtools/stats.c.pysam.c
index e30b2ad..da187ac 100644
--- a/samtools/stats.c.pysam.c
+++ b/samtools/stats.c.pysam.c
@@ -39,6 +39,8 @@ DEALINGS IN THE SOFTWARE. */
*/
+#include <config.h>
+
#include <unistd.h> // for isatty()
#include <stdio.h>
#include <stdlib.h>
@@ -1240,7 +1242,7 @@ void init_regions(stats_t *stats, const char *file)
if ( tid < 0 )
{
if ( !warned )
- fprintf(pysamerr,"Warning: Some sequences not present in the BAM, e.g. \"%s\". This message is printed only once.\n", line.s);
+ fprintf(pysam_stderr,"Warning: Some sequences not present in the BAM, e.g. \"%s\". This message is printed only once.\n", line.s);
warned = 1;
continue;
}
@@ -1334,7 +1336,7 @@ void init_group_id(stats_t *stats, const char *id)
{
khiter_t k = kh_get(kh_rg, stats->rg_hash, key);
if ( k != kh_end(stats->rg_hash) )
- fprintf(pysamerr, "[init_group_id] The group ID not unique: \"%s\"\n", key);
+ fprintf(pysam_stderr, "[init_group_id] The group ID not unique: \"%s\"\n", key);
int ret;
k = kh_put(kh_rg, stats->rg_hash, key, &ret);
kh_value(stats->rg_hash, k) = val;
@@ -1344,7 +1346,7 @@ void init_group_id(stats_t *stats, const char *id)
if ( !n )
error("The sample or read group \"%s\" not present.\n", id);
#else
- fprintf(pysamerr, "Samtools-htslib: init_group_id() header parsing not yet implemented\n");
+ fprintf(pysam_stderr, "Samtools-htslib: init_group_id() header parsing not yet implemented\n");
abort();
#endif
}
@@ -1354,35 +1356,35 @@ static void error(const char *format, ...)
{
if ( !format )
{
- printf("About: The program collects statistics from BAM files. The output can be visualized using plot-bamstats.\n");
- printf("Usage: samtools stats [OPTIONS] file.bam\n");
- printf(" samtools stats [OPTIONS] file.bam chr:from-to\n");
- printf("Options:\n");
- printf(" -c, --coverage <int>,<int>,<int> Coverage distribution min,max,step [1,1000,1]\n");
- printf(" -d, --remove-dups Exclude from statistics reads marked as duplicates\n");
- printf(" -f, --required-flag <str|int> Required flag, 0 for unset. See also `samtools flags` [0]\n");
- printf(" -F, --filtering-flag <str|int> Filtering flag, 0 for unset. See also `samtools flags` [0]\n");
- printf(" --GC-depth <float> the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4]\n");
- printf(" -h, --help This help message\n");
- printf(" -i, --insert-size <int> Maximum insert size [8000]\n");
- printf(" -I, --id <string> Include only listed read group or sample name\n");
- printf(" -l, --read-length <int> Include in the statistics only reads with the given read length []\n");
- printf(" -m, --most-inserts <float> Report only the main part of inserts [0.99]\n");
- printf(" -P, --split-prefix <str> Path or string prefix for filepaths output by -S (default is input filename)\n");
- printf(" -q, --trim-quality <int> The BWA trimming parameter [0]\n");
- printf(" -r, --ref-seq <file> Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n");
- printf(" -s, --sam Ignored (input format is auto-detected).\n");
- printf(" -S, --split <tag> Also write statistics to separate files split by tagged field.\n");
- printf(" -t, --target-regions <file> Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
- printf(" -x, --sparse Suppress outputting IS rows where there are no insertions.\n");
- sam_global_opt_help(stdout, "-.--.");
- printf("\n");
+ fprintf(pysam_stdout, "About: The program collects statistics from BAM files. The output can be visualized using plot-bamstats.\n");
+ fprintf(pysam_stdout, "Usage: samtools stats [OPTIONS] file.bam\n");
+ fprintf(pysam_stdout, " samtools stats [OPTIONS] file.bam chr:from-to\n");
+ fprintf(pysam_stdout, "Options:\n");
+ fprintf(pysam_stdout, " -c, --coverage <int>,<int>,<int> Coverage distribution min,max,step [1,1000,1]\n");
+ fprintf(pysam_stdout, " -d, --remove-dups Exclude from statistics reads marked as duplicates\n");
+ fprintf(pysam_stdout, " -f, --required-flag <str|int> Required flag, 0 for unset. See also `samtools flags` [0]\n");
+ fprintf(pysam_stdout, " -F, --filtering-flag <str|int> Filtering flag, 0 for unset. See also `samtools flags` [0]\n");
+ fprintf(pysam_stdout, " --GC-depth <float> the size of GC-depth bins (decreasing bin size increases memory requirement) [2e4]\n");
+ fprintf(pysam_stdout, " -h, --help This help message\n");
+ fprintf(pysam_stdout, " -i, --insert-size <int> Maximum insert size [8000]\n");
+ fprintf(pysam_stdout, " -I, --id <string> Include only listed read group or sample name\n");
+ fprintf(pysam_stdout, " -l, --read-length <int> Include in the statistics only reads with the given read length []\n");
+ fprintf(pysam_stdout, " -m, --most-inserts <float> Report only the main part of inserts [0.99]\n");
+ fprintf(pysam_stdout, " -P, --split-prefix <str> Path or string prefix for filepaths output by -S (default is input filename)\n");
+ fprintf(pysam_stdout, " -q, --trim-quality <int> The BWA trimming parameter [0]\n");
+ fprintf(pysam_stdout, " -r, --ref-seq <file> Reference sequence (required for GC-depth and mismatches-per-cycle calculation).\n");
+ fprintf(pysam_stdout, " -s, --sam Ignored (input format is auto-detected).\n");
+ fprintf(pysam_stdout, " -S, --split <tag> Also write statistics to separate files split by tagged field.\n");
+ fprintf(pysam_stdout, " -t, --target-regions <file> Do stats in these regions only. Tab-delimited file chr,from,to, 1-based, inclusive.\n");
+ fprintf(pysam_stdout, " -x, --sparse Suppress outputting IS rows where there are no insertions.\n");
+ sam_global_opt_help(pysam_stdout, "-.--.");
+ fprintf(pysam_stdout, "\n");
}
else
{
va_list ap;
va_start(ap, format);
- vfprintf(pysamerr, format, ap);
+ vfprintf(pysam_stderr, format, ap);
va_end(ap);
}
exit(1);
@@ -1708,13 +1710,13 @@ int main_stats(int argc, char *argv[])
}
if (ret < -1) {
- fprintf(pysamerr, "Failure while decoding file\n");
+ fprintf(pysam_stderr, "Failure while decoding file\n");
return 1;
}
}
round_buffer_flush(all_stats, -1);
- output_stats(stdout, all_stats, sparse);
+ output_stats(pysam_stdout, all_stats, sparse);
if (info->split_tag)
output_split_stats(split_hash, bam_fname, sparse);
diff --git a/samtools/stats_isize.c b/samtools/stats_isize.c
index e6b9dc1..3aa9c20 100644
--- a/samtools/stats_isize.c
+++ b/samtools/stats_isize.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include "stats_isize.h"
#include <htslib/khash.h>
diff --git a/samtools/stats_isize.c.pysam.c b/samtools/stats_isize.c.pysam.c
index a25e4d7..6ae9088 100644
--- a/samtools/stats_isize.c.pysam.c
+++ b/samtools/stats_isize.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <stdio.h>
#include "stats_isize.h"
#include <htslib/khash.h>
@@ -94,7 +96,7 @@ static void sparse_set_f(isize_data_t data, int at, isize_insert_t field, uint64
kh_value(h, it) = rec;
a->max = max(at, a->max);
} else {
- fprintf(pysamerr, "%s\n", "Failed to allocate memory for isize_sparse_record_t");
+ fprintf(pysam_stderr, "%s\n", "Failed to allocate memory for isize_sparse_record_t");
exit(11);
}
} else {
diff --git a/samtools/test/merge/test_bam_translate.c b/samtools/test/merge/test_bam_translate.c
index 854779b..6ed561e 100644
--- a/samtools/test/merge/test_bam_translate.c
+++ b/samtools/test/merge/test_bam_translate.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
#include "../test.h"
#include <stdio.h>
diff --git a/samtools/test/merge/test_bam_translate.c.pysam.c b/samtools/test/merge/test_bam_translate.c.pysam.c
index d11fbf8..193954d 100644
--- a/samtools/test/merge/test_bam_translate.c.pysam.c
+++ b/samtools/test/merge/test_bam_translate.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
#include "../test.h"
#include <stdio.h>
@@ -33,40 +35,40 @@ DEALINGS IN THE SOFTWARE. */
#include <unistd.h>
void dump_read(bam1_t* b) {
- printf("->core.tid:(%d)\n", b->core.tid);
- printf("->core.pos:(%d)\n", b->core.pos);
- printf("->core.bin:(%d)\n", b->core.bin);
- printf("->core.qual:(%d)\n", b->core.qual);
- printf("->core.l_qname:(%d)\n", b->core.l_qname);
- printf("->core.flag:(%d)\n", b->core.flag);
- printf("->core.n_cigar:(%d)\n", b->core.n_cigar);
- printf("->core.l_qseq:(%d)\n", b->core.l_qseq);
- printf("->core.mtid:(%d)\n", b->core.mtid);
- printf("->core.mpos:(%d)\n", b->core.mpos);
- printf("->core.isize:(%d)\n", b->core.isize);
+ fprintf(pysam_stdout, "->core.tid:(%d)\n", b->core.tid);
+ fprintf(pysam_stdout, "->core.pos:(%d)\n", b->core.pos);
+ fprintf(pysam_stdout, "->core.bin:(%d)\n", b->core.bin);
+ fprintf(pysam_stdout, "->core.qual:(%d)\n", b->core.qual);
+ fprintf(pysam_stdout, "->core.l_qname:(%d)\n", b->core.l_qname);
+ fprintf(pysam_stdout, "->core.flag:(%d)\n", b->core.flag);
+ fprintf(pysam_stdout, "->core.n_cigar:(%d)\n", b->core.n_cigar);
+ fprintf(pysam_stdout, "->core.l_qseq:(%d)\n", b->core.l_qseq);
+ fprintf(pysam_stdout, "->core.mtid:(%d)\n", b->core.mtid);
+ fprintf(pysam_stdout, "->core.mpos:(%d)\n", b->core.mpos);
+ fprintf(pysam_stdout, "->core.isize:(%d)\n", b->core.isize);
if (b->data) {
- printf("->data:");
+ fprintf(pysam_stdout, "->data:");
int i;
for (i = 0; i < b->l_data; ++i) {
- printf("%x ", b->data[i]);
+ fprintf(pysam_stdout, "%x ", b->data[i]);
}
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
if (b->core.l_qname) {
- printf("qname: %s\n",bam_get_qname(b));
+ fprintf(pysam_stdout, "qname: %s\n",bam_get_qname(b));
}
if (b->core.l_qseq) {
- printf("qseq:");
+ fprintf(pysam_stdout, "qseq:");
int i;
for (i = 0; i < b->core.l_qseq; ++i) {
- printf("%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]);
+ fprintf(pysam_stdout, "%c",seq_nt16_str[seq_nt16_table[bam_seqi(bam_get_seq(b),i)]]);
}
- printf("\n");
- printf("qual:");
+ fprintf(pysam_stdout, "\n");
+ fprintf(pysam_stdout, "qual:");
for (i = 0; i < b->core.l_qseq; ++i) {
- printf("%c",bam_get_qual(b)[i]);
+ fprintf(pysam_stdout, "%c",bam_get_qual(b)[i]);
}
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
@@ -75,18 +77,18 @@ void dump_read(bam1_t* b) {
uint8_t* aux = bam_get_aux(b);
while (i < bam_get_l_aux(b)) {
- printf("%.2s:%c:",aux+i,*(aux+i+2));
+ fprintf(pysam_stdout, "%.2s:%c:",aux+i,*(aux+i+2));
i += 2;
switch (*(aux+i)) {
case 'Z':
- while (*(aux+1+i) != '\0') { putc(*(aux+1+i), stdout); ++i; }
+ while (*(aux+1+i) != '\0') { putc(*(aux+1+i), pysam_stdout); ++i; }
break;
}
- putc('\n',stdout);
+ putc('\n',pysam_stdout);
++i;++i;
}
}
- printf("\n");
+ fprintf(pysam_stdout, "\n");
}
void trans_tbl_test_init(trans_tbl_t* tbl, int32_t n_targets)
@@ -334,7 +336,7 @@ void setup_test_6(bam1_t** b_in, trans_tbl_t* tbl) {
}
-int main(int argc, char**argv)
+int samtools_test_bam_translate_main(int argc, char**argv)
{
// test state
const int NUM_TESTS = 6;
@@ -355,30 +357,30 @@ int main(int argc, char**argv)
bam1_t* b;
- // Setup pysamerr redirect
+ // Setup pysam_stderr redirect
kstring_t res = { 0, 0, NULL };
- FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+ FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
char* tempfname = (optind < argc)? argv[optind] : "test_bam_translate.tmp";
FILE* check = NULL;
// setup
- if (verbose) printf("BEGIN test 1\n"); // TID test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n"); // TID test
trans_tbl_t tbl1;
setup_test_1(&b,&tbl1);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
- if (verbose) printf("RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl1);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
@@ -390,33 +392,33 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 1\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
// setup
- if (verbose) printf("BEGIN test 2\n"); // RG exists and translate test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n"); // RG exists and translate test
trans_tbl_t tbl2;
setup_test_2(&b,&tbl2);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
- if (verbose) printf("RUN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl2);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
@@ -428,33 +430,33 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 2\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 2\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl2);
- if (verbose) printf("END test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END test 2\n");
- if (verbose) printf("BEGIN test 3\n"); // PG exists and translate test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 3\n"); // PG exists and translate test
// setup
trans_tbl_t tbl3;
setup_test_3(&b,&tbl3);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
- if (verbose) printf("RUN test 3\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 3\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl3);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 3\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 3\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
@@ -466,33 +468,33 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 3\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 3\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl3);
- if (verbose) printf("END test 3\n");
+ if (verbose) fprintf(pysam_stdout, "END test 3\n");
- if (verbose) printf("BEGIN test 4\n"); // RG test non-existent
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 4\n"); // RG test non-existent
// setup
trans_tbl_t tbl4;
setup_test_4(&b,&tbl4);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
- if (verbose) printf("RUN test 4\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 4\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl4);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 4\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 4\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
// check result
@@ -503,32 +505,32 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 4\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 4\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl4);
- if (verbose) printf("END test 4\n");
+ if (verbose) fprintf(pysam_stdout, "END test 4\n");
- if (verbose) printf("BEGIN test 5\n"); // PG test non-existent
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 5\n"); // PG test non-existent
// setup
trans_tbl_t tbl5;
setup_test_5(&b,&tbl5);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
- printf("RUN test 5\n");
+ fprintf(pysam_stdout, "RUN test 5\n");
}
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl5);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 5\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 5\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
@@ -540,33 +542,33 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 5\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 5\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl5);
- if (verbose) printf("END test 5\n");
+ if (verbose) fprintf(pysam_stdout, "END test 5\n");
- if (verbose) printf("BEGIN test 6\n"); // RG and PG exists and translate test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 6\n"); // RG and PG exists and translate test
// setup
trans_tbl_t tbl6;
setup_test_6(&b,&tbl6);
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
- if (verbose) printf("RUN test 6\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 6\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bam_translate(b, &tbl6);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 6\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 6\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_read(b);
}
@@ -578,21 +580,21 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 6\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 6\n");
}
fclose(check);
// teardown
bam_destroy1(b);
trans_tbl_destroy(&tbl6);
- if (verbose) printf("END test 6\n");
+ if (verbose) fprintf(pysam_stdout, "END test 6\n");
// Cleanup
free(res.s);
remove(tempfname);
if (failure > 0)
- fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
- fclose(orig_pysamerr);
+ fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+ fclose(orig_pysam_stderr);
return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/samtools/test/merge/test_rtrans_build.c b/samtools/test/merge/test_rtrans_build.c
index df50921..0f23b48 100644
--- a/samtools/test/merge/test_rtrans_build.c
+++ b/samtools/test/merge/test_rtrans_build.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
void dump_rtrans(int* rtrans, int n, int n_targets) {
diff --git a/samtools/test/merge/test_rtrans_build.c.pysam.c b/samtools/test/merge/test_rtrans_build.c.pysam.c
index fcbc458..0ac1367 100644
--- a/samtools/test/merge/test_rtrans_build.c.pysam.c
+++ b/samtools/test/merge/test_rtrans_build.c.pysam.c
@@ -24,16 +24,18 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
void dump_rtrans(int* rtrans, int n, int n_targets) {
- printf("->n_targets:(%d)\n", n_targets);
+ fprintf(pysam_stdout, "->n_targets:(%d)\n", n_targets);
int i, j;
for (i = 0; i < n; ++i) {
- fprintf(pysamerr, "%d",rtrans[i*n_targets+0]);
+ fprintf(pysam_stderr, "%d",rtrans[i*n_targets+0]);
for (j = 1; j < n_targets; ++j)
- fprintf(pysamerr, "\t%d",rtrans[i*n_targets+j]);
- fprintf(pysamerr, "\n");
+ fprintf(pysam_stderr, "\t%d",rtrans[i*n_targets+j]);
+ fprintf(pysam_stderr, "\n");
}
}
@@ -62,7 +64,7 @@ bool check_test_1(trans_tbl_t* tbl, int* rtrans) {
}
-int main(int argc, char**argv)
+int samtools_test_rtrans_build_main(int argc, char**argv)
{
const int NUM_TESTS = 1;
int verbose = 0;
@@ -81,7 +83,7 @@ int main(int argc, char**argv)
const long GIMMICK_SEED = 0x1234330e;
srand48(GIMMICK_SEED);
- if (verbose) printf("BEGIN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");
// setup
trans_tbl_t tbl_1[2];
int n_targets_1 = 3;
@@ -92,29 +94,29 @@ int main(int argc, char**argv)
if (verbose > 1) {
// dump_trans_tid
}
- if (verbose) printf("RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
rtrans_1 = rtrans_build(n_1, n_targets_1, &tbl_1[0]);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("rtrans\n");
+ fprintf(pysam_stdout, "rtrans\n");
dump_rtrans(rtrans_1, n_1, n_targets_1);
}
if (check_test_1(&tbl_1[0], rtrans_1)) {
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 1\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
}
// teardown
trans_tbl_destroy(&tbl_1[0]);
trans_tbl_destroy(&tbl_1[1]);
free(rtrans_1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
if (success == NUM_TESTS) {
return 0;
} else {
- fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+ fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
return 1;
}
}
diff --git a/samtools/test/merge/test_trans_tbl_init.c b/samtools/test/merge/test_trans_tbl_init.c
index b1164a3..d557932 100644
--- a/samtools/test/merge/test_trans_tbl_init.c
+++ b/samtools/test/merge/test_trans_tbl_init.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
#include <assert.h>
#include <regex.h>
@@ -47,7 +49,7 @@ void dump_header(bam_hdr_t* hdr) {
static int populate_merged_header(bam_hdr_t *hdr, merged_header_t *merged_hdr) {
trans_tbl_t dummy;
int res;
- res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, NULL);
+ res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, 1, NULL);
trans_tbl_destroy(&dummy);
return res;
}
@@ -359,7 +361,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 1\n");
- trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, NULL);
+ trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 1\n");
@@ -396,7 +398,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 2\n");
- trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, NULL);
+ trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 2\n");
@@ -432,7 +434,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 3\n");
- trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, NULL);
+ trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 3\n");
@@ -468,7 +470,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 4\n");
- trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, NULL);
+ trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 4\n");
@@ -505,7 +507,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 5\n");
- trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, NULL);
+ trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 5\n");
@@ -541,7 +543,7 @@ int main(int argc, char**argv)
dump_header(translate);
}
if (verbose) printf("RUN test 6\n");
- trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, "filename");
+ trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, true, "filename");
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
if (verbose) printf("END RUN test 6\n");
diff --git a/samtools/test/merge/test_trans_tbl_init.c.pysam.c b/samtools/test/merge/test_trans_tbl_init.c.pysam.c
index 0f54989..af8af43 100644
--- a/samtools/test/merge/test_trans_tbl_init.c.pysam.c
+++ b/samtools/test/merge/test_trans_tbl_init.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_sort.c"
#include <assert.h>
#include <regex.h>
@@ -34,22 +36,22 @@ typedef struct refseq_info {
} refseq_info_t;
void dump_header(bam_hdr_t* hdr) {
- printf("->n_targets:(%d)\n", hdr->n_targets);
+ fprintf(pysam_stdout, "->n_targets:(%d)\n", hdr->n_targets);
int i;
for (i = 0; i < hdr->n_targets; ++i) {
- printf("->target_name[%d]:(%s)\n",i,hdr->target_name[i]);
- printf("->target_len[%d]:(%d)\n",i,hdr->target_len[i]);
+ fprintf(pysam_stdout, "->target_name[%d]:(%s)\n",i,hdr->target_name[i]);
+ fprintf(pysam_stdout, "->target_len[%d]:(%d)\n",i,hdr->target_len[i]);
}
- printf("->text:(");
- fwrite((void*)hdr->text, (size_t) hdr->l_text, 1, stdout);
- printf(")\n");
+ fprintf(pysam_stdout, "->text:(");
+ fwrite((void*)hdr->text, (size_t) hdr->l_text, 1, pysam_stdout);
+ fprintf(pysam_stdout, ")\n");
}
static int populate_merged_header(bam_hdr_t *hdr, merged_header_t *merged_hdr) {
trans_tbl_t dummy;
int res;
- res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, NULL);
+ res = trans_tbl_init(merged_hdr, hdr, &dummy, 0, 0, 1, NULL);
trans_tbl_destroy(&dummy);
return res;
}
@@ -325,7 +327,7 @@ bool check_test_6(bam_hdr_t* translate, bam_hdr_t* out, trans_tbl_t* tbl) {
return true;
}
-int main(int argc, char**argv)
+int samtools_test_trans_tbl_init_main(int argc, char**argv)
{
const int NUM_TESTS = 6;
int verbose = 0;
@@ -349,7 +351,7 @@ int main(int argc, char**argv)
bam_hdr_t* out;
bam_hdr_t* translate;
- if (verbose) printf("BEGIN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n");
// setup
trans_tbl_t tbl_1;
merged_header_t *merged_hdr = init_merged_header();
@@ -357,36 +359,36 @@ int main(int argc, char**argv)
assert(translate);
// test
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 1\n");
- trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, NULL);
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_1, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_1(translate, out, &tbl_1)) {
- if (verbose) printf("Test 1 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 1 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 1 : FAIL\n");
- fprintf(pysamerr, "Test 1 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 1 : FAIL\n");
+ fprintf(pysam_stderr, "Test 1 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
// test
- if (verbose) printf("BEGIN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n");
// reinit
trans_tbl_t tbl_2;
@@ -394,108 +396,108 @@ int main(int argc, char**argv)
translate = setup_test_2(merged_hdr);
assert(translate);
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 2\n");
- trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, NULL);
+ if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_2, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_2(translate, out, &tbl_2)) {
- if (verbose) printf("Test 2 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 2 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 2 : FAIL\n");
- fprintf(pysamerr, "Test 2 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 2 : FAIL\n");
+ fprintf(pysam_stderr, "Test 2 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_2);
- if (verbose) printf("END test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END test 2\n");
// test
- if (verbose) printf("BEGIN test 3\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 3\n");
// reinit
trans_tbl_t tbl_3;
merged_hdr = init_merged_header();
translate = setup_test_3(merged_hdr);
assert(translate);
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 3\n");
- trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, NULL);
+ if (verbose) fprintf(pysam_stdout, "RUN test 3\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_3, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 3\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 3\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_3(translate, out, &tbl_3)) {
- if (verbose) printf("Test 3 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 3 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 3 : FAIL\n");
- fprintf(pysamerr, "Test 3 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 3 : FAIL\n");
+ fprintf(pysam_stderr, "Test 3 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_3);
- if (verbose) printf("END test 3\n");
+ if (verbose) fprintf(pysam_stdout, "END test 3\n");
// test
- if (verbose) printf("BEGIN test 4\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 4\n");
// reinit
trans_tbl_t tbl_4;
merged_hdr = init_merged_header();
translate = setup_test_4(merged_hdr);
assert(translate);
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 4\n");
- trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, NULL);
+ if (verbose) fprintf(pysam_stdout, "RUN test 4\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_4, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 4\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 4\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_4(translate, out, &tbl_4)) {
- if (verbose) printf("Test 4 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 4 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 4 : FAIL\n");
- fprintf(pysamerr, "Test 4 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 4 : FAIL\n");
+ fprintf(pysam_stderr, "Test 4 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_4);
- if (verbose) printf("END test 4\n");
+ if (verbose) fprintf(pysam_stdout, "END test 4\n");
// test
- if (verbose) printf("BEGIN test 5\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 5\n");
// reinit
trans_tbl_t tbl_5;
merged_hdr = init_merged_header();
@@ -503,74 +505,74 @@ int main(int argc, char**argv)
assert(translate);
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 5\n");
- trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, NULL);
+ if (verbose) fprintf(pysam_stdout, "RUN test 5\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_5, false, false, true, NULL);
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 5\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 5\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_5(translate, out, &tbl_5)) {
- if (verbose) printf("Test 5 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 5 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 5 : FAIL\n");
- fprintf(pysamerr, "Test 5 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 5 : FAIL\n");
+ fprintf(pysam_stderr, "Test 5 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_5);
- if (verbose) printf("END test 5\n");
+ if (verbose) fprintf(pysam_stdout, "END test 5\n");
// test
- if (verbose) printf("BEGIN test 6\n");
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 6\n");
// reinit
trans_tbl_t tbl_6;
merged_hdr = init_merged_header();
translate = setup_test_6(merged_hdr);
assert(translate);
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
}
- if (verbose) printf("RUN test 6\n");
- trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, "filename");
+ if (verbose) fprintf(pysam_stdout, "RUN test 6\n");
+ trans_tbl_init(merged_hdr, translate, &tbl_6, false, false, true, "filename");
out = finish_merged_header(merged_hdr);
free_merged_header(merged_hdr);
- if (verbose) printf("END RUN test 6\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 6\n");
if (verbose > 1) {
- printf("translate\n");
+ fprintf(pysam_stdout, "translate\n");
dump_header(translate);
- printf("out\n");
+ fprintf(pysam_stdout, "out\n");
dump_header(out);
}
if (check_test_6(translate, out, &tbl_6)) {
- if (verbose) printf("Test 6 : PASS\n");
+ if (verbose) fprintf(pysam_stdout, "Test 6 : PASS\n");
++success;
} else {
- if (verbose) printf("Test 6 : FAIL\n");
- fprintf(pysamerr, "Test 6 : FAIL\n");
+ if (verbose) fprintf(pysam_stdout, "Test 6 : FAIL\n");
+ fprintf(pysam_stderr, "Test 6 : FAIL\n");
++failure;
}
// teardown
bam_hdr_destroy(translate);
bam_hdr_destroy(out);
trans_tbl_destroy(&tbl_6);
- if (verbose) printf("END test 6\n");
+ if (verbose) fprintf(pysam_stdout, "END test 6\n");
if (success == NUM_TESTS) {
return 0;
} else {
- fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+ fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
return 1;
}
}
diff --git a/samtools/test/split/test_count_rg.c b/samtools/test/split/test_count_rg.c
index 97512a8..4038f97 100644
--- a/samtools/test/split/test_count_rg.c
+++ b/samtools/test/split/test_count_rg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
diff --git a/samtools/test/split/test_count_rg.c.pysam.c b/samtools/test/split/test_count_rg.c.pysam.c
index eda8abb..25131a8 100644
--- a/samtools/test/split/test_count_rg.c.pysam.c
+++ b/samtools/test/split/test_count_rg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
@@ -40,7 +42,7 @@ void setup_test_1(bam_hdr_t** hdr_in)
(*hdr_in)->l_text = strlen(test1);
}
-int main(int argc, char**argv)
+int samtools_test_count_rg_main(int argc, char**argv)
{
// test state
const int NUM_TESTS = 1;
@@ -55,7 +57,7 @@ int main(int argc, char**argv)
++verbose;
break;
default:
- printf(
+ fprintf(pysam_stdout,
"usage: test_count_rg [-v]\n\n"
" -v verbose output\n"
);
@@ -64,32 +66,32 @@ int main(int argc, char**argv)
}
- // Setup pysamerr redirect
+ // Setup pysam_stderr redirect
kstring_t res = { 0, 0, NULL };
- FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+ FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
char* tempfname = (optind < argc)? argv[optind] : "test_count_rg.tmp";
FILE* check = NULL;
// setup
- if (verbose) printf("BEGIN test 1\n"); // TID test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n"); // TID test
bam_hdr_t* hdr1;
size_t count;
char** output;
setup_test_1(&hdr1);
if (verbose > 1) {
- printf("hdr1\n");
+ fprintf(pysam_stdout, "hdr1\n");
dump_hdr(hdr1);
}
- if (verbose) printf("RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bool result_1 = count_RG(hdr1, &count, &output);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("b\n");
+ fprintf(pysam_stdout, "b\n");
dump_hdr(hdr1);
}
@@ -101,7 +103,7 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 1\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
}
fclose(check);
@@ -112,14 +114,14 @@ int main(int argc, char**argv)
}
free(output);
bam_hdr_destroy(hdr1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
// Cleanup
free(res.s);
remove(tempfname);
if (failure > 0)
- fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
- fclose(orig_pysamerr);
+ fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+ fclose(orig_pysam_stderr);
return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/samtools/test/split/test_expand_format_string.c b/samtools/test/split/test_expand_format_string.c
index ede7586..7c90b62 100644
--- a/samtools/test/split/test_expand_format_string.c
+++ b/samtools/test/split/test_expand_format_string.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
diff --git a/samtools/test/split/test_expand_format_string.c.pysam.c b/samtools/test/split/test_expand_format_string.c.pysam.c
index 94e7732..fe9a426 100644
--- a/samtools/test/split/test_expand_format_string.c.pysam.c
+++ b/samtools/test/split/test_expand_format_string.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
@@ -40,7 +42,7 @@ void setup_test_1(bam_hdr_t** hdr_in)
(*hdr_in)->l_text = strlen(test1);
}
-int main(int argc, char**argv)
+int samtools_test_expand_format_string_main(int argc, char**argv)
{
// test state
const int NUM_TESTS = 1;
@@ -55,7 +57,7 @@ int main(int argc, char**argv)
++verbose;
break;
default:
- printf(
+ fprintf(pysam_stdout,
"usage: test_expand_format_string [-v]\n\n"
" -v verbose output\n"
);
@@ -64,34 +66,34 @@ int main(int argc, char**argv)
}
- // Setup pysamerr redirect
+ // Setup pysam_stderr redirect
kstring_t res = { 0, 0, NULL };
- FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+ FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
char* tempfname = (optind < argc)? argv[optind] : "test_expand_format_string.tmp";
FILE* check = NULL;
// setup
- if (verbose) printf("BEGIN test 1\n"); // default format string test
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n"); // default format string test
const char* format_string_1 = "%*_%#.bam";
const char* basename_1 = "basename";
const char* rg_id_1 = "1#2.3";
const int rg_idx_1 = 4;
if (verbose > 1) {
- printf("format_string:%s\n"
+ fprintf(pysam_stdout, "format_string:%s\n"
"basename:%s\n"
"rg_id:%s\n"
"rg_idx:%d\n", format_string_1, basename_1, rg_id_1, rg_idx_1);
}
- if (verbose) printf("RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
char* output_1 = expand_format_string(format_string_1, basename_1, rg_id_1, rg_idx_1, NULL);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("format_string:%s\n"
+ fprintf(pysam_stdout, "format_string:%s\n"
"basename:%s\n"
"rg_id:%s\n"
"rg_idx:%d\n", format_string_1, basename_1, rg_id_1, rg_idx_1);
@@ -106,20 +108,20 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 1\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
}
fclose(check);
// teardown
free(output_1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
// Cleanup test harness
free(res.s);
remove(tempfname);
if (failure > 0)
- fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
- fclose(orig_pysamerr);
+ fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+ fclose(orig_pysam_stderr);
return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/samtools/test/split/test_filter_header_rg.c b/samtools/test/split/test_filter_header_rg.c
index f4e1266..d9505d6 100644
--- a/samtools/test/split/test_filter_header_rg.c
+++ b/samtools/test/split/test_filter_header_rg.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <unistd.h>
diff --git a/samtools/test/split/test_filter_header_rg.c.pysam.c b/samtools/test/split/test_filter_header_rg.c.pysam.c
index 4a5b6d5..97b3573 100644
--- a/samtools/test/split/test_filter_header_rg.c.pysam.c
+++ b/samtools/test/split/test_filter_header_rg.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <unistd.h>
@@ -73,7 +75,7 @@ bool check_test_2(const bam_hdr_t* hdr) {
return true;
}
-int main(int argc, char**argv)
+int samtools_test_filter_header_rg_main(int argc, char**argv)
{
// test state
const int NUM_TESTS = 2;
@@ -88,7 +90,7 @@ int main(int argc, char**argv)
++verbose;
break;
default:
- printf(
+ fprintf(pysam_stdout,
"usage: test_filter_header_rg [-v]\n\n"
" -v verbose output\n"
);
@@ -97,31 +99,31 @@ int main(int argc, char**argv)
}
- // Setup pysamerr redirect
+ // Setup pysam_stderr redirect
kstring_t res = { 0, 0, NULL };
- FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
+ FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
char* tempfname = (optind < argc)? argv[optind] : "test_count_rg.tmp";
FILE* check = NULL;
// setup
- if (verbose) printf("BEGIN test 1\n"); // test eliminating a tag that isn't there
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 1\n"); // test eliminating a tag that isn't there
bam_hdr_t* hdr1;
const char* id_to_keep_1 = "1#2.3";
setup_test_1(&hdr1);
if (verbose > 1) {
- printf("hdr1\n");
+ fprintf(pysam_stdout, "hdr1\n");
dump_hdr(hdr1);
}
- if (verbose) printf("RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 1\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bool result_1 = filter_header_rg(hdr1, id_to_keep_1);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- printf("hdr1\n");
+ fprintf(pysam_stdout, "hdr1\n");
dump_hdr(hdr1);
}
@@ -135,32 +137,32 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 1\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 1\n");
}
fclose(check);
// teardown
bam_hdr_destroy(hdr1);
- if (verbose) printf("END test 1\n");
+ if (verbose) fprintf(pysam_stdout, "END test 1\n");
- if (verbose) printf("BEGIN test 2\n"); // test eliminating a tag that is there
+ if (verbose) fprintf(pysam_stdout, "BEGIN test 2\n"); // test eliminating a tag that is there
bam_hdr_t* hdr2;
const char* id_to_keep_2 = "fish";
setup_test_2(&hdr2);
if (verbose > 1) {
- printf("hdr2\n");
+ fprintf(pysam_stdout, "hdr2\n");
dump_hdr(hdr2);
}
- if (verbose) printf("RUN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "RUN test 2\n");
// test
- xfreopen(tempfname, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname, "w", pysam_stderr); // Redirect pysam_stderr to pipe
bool result_2 = filter_header_rg(hdr2, id_to_keep_2);
- fclose(pysamerr);
+ fclose(pysam_stderr);
- if (verbose) printf("END RUN test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END RUN test 2\n");
if (verbose > 1) {
- printf("hdr2\n");
+ fprintf(pysam_stdout, "hdr2\n");
dump_hdr(hdr2);
}
@@ -174,21 +176,21 @@ int main(int argc, char**argv)
++success;
} else {
++failure;
- if (verbose) printf("FAIL test 2\n");
+ if (verbose) fprintf(pysam_stdout, "FAIL test 2\n");
}
fclose(check);
// teardown
bam_hdr_destroy(hdr2);
- if (verbose) printf("END test 2\n");
+ if (verbose) fprintf(pysam_stdout, "END test 2\n");
// Cleanup
free(res.s);
remove(tempfname);
if (failure > 0)
- fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
- fclose(orig_pysamerr);
+ fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+ fclose(orig_pysam_stderr);
return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/samtools/test/split/test_parse_args.c b/samtools/test/split/test_parse_args.c
index 66c7c88..85a196a 100644
--- a/samtools/test/split/test_parse_args.c
+++ b/samtools/test/split/test_parse_args.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
diff --git a/samtools/test/split/test_parse_args.c.pysam.c b/samtools/test/split/test_parse_args.c.pysam.c
index 608ec7c..2c3e749 100644
--- a/samtools/test/split/test_parse_args.c.pysam.c
+++ b/samtools/test/split/test_parse_args.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_split.c"
#include "../test.h"
#include <stdlib.h>
@@ -65,7 +67,7 @@ bool check_test_2(const parsed_opts_t* opts) {
return true;
}
-int main(int argc, char**argv)
+int samtools_test_parse_args_main(int argc, char**argv)
{
// test state
const int NUM_TESTS = 2;
@@ -80,7 +82,7 @@ int main(int argc, char**argv)
++verbose;
break;
default:
- printf(
+ fprintf(pysam_stdout,
"usage: test_parse_args [-v]\n\n"
" -v verbose output\n"
);
@@ -88,58 +90,58 @@ int main(int argc, char**argv)
}
}
- // Setup stdout and pysamerr redirect
- kstring_t res_stdout = { 0, 0, NULL };
- kstring_t res_pysamerr = { 0, 0, NULL };
- FILE* orig_stdout = fdopen(dup(STDOUT_FILENO), "a"); // Save pysamerr
- FILE* orig_pysamerr = fdopen(dup(STDERR_FILENO), "a"); // Save pysamerr
- char* tempfname_stdout = (optind < argc)? argv[optind] : "test_parse_args.tmp.o";
- char* tempfname_pysamerr = (optind < argc)? argv[optind] : "test_parse_args.tmp.e";
- FILE* check_stdout = NULL;
- FILE* check_pysamerr = NULL;
+ // Setup pysam_stdout and pysam_stderr redirect
+ kstring_t res_pysam_stdout = { 0, 0, NULL };
+ kstring_t res_pysam_stderr = { 0, 0, NULL };
+ FILE* orig_pysam_stdout = fdopen(dup(STDOUT_FILENO), "a"); // Save pysam_stderr
+ FILE* orig_pysam_stderr = fdopen(dup(STDERR_FILENO), "a"); // Save pysam_stderr
+ char* tempfname_pysam_stdout = (optind < argc)? argv[optind] : "test_parse_args.tmp.o";
+ char* tempfname_pysam_stderr = (optind < argc)? argv[optind] : "test_parse_args.tmp.e";
+ FILE* check_pysam_stdout = NULL;
+ FILE* check_pysam_stderr = NULL;
// Cleanup getopt
optind = 1;
// setup
- if (verbose) fprintf(orig_stdout,"BEGIN test 1\n"); // test eliminating a tag that isn't there
+ if (verbose) fprintf(orig_pysam_stdout,"BEGIN test 1\n"); // test eliminating a tag that isn't there
int argc_1;
char** argv_1;
setup_test_1(&argc_1, &argv_1);
if (verbose > 1) {
- fprintf(orig_stdout, "argc: %d\n", argc_1);
+ fprintf(orig_pysam_stdout, "argc: %d\n", argc_1);
}
- if (verbose) fprintf(orig_stdout,"RUN test 1\n");
+ if (verbose) fprintf(orig_pysam_stdout,"RUN test 1\n");
// test
- xfreopen(tempfname_stdout, "w", stdout); // Redirect stdout to pipe
- xfreopen(tempfname_pysamerr, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname_pysam_stdout, "w", pysam_stdout); // Redirect pysam_stdout to pipe
+ xfreopen(tempfname_pysam_stderr, "w", pysam_stderr); // Redirect pysam_stderr to pipe
parsed_opts_t* result_1 = parse_args(argc_1, argv_1);
- fclose(stdout);
- fclose(pysamerr);
+ fclose(pysam_stdout);
+ fclose(pysam_stderr);
- if (verbose) fprintf(orig_stdout, "END RUN test 1\n");
+ if (verbose) fprintf(orig_pysam_stdout, "END RUN test 1\n");
if (verbose > 1) {
- fprintf(orig_stdout, "argc: %d\n", argc_1);
+ fprintf(orig_pysam_stdout, "argc: %d\n", argc_1);
}
// check result
- res_stdout.l = res_pysamerr.l = 0;
- check_stdout = fopen(tempfname_stdout, "r");
- check_pysamerr = fopen(tempfname_pysamerr, "r");
+ res_pysam_stdout.l = res_pysam_stderr.l = 0;
+ check_pysam_stdout = fopen(tempfname_pysam_stdout, "r");
+ check_pysam_stderr = fopen(tempfname_pysam_stderr, "r");
if ( !result_1
- && kgetline(&res_stdout, (kgets_func *)fgets, check_stdout) >= 0
- && !feof(check_stdout)
- && res_stdout.l > 0
- && kgetline(&res_pysamerr, (kgets_func *)fgets, check_pysamerr) < 0
- && (feof(check_pysamerr) || res_pysamerr.l == 0)) {
+ && kgetline(&res_pysam_stdout, (kgets_func *)fgets, check_pysam_stdout) >= 0
+ && !feof(check_pysam_stdout)
+ && res_pysam_stdout.l > 0
+ && kgetline(&res_pysam_stderr, (kgets_func *)fgets, check_pysam_stderr) < 0
+ && (feof(check_pysam_stderr) || res_pysam_stderr.l == 0)) {
++success;
} else {
++failure;
- if (verbose) fprintf(orig_stdout, "FAIL test 1\n");
+ if (verbose) fprintf(orig_pysam_stdout, "FAIL test 1\n");
}
- fclose(check_pysamerr);
- fclose(check_stdout);
+ fclose(check_pysam_stderr);
+ fclose(check_pysam_stdout);
// teardown
cleanup_opts(result_1);
@@ -148,49 +150,49 @@ int main(int argc, char**argv)
free(argv_1[i]);
}
free(argv_1);
- if (verbose) fprintf(orig_stdout, "END test 1\n");
+ if (verbose) fprintf(orig_pysam_stdout, "END test 1\n");
// Cleanup getopt
optind = 1;
- if (verbose) fprintf(orig_stdout, "BEGIN test 2\n"); // test eliminating a tag that is there
+ if (verbose) fprintf(orig_pysam_stdout, "BEGIN test 2\n"); // test eliminating a tag that is there
int argc_2;
char** argv_2;
setup_test_2(&argc_2, &argv_2);
if (verbose > 1) {
- fprintf(orig_stdout, "argc: %d\n", argc_2);
+ fprintf(orig_pysam_stdout, "argc: %d\n", argc_2);
}
- if (verbose) fprintf(orig_stdout, "RUN test 2\n");
+ if (verbose) fprintf(orig_pysam_stdout, "RUN test 2\n");
// test
- xfreopen(tempfname_stdout, "w", stdout); // Redirect stdout to pipe
- xfreopen(tempfname_pysamerr, "w", pysamerr); // Redirect pysamerr to pipe
+ xfreopen(tempfname_pysam_stdout, "w", pysam_stdout); // Redirect pysam_stdout to pipe
+ xfreopen(tempfname_pysam_stderr, "w", pysam_stderr); // Redirect pysam_stderr to pipe
parsed_opts_t* result_2 = parse_args(argc_2, argv_2);
- fclose(stdout);
- fclose(pysamerr);
+ fclose(pysam_stdout);
+ fclose(pysam_stderr);
- if (verbose) fprintf(orig_stdout, "END RUN test 2\n");
+ if (verbose) fprintf(orig_pysam_stdout, "END RUN test 2\n");
if (verbose > 1) {
- fprintf(orig_stdout, "argc: %d\n", argc_2);
+ fprintf(orig_pysam_stdout, "argc: %d\n", argc_2);
}
// check result
- res_stdout.l = res_pysamerr.l = 0;
- check_stdout = fopen(tempfname_stdout, "r");
- check_pysamerr = fopen(tempfname_pysamerr, "r");
+ res_pysam_stdout.l = res_pysam_stderr.l = 0;
+ check_pysam_stdout = fopen(tempfname_pysam_stdout, "r");
+ check_pysam_stderr = fopen(tempfname_pysam_stderr, "r");
if ( result_2
&& check_test_2(result_2)
- && kgetline(&res_stdout, (kgets_func *)fgets, check_stdout) < 0
- && (feof(check_stdout) || res_stdout.l == 0)
- && kgetline(&res_pysamerr, (kgets_func *)fgets, check_pysamerr) < 0
- && (feof(check_pysamerr) || res_pysamerr.l == 0)) {
+ && kgetline(&res_pysam_stdout, (kgets_func *)fgets, check_pysam_stdout) < 0
+ && (feof(check_pysam_stdout) || res_pysam_stdout.l == 0)
+ && kgetline(&res_pysam_stderr, (kgets_func *)fgets, check_pysam_stderr) < 0
+ && (feof(check_pysam_stderr) || res_pysam_stderr.l == 0)) {
++success;
} else {
++failure;
- if (verbose) fprintf(orig_stdout, "FAIL test 2\n");
+ if (verbose) fprintf(orig_pysam_stdout, "FAIL test 2\n");
}
- fclose(check_stdout);
- fclose(check_pysamerr);
+ fclose(check_pysam_stdout);
+ fclose(check_pysam_stderr);
// teardown
cleanup_opts(result_2);
@@ -200,18 +202,18 @@ int main(int argc, char**argv)
}
free(argv_2);
- if (verbose) fprintf(orig_stdout, "END test 2\n");
+ if (verbose) fprintf(orig_pysam_stdout, "END test 2\n");
// Cleanup
- free(res_stdout.s);
- free(res_pysamerr.s);
- remove(tempfname_stdout);
- remove(tempfname_pysamerr);
- fclose(orig_stdout);
+ free(res_pysam_stdout.s);
+ free(res_pysam_stderr.s);
+ remove(tempfname_pysam_stdout);
+ remove(tempfname_pysam_stderr);
+ fclose(orig_pysam_stdout);
if (failure > 0)
- fprintf(orig_pysamerr, "%d failures %d successes\n", failure, success);
- fclose(orig_pysamerr);
+ fprintf(orig_pysam_stderr, "%d failures %d successes\n", failure, success);
+ fclose(orig_pysam_stderr);
return (success == NUM_TESTS)? EXIT_SUCCESS : EXIT_FAILURE;
}
diff --git a/samtools/test/test.c b/samtools/test/test.c
index ef1d1f9..7ab38af 100644
--- a/samtools/test/test.c
+++ b/samtools/test/test.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/samtools/test/test.c.pysam.c b/samtools/test/test.c.pysam.c
index 735eb7b..a8295b5 100644
--- a/samtools/test/test.c.pysam.c
+++ b/samtools/test/test.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -35,7 +37,7 @@ DEALINGS IN THE SOFTWARE. */
void xfreopen(const char *path, const char *mode, FILE *stream)
{
if (freopen(path, mode, stream) == NULL) {
- fprintf(pysamerr, __FILE__": error reopening %s: %s\n",
+ fprintf(pysam_stderr, __FILE__": error reopening %s: %s\n",
path, strerror(errno));
exit(2);
}
@@ -43,13 +45,13 @@ void xfreopen(const char *path, const char *mode, FILE *stream)
void dump_hdr(const bam_hdr_t* hdr)
{
- printf("n_targets: %d\n", hdr->n_targets);
- printf("ignore_sam_err: %d\n", hdr->ignore_sam_err);
- printf("l_text: %u\n", hdr->l_text);
- printf("idx\ttarget_len\ttarget_name:\n");
+ fprintf(pysam_stdout, "n_targets: %d\n", hdr->n_targets);
+ fprintf(pysam_stdout, "ignore_sam_err: %d\n", hdr->ignore_sam_err);
+ fprintf(pysam_stdout, "l_text: %u\n", hdr->l_text);
+ fprintf(pysam_stdout, "idx\ttarget_len\ttarget_name:\n");
int32_t target;
for (target = 0; target < hdr->n_targets; ++target) {
- printf("%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
+ fprintf(pysam_stdout, "%d\t%u\t\"%s\"\n", target, hdr->target_len[target], hdr->target_name[target]);
}
- printf("text: \"%s\"\n", hdr->text);
+ fprintf(pysam_stdout, "text: \"%s\"\n", hdr->text);
}
diff --git a/samtools/test/tview/test_get_rg_sample.c b/samtools/test/tview/test_get_rg_sample.c
index c22ba9d..3db9da2 100644
--- a/samtools/test/tview/test_get_rg_sample.c
+++ b/samtools/test/tview/test_get_rg_sample.c
@@ -22,6 +22,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_tview.c"
#include <stdbool.h>
diff --git a/samtools/test/tview/test_get_rg_sample.c.pysam.c b/samtools/test/tview/test_get_rg_sample.c.pysam.c
index 99a217f..8c441f9 100644
--- a/samtools/test/tview/test_get_rg_sample.c.pysam.c
+++ b/samtools/test/tview/test_get_rg_sample.c.pysam.c
@@ -24,6 +24,8 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE. */
+#include <config.h>
+
#include "../../bam_tview.c"
#include <stdbool.h>
@@ -59,7 +61,7 @@ void teardown_1(khash_t(kh_rg)* test_result, char* header)
free(header);
}
-int main(int argc, char** argv)
+int samtools_test_get_rg_sample_main(int argc, char** argv)
{
const int NUM_TESTS = 1;
int success = 0;
@@ -77,7 +79,7 @@ int main(int argc, char** argv)
if (success == NUM_TESTS) {
return 0;
} else {
- fprintf(pysamerr, "%d failures %d successes\n", failure, success);
+ fprintf(pysam_stderr, "%d failures %d successes\n", failure, success);
return 1;
}
}
diff --git a/samtools/version.h b/samtools/version.h
index abe052c..ec46e67 100644
--- a/samtools/version.h
+++ b/samtools/version.h
@@ -1 +1 @@
-#define SAMTOOLS_VERSION "1.3"
+#define SAMTOOLS_VERSION "1.3.1"
diff --git a/setup.py b/setup.py
index 7b59b69..080bc24 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@ This module provides a low-level wrapper around the htslib C-API as
using cython and a high-level API for convenient access to the data
within standard genomic file formats.
-The current version wraps htslib-1.3, samtools-1.3 and bcftools-1.3.
+The current version wraps htslib-1.3.1, samtools-1.3.1 and bcftools-1.3.1.
See:
http://www.htslib.org
@@ -155,7 +155,7 @@ if HTSLIB_MODE in ['shared', 'separate']:
outf.write(
"/* empty config.h created by pysam */\n")
outf.write(
- "/* conservative compilation options */")
+ "/* conservative compilation options */\n")
if HTSLIB_LIBRARY_DIR:
# linking against a shared, externally installed htslib version, no
@@ -259,6 +259,16 @@ if HTSLIB_SOURCE == "builtin":
"adding shared libcurl and libcrypto")
external_htslib_libraries.extend(["curl", "crypto"])
+# create empty config.h files if they have not been created automatically
+# or created by the user:
+for fn in "samtools/config.h", "htslib/config.h":
+ if not os.path.exists(fn):
+ with open(fn, "w") as outf:
+ outf.write(
+ "/* empty config.h created by pysam */\n")
+ outf.write(
+ "/* conservative compilation options */\n")
+
parts = ["samtools",
"bcftools",
"htslib",
@@ -271,15 +281,6 @@ parts = ["samtools",
"vcf",
"bcf"]
-# remove existing files to recompute
-# necessary to be both compatible for python 2.7 and 3.3
-if IS_PYTHON3:
- for part in parts:
- try:
- os.unlink("pysam/c%s.c" % part)
- except:
- pass
-
# Exit if there are no pre-compiled files and no cython available
fn = source_pattern % "htslib"
if not os.path.exists(fn):
@@ -449,7 +450,7 @@ ctabixproxies = Extension(
"pysam.ctabixproxies",
[source_pattern % "tabixproxies"] +
os_c_files,
- library_dirs=[],
+ library_dirs=htslib_library_dirs,
include_dirs=include_os,
libraries=external_htslib_libraries + internal_htslib_libraries,
language="c",
@@ -461,7 +462,7 @@ cvcf = Extension(
"pysam.cvcf",
[source_pattern % "vcf"] +
os_c_files,
- library_dirs=[],
+ library_dirs=htslib_library_dirs,
include_dirs=["htslib", "."] + include_os + htslib_include_dirs,
libraries=external_htslib_libraries + internal_htslib_libraries,
language="c",
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py
index 5995faa..94b2eb3 100644
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -3,6 +3,7 @@ import pysam
import unittest
import collections
import copy
+import array
from TestUtils import checkFieldEqual
@@ -319,7 +320,7 @@ class TestAlignedSegment(ReadTest):
(None, 25, 'T'), (None, 26, 'T'),
(5, 27, 'A'), (6, 28, 'A'), (7, 29, 'A'), (8, 30, 'A')]
)
-
+
a.cigarstring = "5M2D2I2M"
a.set_tag("MD", "4C^TT2")
self.assertEqual(
@@ -331,6 +332,34 @@ class TestAlignedSegment(ReadTest):
(7, 27, 'A'), (8, 28, 'A')]
)
+ def test_get_aligned_pairs_skip_reference(self):
+ a = self.buildRead()
+ a.query_sequence = "A" * 10
+ a.cigarstring = "5M1N5M"
+ a.set_tag("MD", "10")
+
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=True),
+ [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+ (3, 23, 'A'), (4, 24, 'A'), (None, 25, None),
+ (5, 26, 'A'), (6, 27, 'A'), (7, 28, 'A'),
+ (8, 29, 'A'), (9, 30, 'A')])
+
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=False),
+ [(0, 20), (1, 21), (2, 22),
+ (3, 23), (4, 24), (None, 25),
+ (5, 26), (6, 27), (7, 28),
+ (8, 29), (9, 30)])
+
+ self.assertEqual(
+ a.get_aligned_pairs(matches_only=True, with_seq=False),
+ [(0, 20), (1, 21),
+ (2, 22), (3, 23),
+ (4, 24), (5, 26),
+ (6, 27), (7, 28),
+ (8, 29), (9, 30)])
+
def testNoSequence(self):
'''issue 176: retrieving length without query sequence
with soft-clipping.
@@ -347,13 +376,60 @@ class TestAlignedSegment(ReadTest):
self.assertEqual(a.query_alignment_length, 20)
+class TestCigarStats(ReadTest):
+
+ def testStats(self):
+
+ a = self.buildRead()
+
+ a.cigarstring = None
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+ a.cigarstring = "10M"
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ [[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+ a.cigarstring = "10M2I2M"
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ [[12, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0],
+ [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+ for i, x in enumerate("MIDNSHP=X"):
+ a.cigarstring = "2{}".format(x)
+ expected = [[0] * 11, [0] * 11]
+ expected[0][i] = 2
+ expected[1][i] = 1
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ expected)
+
+ a.cigarstring = "10M"
+ a.set_tag("NM", 5)
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ [[10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
+ [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+ a.cigarstring = None
+ self.assertEqual(
+ [list(x) for x in a.get_cigar_stats()],
+ [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
+
+
class TestAlignedPairs(unittest.TestCase):
filename = os.path.join(DATADIR, "example_aligned_pairs.bam")
def testReferenceBases(self):
"""reference bases should always be the same nucleotide
"""
- reference_bases = collections.defaultdict(list)
+ reference_bases = collections.defaultdict(list)
with pysam.AlignmentFile(self.filename) as inf:
for c in inf.pileup():
for r in c.pileups:
@@ -389,7 +465,25 @@ class TestTags(ReadTest):
self.assertEqual(False, a.has_tag("NM"))
# check if deleting a non-existing tag is fine
a.set_tag("NM", None)
+ a.set_tag("NM", None)
+ def testArrayTags(self):
+ read = self.buildRead()
+ supported_dtypes = "bhBHf"
+ unsupported_dtypes = "lLd"
+
+ for dtype in supported_dtypes:
+ key = "F" + dtype
+ read.set_tag(key, array.array(dtype, range(10)))
+ ary = read.get_tag(key)
+
+ for dtype in unsupported_dtypes:
+ key = "F" + dtype
+ self.assertRaises(ValueError,
+ read.set_tag,
+ key,
+ array.array(dtype, range(10)))
+
def testAddTagsType(self):
a = self.buildRead()
a.tags = None
@@ -551,6 +645,23 @@ class TestTags(ReadTest):
"A" * 5 + "C" * 3 + "A" * 5,
a.get_reference_sequence())
+ def testMDTagRefSkipping(self):
+ a = self.buildRead()
+
+ a.cigarstring = "5M1N5M"
+ a.query_sequence = "A" * 10
+ a.set_tag('MD', "10")
+ self.assertEqual(
+ "A" * 10,
+ a.get_reference_sequence())
+
+ a.cigarstring = "5M3N5M"
+ a.query_sequence = "A" * 10
+ a.set_tag('MD', "10")
+ self.assertEqual(
+ "A" * 10,
+ a.get_reference_sequence())
+
def testMDTagSoftClipping(self):
a = self.buildRead()
@@ -561,7 +672,7 @@ class TestTags(ReadTest):
self.assertEqual(
"A" * 5 + "C" + "A" * 5,
a.get_reference_sequence())
-
+
# all together
a.cigarstring = "5S5M1D5M1I5M5S"
a.query_sequence = "G" * 5 + "A" * 16 + "G" * 5
@@ -579,7 +690,7 @@ class TestTags(ReadTest):
self.assertEqual(
"AAcAATCAAAAA",
a.get_reference_sequence())
-
+
a.cigarstring = "5S5M2D1I5M5S"
a.query_sequence = "G" * 5 + "A" * 11 + "G" * 5
a.set_tag('MD', "2C2^TC5")
@@ -606,7 +717,7 @@ class TestTags(ReadTest):
class TestCopy(ReadTest):
-
+
def testCopy(self):
a = self.buildRead()
b = copy.copy(a)
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py
index c03e234..9a33722 100644
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -23,7 +23,7 @@ from functools import partial
import pysam
import pysam.samtools
from TestUtils import checkBinaryEqual, checkURL, \
- checkSamtoolsViewEqual, checkFieldEqual, force_str
+ check_samtools_view_equal, checkFieldEqual, force_str
DATADIR = "pysam_data"
@@ -49,6 +49,9 @@ class BasicTestBAMFromFetch(unittest.TestCase):
"rb")
self.reads = list(self.samfile.fetch())
+ def tearDown(self):
+ self.samfile.close()
+
def testARqname(self):
self.assertEqual(
self.reads[0].query_name,
@@ -261,9 +264,6 @@ class BasicTestBAMFromFetch(unittest.TestCase):
self.assertEqual(self.reads[0].opt("XT"), "U")
self.assertEqual(self.reads[1].opt("XT"), "R")
- def tearDown(self):
- self.samfile.close()
-
class BasicTestSAMFromFetch(BasicTestBAMFromFetch):
@@ -426,42 +426,42 @@ class TestIO(unittest.TestCase):
The *checkf* is used to determine if the files are
equal.
'''
- infile = pysam.AlignmentFile(
- os.path.join(DATADIR, input_filename),
- input_mode)
-
- if "b" in input_mode:
- self.assertTrue(infile.is_bam)
- self.assertFalse(infile.is_cram)
- elif "c" in input_mode:
- self.assertFalse(infile.is_bam)
- self.assertTrue(infile.is_cram)
- else:
- self.assertFalse(infile.is_cram)
- self.assertFalse(infile.is_bam)
-
- if use_template:
- outfile = pysam.AlignmentFile(
- output_filename,
- output_mode,
- reference_filename=sequence_filename,
- template=infile)
- else:
- outfile = pysam.AlignmentFile(
- output_filename,
- output_mode,
- reference_names=infile.references,
- reference_lengths=infile.lengths,
- reference_filename=sequence_filename,
- add_sq_text=False)
- iter = infile.fetch()
+ with pysam.AlignmentFile(
+ os.path.join(DATADIR, input_filename),
+ input_mode) as infile:
+
+ if "b" in input_mode:
+ self.assertTrue(infile.is_bam)
+ self.assertFalse(infile.is_cram)
+ elif "c" in input_mode:
+ self.assertFalse(infile.is_bam)
+ self.assertTrue(infile.is_cram)
+ else:
+ self.assertFalse(infile.is_cram)
+ self.assertFalse(infile.is_bam)
+
+ if use_template:
+ outfile = pysam.AlignmentFile(
+ output_filename,
+ output_mode,
+ reference_filename=sequence_filename,
+ template=infile)
+ else:
+ outfile = pysam.AlignmentFile(
+ output_filename,
+ output_mode,
+ reference_names=infile.references,
+ reference_lengths=infile.lengths,
+ reference_filename=sequence_filename,
+ add_sq_text=False)
- for x in iter:
- outfile.write(x)
+ iter = infile.fetch()
- infile.close()
- outfile.close()
+ for x in iter:
+ outfile.write(x)
+
+ outfile.close()
self.assertTrue(checkf(
os.path.join(DATADIR, reference_filename),
@@ -490,7 +490,7 @@ class TestIO(unittest.TestCase):
"tmp_ex2.cram",
"rc", "wc",
sequence_filename="pysam_data/ex1.fa",
- checkf=checkSamtoolsViewEqual)
+ checkf=check_samtools_view_equal)
def testSAM2BAM(self):
self.checkEcho("ex2.sam",
@@ -512,7 +512,7 @@ class TestIO(unittest.TestCase):
"rb", "wc",
sequence_filename="pysam_data/ex1.fa",
checkf=partial(
- checkSamtoolsViewEqual,
+ check_samtools_view_equal,
without_header=True))
def testCRAM2BAM(self):
@@ -523,7 +523,7 @@ class TestIO(unittest.TestCase):
"rc", "wb",
sequence_filename="pysam_data/ex1.fa",
checkf=partial(
- checkSamtoolsViewEqual,
+ check_samtools_view_equal,
without_header=True))
def testSAM2CRAM(self):
@@ -533,7 +533,7 @@ class TestIO(unittest.TestCase):
"r", "wc",
sequence_filename="pysam_data/ex1.fa",
checkf=partial(
- checkSamtoolsViewEqual,
+ check_samtools_view_equal,
without_header=True))
def testCRAM2SAM(self):
@@ -543,7 +543,7 @@ class TestIO(unittest.TestCase):
"rc", "wh",
sequence_filename="pysam_data/ex1.fa",
checkf=partial(
- checkSamtoolsViewEqual,
+ check_samtools_view_equal,
without_header=True))
# Disabled - should work, files are not binary equal, but are
@@ -858,12 +858,18 @@ class TestIteratorRowBAM(unittest.TestCase):
filename = os.path.join(DATADIR, "ex2.bam")
mode = "rb"
+ reference_filename = None
def setUp(self):
self.samfile = pysam.AlignmentFile(
- self.filename, self.mode,
+ self.filename,
+ self.mode,
+ reference_filename=self.reference_filename,
)
+ def tearDown(self):
+ self.samfile.close()
+
def checkRange(self, rnge):
'''compare results from iterator with those from samtools.'''
ps = list(self.samfile.fetch(region=rnge))
@@ -911,9 +917,6 @@ class TestIteratorRowBAM(unittest.TestCase):
self.checkRange("%s:%i-%i" %
(contig, start, start + 90))
- def tearDown(self):
- self.samfile.close()
-
class TestIteratorRowAllBAM(unittest.TestCase):
@@ -1034,9 +1037,9 @@ class TestIteratorRowCRAM(TestIteratorRowBAM):
mode = "rc"
-class TestIteratorRowCRAM(TestIteratorRowBAM):
- filename = os.path.join(DATADIR, "ex2.cram")
- mode = "rc"
+class TestIteratorRowCRAMWithReferenceFilename(TestIteratorRowCRAM):
+ reference_filename = os.path.join(DATADIR, "ex1.fa")
+
##########################################################
##########################################################
@@ -1840,40 +1843,54 @@ class TestBTagBam(TestBTagSam):
filename = os.path.join(DATADIR, 'example_btag.bam')
-class TestDoubleFetch(unittest.TestCase):
-
+class TestDoubleFetchBAM(unittest.TestCase):
'''check if two iterators on the same bamfile are independent.'''
filename = os.path.join(DATADIR, 'ex1.bam')
+ mode = "rb"
def testDoubleFetch(self):
- samfile1 = pysam.AlignmentFile(self.filename, 'rb')
-
- for a, b in zip(samfile1.fetch(multiple_iterators=True),
- samfile1.fetch(multiple_iterators=True)):
- self.assertEqual(a.compare(b), 0)
+ with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+ for a, b in zip(samfile1.fetch(multiple_iterators=True),
+ samfile1.fetch(multiple_iterators=True)):
+ self.assertEqual(a.compare(b), 0)
def testDoubleFetchWithRegion(self):
- samfile1 = pysam.AlignmentFile(self.filename, 'rb')
- chr, start, stop = 'chr1', 200, 3000000
- # just making sure the test has something to catch
- self.assertTrue(len(list(samfile1.fetch(chr, start, stop))) > 0)
+ with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+ contig, start, stop = 'chr1', 200, 3000000
+ # just making sure the test has something to catch
+ self.assertTrue(len(list(samfile1.fetch(contig, start, stop))) > 0)
- for a, b in zip(samfile1.fetch(chr, start, stop),
- samfile1.fetch(chr, start, stop,
- multiple_iterators=True)):
- self.assertEqual(a.compare(b), 0)
+ # see Issue #293
+ # The following fails for CRAM files, but works for BAM
+ # files when the first is multiple_iterators=False:
+ for a, b in zip(samfile1.fetch(contig, start, stop,
+ multiple_iterators=True),
+ samfile1.fetch(contig, start, stop,
+ multiple_iterators=True)):
+ self.assertEqual(a.compare(b), 0)
def testDoubleFetchUntilEOF(self):
- samfile1 = pysam.AlignmentFile(self.filename, 'rb')
+ with pysam.AlignmentFile(self.filename, self.mode) as samfile1:
+
+ for a, b in zip(samfile1.fetch(until_eof=True),
+ samfile1.fetch(until_eof=True,
+ multiple_iterators=True)):
+ self.assertEqual(a.compare(b), 0)
+
+
+class TestDoubleFetchCRAM(TestDoubleFetchBAM):
+ filename = os.path.join(DATADIR, 'ex2.cram')
+ mode = "rc"
+
- for a, b in zip(samfile1.fetch(until_eof=True),
- samfile1.fetch(until_eof=True,
- multiple_iterators=True)):
- self.assertEqual(a.compare(b), 0)
+class TestDoubleFetchCRAMWithReference(TestDoubleFetchBAM):
+ filename = os.path.join(DATADIR, 'ex2.cram')
+ mode = "rc"
+ reference_filename = os.path.join(DATADIR, 'ex1.fa')
class TestRemoteFileFTP(unittest.TestCase):
@@ -1926,10 +1943,11 @@ class TestRemoteFileHTTP(unittest.TestCase):
if not checkURL(self.url):
return
- samfile = pysam.AlignmentFile(self.url, "rb")
- result = list(samfile.fetch(region=self.region))
- samfile_local = pysam.AlignmentFile(self.local, "rb")
- ref = list(samfile_local.fetch(region=self.region))
+ with pysam.AlignmentFile(self.url, "rb") as samfile:
+ result = list(samfile.fetch(region=self.region))
+
+ with pysam.AlignmentFile(self.local, "rb") as samfile_local:
+ ref = list(samfile_local.fetch(region=self.region))
self.assertEqual(len(ref), len(result))
for x, y in zip(result, ref):
@@ -1939,10 +1957,11 @@ class TestRemoteFileHTTP(unittest.TestCase):
if not checkURL(self.url):
return
- samfile = pysam.AlignmentFile(self.url, "rb")
- result = list(samfile.fetch())
- samfile_local = pysam.AlignmentFile(self.local, "rb")
- ref = list(samfile_local.fetch())
+ with pysam.AlignmentFile(self.url, "rb") as samfile:
+ result = list(samfile.fetch())
+
+ with pysam.AlignmentFile(self.local, "rb") as samfile_local:
+ ref = list(samfile_local.fetch())
self.assertEqual(len(ref), len(result))
for x, y in zip(result, ref):
@@ -2009,6 +2028,10 @@ class TestPileup(unittest.TestCase):
self.samfile = pysam.AlignmentFile(self.samfilename)
self.fastafile = pysam.Fastafile(self.fastafilename)
+ def tearDown(self):
+ self.samfile.close()
+ self.fastafile.close()
+
def checkEqual(self, references, iterator):
for x, column in enumerate(iterator):
@@ -2070,6 +2093,10 @@ class TestCountCoverage(unittest.TestCase):
samfile.close()
pysam.samtools.index("test_count_coverage_read_all.bam")
+ def tearDown(self):
+ self.samfile.close()
+ self.fastafile.close()
+
def count_coverage_python(self, bam, chrom, start, stop,
read_callback,
quality_threshold=15):
@@ -2161,23 +2188,26 @@ class TestCountCoverage(unittest.TestCase):
self.assertEqual(fast_counts[3], manual_counts[3])
def test_count_coverage_read_all(self):
- samfile = pysam.AlignmentFile("test_count_coverage_read_all.bam")
+
chrom = 'chr1'
start = 0
stop = 2000
def filter(read):
return not (read.flag & (0x4 | 0x100 | 0x200 | 0x400))
- fast_counts = samfile.count_coverage(
- chrom, start, stop,
- read_callback='all',
- #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
- quality_threshold=0)
- manual_counts = samfile.count_coverage(
- chrom, start, stop,
- read_callback=lambda read: not(
- read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
- quality_threshold=0)
+
+ with pysam.AlignmentFile("test_count_coverage_read_all.bam") as samfile:
+
+ fast_counts = samfile.count_coverage(
+ chrom, start, stop,
+ read_callback='all',
+ #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+ quality_threshold=0)
+ manual_counts = samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: not(
+ read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+ quality_threshold=0)
os.unlink("test_count_coverage_read_all.bam")
os.unlink("test_count_coverage_read_all.bam.bai")
@@ -2202,18 +2232,20 @@ class TestCountCoverage(unittest.TestCase):
samfile.write(read)
samfile.close()
pysam.samtools.index("test_count_coverage_nofilter.bam")
- samfile = pysam.AlignmentFile("test_count_coverage_nofilter.bam")
chr = 'chr1'
start = 0
stop = 2000
- fast_counts = samfile.count_coverage(chr, start, stop,
- read_callback='nofilter',
- quality_threshold=0)
- manual_counts = self.count_coverage_python(samfile, chr, start, stop,
- read_callback=lambda x: True,
- quality_threshold=0)
- samfile.close()
+ with pysam.AlignmentFile("test_count_coverage_nofilter.bam") as samfile:
+
+ fast_counts = samfile.count_coverage(chr, start, stop,
+ read_callback='nofilter',
+ quality_threshold=0)
+
+ manual_counts = self.count_coverage_python(samfile, chr, start, stop,
+ read_callback=lambda x: True,
+ quality_threshold=0)
+
os.unlink("test_count_coverage_nofilter.bam")
os.unlink("test_count_coverage_nofilter.bam.bai")
self.assertEqual(fast_counts[0], manual_counts[0])
@@ -2223,7 +2255,7 @@ class TestCountCoverage(unittest.TestCase):
class TestPileupQueryPosition(unittest.TestCase):
-
+
filename = "test_query_position.bam"
def testPileup(self):
@@ -2260,8 +2292,8 @@ class TestLogging(unittest.TestCase):
log_hand.setFormatter(formatter)
logger.addHandler(log_hand)
- bam = pysam.AlignmentFile(bamfile, 'rb')
- cols = bam.pileup()
+ with pysam.AlignmentFile(bamfile, 'rb') as bam:
+ cols = bam.pileup()
self.assertTrue(True)
def testFail1(self):
@@ -2292,40 +2324,41 @@ class TestAlignmentFileUtilityFunctions(unittest.TestCase):
def testCount(self):
- samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
- "rb")
+ with pysam.AlignmentFile(
+ os.path.join(DATADIR, "ex1.bam"),
+ "rb") as samfile:
- for contig in ("chr1", "chr2"):
- for start in range(0, 2000, 100):
- end = start + 1
- self.assertEqual(
- len(list(samfile.fetch(contig, start, end))),
- samfile.count(contig, start, end),
- 'number mismatch for %s:%i-%i %i != %i' % (
- contig, start, end,
+ for contig in ("chr1", "chr2"):
+ for start in range(0, 2000, 100):
+ end = start + 1
+ self.assertEqual(
len(list(samfile.fetch(contig, start, end))),
- samfile.count(contig, start, end)))
+ samfile.count(contig, start, end),
+ 'number mismatch for %s:%i-%i %i != %i' % (
+ contig, start, end,
+ len(list(samfile.fetch(contig, start, end))),
+ samfile.count(contig, start, end)))
- # test empty intervals
- self.assertEqual(
- len(list(samfile.fetch(contig, start, start))),
- samfile.count(contig, start, start),
- 'number mismatch for %s:%i-%i %i != %i' % (
- contig, start, start,
+ # test empty intervals
+ self.assertEqual(
len(list(samfile.fetch(contig, start, start))),
- samfile.count(contig, start, start)))
+ samfile.count(contig, start, start),
+ 'number mismatch for %s:%i-%i %i != %i' % (
+ contig, start, start,
+ len(list(samfile.fetch(contig, start, start))),
+ samfile.count(contig, start, start)))
- # test half empty intervals
- self.assertEqual(len(list(samfile.fetch(contig, start))),
- samfile.count(contig, start))
+ # test half empty intervals
+ self.assertEqual(len(list(samfile.fetch(contig, start))),
+ samfile.count(contig, start))
- self.assertEqual(
- len(list(samfile.fetch(contig, start))),
- samfile.count(contig, start),
- 'number mismatch for %s:%i %i != %i' % (
- contig, start,
+ self.assertEqual(
len(list(samfile.fetch(contig, start))),
- samfile.count(contig, start)))
+ samfile.count(contig, start),
+ 'number mismatch for %s:%i %i != %i' % (
+ contig, start,
+ len(list(samfile.fetch(contig, start))),
+ samfile.count(contig, start)))
def testMate(self):
'''test mate access.'''
@@ -2339,35 +2372,35 @@ class TestAlignmentFileUtilityFunctions(unittest.TestCase):
for x in readnames:
counts[x] += 1
- samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
- "rb")
+ with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
+ "rb") as samfile:
- for read in samfile.fetch():
- if not read.is_paired:
- self.assertRaises(ValueError, samfile.mate, read)
- elif read.mate_is_unmapped:
- self.assertRaises(ValueError, samfile.mate, read)
- else:
- if counts[read.query_name] == 1:
+ for read in samfile.fetch():
+ if not read.is_paired:
+ self.assertRaises(ValueError, samfile.mate, read)
+ elif read.mate_is_unmapped:
self.assertRaises(ValueError, samfile.mate, read)
else:
- mate = samfile.mate(read)
- self.assertEqual(read.query_name, mate.query_name)
- self.assertEqual(read.is_read1, mate.is_read2)
- self.assertEqual(read.is_read2, mate.is_read1)
- self.assertEqual(
- read.reference_start, mate.next_reference_start)
- self.assertEqual(
- read.next_reference_start, mate.reference_start)
+ if counts[read.query_name] == 1:
+ self.assertRaises(ValueError, samfile.mate, read)
+ else:
+ mate = samfile.mate(read)
+ self.assertEqual(read.query_name, mate.query_name)
+ self.assertEqual(read.is_read1, mate.is_read2)
+ self.assertEqual(read.is_read2, mate.is_read1)
+ self.assertEqual(
+ read.reference_start, mate.next_reference_start)
+ self.assertEqual(
+ read.next_reference_start, mate.reference_start)
def testIndexStats(self):
'''test if total number of mapped/unmapped reads is correct.'''
- samfile = pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
- "rb")
- self.assertEqual(samfile.mapped, 3235)
- self.assertEqual(samfile.unmapped, 35)
- self.assertEqual(samfile.nocoordinate, 0)
+ with pysam.AlignmentFile(os.path.join(DATADIR, "ex1.bam"),
+ "rb") as samfile:
+ self.assertEqual(samfile.mapped, 3235)
+ self.assertEqual(samfile.unmapped, 35)
+ self.assertEqual(samfile.nocoordinate, 0)
class TestMappedUnmapped(unittest.TestCase):
@@ -2452,26 +2485,29 @@ class TestAlignmentFileIndex(unittest.TestCase):
class TestExplicitIndex(unittest.TestCase):
def testExplicitIndexBAM(self):
- samfile = pysam.AlignmentFile(
- os.path.join(DATADIR, "explicit_index.bam"),
- "rb",
- filepath_index=os.path.join(DATADIR, 'ex1.bam.bai'))
-
- samfile.fetch("chr1")
+ with pysam.AlignmentFile(
+ os.path.join(DATADIR, "explicit_index.bam"),
+ "rb",
+ filepath_index=os.path.join(DATADIR, 'ex1.bam.bai')) as samfile:
+ samfile.fetch("chr1")
def testExplicitIndexCRAM(self):
- samfile = pysam.AlignmentFile(
- os.path.join(DATADIR, "explicit_index.cram"),
- "rc",
- filepath_index=os.path.join(DATADIR, 'ex1.cram.crai'))
+ with pysam.AlignmentFile(
+ os.path.join(DATADIR, "explicit_index.cram"),
+ "rc",
+ filepath_index=os.path.join(DATADIR, 'ex1.cram.crai')) as samfile:
+ samfile.fetch("chr1")
def testRemoteExplicitIndexBAM(self):
- samfile = pysam.AlignmentFile(
- "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam",
- "rb",
- filepath_index=os.path.join(DATADIR, 'ex1.bam.bai'))
+ if not checkURL(
+ "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam"):
+ return
- samfile.fetch("chr1")
+ with pysam.AlignmentFile(
+ "http://genserv.anat.ox.ac.uk/downloads/pysam/test/noindex.bam",
+ "rb",
+ filepath_index=os.path.join(DATADIR, 'ex1.bam.bai')) as samfile:
+ samfile.fetch("chr1")
class TestVerbosity(unittest.TestCase):
diff --git a/tests/TestUtils.py b/tests/TestUtils.py
index efb2333..71ab22a 100644
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -18,15 +18,28 @@ else:
if IS_PYTHON3:
def force_str(s):
- return s.decode('ascii')
+ try:
+ return s.decode('ascii')
+ except AttributeError:
+ return s
+ def force_bytes(s):
+ try:
+ return s.encode('ascii')
+ except AttributeError:
+ return s
else:
def force_str(s):
return s
+ def force_bytes(s):
+ return s
def openfile(fn):
if fn.endswith(".gz"):
- return gzip.open(fn)
+ try:
+ return gzip.open(fn, "rt", encoding="utf-8")
+ except TypeError:
+ return gzip.open(fn, "r")
else:
return open(fn)
@@ -59,8 +72,9 @@ def checkBinaryEqual(filename1, filename2):
return found
-def checkSamtoolsViewEqual(filename1, filename2,
- without_header=False):
+def check_samtools_view_equal(
+ filename1, filename2,
+ without_header=False):
'''return true if the two files are equal in their
content through samtools view.
'''
@@ -139,7 +153,7 @@ def checkFieldEqual(cls, read1, read2, exclude=[]):
(n, getattr(read1, n), getattr(read2, n)))
-def check_lines_equal(cls, a, b, sort=False, filter_f=None):
+def check_lines_equal(cls, a, b, sort=False, filter_f=None, msg=None):
"""check if contents of two files are equal comparing line-wise.
sort: bool
@@ -147,17 +161,17 @@ def check_lines_equal(cls, a, b, sort=False, filter_f=None):
filter_f:
remover lines in both a and b where expression is True
"""
-
aa = openfile(a).readlines()
bb = openfile(b).readlines()
if filter_f is not None:
- aa = [x for x in aa if not filter_f]
- bb = [x for x in bb if not filter_f]
+ aa = [x for x in aa if not filter_f(x)]
+ bb = [x for x in bb if not filter_f(x)]
+
if sort:
- cls.assertEqual(sorted(aa), sorted(bb))
+ cls.assertEqual(sorted(aa), sorted(bb), msg)
else:
- cls.assertEqual(aa, bb)
+ cls.assertEqual(aa, bb, msg)
def get_temp_filename(suffix=""):
diff --git a/tests/VariantFile_test.py b/tests/VariantFile_test.py
index a7e54ac..ef21245 100644
--- a/tests/VariantFile_test.py
+++ b/tests/VariantFile_test.py
@@ -2,6 +2,7 @@ import os
import unittest
import pysam
import gzip
+import subprocess
from TestUtils import get_temp_filename, check_lines_equal
DATADIR="cbcf_data"
@@ -9,7 +10,6 @@ from tabix_test import loadAndConvert
def read_header(filename):
-
data = []
if filename.endswith(".gz"):
for line in gzip.open(filename):
@@ -21,6 +21,7 @@ def read_header(filename):
for line in f:
if line.startswith("#"):
data.append(line)
+
return data
@@ -135,8 +136,9 @@ class TestOpening(unittest.TestCase):
self.assertEqual(len(list(inf.fetch())), 5)
def testDetectBCF(self):
- with pysam.VariantFile(os.path.join(DATADIR,
- "example_vcf40.bcf")) as inf:
+ with pysam.VariantFile(os.path.join(
+ DATADIR,
+ "example_vcf40.bcf")) as inf:
self.assertEqual(inf.category, 'VARIANTS')
self.assertEqual(inf.format, 'BCF')
self.assertEqual(inf.compression, 'BGZF')
@@ -333,7 +335,7 @@ class TestConstructionVCFWithContigs(unittest.TestCase):
check_lines_equal(
self, fn_in, fn_out, sort=True,
- filter_f=lambda x: not x.startswith("##contig"))
+ filter_f=lambda x: x.startswith("##contig"))
os.unlink(fn_out)
def testConstructionWithRecords(self):
@@ -413,6 +415,43 @@ class TestConstructionVCFGZWithoutContigs(TestConstructionVCFWithContigs):
filename = "example_vcf42.vcf.gz"
+class TestSettingRecordValues(unittest.TestCase):
+
+ filename = "example_vcf40.vcf"
+
+ def testSetQual(self):
+ with pysam.VariantFile(os.path.join(DATADIR, self.filename)) as inf:
+ record = next(inf)
+ self.assertEqual(record.qual, 47)
+ record.qual = record.qual
+ self.assertEqual(record.qual, 47)
+ record.qual = 10
+ self.assertEqual(record.qual, 10)
+ self.assertEqual(str(record).split("\t")[5], "10")
+
+ def testGenotype(self):
+ with pysam.VariantFile(os.path.join(DATADIR, self.filename)) as inf:
+ record = next(inf)
+ sample = record.samples["NA00001"]
+ print (sample["GT"])
+ self.assertEqual(sample["GT"], (0, 0))
+# Fails with TypeError
+# sample["GT"] = sample["GT"]
+
+class TestSubsetting(unittest.TestCase):
+
+ filename = "example_vcf42.vcf.gz"
+
+ def testSubsetting(self):
+ with pysam.VariantFile(os.path.join(DATADIR,
+ self.filename)) as inf:
+ inf.subset_samples(["NA00001"])
+
if __name__ == "__main__":
+ # build data files
+ print ("building data files")
+ subprocess.call("make -C %s" % DATADIR, shell=True)
+ print ("starting tests")
unittest.main()
+ print ("completed tests")
diff --git a/tests/cbcf_data/example_vcf42.vcf b/tests/cbcf_data/example_vcf42.vcf
index c6c7030..f103e1f 100644
--- a/tests/cbcf_data/example_vcf42.vcf
+++ b/tests/cbcf_data/example_vcf42.vcf
@@ -17,8 +17,8 @@
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
-M 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
+M 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2:.
17 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
-20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
-20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
+20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.
+20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.
20 1234567 microsat1 GTCT G,GTACT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
diff --git a/tests/faidx_test.py b/tests/faidx_test.py
index f3e6cc4..a123550 100644
--- a/tests/faidx_test.py
+++ b/tests/faidx_test.py
@@ -2,6 +2,7 @@ import pysam
import unittest
import os
import gzip
+import shutil
from TestUtils import checkURL
@@ -56,6 +57,53 @@ class TestFastaFile(unittest.TestCase):
self.file.close()
+class TestFastaFilePathIndex(unittest.TestCase):
+
+ filename = os.path.join(DATADIR, "ex1.fa")
+
+ def testGarbageIndex(self):
+ self.assertRaises(NotImplementedError,
+ pysam.FastaFile,
+ self.filename,
+ filepath_index="garbage.fa.fai")
+ return
+
+ self.assertRaises(ValueError,
+ pysam.FastaFile,
+ self.filename,
+ filepath_index="garbage.fa.fai")
+
+ def testOpenWithoutIndex(self):
+ faidx = pysam.FastaFile(self.filename)
+ faidx.close()
+
+ def testOpenWithStandardIndex(self):
+ self.assertRaises(NotImplementedError,
+ pysam.FastaFile,
+ self.filename,
+ filepath_index=self.filename + ".fai")
+ return
+
+ faidx = pysam.FastaFile(self.filename,
+ filepath_index=self.filename + ".fai")
+ faidx.close()
+
+ def testOpenWithOtherIndex(self):
+ return
+ tmpfilename = "tmp_" + os.path.basename(self.filename)
+ shutil.copyfile(self.filename, tmpfilename)
+ faidx = pysam.FastaFile(tmpfilename,
+ filepath_index=self.filename + ".fai")
+ faidx.close()
+ # index should not be auto-generated
+ self.assertFalse(os.path.exists(tmpfilename + ".fai"))
+ os.unlink(tmpfilename)
+
+class TestFastaFilePathIndexCompressed(TestFastaFilePathIndex):
+
+ filename = os.path.join(DATADIR, "ex1.fa.gz")
+
+
class TestFastxFileFastq(unittest.TestCase):
filetype = pysam.FastxFile
@@ -67,6 +115,9 @@ class TestFastxFileFastq(unittest.TestCase):
persist=self.persist)
self.has_quality = self.filename.endswith('.fq')
+ def tearDown(self):
+ self.file.close()
+
def checkFirst(self, s):
# test first entry
self.assertEqual(s.sequence, "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC")
@@ -160,8 +211,8 @@ class TestFastxFileWithEmptySequence(unittest.TestCase):
with gzip.open(fn) as inf:
ref_num = len(list(inf)) / 4
- f = self.filetype(fn)
- l = len(list(f))
+ with self.filetype(fn) as f:
+ l = len(list(f))
self.assertEqual(ref_num, l)
@@ -175,10 +226,10 @@ class TestRemoteFileFTP(unittest.TestCase):
def testFTPView(self):
if not checkURL(self.url):
return
- f = pysam.Fastafile(self.url)
- self.assertEqual(
- len(f.fetch("chr1", 0, 1000)),
- 1000)
+ with pysam.Fastafile(self.url) as f:
+ self.assertEqual(
+ len(f.fetch("chr1", 0, 1000)),
+ 1000)
if __name__ == "__main__":
diff --git a/tests/pysam_data/Makefile b/tests/pysam_data/Makefile
index aed77b5..89a4a0c 100644
--- a/tests/pysam_data/Makefile
+++ b/tests/pysam_data/Makefile
@@ -17,7 +17,8 @@ all: ex1.pileup.gz \
ex2_truncated.bam \
empty.bam empty.bam.bai \
explicit_index.bam explicit_index.cram \
- faidx_empty_seq.fq.gz
+ faidx_empty_seq.fq.gz \
+ ex1.fa.gz ex1.fa.gz.fai
# ex2.sam - as ex1.sam, but with header
ex2.sam.gz: ex1.bam ex1.bam.bai
@@ -82,3 +83,9 @@ clean:
%.fq.gz: %.fq
gzip < $< > $@
+
+%.fa.gz: %.fa
+ bgzip < $< > $@
+
+%.fa.gz.fai: %.fa.gz
+ samtools faidx $<
diff --git a/tests/samtools_test.py b/tests/samtools_test.py
index e5fd8b9..d5b2791 100644
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -15,7 +15,8 @@ import glob
import sys
import subprocess
import shutil
-from TestUtils import checkBinaryEqual
+from TestUtils import checkBinaryEqual, check_lines_equal, \
+ check_samtools_view_equal, get_temp_filename, force_bytes
IS_PYTHON3 = sys.version_info[0] >= 3
@@ -80,6 +81,8 @@ class SamtoolsTest(unittest.TestCase):
"idxstats ex1.bam > %(out)s_ex1.idxstats",
"fixmate ex1.bam %(out)s_ex1.fixmate.bam",
"flagstat ex1.bam > %(out)s_ex1.flagstat",
+ # Fails python 3.3 on linux, passes on OsX and when
+ # run locally
"calmd ex1.bam ex1.fa > %(out)s_ex1.calmd.bam",
# use -s option, otherwise the following error in samtools 1.2:
# Samtools-htslib-API: bam_get_library() not yet implemented
@@ -132,7 +135,7 @@ class SamtoolsTest(unittest.TestCase):
samtools_version))
def setUp(self):
- '''setup tests.
+ '''setup tests.
For setup, all commands will be run before the first test is
executed. Individual tests will then just compare the output
@@ -146,7 +149,7 @@ class SamtoolsTest(unittest.TestCase):
os.makedirs(WORKDIR)
for f in self.requisites:
- shutil.copy(os.path.join(DATADIR, f),
+ shutil.copy(os.path.join(DATADIR, f),
os.path.join(WORKDIR, f))
self.savedir = os.getcwd()
@@ -184,13 +187,11 @@ class SamtoolsTest(unittest.TestCase):
output = pysam_method(*pysam_parts,
raw=True,
catch_stdout=True)
-
# sys.stdout.write(" pysam ok\n")
-
if ">" in statement:
with open(pysam_targets[-1], "wb") as outfile:
if output is not None:
- outfile = outfile.write(output)
+ outfile.write(force_bytes(output))
for samtools_target, pysam_target in zip(samtools_targets,
pysam_targets):
@@ -204,17 +205,32 @@ class SamtoolsTest(unittest.TestCase):
else:
samtools_files = [samtools_target]
pysam_files = [pysam_target]
-
+
for s, p in zip(samtools_files, pysam_files):
- self.assertTrue(
- checkBinaryEqual(s, p),
- "%s failed: files %s and %s are not the same" %
- (command, s, p))
+ binary_equal = checkBinaryEqual(s, p)
+ error_msg = "%s failed: files %s and %s are not the same" % (command, s, p)
+ if binary_equal:
+ continue
+ if s.endswith(".bam"):
+ self.assertTrue(
+ check_samtools_view_equal(
+ s, p, without_header=True),
+ error_msg)
+ check_lines_equal(
+ self, s, p,
+ filter_f=lambda x: x.startswith("#"),
+ msg=error_msg)
def testStatements(self):
for statement in self.statements:
+ if (statement.startswith("calmd") and
+ list(sys.version_info[:2]) == [3, 3]):
+ # skip calmd test, fails only on python 3.3.5
+ # in linux (empty output). Works in OsX and passes
+ # for 3.4 and 3.5, see issue #293
+ continue
self.check_statement(statement)
-
+
def tearDown(self):
if os.path.exists(WORKDIR):
shutil.rmtree(WORKDIR)
@@ -227,6 +243,28 @@ class EmptyIndexTest(unittest.TestCase):
self.assertRaises(IOError, pysam.samtools.index,
"exdoesntexist.bam")
+class TestReturnType(unittest.TestCase):
+
+ def testReturnValueString(self):
+ retval = pysam.idxstats(os.path.join(DATADIR, "ex1.bam"))
+ if IS_PYTHON3:
+ self.assertFalse(isinstance(retval, bytes))
+ self.assertTrue(isinstance(retval, str))
+ else:
+ self.assertTrue(isinstance(retval, bytes))
+ self.assertTrue(isinstance(retval, basestring))
+
+ def testReturnValueData(self):
+ args = "-O BAM {}".format(os.path.join(DATADIR, "ex1.bam")).split(" ")
+ retval = pysam.view(*args)
+
+ if IS_PYTHON3:
+ self.assertTrue(isinstance(retval, bytes))
+ self.assertFalse(isinstance(retval, str))
+ else:
+ self.assertTrue(isinstance(retval, bytes))
+ self.assertTrue(isinstance(retval, basestring))
+
class StdoutTest(unittest.TestCase):
'''test if stdout can be redirected.'''
@@ -242,11 +280,29 @@ class StdoutTest(unittest.TestCase):
catch_stdout=False)
self.assertEqual(r, None)
+ def testDoubleCalling(self):
+ # The following would fail if there is an
+ # issue with stdout being improperly caught.
+ retvals = pysam.idxstats(
+ os.path.join(DATADIR, "ex1.bam"))
+ retvals = pysam.idxstats(
+ os.path.join(DATADIR, "ex1.bam"))
+
+ def testSaveStdout(self):
+ outfile = get_temp_filename(suffix=".tsv")
+ r = pysam.samtools.flagstat(
+ os.path.join(DATADIR, "ex1.bam"),
+ save_stdout=outfile)
+ self.assertEqual(r, None)
+ with open(outfile) as inf:
+ r = inf.read()
+ self.assertTrue(len(r) > 0)
+
class PysamTest(SamtoolsTest):
"""check access to samtools command in the pysam
main package.
-
+
This is for backwards capability.
"""
diff --git a/tests/tabix_test.py b/tests/tabix_test.py
index f09ba8c..ec1e37e 100644
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -270,6 +270,9 @@ class TestIterationWithoutComments(IterationTest):
IterationTest.setUp(self)
self.tabix = pysam.TabixFile(self.filename)
+ def tearDown(self):
+ self.tabix.close()
+
def testRegionStrings(self):
"""test if access with various region strings
works"""
@@ -351,7 +354,7 @@ class TestIterationWithoutComments(IterationTest):
self.tabix.fetch("chr1", 100, 100)
def testGetContigs(self):
- self.assertEqual(sorted(self.tabix.contigs), [b"chr1", b"chr2"])
+ self.assertEqual(sorted(self.tabix.contigs), ["chr1", "chr2"])
# check that contigs is read-only
self.assertRaises(
AttributeError, setattr, self.tabix, "contigs", ["chr1", "chr2"])
@@ -374,13 +377,10 @@ class TestIterationWithoutComments(IterationTest):
# opens any tabix file
with pysam.TabixFile(self.filename) as inf:
pass
-
+
for i in range(1000):
func1()
- def tearDown(self):
- self.tabix.close()
-
class TestIterationWithComments(TestIterationWithoutComments):
@@ -405,6 +405,9 @@ class TestParser(unittest.TestCase):
self.tabix = pysam.TabixFile(self.filename)
self.compare = loadAndConvert(self.filename)
+ def tearDown(self):
+ self.tabix.close()
+
def testRead(self):
for x, r in enumerate(self.tabix.fetch(parser=pysam.asTuple())):
@@ -505,6 +508,36 @@ class TestParser(unittest.TestCase):
self.assertEqual(a, b)
+class TestGTF(TestParser):
+
+ def testRead(self):
+
+ for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
+ c = self.compare[x]
+ self.assertEqual(len(c), len(r))
+ self.assertEqual(list(c), list(r))
+ self.assertEqual(c, str(r).split("\t"))
+ self.assertTrue(r.gene_id.startswith("ENSG"))
+ if r.feature != 'gene':
+ self.assertTrue(r.transcript_id.startswith("ENST"))
+ self.assertEqual(c[0], r.contig)
+ self.assertEqual("\t".join(map(str, c)),
+ str(r))
+
+ def testSetting(self):
+
+ for r in self.tabix.fetch(parser=pysam.asGTF()):
+ r.contig = r.contig + "_test"
+ r.source = r.source + "_test"
+ r.feature = r.feature + "_test"
+ r.start += 10
+ r.end += 10
+ r.score = 20
+ r.strand = "+"
+ r.frame = 0
+ r.attributes = 'gene_id "0001";'
+
+
class TestIterators(unittest.TestCase):
filename = os.path.join(DATADIR, "example.gtf.gz")
@@ -522,6 +555,10 @@ class TestIterators(unittest.TestCase):
open(self.tmpfilename_uncompressed, "wb") as outfile:
outfile.write(infile.read())
+ def tearDown(self):
+ self.tabix.close()
+ os.unlink(self.tmpfilename_uncompressed)
+
def open(self):
if self.is_compressed:
@@ -566,9 +603,6 @@ class TestIterators(unittest.TestCase):
# Not implemented
# self.assertRaises(ValueError, i.next)
- def tearUp(self):
- os.unlink(self.tmpfilename_uncompressed)
-
class TestIteratorsGenericCompressed(TestIterators):
is_compressed = True
@@ -584,23 +618,6 @@ class TestIteratorsFileUncompressed(TestIterators):
is_compressed = False
-class TestGTF(TestParser):
-
- def testRead(self):
-
- for x, r in enumerate(self.tabix.fetch(parser=pysam.asGTF())):
- c = self.compare[x]
- self.assertEqual(len(c), len(r))
- self.assertEqual(list(c), list(r))
- self.assertEqual(c, str(r).split("\t"))
- self.assertTrue(r.gene_id.startswith("ENSG"))
- if r.feature != 'gene':
- self.assertTrue(r.transcript_id.startswith("ENST"))
- self.assertEqual(c[0], r.contig)
- self.assertEqual("\t".join(map(str, c)),
- str(r))
-
-
class TestIterationMalformattedGTFFiles(unittest.TestCase):
'''test reading from malformatted gtf files.'''
@@ -638,6 +655,9 @@ class TestBed(unittest.TestCase):
self.tabix = pysam.TabixFile(self.filename)
self.compare = loadAndConvert(self.filename)
+ def tearDown(self):
+ self.tabix.close()
+
def testRead(self):
for x, r in enumerate(self.tabix.fetch(parser=pysam.asBed())):
@@ -670,9 +690,6 @@ class TestBed(unittest.TestCase):
self.assertEqual(int(c[2]) + 1, r.end)
self.assertEqual(str(int(c[2]) + 1), r[2])
- def tearDown(self):
- self.tabix.close()
-
class TestVCF(unittest.TestCase):
@@ -736,6 +753,9 @@ class TestVCFFromTabix(TestVCF):
self.tabix = pysam.TabixFile(self.tmpfilename + ".gz")
self.compare = loadAndConvert(self.filename)
+ def tearDown(self):
+ self.tabix.close()
+
def testRead(self):
ncolumns = len(self.columns)
@@ -804,9 +824,6 @@ class TestVCFFromTabix(TestVCF):
c[ncolumns + y] = "test_%i" % y
r[y] = "test_%i" % y
self.assertEqual(c[ncolumns + y], r[y])
-
- def tearDown(self):
- self.tabix.close()
class TestVCFFromVCF(TestVCF):
@@ -843,6 +860,9 @@ class TestVCFFromVCF(TestVCF):
self.vcf = pysam.VCF()
self.compare = loadAndConvert(self.filename, encode=False)
+ def tearDown(self):
+ self.vcf.close()
+
def testConnecting(self):
fn = os.path.basename(self.filename)
@@ -856,15 +876,25 @@ class TestVCFFromVCF(TestVCF):
def get_iterator(self):
- f = open(self.filename)
- fn = os.path.basename(self.filename)
+ with open(self.filename) as f:
+ fn = os.path.basename(self.filename)
- for x, msg in self.fail_on_opening:
- if "%i.vcf" % x == fn:
- self.assertRaises(ValueError, self.vcf.parse, f)
- return
+ for x, msg in self.fail_on_opening:
+ if "%i.vcf" % x == fn:
+ self.assertRaises(ValueError, self.vcf.parse, f)
+ return
- return self.vcf.parse(f)
+ for vcf_code, msg in self.fail_on_parsing:
+ if "%i.vcf" % vcf_code == fn:
+ self.assertRaises((ValueError,
+ AssertionError),
+ list, self.vcf.parse(f))
+ return
+ # python 2.7
+ # self.assertRaisesRegexp(
+ # ValueError, re.compile(msg), self.vcf.parse, f)
+
+ return list(self.vcf.parse(f))
def get_field_value(self, record, field):
return record[field]
@@ -1063,6 +1093,8 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
missing_value = None
missing_quality = None
+ vcf = None
+
def filter2value(self, r, v):
if r == "PASS":
return ["PASS"], list(v)
@@ -1104,9 +1136,14 @@ class TestVCFFromVariantFile(TestVCFFromVCF):
TestVCF.setUp(self)
self.compare = loadAndConvert(self.filename, encode=False)
+ def tearDown(self):
+ if self.vcf:
+ self.vcf.close()
+ self.vcf = None
+
def get_iterator(self):
- vcf = pysam.VariantFile(self.filename)
- return vcf.fetch()
+ self.vcf = pysam.VariantFile(self.filename)
+ return self.vcf.fetch()
def get_field_value(self, record, field):
return getattr(record, field)
@@ -1124,11 +1161,22 @@ class TestRemoteFileHTTP(unittest.TestCase):
local = os.path.join(DATADIR, "example.gtf.gz")
def setUp(self):
+ if not checkURL(self.url):
+ self.remote_file = None
+ return
+
self.remote_file = pysam.TabixFile(self.url, "r")
self.local_file = pysam.TabixFile(self.local, "r")
+ def tearDown(self):
+ if self.remote_file is None:
+ return
+
+ self.remote_file.close()
+ self.local_file.close()
+
def testFetchAll(self):
- if not checkURL(self.url):
+ if self.remote_file is None:
return
remote_result = list(self.remote_file.fetch())
@@ -1139,16 +1187,15 @@ class TestRemoteFileHTTP(unittest.TestCase):
self.assertEqual(x, y)
def testHeader(self):
+ if self.remote_file is None:
+ return
+
self.assertEqual(list(self.local_file.header), [])
self.assertRaises(AttributeError,
getattr,
self.remote_file,
"header")
- def tearDown(self):
- self.remote_file.close()
- self.local_file.close()
-
class TestIndexArgument(unittest.TestCase):
@@ -1163,13 +1210,11 @@ class TestIndexArgument(unittest.TestCase):
shutil.copyfile(self.index_src, self.index_dst)
with pysam.TabixFile(
- self.filename_src, "r", index=self.index_src) as \
- same_basename_file:
+ self.filename_src, "r", index=self.index_src) as same_basename_file:
same_basename_results = list(same_basename_file.fetch())
with pysam.TabixFile(
- self.filename_dst, "r", index=self.index_dst) as \
- diff_index_file:
+ self.filename_dst, "r", index=self.index_dst) as diff_index_file:
diff_index_result = list(diff_index_file.fetch())
self.assertEqual(len(same_basename_results), len(diff_index_result))
@@ -1263,7 +1308,7 @@ class TestMultipleIterators(unittest.TestCase):
def testDoubleFetch(self):
- with pysam.TabixFile(self.filename) as f:
+ with pysam.TabixFile(self.filename) as f:
for a, b in zip(f.fetch(multiple_iterators=True),
f.fetch(multiple_iterators=True)):
diff --git a/tests/test_samtools_python.py b/tests/test_samtools_python.py
new file mode 100644
index 0000000..1b915fd
--- /dev/null
+++ b/tests/test_samtools_python.py
@@ -0,0 +1,35 @@
+import pysam
+
+def test_idxstats_parse_split_lines():
+ bam_filename = "./pysam_data/ex2.bam"
+ lines = pysam.idxstats(bam_filename, split_lines=True) # Test pysam 0.8.X style output, which returns a list of lines
+ for line in lines:
+ _seqname, _seqlen, nmapped, _nunmapped = line.split()
+
+
+def test_bedcov_split_lines():
+ bam_filename = "./pysam_data/ex1.bam"
+ bed_filename = "./pysam_data/ex1.bed"
+ lines = pysam.bedcov(bed_filename, bam_filename, split_lines=True) # Test pysam 0.8.X style output, which returns a list of lines
+ for line in lines:
+ fields = line.split('\t')
+ assert len(fields) in [4, 5], "bedcov should give tab delimited output with 4 or 5 fields. Split line (%s) gives %d fields." % (fields, len(fields))
+
+
+def test_idxstats_parse():
+ bam_filename = "./pysam_data/ex2.bam"
+ idxstats_string = pysam.idxstats(bam_filename, split_lines=False) # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
+ lines = idxstats_string.splitlines()
+ for line in lines:
+ splt = line.split("\t")
+ _seqname, _seqlen, nmapped, _nunmapped = splt
+
+
+def test_bedcov():
+ bam_filename = "./pysam_data/ex1.bam"
+ bed_filename = "./pysam_data/ex1.bed"
+ bedcov_string = pysam.bedcov(bed_filename, bam_filename, split_lines=False) # Test pysam 0.9.X style output, which returns a string that needs to be split by \n
+ lines = bedcov_string.splitlines()
+ for line in lines:
+ fields = line.split('\t')
+ assert len(fields) in [4, 5], "bedcov should give tab delimited output with 4 or 5 fields. Split line (%s) gives %d fields." % (fields, len(fields))
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pysam.git
More information about the debian-med-commit
mailing list