[med-svn] [sra-sdk] 01/03: Imported Upstream version 2.6.2
Andreas Tille
tille at debian.org
Sun May 15 18:34:53 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository sra-sdk.
commit 6881a0000f6d1ee80a666ecfea464857802a5fe4
Author: Andreas Tille <tille at debian.org>
Date: Sun May 15 20:18:06 2016 +0200
Imported Upstream version 2.6.2
---
CHANGES | 58 +-
CHANGES.md | 54 +
Makefile | 36 +-
README | 2 +
README.md | 5 +-
build/.gitignore | 3 +-
.../{prefetch.vcxproj => dbgap-mount-tool.vcxproj} | 20 +-
build/MSVC/2010/prefetch.vcxproj | 1 +
build/MSVC/2010/test-align-info.vcxproj | 5 +-
build/MSVC/2010/test-fastq-loader.vcxproj | 3 +-
build/MSVC/2010/vdb-common.props | 10 +-
build/MSVC/2010/vdb-dump.vcxproj | 2 +
build/Makefile.deb | 109 +
build/Makefile.gcc | 10 +-
build/Makefile.install | 50 +-
build/Makefile.linux | 13 +
build/Makefile.rpm | 165 +
build/Makefile.shell | 14 +-
build/Makefile.targets | 13 +
build/Makefile.tools | 40 +
build/ld.linux.exe.sh | 9 +-
build/ld.linux.gcc.sh | 5 +-
build/ld.mac.dlib.sh | 2 +-
build/ld.mac.exe.sh | 2 +-
build/ld.sh | 9 +-
{test/ngs-pileup => build}/valgrind.suppress | 0
setup/konfigure.perl | 44 +-
setup/package.prl | 2 +-
test/Makefile | 5 +
test/align-cache/CSRA_file | Bin 0 -> 154937 bytes
test/{kget => align-cache}/Makefile | 36 +-
.../valgrind.suppress | 0
test/{vcf-loader => copycat}/Makefile | 47 +-
test/copycat/input/1.xml | 5 +
test/fastq-loader/Makefile | 22 +-
test/fastq-loader/expected/1.1.stdout | 3 +-
test/fastq-loader/expected/1.2.stdout | 3 +-
test/fastq-loader/expected/1.4.stdout | 6 +
test/fastq-loader/expected/10.0.stdout | 3 +-
test/fastq-loader/expected/11.0.stdout | 5 +-
test/fastq-loader/expected/11.1.stdout | 3 +-
test/fastq-loader/expected/12.0.stdout | 4 +
test/fastq-loader/expected/12.1.stdout | 4 +
test/fastq-loader/expected/12.2.stdout | 5 +-
test/fastq-loader/expected/13.0.stdout | 96 +
test/fastq-loader/expected/13.1.stdout | 96 +
test/fastq-loader/expected/14.0.stdout | 32 +
test/fastq-loader/expected/2.1.1.stdout | 4 +
test/fastq-loader/expected/2.1.stdout | 8 +-
test/fastq-loader/expected/2.5.stdout | 4 +
test/fastq-loader/expected/2.6.stdout | 3 +-
test/fastq-loader/expected/2.7.stdout | 3 +-
test/fastq-loader/expected/2.8.1.stdout | 8 +-
test/fastq-loader/expected/2.8.stdout | 8 +-
test/fastq-loader/expected/3.1.stdout | 8 +
test/fastq-loader/expected/4.2.stdout | 3 +-
test/fastq-loader/expected/4.4.stdout | 6 +
test/fastq-loader/expected/4.6.stdout | 3 +-
test/fastq-loader/expected/5.0.stdout | 3 +-
test/fastq-loader/expected/6.0.stdout | 26 +
test/fastq-loader/expected/7.1.stdout | 4 +
test/fastq-loader/expected/7.2.stdout | 4 +
test/fastq-loader/expected/7.3.stdout | 4 +
test/fastq-loader/expected/8.0.stdout | 3 +-
test/fastq-loader/expected/9.0.stdout | 4 +
test/fastq-loader/input/13.0.fasta | 2 +
test/fastq-loader/input/13.0.fastq | 12 +
test/fastq-loader/input/13.1.fastq | 12 +
test/fastq-loader/input/14.0.fasta | 2 +
test/fastq-loader/wb-test-fastq.cpp | 20 +
test/general-loader/.gitignore | 1 -
test/general-loader/Makefile | 13 +-
test/general-loader/expected/2.stderr | 4 +-
test/general-loader/expected/2packed.stderr | 4 +-
test/general-loader/{ => input}/column01 | 5 +-
test/general-loader/{ => input}/column02 | 5 +-
test/general-loader/test-general-loader.cpp | 304 +-
test/general-loader/test-general-writer.cpp | 4 +-
test/{kget => kar}/Makefile | 35 +-
test/kar/expected/1.0.stdout | 35 +
test/kar/input/1.0/1.md5 | 0
test/kar/input/1.0/col/sub1/idx | 0
test/kar/input/1.0/col/sub1/idx0 | 0
test/kar/input/1.0/col/sub1/idx1 | 0
test/kar/input/1.0/col/sub1/idx2 | 0
test/kar/input/1.0/col/sub1/idx3 | 0
test/kar/input/1.0/idx/4a | 0
test/kar/input/1.0/idx/4b | 0
test/kar/input/1.0/idx/sub/4c | 0
test/kar/input/1.0/idx/sub/4d | 0
test/kar/input/1.0/md/cur | 0
test/kar/input/1.0/md5 | 0
test/kar/input/1.0/sub1/col/sub1/idx | 0
test/kar/input/1.0/sub1/col/sub1/idx0 | 0
test/kar/input/1.0/sub1/col/sub1/idx1 | 0
test/kar/input/1.0/sub1/col/sub1/idx2 | 0
test/kar/input/1.0/sub1/col/sub1/idx3 | 0
test/kar/input/1.0/sub1/md/cur | 0
test/kar/input/1.0/sub1/sub2/col/sub1/idx | 0
test/kar/input/1.0/sub1/sub2/col/sub1/idx0 | 0
test/kar/input/1.0/sub1/sub2/col/sub1/idx1 | 0
test/kar/input/1.0/sub1/sub2/col/sub1/idx2 | 0
test/kar/input/1.0/sub1/sub2/col/sub1/idx3 | 0
test/kar/test-kar.sh | 97 +
test/kget/Makefile | 10 +
test/kget/test_kget.py | 175 +
test/ngs-pileup/Makefile | 3 -
test/{vcf-loader => pileup-stats}/Makefile | 44 +-
.../{ngs-pileup => pileup-stats}/valgrind.suppress | 0
test/ref-variation/Makefile | 19 +-
test/ref-variation/expected/ref-variation.out | 192 +
test/ref-variation/expected/var-expand.out | 1 +
test/ref-variation/have-access.sh | 25 +
test/ref-variation/ref-variation.in | 4 +
test/ref-variation/ref-variation.sh | 29 +
test/samline/Makefile | 39 +-
test/samline/alig-gen.c | 854 +-
test/samline/bx_tag_test.py | 27 +
test/samline/ca_test.py | 80 +
test/samline/cigar.c | 628 +-
test/samline/cigar.h | 4 +-
test/samline/example.sh | 35 +-
test/samline/refbases.c | 209 +-
test/samline/refbases.h | 4 +-
test/samline/reject_multi_refnames.sh | 62 +
test/samline/sam.py | 339 +
test/samline/sampart.c | 304 +
tools/kget/kget.vers => test/samline/sampart.vers | 0
test/samline/sec_align_problem.sh | 76 +
test/vcf-loader/Makefile | 3 -
test/{vcf-loader => vdb-validate}/Makefile | 53 +-
test/vdb-validate/db/blob-row-gap.kar | Bin 0 -> 3362642 bytes
test/vdb-validate/db/sdc_len_mismatch.csra | Bin 0 -> 181017 bytes
test/vdb-validate/db/sdc_pa_longer.csra | Bin 0 -> 183853 bytes
test/vdb-validate/db/sdc_tmp_mismatch.csra | Bin 0 -> 170925 bytes
test/vdb-validate/expected/ROW_GAP | 42 +
test/vdb-validate/expected/no_sdc_checks | 23 +
test/vdb-validate/expected/sdc_len_mismatch_1 | 24 +
test/vdb-validate/expected/sdc_len_mismatch_2 | 24 +
test/vdb-validate/expected/sdc_pa_longer_1 | 26 +
test/vdb-validate/expected/sdc_pa_longer_2 | 26 +
test/vdb-validate/expected/sdc_pa_longer_3 | 26 +
test/vdb-validate/expected/sdc_tmp_mismatch | 16 +
.../vdb-validate/runtestcase.sh | 59 +-
test/vschema/Makefile | 2 +
test/vschema/idx-text/checklookup.cpp | 18 +-
test/vschema/idx-text/expected/ci-1.stdout | 6 +
test/vschema/idx-text/expected/ci-5.stdout | 15 +
test/vschema/idx-text/expected/cs-3.stdout | 18 +
test/vschema/idx-text/makeinputs.cpp | 40 +-
tools/Makefile | 12 +-
tools/align-cache/Makefile | 1 -
tools/align-info/align-info.vers | 2 +-
tools/align-info/align-info.vers.h | 2 +-
tools/bam-loader/Globals.h | 11 +-
tools/bam-loader/Makefile | 3 +-
tools/bam-loader/alignment-writer.h | 2 +
tools/bam-loader/bam-alignment.h | 74 +
tools/bam-loader/bam-load.vers | 2 +-
tools/bam-loader/bam-loader.c | 395 +-
tools/bam-loader/bam-priv.h | 196 +
tools/bam-loader/bam.c | 531 +-
tools/bam-loader/bam.h | 70 +-
tools/bam-loader/loader-imp.c | 1573 +-
tools/bam-loader/loader-imp.h | 3 +-
.../{loader-imp.h => low-match-count.cpp} | 49 +-
.../bam-loader/{loader-imp.h => low-match-count.h} | 10 +-
tools/bam-loader/reference-writer.c | 57 +-
tools/bam-loader/reference-writer.h | 2 +-
tools/bam-loader/sequence-writer.c | 478 +-
tools/bam-loader/sequence-writer.h | 23 +-
tools/cache-mgr/cache-mgr.vers | 2 +-
tools/cache-mgr/cache-mgr.vers.h | 2 +-
tools/ccextract/ccextract.vers | 2 +-
tools/cg-load/cg-load.vers | 2 +-
tools/copycat/Makefile | 1 +
tools/copycat/ccfileformat.c | 42 +-
tools/copycat/copycat.vers | 2 +-
tools/copycat/magic | 18398 +++++++++++++++++++
tools/dbgap-mount/Makefile | 37 +-
tools/dbgap-mount/dbgap-mount-tool.c | 565 +
.../dbgap-mount-tool.h} | 49 +-
.../dbgap-mount-tool.vers} | 0
tools/dbgap-mount/dbgap-mount-tool.vers.h | 1 +
tools/dbgap-mount/demo.c | 14 +-
.../refbases.h => tools/dbgap-mount/unix/wrap.c | 31 +-
tools/dbgap-mount/win/wrap.c | 344 +
tools/fastdump/.gitignore | 3 +
{test/samline => tools/fastdump}/Makefile | 25 +-
tools/fastdump/cmn_iter.c | 287 +
test/samline/cigar.h => tools/fastdump/cmn_iter.h | 51 +-
tools/fastdump/fastdump.c | 432 +
tools/{kget/kget.vers => fastdump/fastdump.vers} | 0
tools/fastdump/fastq_iter.c | 97 +
.../refbases.h => tools/fastdump/fastq_iter.h | 35 +-
tools/fastdump/file_printer.c | 130 +
.../refbases.h => tools/fastdump/file_printer.h | 23 +-
tools/fastdump/helper.c | 685 +
tools/fastdump/helper.h | 141 +
tools/fastdump/index.c | 335 +
test/samline/cigar.h => tools/fastdump/index.h | 38 +-
tools/fastdump/join.c | 543 +
test/samline/refbases.h => tools/fastdump/join.h | 43 +-
tools/fastdump/line_iter.c | 199 +
.../refbases.h => tools/fastdump/line_iter.h | 27 +-
tools/fastdump/lookup_reader.c | 331 +
.../cigar.h => tools/fastdump/lookup_reader.h | 46 +-
tools/fastdump/lookup_writer.c | 138 +
.../cigar.h => tools/fastdump/lookup_writer.h | 38 +-
tools/fastdump/merge_sorter.c | 173 +
.../refbases.h => tools/fastdump/merge_sorter.h | 107 +-
tools/fastdump/raw_read_iter.c | 100 +
.../refbases.h => tools/fastdump/raw_read_iter.h | 34 +-
tools/fastdump/readme.txt | 92 +
tools/fastdump/sorter.c | 538 +
test/samline/cigar.h => tools/fastdump/sorter.h | 46 +-
tools/fastdump/special_iter.c | 98 +
.../refbases.h => tools/fastdump/special_iter.h | 35 +-
tools/fastdump/todo.txt | 3 +
tools/fastdump/verify.sh | 60 +
tools/fastq-dump/fastq-dump.vers | 2 +-
tools/fastq-loader/Makefile | 2 +-
tools/fastq-loader/fastq-grammar.c | 480 +-
tools/fastq-loader/fastq-grammar.y | 37 +-
tools/fastq-loader/fastq-lex.c | 453 +-
tools/fastq-loader/fastq-lex.l | 1 +
tools/fastq-loader/fastq-loader.c | 7 +-
tools/fastq-loader/fastq-tokens.h | 21 +-
tools/fastq-loader/latf-load.vers | 2 +-
tools/fastq-loader/latf-load.vers.h | 2 +-
tools/fuse/remote-cache.c | 631 +-
tools/fuse/remote-cache.h | 4 +-
tools/fuse/remote-file.c | 24 +-
tools/fuse/remote-fuser.vers | 2 +-
tools/fuse/sra-fuser-test.vers | 2 +-
tools/fuse/sra-fuser-test.vers.h | 2 +-
tools/fuse/sra-fuser.vers | 2 +-
tools/fuse/sra-makeidx.vers | 2 +-
tools/general-loader/database-loader.cpp | 3 +-
tools/general-loader/general-loader.vers | 2 +-
tools/general-loader/general-loader.vers.h | 2 +-
tools/general-loader/libgeneral-writer.vers | 2 +-
tools/kar/kar.c | 70 +-
tools/kar/kar.vers | 2 +-
tools/kar/kar.vers.h | 2 +-
tools/kar/sra-kar.vers | 2 +-
tools/kar/sra-kar.vers.h | 2 +-
tools/keyring-srv/keyring-srv.vers | 2 +-
tools/kget/examples.sh | 62 +
tools/kget/kget.c | 739 +-
tools/kget/kget.vers | 2 +-
tools/kqsh/kqsh.vers | 2 +-
tools/nenctool/nenctest.vers | 2 +-
tools/nenctool/nenctool.vers | 2 +-
tools/nencvalid/nencvalid.vers | 2 +-
tools/pacbio-load/pacbio-load.c | 81 +-
tools/pacbio-load/pacbio-load.vers | 2 +-
tools/pileup-stats/pileup-stats.cpp | 187 +-
tools/pileup-stats/pileup-stats.vers | 2 +-
tools/prefetch/prefetch.c | 186 +-
tools/prefetch/prefetch.vers | 2 +-
tools/prefetch/prefetch.vers.h | 2 +-
tools/qual-recalib-stat/qual-recalib-stat.vers | 2 +-
tools/qual-recalib-stat/qual-recalib-stat.vers.h | 2 +-
tools/rd-filter-redact/rd-filter-redact.vers | 2 +-
tools/rd-filter-redact/read-filter-redact.vers | 2 +-
tools/rd-filter-redact/read-filter-redact.vers.h | 2 +-
tools/rd-filter-redact/test-read-write-cursor.vers | 2 +-
tools/ref-variation/common.cpp | 5 +-
tools/ref-variation/common.h | 3 +-
tools/ref-variation/helper.cpp | 176 +-
tools/ref-variation/helper.h | 34 +-
tools/ref-variation/ref-variation.cpp | 79 +-
tools/ref-variation/ref-variation.vers | 2 +-
tools/ref-variation/ref-variation.vers.h | 2 +-
tools/ref-variation/var-expand.cpp | 171 +-
tools/ref-variation/var-expand.vers | 2 +-
tools/ref-variation/var-expand.vers.h | 2 +-
tools/refseq-load/refseq-load.vers | 2 +-
tools/sra-dump/abi-dump.vers | 2 +-
tools/sra-dump/abi-dump.vers.h | 2 +-
tools/sra-dump/core.c | 4 +-
tools/sra-dump/fastq-dump.vers | 2 +-
tools/sra-dump/fastq-dump.vers.h | 2 +-
tools/sra-dump/illumina-dump.vers | 2 +-
tools/sra-dump/illumina-dump.vers.h | 2 +-
tools/sra-dump/sff-dump.vers | 2 +-
tools/sra-dump/sff-dump.vers.h | 2 +-
tools/sra-dump/sra-dump.vers | 2 +-
tools/sra-load/abi-load.vers | 2 +-
tools/sra-load/fastq-fmt.c | 2 +-
tools/sra-load/fastq-load.vers | 2 +-
tools/sra-load/helicos-load.vers | 2 +-
tools/sra-load/illumina-load.vers | 2 +-
tools/sra-load/pacbio-loadxml.vers | 2 +-
tools/sra-load/sff-load.vers | 2 +-
tools/sra-load/srf-load.vers | 2 +-
tools/sra-pileup/read_fkt.c | 882 +-
tools/sra-pileup/sam-aligned.c | 231 +-
tools/sra-pileup/sam-dump.vers | 2 +-
tools/sra-pileup/sam-dump.vers.h | 2 +-
tools/sra-pileup/sam-dump2.vers | 2 +-
tools/sra-pileup/sam-hdr1.c | 1055 +-
tools/sra-pileup/sam-unaligned.c | 68 +-
tools/sra-pileup/sra-pileup.vers | 2 +-
tools/sra-pileup/sra-pileup.vers.h | 2 +-
tools/sra-sort/buff-writer.c | 16 +-
tools/sra-sort/col-pair.c | 4 +-
tools/sra-sort/csra-pair.c | 36 +-
tools/sra-sort/csra-pair.h | 3 +
tools/sra-sort/csra-tbl.c | 31 +-
tools/sra-sort/sra-sort.vers | 2 +-
tools/sra-sort/xcheck-ref-align.c | 9 +-
tools/sra-stat/sra-stat.c | 107 +-
tools/sra-stat/sra-stat.vers | 2 +-
tools/sra-stat/sra-stat.vers.h | 2 +-
tools/sra/table-vers.vers | 2 +-
tools/srapath/srapath.vers | 2 +-
tools/srapath/srapath.vers.h | 2 +-
tools/update-schema/vdb-update-schema.vers | 2 +-
tools/util/Makefile | 17 +-
tools/util/kdb-index.vers | 2 +-
tools/util/kdbmeta.vers | 2 +-
tools/util/kdbmeta.vers.h | 2 +-
tools/util/md5cp.vers | 2 +-
tools/util/md5cp.vers.h | 2 +-
tools/util/pacbio-correct.vers | 2 +-
tools/util/pseudo-aligner.cpp | 515 +
tools/util/rcexplain.vers | 2 +-
tools/util/rcexplain.vers.h | 2 +-
tools/util/rowwritetest.vers | 2 +-
tools/util/schema-replace.vers | 2 +-
tools/util/schema-replace.vers.h | 2 +-
tools/util/test-sra.vers | 2 +-
tools/util/test-sra.vers.h | 2 +-
tools/util/testld.vers | 2 +-
tools/util/txt2kdb.vers | 2 +-
tools/util/vdb-lock.vers | 2 +-
tools/util/vdb-lock.vers.h | 2 +-
tools/util/vdb-passwd.vers | 2 +-
tools/util/vdb-passwd.vers.h | 2 +-
tools/util/vdb-unlock.vers | 2 +-
tools/util/vdb-unlock.vers.h | 2 +-
tools/vcf-loader/vcf-load.vers | 2 +-
tools/vdb-config/vdb-config.c | 51 +-
tools/vdb-config/vdb-config.vers | 2 +-
tools/vdb-config/vdb-config.vers.h | 2 +-
tools/vdb-copy/vdb-copy.vers | 2 +-
tools/vdb-copy/vdb-copy.vers.h | 2 +-
tools/vdb-decrypt/vdb-decrypt.vers | 2 +-
tools/vdb-decrypt/vdb-decrypt.vers.h | 2 +-
tools/vdb-decrypt/vdb-encrypt.vers | 2 +-
tools/vdb-decrypt/vdb-encrypt.vers.h | 2 +-
tools/vdb-dump/Makefile | 3 +
tools/vdb-dump/vdb-boot.vers | 2 +-
tools/vdb-dump/vdb-dump-context.c | 109 +-
tools/vdb-dump/vdb-dump-context.h | 30 +-
tools/vdb-dump/vdb-dump-fastq.c | 1379 +-
tools/vdb-dump/vdb-dump-formats.c | 12 +-
tools/vdb-dump/vdb-dump-helper.c | 512 +-
tools/vdb-dump/vdb-dump-helper.h | 22 +-
tools/vdb-dump/vdb-dump-interact.c | 265 +
.../vdb-dump/vdb-dump-interact.h | 17 +-
tools/vdb-dump/vdb-dump-print.c | 895 +-
tools/vdb-dump/vdb-dump-print.h | 15 +-
tools/vdb-dump/vdb-dump-repo.c | 281 +
.../refbases.h => tools/vdb-dump/vdb-dump-repo.h | 87 +-
tools/vdb-dump/vdb-dump-str.c | 96 +-
tools/vdb-dump/vdb-dump.c | 1150 +-
tools/vdb-dump/vdb-dump.vers | 2 +-
tools/vdb-dump/vdb-dump.vers.h | 2 +-
tools/vdb-dump/vdb_info.c | 547 +-
tools/vdb-validate/Makefile | 21 +-
tools/vdb-validate/check-corrupt.cpp | 644 +
tools/vdb-validate/check-corrupt.vers | 1 +
tools/vdb-validate/check-corrupt.vers.h | 1 +
tools/vdb-validate/vdb-validate.c | 677 +-
tools/vdb-validate/vdb-validate.vers | 2 +-
tools/vdb-validate/vdb-validate.vers.h | 2 +-
379 files changed, 39662 insertions(+), 7014 deletions(-)
diff --git a/CHANGES b/CHANGES
index 63d53dc..f406ba5 100644
--- a/CHANGES
+++ b/CHANGES
@@ -26,5 +26,59 @@
The NCBI SRA Software Development Kit
Contact: sra-tools at ncbi.nlm.nih.gov
-Release documentation is now being delivered at
-http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software
+NCBI External Developer Release: SRA Toolkit 2.6.2
+April 20, 2016
+
+ align-cache: a tool producing vdbcache that reduces effects of random access and improves speed.
+ bam-load: Fixed bug in validation code for unsorted bam files
+ bam-load: If two (or more) local reference names refer to the same global reference,
+ bam-load will record the first one used and report the change.
+ bam-load: Secondary alignment will be used for spot assembly if the loader is explicitly told to use them
+ bam-load: The code that had invalid item in index problem was removed in the process of addressing a performance issue.
+ bam-load: change reporting of fatal warnings into fatal errors
+ bam-load: changed from an error to a warning if using secondary alignments to create spots
+ bam-load: low-match secondary alignments are now discarded; low-match primary alignments are logged,
+ and if too many, it will abort the load.
+ bam-load: records the contents of the BX tag
+ bam-load: rules for spot assembly were reverted to assembling spots only on primary alignments
+ blast, build: Improved blast tools: all required libraries are linked statically.
+ build: Allow to build sra-tools on systems without static c++ library
+ check-corrupt, vdb-validate: Added a new set of checks that can be triggered by using one of two "--sdc:" cmd options
+ copycat: now runs on Centos 7; no longer uses system-installed magic file
+ dbgap-mount: Added support for standard options "-L" and "-o", which allow users
+ to determine the logging level and log output file
+ dbgap-mount: New optional parameter was introduced '-u' which allows user
+ to unmount FUSE/DOKAN volume. Implemented on linux and windows
+ fastq-load: The (old) fastq-loader will properly report multiple reads on the 454 platform (and still fail).
+ kar: added '--md5' option to create md5sum compatible auxiliary file
+ kdb: Fixed VTableDropColumn, so that it can drop static columns
+ kfs, kns, ngs, sra-tools: Fixed thread safety issues for both cache and http files
+ kget: kget has a new option --full to match wget in speed. added examples.sh
+ kproc: Fixed KQueue to wake waiters when sealed, fixed KSemaphore to wake waiters when canceled
+ latf-load: now allows undescores inside spot group names
+ latf-load: now loads data produced by fastq-dump
+ latf-load: updated to support Illumina tag line format with identifier at the front
+ pileup-stats: added -V (--version) option: prints out the software
+ pileup-stats: pileup-stats: added version support (options -V or --version)
+ prefetch: Added --eliminate-quals option which speeds up HTTP download by ignoring QUALITY column`s data
+ prefetch: Fixed failure when running prefetch.exe "-a<bin|key>" when there is a space after "-a"
+ prefetch: messages about maximum size of download are made more user-friendly.
+ prefetch: now will download even when caching is disabled
+ ref-variation: --input-file option allows to specify input accessions and paths in the file
+ ref-variation: added "count-strand" option: it controls relative orientation of 3' and 5' fragments.
+ ref-variation: added -c option to flush output immediately; reporting zero matches
+ ref-variation: added a way to specify a number of repeats of the query
+ ref-variation: improved threading management
+ ref-variation: removed irrelevant warnings reported in some cases in debug version only
+ sam-dump: Segfault no longer occurs when confronted with large amounts of header lines
+ sam-dump: added option to produce MD tags
+ sam-dump: filters out duplicates in the rows that it generates
+ sam-dump: produces BX-tags if preserved in SRA file by bam-load
+ sra-sort: correctly generates spot-id column even in the absence of primary alignments
+ sra-stat: no longer fails when CS_NATIVE column is not present.
+ sra-tools, vdb-config: Removed dependency of mac binaries on unnecessary libraries, e.g. libxml2.
+ sra-tools: https://github.com/ncbi/sra-tools/issues/27 : contains short and long examples of how to configure sra-tools build
+ var-expand: a tool for batch variation expansion
+ vdb-config: now handles standard options such as --option-file
+ vdb-validate: Added code to continue with the next row when column has discontiguous blobs
+
diff --git a/CHANGES.md b/CHANGES.md
new file mode 100644
index 0000000..d0f49b0
--- /dev/null
+++ b/CHANGES.md
@@ -0,0 +1,54 @@
+# NCBI External Developer Release:
+## SRA Toolkit 2.6.2
+**April 20, 2016**
+
+ **align-cache**: a tool producing vdbcache that reduces effects of random access and improves speed.
+ **bam-load**: Fixed bug in validation code for unsorted bam files
+ **bam-load**: If two (or more) local reference names refer to the same global reference, bam-load will record the first one used and report the change.
+ **bam-load**: Secondary alignment will be used for spot assembly if the loader is explicitly told to use them
+ **bam-load**: The code that had invalid item in index problem was removed in the process of addressing a performance issue.
+ **bam-load**: change reporting of fatal warnings into fatal errors
+ **bam-load**: changed from an error to a warning if using secondary alignments to create spots
+ **bam-load**: low-match secondary alignments are now discarded; low-match primary alignments are logged, and if too many, it will abort the load.
+ **bam-load**: records the contents of the BX tag
+ **bam-load**: rules for spot assembly were reverted to assembling spots only on primary alignments
+ **blast, build**: Improved blast tools: all required libraries are linked statically.
+ **build**: Allow to build sra-tools on systems without static c++ library
+ **check-corrupt, vdb-validate**: Added a new set of checks that can be triggered by using one of two "--sdc:" cmd options
+ **copycat**: now runs on Centos 7; no longer uses system-installed magic file
+ **dbgap-mount**: Added support for standard options "-L" and "-o", which allow users to determine the logging level and log output file
+ **dbgap-mount**: New optional parameter was introduced '-u' which allows user to unmount FUSE/DOKAN volume. Implemented on linux and windows
+ **fastq-load**: The (old) fastq-loader will properly report multiple reads on the 454 platform (and still fail).
+ **kar**: added '--md5' option to create md5sum compatible auxiliary file
+ **kdb**: Fixed VTableDropColumn, so that it can drop static columns
+ **kfs, kns, ngs, sra-tools**: Fixed thread safety issues for both cache and http files
+ **kget**: has a new option --full to match wget in speed. added examples.sh
+ **kproc**: Fixed KQueue to wake waiters when sealed, fixed KSemaphore to wake waiters when canceled
+ **latf-load**: now allows undescores inside spot group names
+ **latf-load**: now loads data produced by fastq-dump
+ **latf-load**: updated to support Illumina tag line format with identifier at the front
+ **pileup-stats**: added -V (--version) option: prints out the software
+ **pileup-stats**: pileup-stats: added version support (options -V or --version)
+ **prefetch**: Added --eliminate-quals option which speeds up HTTP download by ignoring QUALITY column`s data
+ **prefetch**: Fixed failure when running prefetch.exe "-a<bin|key>" when there is a space after "-a"
+ **prefetch**: messages about maximum size of download are made more user-friendly.
+ **prefetch**: now will download even when caching is disabled
+ **ref-variation**: --input-file option allows to specify input accessions and paths in the file
+ **ref-variation**: added "count-strand" option: it controls relative orientation of 3' and 5' fragments.
+ **ref-variation**: added -c option to flush output immediately; reporting zero matches
+ **ref-variation**: added a way to specify a number of repeats of the query
+ **ref-variation**: improved threading management
+ **ref-variation**: removed irrelevant warnings reported in some cases in debug version only
+ **sam-dump**: Segfault no longer occurs when confronted with large amounts of header lines
+ **sam-dump**: added option to produce MD tags
+ **sam-dump**: filters out duplicates in the rows that it generates
+ **sam-dump**: produces BX-tags if preserved in SRA file by bam-load
+ **sra-sort**: correctly generates spot-id column even in the absence of primary alignments
+ **sra-stat**: no longer fails when CS_NATIVE column is not present.
+ **sra-tools, vdb-config**: Removed dependency of mac binaries on unnecessary libraries, e.g. libxml2.
+ **sra-tools**: [https://github.com/ncbi/sra-tools/issues/27](https://github.com/ncbi/sra-tools/issues/27) : contains short and long examples of how to configure sra-tools build
+ **var-expand**: a tool for batch variation expansion
+ **vdb-config**: now handles standard options such as --option-file
+ **vdb-validate**: Added code to continue with the next row when column has discontiguous blobs
+
+
diff --git a/Makefile b/Makefile
index 3410087..70766da 100644
--- a/Makefile
+++ b/Makefile
@@ -74,7 +74,7 @@ uninstall:
#-------------------------------------------------------------------------------
# clean
#
-clean: clean_test
+clean: clean_deb clean_rpm clean_test
clean_test:
@ $(MAKE) -s -C test clean
@@ -98,6 +98,40 @@ slowtests_test:
@ $(MAKE) -s -C test slowtests
#-------------------------------------------------------------------------------
+# valgrind
+#
+valgrind: valgrind_test
+
+valgrind_test:
+ @ $(MAKE) -s -C test valgrind
+
+#-------------------------------------------------------------------------------
+# RPM
+#
+ifeq (mac,$(OS))
+rpm:
+ @ echo "Not making rpm on Mac"
+clean_rpm:
+else
+rpm: std
+ @ $(MAKE) rpm -s TOP=$(CURDIR) -f build/Makefile.rpm
+clean_rpm:
+ @ $(MAKE) clean_rpm -s TOP=$(CURDIR) -f build/Makefile.rpm
+endif
+
+#-------------------------------------------------------------------------------
+# DEB
+#
+ifeq (mac,$(OS))
+deb: std
+else
+deb:
+endif
+ @ $(MAKE) deb -s TOP=$(CURDIR) -f build/Makefile.deb
+clean_deb:
+ @ $(MAKE) clean_deb -s TOP=$(CURDIR) -f build/Makefile.deb
+
+#-------------------------------------------------------------------------------
# pass-through targets
#
COMPILERS = GCC ICC VC++ CLANG
diff --git a/README b/README
index ac593f7..4fb449e 100644
--- a/README
+++ b/README
@@ -30,6 +30,8 @@ Contact: sra-tools at ncbi.nlm.nih.gov
SRA Tools web site: http://www.ncbi.nlm.nih.gov/Traces/sra/?view=toolkit_doc
Download page: https://github.com/ncbi/sra-tools/wiki/Downloads
+Please check the CHANGES file for change history.
+
The SRA Toolkit and SDK from NCBI is a collection of tools and libraries for
using data in the INSDC Sequence Read Archives.
diff --git a/README.md b/README.md
index fbb026a..19c2097 100644
--- a/README.md
+++ b/README.md
@@ -2,11 +2,14 @@
### Contact:
email: sra-tools at ncbi.nlm.nih.gov
-or visit the [SRA Tools web site](http://ncbi.github.io/sra-tools) - UPDATED
+or visit the [SRA Tools web site](http://ncbi.github.io/sra-tools)
### Download
Visit our [download page](https://github.com/ncbi/sra-tools/wiki/Downloads) for pre-built binaries.
+### Change Log
+Please check the CHANGES.md file for change history.
+
## The SRA Toolkit
The SRA Toolkit and SDK from NCBI is a collection of tools and libraries for
using data in the INSDC Sequence Read Archives.
diff --git a/build/.gitignore b/build/.gitignore
index 797da11..2a57072 100644
--- a/build/.gitignore
+++ b/build/.gitignore
@@ -1,5 +1,6 @@
.cvsignore
RHOST* ARCH* BUILD* COMP* OUTDIR*
STATIC
+ld.linux.exe_cmd.sh
LNKG.*
-Makefile.config*
\ No newline at end of file
+Makefile.config*
diff --git a/build/MSVC/2010/prefetch.vcxproj b/build/MSVC/2010/dbgap-mount-tool.vcxproj
similarity index 55%
copy from build/MSVC/2010/prefetch.vcxproj
copy to build/MSVC/2010/dbgap-mount-tool.vcxproj
index b491b77..549c17d 100644
--- a/build/MSVC/2010/prefetch.vcxproj
+++ b/build/MSVC/2010/dbgap-mount-tool.vcxproj
@@ -24,16 +24,22 @@
<ItemDefinitionGroup>
<Link>
<AdditionalDependencies>$(VDB_TARGET)lib\ncbi-vdb.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>$(VDB_TARGET)lib\zlib.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>$(VDB_TARGET)lib\kapp.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>$(VDB_TARGET)lib\libxfs.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>%ProgramFiles(x86)%\Dokan\DokanLibrary\dokan.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>Kernel32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>Shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
- </ItemDefinitionGroup>
+ </ItemDefinitionGroup>
- <ItemGroup>
- <ClCompile Include="..\..\..\tools\prefetch\prefetch.c" />
- <ClCompile Include="..\..\..\tools\prefetch\kfile-no-q.c" />
+ <ItemGroup>
+ <ClCompile Include="..\..\..\tools\dbgap-mount\dbgap-mount-tool.c" />
+ <ClCompile Include="..\..\..\tools\dbgap-mount\win\wrap.c" />
</ItemGroup>
-
+
<PropertyGroup Label="Globals">
- <ProjectGuid>{2CF4BF04-4C4D-442E-A7AD-0AE5EF87C733}</ProjectGuid>
+ <ProjectGuid>{8DA91267-49B1-4696-AF9D-2A98B697167E}</ProjectGuid>
</PropertyGroup>
-
+
</Project>
diff --git a/build/MSVC/2010/prefetch.vcxproj b/build/MSVC/2010/prefetch.vcxproj
index b491b77..01e1ca5 100644
--- a/build/MSVC/2010/prefetch.vcxproj
+++ b/build/MSVC/2010/prefetch.vcxproj
@@ -24,6 +24,7 @@
<ItemDefinitionGroup>
<Link>
<AdditionalDependencies>$(VDB_TARGET)lib\ncbi-vdb.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <EntryPointSymbol>wmainCRTStartupNoPathConversion</EntryPointSymbol>
</Link>
</ItemDefinitionGroup>
diff --git a/build/MSVC/2010/test-align-info.vcxproj b/build/MSVC/2010/test-align-info.vcxproj
index aab94c9..c881d3c 100644
--- a/build/MSVC/2010/test-align-info.vcxproj
+++ b/build/MSVC/2010/test-align-info.vcxproj
@@ -22,7 +22,8 @@
<Import Project=".\test-project.props" />
<Target Name="AfterBuild" Condition="'$(Autorun)'=='true'">
- <Exec Command="$(VDB_TARGET)\bin\align-info -h" WorkingDirectory="$(ProjectDir)"/>
+ <Exec Command="dir . $(VDB_TARGET)\bin\align-info.exe"/>
+ <Exec Command="$(VDB_TARGET)\bin\align-info.exe -h" WorkingDirectory="$(ProjectDir)"/>
</Target>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/build/MSVC/2010/test-fastq-loader.vcxproj b/build/MSVC/2010/test-fastq-loader.vcxproj
index a2d63c2..1ff1a75 100644
--- a/build/MSVC/2010/test-fastq-loader.vcxproj
+++ b/build/MSVC/2010/test-fastq-loader.vcxproj
@@ -37,7 +37,8 @@
</ItemGroup>
<Target Name="AfterBuild" Condition="'$(Autorun)'=='true'">
+ <Exec Command="$(OutDir)vdb-config$(TargetExt) --root -s vdb/schema/paths="../../../../ncbi-vdb/interfaces""/>
<Exec Command="$(OutDir)$(TargetName)$(TargetExt)" WorkingDirectory="$(ProjectDir)"/>
</Target>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/build/MSVC/2010/vdb-common.props b/build/MSVC/2010/vdb-common.props
index b38fd75..f4c22a4 100644
--- a/build/MSVC/2010/vdb-common.props
+++ b/build/MSVC/2010/vdb-common.props
@@ -1,18 +1,22 @@
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
-
+
+ <PropertyGroup Label="Configuration">
+ <PlatformToolset>v100</PlatformToolset>
+ </PropertyGroup>
+
<PropertyGroup Label="Globals">
<Platform Condition="'$(Platform)' == ''">x64</Platform>
<Configuration Condition="'$(Configuration)' == ''">Release</Configuration>
<NGS_ROOT Condition="'$(NGS_ROOT)' == ''">$(ProjectDir)..\..\..\..\ngs\ngs-sdk\</NGS_ROOT>
<NGS_OUTDIR Condition="'$(NGS_OUTDIR)' == ''">$(NGS_ROOT)..\..\OUTDIR\</NGS_OUTDIR>
- <NGS_TARGET Condition="'$(NGS_TARGET)' == ''">$(NGS_OUTDIR)win\cl\$(Platform)\$(Configuration)\</NGS_TARGET>
+ <NGS_TARGET Condition="'$(NGS_TARGET)' == ''">$(NGS_OUTDIR)win\$(PlatformToolset)\$(Platform)\$(Configuration)\</NGS_TARGET>
<VDB_OUTDIR Condition="'$(VDB_OUTDIR)' == ''">$(NGS_OUTDIR)</VDB_OUTDIR>
<VDB_ROOT Condition="'$(VDB_ROOT)' == ''">$(ProjectDir)..\..\..\..\ncbi-vdb\</VDB_ROOT>
- <VDB_TARGET Condition="'$(VDB_TARGET)' == ''">$(VDB_OUTDIR)win\cl\$(Platform)\$(Configuration)\</VDB_TARGET>
+ <VDB_TARGET Condition="'$(VDB_TARGET)' == ''">$(VDB_OUTDIR)win\$(PlatformToolset)\$(Platform)\$(Configuration)\</VDB_TARGET>
</PropertyGroup>
diff --git a/build/MSVC/2010/vdb-dump.vcxproj b/build/MSVC/2010/vdb-dump.vcxproj
index c736ada..21096c5 100644
--- a/build/MSVC/2010/vdb-dump.vcxproj
+++ b/build/MSVC/2010/vdb-dump.vcxproj
@@ -39,6 +39,8 @@
<ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump-redir.c" />
<ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump-str.c" />
<ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump-tools.c" />
+ <ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump-interact.c" />
+ <ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump-repo.c" />
<ClCompile Include="..\..\..\tools\vdb-dump\vdb-dump.c" />
<ClCompile Include="..\..\..\tools\vdb-dump\vdb_info.c" />
</ItemGroup>
diff --git a/build/Makefile.deb b/build/Makefile.deb
new file mode 100644
index 0000000..0425991
--- /dev/null
+++ b/build/Makefile.deb
@@ -0,0 +1,109 @@
+# ===========================================================================
+#
+# PUBLIC DOMAIN NOTICE
+# National Center for Biotechnology Information
+#
+# This software/database is a "United States Government Work" under the
+# terms of the United States Copyright Act. It was written as part of
+# the author's official duties as a United States Government employee and
+# thus cannot be copyrighted. This software/database is freely available
+# to the public for use. The National Library of Medicine and the U.S.
+# Government have not placed any restriction on its use or reproduction.
+#
+# Although all reasonable efforts have been taken to ensure the accuracy
+# and reliability of the software and data, the NLM and the U.S.
+# Government do not and cannot warrant the performance or results that
+# may be obtained by using this software or data. The NLM and the U.S.
+# Government disclaim all warranties, express or implied, including
+# warranties of performance, merchantability or fitness for any particular
+# purpose.
+#
+# Please cite the author in any work or product based on this material.
+#
+# ===========================================================================
+
+#VERBOSE=-v
+
+# determine shell environment ($CONFIG_FILE)
+include $(TOP)/build/Makefile.shell
+
+# load build configuration ($TARGDIR) ($VERSION)
+include $(CONFIG_FILE)
+
+# $TOOLS_TO_INSTALL
+include $(TOP)/build/Makefile.tools
+ifeq (1,$(HAVE_HDF5))
+ TOOLS_TO_INSTALL += pacbio-load
+endif
+TOOLS_TO_INSTALL += remote-fuser
+
+PKGDIR=$(TARGDIR)/debbuild
+DST=$(PKGDIR)/sra-toolkit-$(VERSION)
+DEB=$(DST).deb
+
+TOOL=$(shell which dpkg-deb 2> /dev/null)
+
+################################################################################
+
+clean_deb:
+ rm -rf $(DEB) $(DST)
+
+ifeq ("","$(TOOL)")
+deb:
+ >&2 echo dpkg-deb: command not found
+else
+deb: $(DEB)
+endif
+
+$(DEB): $(DST)/etc/ncbi/default.kfg $(DST)/etc/ncbi/schema.kfg \
+ $(DST)/etc/ncbi/vdb-copy.kfg $(DST)/usr/bin $(DST)/DEBIAN/control
+ifeq (mac,$(OS))
+ dpkg-deb --build $(DST)
+else
+ fakeroot dpkg-deb --build $(DST)
+endif
+
+$(PKGDIR):
+ mkdir $(VERBOSE) -p $@
+
+$(DST): $(PKGDIR)
+ mkdir $(VERBOSE) -p $@/DEBIAN
+ mkdir $(VERBOSE) -p $@/etc/ncbi
+
+$(DST)/etc/ncbi:
+ mkdir -p $@
+
+$(DST)/etc/ncbi/default.kfg: $(DST)/etc/ncbi
+ rsync $(VERBOSE) -l --chmod=g-w $(VDB_LIBDIR)/ncbi/default.kfg $@
+
+$(DST)/etc/ncbi/schema.kfg: $(DST)/etc/ncbi
+ echo '/vdb/schema/paths = "/usr/share/ncbi/schema"' > $@
+
+$(DST)/etc/ncbi/vdb-copy.kfg: $(DST)/etc/ncbi
+ rsync $(VERBOSE) -l --chmod=g-w $(BINDIR)/ncbi/vdb-copy.kfg $@
+
+$(DST)/usr/bin: $(DST)
+ mkdir $(VERBOSE) -p $@
+ $(MAKE) -s --no-print-directory -f $(TOP)/build/Makefile.deb \
+ $(TOOLS_TO_INSTALL)
+
+$(DST)/usr/share/ncbi/schema: $(DST)
+ mkdir $(VERBOSE) -p $@
+ rsync $(VERBOSE) -rmloptD --include '*.vschema' --include='*/' \
+ --exclude '*' $(VDB_INCDIR)/* $@
+
+$(DST)/DEBIAN/control:
+ mkdir $(VERBOSE) -p $(DST)/DEBIAN
+ printf \
+"Package: sra-toolkit\n"\
+"Version: $(VERSION)\n"\
+"Architecture: all\n"\
+"Maintainer: SRA Toolkit Development Team <sra-tools at ncbi.nlm.nih.gov>\n"\
+"Description: SRA Toolkit package\n"\
+" The SRA Toolkit and SDK from NCBI is a collection of tools and libraries\n"\
+" for using data in the INSDC Sequence Read Archives.\n" > $@
+
+$(TOOLS_TO_INSTALL):
+ - ( test -f $(BINDIR)/$@ || ( echo "$@ not found" && false ) ) && \
+ rsync $(VERBOSE) -l $(BINDIR)/$@ $(BINDIR)/$@$(VERSION_EXEX) \
+ $(BINDIR)/$@$(MAJVERS_EXEX) $(DST)/usr/bin
diff --git a/build/Makefile.gcc b/build/Makefile.gcc
index e90f4f0..dc8ed4d 100644
--- a/build/Makefile.gcc
+++ b/build/Makefile.gcc
@@ -79,11 +79,11 @@ ifeq (dbg, $(BUILD))
PED = -std=gnu99 -pedantic # -fdiagnostics-show-option
else
-OPT = -O3 -Wno-variadic-macros -fno-strict-aliasing -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1
-ifeq (x86_64, $(ARCH))
- OPT += -Wall
-endif
-
+OPT = -O3 -Wno-variadic-macros -fno-strict-aliasing -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=1
+ifeq (x86_64, $(ARCH))
+ OPT += -Wall
+endif
+
endif
ifeq (mac,$(OS))
diff --git a/build/Makefile.install b/build/Makefile.install
index 9d506f2..75930e8 100644
--- a/build/Makefile.install
+++ b/build/Makefile.install
@@ -33,46 +33,10 @@ include $(TOP)/build/Makefile.$(OS)
#-------------------------------------------------------------------------------
# install
-#
-TOOLS_TO_INSTALL = \
- abi-dump \
- abi-load \
- align-info \
- bam-load \
- cache-mgr \
- cg-load \
- fastq-dump \
- fastq-load \
- helicos-load \
- illumina-dump \
- illumina-load \
- kar \
- kdbmeta \
- latf-load \
- prefetch \
- rcexplain \
- sam-dump \
- sff-dump \
- sff-load \
- sra-pileup \
- sra-sort \
- sra-stat \
- srapath \
- srf-load \
- test-sra \
- vdb-config \
- vdb-copy \
- vdb-decrypt \
- vdb-dump \
- vdb-encrypt \
- vdb-lock \
- vdb-passwd \
- vdb-unlock \
- vdb-validate \
+#
-# remote-fuser \
-# blastn_vdb \
-# tblastn_vdb \
+# $TOOLS_TO_INSTALL
+include $(TOP)/build/Makefile.tools
#fake root for debugging
#uncomment this line and change the test for root ( see under install: ) to succeed:
@@ -85,6 +49,8 @@ installexes:
@ echo "Installing executables to $(INST_BINDIR)/..."
@ mkdir -p $(INST_BINDIR)
@ $(MAKE) -s --no-print-directory -f $(TOP)/build/Makefile.install $(TOOLS_TO_INSTALL)
+ @ printf "Installing magic file..."
+ @ $(COPY) $(TOP)/tools/copycat/magic $(INST_BINDIR) && echo "done"
$(TOOLS_TO_INSTALL):
@ printf "Installing $@..."
@@ -102,9 +68,9 @@ endif
install: installexes
ifeq (true, $(LINUX_ROOT))
@ # install configuration file(s)
- @ echo "Installing configuration files to $(ROOT)/etc/ncbi/"
+ @ echo "Installing configuration files to $(ROOT)/etc/ncbi/..."
@ mkdir -p $(ROOT)/etc/ncbi/
- @ cp $(TOP)/tools/vdb-copy/vdb-copy.kfg $(ROOT)/etc/ncbi/
+ @ cp $(TOP)/tools/vdb-copy/vdb-copy.kfg $(ROOT)/etc/ncbi/ && echo "done"
@ #
@ echo "Updating $(PROFILE_FILE).[c]sh"
@ printf \
@@ -130,7 +96,7 @@ endif
# uninstall
#
-TO_UNINSTALL = $(addsuffix *,$(addprefix $(INST_BINDIR)/,$(TOOLS_TO_INSTALL)))
+TO_UNINSTALL = $(addsuffix *,$(addprefix $(INST_BINDIR)/,$(TOOLS_TO_INSTALL) magic))
TO_UNINSTALL_AS_ROOT = $(ROOT)/etc/ncbi/vdb-copy.kfg $(PROFILE_FILE).sh $(PROFILE_FILE).csh
uninstall:
diff --git a/build/Makefile.linux b/build/Makefile.linux
index 9383a51..b14b33f 100644
--- a/build/Makefile.linux
+++ b/build/Makefile.linux
@@ -72,3 +72,16 @@ COMPILERS = GCC # ICC
# copy without following symlinks
COPY = cp -P
+
+# valgrind
+VALGRIND = export NCBI_VALGRIND=ncbi; valgrind --error-exitcode=1 --suppressions=$(TOP)/build/valgrind.suppress --show-reachable=no
+
+VALGRIND_TARGETS = $(addprefix vg-,$(TEST_TOOLS))
+
+valgrind: $(VALGRIND_TARGETS)
+
+vg-test-%: test-%
+ $(VALGRIND) $(TEST_BINDIR)/$^
+
+vg-wb-test-%: wb-test-%
+ $(VALGRIND) $(TEST_BINDIR)/$^
diff --git a/build/Makefile.rpm b/build/Makefile.rpm
new file mode 100644
index 0000000..34074b7
--- /dev/null
+++ b/build/Makefile.rpm
@@ -0,0 +1,165 @@
+# ===========================================================================
+#
+# PUBLIC DOMAIN NOTICE
+# National Center for Biotechnology Information
+#
+# This software/database is a "United States Government Work" under the
+# terms of the United States Copyright Act. It was written as part of
+# the author's official duties as a United States Government employee and
+# thus cannot be copyrighted. This software/database is freely available
+# to the public for use. The National Library of Medicine and the U.S.
+# Government have not placed any restriction on its use or reproduction.
+#
+# Although all reasonable efforts have been taken to ensure the accuracy
+# and reliability of the software and data, the NLM and the U.S.
+# Government do not and cannot warrant the performance or results that
+# may be obtained by using this software or data. The NLM and the U.S.
+# Government disclaim all warranties, express or implied, including
+# warranties of performance, merchantability or fitness for any particular
+# purpose.
+#
+# Please cite the author in any work or product based on this material.
+#
+# ===========================================================================
+
+#VERBOSE=-v
+
+# determine shell environment ($CONFIG_FILE)
+include $(TOP)/build/Makefile.shell
+
+# load build configuration ($TARGDIR) ($VERSION)
+include $(CONFIG_FILE)
+
+# $TOOLS_TO_INSTALL
+include $(TOP)/build/Makefile.tools
+ifeq (1,$(HAVE_HDF5))
+ TOOLS_TO_INSTALL += pacbio-load
+endif
+TOOLS_TO_INSTALL += remote-fuser
+
+REVISION=0
+PKGDIR=$(TARGDIR)/rpmbuild
+RPM =$(PKGDIR)/RPMS/$(ARCH)/sra-toolkit-$(VERSION)-$(REVISION).$(ARCH).rpm
+SRC =$(PKGDIR)/SOURCES/sra-toolkit-$(VERSION).tar.gz
+SPEC=$(PKGDIR)/SPECS/sra-toolkit.spec
+DST =$(PKGDIR)/sra-toolkit-$(VERSION)
+TGZ=$(DST).tar.gz
+
+clean_rpm:
+ echo 'DEBUGGING RPM: clean_rpm'
+ rm -rf $(DST) $(RPM) $(SPEC) $(SRC) $(TGZ)
+
+rpm: $(RPM)
+# echo DEBUGGING RPM: rpm $(TOP)/$(MODULE)
+
+$(RPM): $(SPEC) $(SRC)
+# echo 'DEBUGGING RPM: (RPM)'
+ rpmbuild --define "_topdir $(PKGDIR)" --define "_tmppath %{_topdir}/tmp" \
+ -bb $(SPEC)
+
+$(SRC): $(DST)/etc/ncbi/default.kfg $(DST)/etc/ncbi/schema.kfg \
+ $(DST)/etc/ncbi/vdb-copy.kfg $(DST)/usr/share/ncbi/schema
+ echo 'DEBUGGING RPM: (SRC)'
+ chmod -R g-w $(DST)
+ cd $(PKGDIR) ; tar $(VERBOSE) -zcf $(TGZ) --owner 0 --group 0 \
+ sra-toolkit-$(VERSION)
+ mkdir $(VERBOSE) -p $(PKGDIR)/SOURCES
+ cp $(VERBOSE) -p $(TGZ) $@
+
+$(DST)/etc/ncbi/default.kfg: $(DST)
+ echo 'DEBUGGING RPM: (/etc/ncbi/default.kfg)'
+ rsync $(VERBOSE) -l --chmod=g-w $(VDB_LIBDIR)/ncbi/default.kfg $@
+
+$(DST)/etc/ncbi/schema.kfg: $(DST)
+ echo 'DEBUGGING RPM: (/etc/ncbi/schema.kfg)'
+ echo '/vdb/schema/paths = "/usr/share/ncbi/schema"' > $@
+
+$(DST)/etc/ncbi/vdb-copy.kfg: $(DST)
+ echo 'DEBUGGING RPM: (/etc/ncbi/vdb-copy.kfg)'
+ rsync $(VERBOSE) -l --chmod=g-w $(BINDIR)/ncbi/vdb-copy.kfg $@
+
+$(DST): $(PKGDIR)
+# echo 'DEBUGGING RPM: (DST)'
+ mkdir $(VERBOSE) -p $(DST)/etc/ncbi
+ mkdir $(VERBOSE) -p $(DST)/usr/bin
+ $(MAKE) -s --no-print-directory -f $(TOP)/build/Makefile.rpm \
+ $(TOOLS_TO_INSTALL)
+
+$(DST)/usr/share/ncbi/schema: $(DST)
+ echo 'DEBUGGING RPM: (/usr/share/ncbi/schema)'
+ mkdir $(VERBOSE) -p $@
+ rsync $(VERBOSE) -rmloptD --include '*.vschema' --include='*/' \
+ --exclude '*' $(VDB_INCDIR)/* $@
+
+$(PKGDIR):
+ echo 'DEBUGGING RPM: (PKGDIR)'
+ mkdir $(VERBOSE) -p $@/{RPMS,SRPMS,BUILD,SOURCES,SPECS,tmp}
+
+$(TOOLS_TO_INSTALL):
+# echo 'DEBUGGING RPM: (TOOLS_TO_INSTALL)'
+ - ( test -f $(BINDIR)/$@ || ( echo "$@ not found" && false ) ) && \
+ rsync $(VERBOSE) -l $(BINDIR)/$@ $(BINDIR)/$@$(VERSION_EXEX) \
+ $(BINDIR)/$@$(MAJVERS_EXEX) $(DST)/usr/bin
+
+$(SPEC): $(PKGDIR) $(TOP)/build/Makefile.rpm
+# echo 'DEBUGGING RPM: (SPEC)'
+ mkdir -p $(PKGDIR)/SPECS
+ printf \
+"%%define __spec_install_post %%{nil}\n"\
+"%%define debug_package %%{nil}\n"\
+"%%define __os_install_post %%{_dbpath}/brp-compress\n"\
+"\n"\
+"Summary: SRA Toolkit package\n"\
+"Name: sra-toolkit\n"\
+"Version: $(VERSION)\n"\
+"Release: $(REVISION)\n"\
+"License: Public Domain\n"\
+"Group: NCBI/VDB\n"\
+"SOURCE0 : %%{name}-%%{version}.tar.gz\n"\
+"URL: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software\n"\
+"\n"\
+"BuildRoot: %%{_tmppath}/%%{name}-%%{version}-%%{release}-root\n"\
+"\n"\
+"%%description\n"\
+"%%{summary}\n"\
+"\n"\
+"%%prep\n"\
+"%%setup -q\n"\
+"\n"\
+"%%build\n"\
+"# Empty section.\n"\
+"\n"\
+"%%install\n"\
+"rm -rf %%{buildroot}\n"\
+"mkdir -p %%{buildroot}\n"\
+"\n"\
+"# in builddir\n"\
+"cp -a * %%{buildroot}\n"\
+"\n"\
+"%%clean\n"\
+"rm -rf %%{buildroot}\n"\
+"\n"\
+"%%files\n"\
+"%%defattr(-,root,root,-)\n"\
+"%%{_bindir}/*\n"\
+"/etc/ncbi/*\n"\
+"/usr/share/ncbi/schema/*/*\n"\
+"\n"\
+"%%post\n"\
+"if [ -e /etc/profile.d/sra-tools.csh ] || [ -e /etc/profile.d/sra-tools.sh ]"\
+"\n"\
+"then\n"\
+" FOUND=1\n"\
+"fi\n"\
+'if [ "$$FOUND" != "" ] ; then'\
+"\n"\
+" echo User installation of sra-tools is found.\n"\
+" if [ -e /etc/profile.d/sra-tools.csh ] ; then\n"\
+" mv -v /etc/profile.d/sra-tools.csh /etc/profile.d/sra-tools.csh.bak\n"\
+" fi\n"\
+" if [ -e /etc/profile.d/sra-tools.sh ] ; then\n"\
+" mv -v /etc/profile.d/sra-tools.sh /etc/profile.d/sra-tools.sh.bak\n"\
+" fi\n"\
+" echo /etc/profile.d/sra-tools.[c]sh was updated.\n"\
+" echo You might need to relogin to have your PATH variable updated.\n"\
+"fi\n" > $@
diff --git a/build/Makefile.shell b/build/Makefile.shell
index d90b53e..7b78684 100644
--- a/build/Makefile.shell
+++ b/build/Makefile.shell
@@ -24,7 +24,7 @@
CONFIG_FILE = $(TOP)/build/Makefile.config
ifeq (no, $(shell test -f $(CONFIG_FILE) && echo yes || echo no))
- $(error "*** File '$(CONFIG_FILE)' is missing. Please run $(TOP)/build/configure")
+ $(error "*** File '$(CONFIG_FILE)' is missing. Please run $(TOP)/configure")
endif
# determine OS
@@ -35,6 +35,11 @@ ifeq (Darwin, $(UNAME))
endif
ifeq (Linux, $(UNAME))
HOST_OS = linux
+ OS_DISTRIBUTOR = $(shell lsb_release -si)
+ EXECMDF = $(TOP)/build/ld.linux.exe_cmd.sh
+ ifeq (no, $(shell test -f $(EXECMDF) && echo yes || echo no))
+ $(error "*** File '$(EXECMDF)' is missing. Please run $(TOP)/configure")
+ endif
endif
ifeq (SunOS, $(UNAME))
HOST_OS = sun
@@ -55,6 +60,13 @@ BUILD_OS = $(OS)
ARCHITECTURES = default
ifeq (mac,$(HOST_OS))
HOST_ARCH = $(shell $(TOP)/build/mac.arch.sh)
+ CONFIG_FILE_OA = $(TOP)/build/Makefile.config.$(OS).$(HOST_ARCH)
+ ifeq (no, $(shell test -f $(CONFIG_FILE_OA) && echo yes || echo no))
+ $(error "*** File '$(CONFIG_FILE_OA)' is missing. Please run $(TOP)/configure")
+ endif
+ ifeq (no, $(shell grep CONFIGURE_FOUND_XML2 $(CONFIG_FILE_OA) && echo yes || echo no))
+ $(error "*** 'CONFIGURE_FOUND_XML2' is missing in $(CONFIG_FILE_OA). Please run $(TOP)/configure")
+ endif
else
MARCH = $(shell uname -m)
diff --git a/build/Makefile.targets b/build/Makefile.targets
index cd36e1a..0690964 100644
--- a/build/Makefile.targets
+++ b/build/Makefile.targets
@@ -29,6 +29,7 @@
# clean
# runtests
# slowtests
+# valgrind
#
# requires $(SUBDIRS) to be defined
#
@@ -102,3 +103,15 @@ $(SUBDIRS_SLOWTESTS):
.PHONY: slowtests $(SUBDIRS_SLOWTESTS)
+#-------------------------------------------------------------------------------
+# valgrind
+#
+SUBDIRS_VALGRIND ?= $(addsuffix _valgrind, $(SUBDIRS))
+
+valgrind: $(SUBDIRS_VALGRIND)
+
+$(SUBDIRS_VALGRIND):
+ @ $(MAKE) -C $(subst _valgrind,,$@) valgrind
+
+.PHONY: valgrind $(SUBDIRS_VALGRIND)
+
diff --git a/build/Makefile.tools b/build/Makefile.tools
new file mode 100644
index 0000000..ab4465e
--- /dev/null
+++ b/build/Makefile.tools
@@ -0,0 +1,40 @@
+TOOLS_TO_INSTALL = \
+ abi-dump \
+ abi-load \
+ align-info \
+ bam-load \
+ cache-mgr \
+ cg-load \
+ fastq-dump \
+ fastq-load \
+ helicos-load \
+ illumina-dump \
+ illumina-load \
+ kar \
+ kdbmeta \
+ latf-load \
+ prefetch \
+ rcexplain \
+ sam-dump \
+ sff-dump \
+ sff-load \
+ sra-pileup \
+ sra-sort \
+ sra-stat \
+ srapath \
+ srf-load \
+ test-sra \
+ vdb-config \
+ vdb-copy \
+ vdb-decrypt \
+ vdb-dump \
+ vdb-encrypt \
+ vdb-lock \
+ vdb-passwd \
+ vdb-unlock \
+ vdb-validate \
+ copycat \
+
+# remote-fuser \
+# blastn_vdb \
+# tblastn_vdb \
diff --git a/build/ld.linux.exe.sh b/build/ld.linux.exe.sh
index 81dc5f0..e8b7580 100755
--- a/build/ld.linux.exe.sh
+++ b/build/ld.linux.exe.sh
@@ -298,7 +298,14 @@ if grep -q HAVE_XML2 ${BUILD_DIR}/Makefile.config.linux.$ARCH > /dev/null
then
if grep -q XML2_LIBDIR ${BUILD_DIR}/Makefile.config.linux.$ARCH > /dev/null
then
- XMLLIBDIR="-Wl,-Bstatic -L$(grep XML2_LIBDIR ${BUILD_DIR}/Makefile.config.linux.$ARCH | perl -e'while(<>){if(/XML2_LIBDIR = (.+)/){print $1}}')"
+ if grep -q 'OS_DISTRIBUTOR = Ubuntu' ${BUILD_DIR}/Makefile.config.linux.$ARCH > /dev/null
+ then
+ ##### Do not link libxml2 statically on Ubuntu: it does not compile
+ XMLLIBDIR="-L$(grep XML2_LIBDIR ${BUILD_DIR}/Makefile.config.linux.$ARCH | perl -e'while(<>){if(/XML2_LIBDIR = (.+)/){print $1}}')"
+ else
+ ##### Link libxml2 statically on all linux-es except Ubuntu
+ XMLLIBDIR="-Wl,-Bstatic -L$(grep XML2_LIBDIR ${BUILD_DIR}/Makefile.config.linux.$ARCH | perl -e'while(<>){if(/XML2_LIBDIR = (.+)/){print $1}}')"
+ fi
fi
if [ $HAVE_XML -ne 0 ]
then
diff --git a/build/ld.linux.gcc.sh b/build/ld.linux.gcc.sh
index cfc8ad0..fd2e81f 100755
--- a/build/ld.linux.gcc.sh
+++ b/build/ld.linux.gcc.sh
@@ -33,7 +33,10 @@ LD_REF_SYMBOLS="-Wl,-no-whole-archive"
# build command
DLIB_CMD="$LD -shared"
-EXE_CMD="$LD -static-libstdc++ -static-libgcc"
+
+#EXE_CMD="$LD -static-libstdc++ -static-libgcc"
+source "${0%exe.sh}exe_cmd.sh" # EXE_CMD is defined in build/ld.linux.exe_cmd.sh
+
EXE_STATIC_CMD="$EXE_CMD"
#EXE_CMD="$LD"
#EXE_STATIC_CMD="$LD -static"
diff --git a/build/ld.mac.dlib.sh b/build/ld.mac.dlib.sh
index a990020..83b00ed 100755
--- a/build/ld.mac.dlib.sh
+++ b/build/ld.mac.dlib.sh
@@ -338,7 +338,7 @@ fi
# add in xml
if [ $HAVE_XML -ne 0 ]
then
- CMD="$CMD -lxml2"
+ CMD="$CMD -lxml2 -liconv"
fi
# add in math library
diff --git a/build/ld.mac.exe.sh b/build/ld.mac.exe.sh
index a1d182a..6c6369f 100755
--- a/build/ld.mac.exe.sh
+++ b/build/ld.mac.exe.sh
@@ -319,7 +319,7 @@ fi
# add in xml
if [ $HAVE_XML -ne 0 ]
then
- CMD="$CMD -lxml2"
+ CMD="$CMD -lxml2 -liconv"
fi
# add in math library
diff --git a/build/ld.sh b/build/ld.sh
index 1293a0e..734c6ab 100755
--- a/build/ld.sh
+++ b/build/ld.sh
@@ -301,7 +301,10 @@ do
HAVE_BZIP=1
HAVE_KFC=1
NEED_M=1
- NEED_XML=1
+
+ NEED_XML=$CONFIGURE_FOUND_XML2
+ # CONFIGURE_FOUND_XML2 comes from Makefile.config.$OS.$ARCH
+
LIBS="$LIBS $1"
;;
-[lds]ncbi-ngs-c++)
@@ -311,7 +314,7 @@ do
HAVE_BZIP=1
HAVE_KFC=1
NEED_M=1
- NEED_XML=1
+ NEED_XML=$CONFIGURE_FOUND_XML2
LIBS="$LIBS $1"
;;
-[lds]ncbi-wvdb)
@@ -321,7 +324,7 @@ do
HAVE_BZIP=1
HAVE_KFC=1
NEED_M=16
- NEED_XML=1
+ NEED_XML=$CONFIGURE_FOUND_XML2
LIBS="$LIBS $1"
;;
diff --git a/test/ngs-pileup/valgrind.suppress b/build/valgrind.suppress
similarity index 100%
copy from test/ngs-pileup/valgrind.suppress
copy to build/valgrind.suppress
diff --git a/setup/konfigure.perl b/setup/konfigure.perl
index fe8d17c..3a25b88 100644
--- a/setup/konfigure.perl
+++ b/setup/konfigure.perl
@@ -203,6 +203,14 @@ unless ($OSTYPE =~ /linux/i || $OSTYPE =~ /darwin/i || $OSTYPE eq 'win') {
exit 1;
}
+my $OS_DISTRIBUTOR = '';
+if ($OS eq 'linux') {
+ print "checking OS distributor... " unless ($AUTORUN);
+ $OS_DISTRIBUTOR = `lsb_release -si 2> /dev/null`;
+ chomp $OS_DISTRIBUTOR;
+ println $OS_DISTRIBUTOR unless ($AUTORUN);
+}
+
print "checking machine architecture... " unless ($AUTORUN);
println $MARCH unless ($AUTORUN);
unless ($MARCH =~ /x86_64/i || $MARCH =~ /i?86/i) {
@@ -424,6 +432,11 @@ if ($TOOLS =~ /gcc$/ && check_no_array_bounds()) {
$NO_ARRAY_BOUNDS_WARNING = '-Wno-array-bounds';
}
+my $STATIC_LIBSTDCPP = '';
+if ($TOOLS =~ /gcc$/) {
+ $STATIC_LIBSTDCPP = check_static_libstdcpp();
+}
+
my @dependencies;
my %DEPEND_OPTIONS;
@@ -714,6 +727,14 @@ if ($OS ne 'win' && ! $OPT{'status'}) {
close F;
}
+ if ($TOOLS =~ /gcc$/) {
+ my $EXECMDF = File::Spec->catdir(CONFIG_OUT(), 'ld.linux.exe_cmd.sh');
+ println "configure: creating '$EXECMDF'" unless ($AUTORUN);
+ open F, ">$EXECMDF" or die "cannot open $EXECMDF to write";
+ print F "EXE_CMD=\"\$LD $STATIC_LIBSTDCPP -static-libgcc\"\n";
+ close F;
+ }
+
# create Makefile.config
println "configure: creating '$OUT_MAKEFILE'" unless ($AUTORUN);
open my $F, ">$OUT_MAKEFILE" or die "cannot open $OUT_MAKEFILE to write";
@@ -773,6 +794,7 @@ BUILD = $BUILD
# target OS
OS = $OS
OSINC = $OSINC
+OS_DISTRIBUTOR = $OS_DISTRIBUTOR
# prefix string for system libraries
LPFX = $LPFX
@@ -974,6 +996,14 @@ EndText
L($F, $_) foreach (@dependencies);
L($F);
+ # pass HAVE_XML2 to build scripts
+ L($F, 'ifeq (,$(HAVE_XML2))');
+ L($F, ' HAVE_XML2=0');
+ L($F, 'endif');
+ L($F, 'CONFIGURE_FOUND_XML2=$(HAVE_XML2)');
+ L($F, 'export CONFIGURE_FOUND_XML2');
+ L($F);
+
if ($OS eq 'linux' || $OS eq 'mac') {
L($F, '# installation rules');
L($F,
@@ -1410,6 +1440,15 @@ sub check_no_array_bounds {
check_compiler('O', '-Wno-array-bounds');
}
+sub check_static_libstdcpp {
+ my $option = '-static-libstdc++';
+ my $save = $TOOLS;
+ $TOOLS = $CPP;
+ $_ = check_compiler('O', $option);
+ $TOOLS = $save;
+ $_ ? $option : ''
+}
+
sub find_lib {
check_compiler('L', @_);
}
@@ -1421,7 +1460,7 @@ sub check_compiler {
if ($t eq 'L') {
print "checking for $n library... ";
} elsif ($t eq 'O') {
- if ($tool && $tool =~ /gcc$/) {
+ if ($tool && ($tool =~ /gcc$/ || $tool =~ /g\+\+$/)) {
print "checking whether $tool accepts $n... ";
} else {
return;
@@ -1452,7 +1491,8 @@ sub check_compiler {
$library = '-lmagic';
$log = '#include <magic.h> \n int main() { magic_open (0); }\n'
} elsif ($n eq 'xml2') {
- $library = '-lxml2';
+ $library = '-lxml2';
+ $library .= ' -liconv' if ($OS eq 'mac');
$log = '#include <libxml/xmlreader.h>\n' .
'int main() { xmlInitParser ( ); }\n'
} else {
diff --git a/setup/package.prl b/setup/package.prl
index 3324750..2f6b782 100644
--- a/setup/package.prl
+++ b/setup/package.prl
@@ -1,6 +1,6 @@
################################################################################
sub PACKAGE { 'sra-tools' }
-sub VERSION { '2.5.7' }
+sub VERSION { '2.6.2' }
sub PACKAGE_TYPE { 'B' }
sub PACKAGE_NAME { 'SRA-TOOLS' }
sub PACKAGE_NAMW { 'SRATOOLS' }
diff --git a/test/Makefile b/test/Makefile
index 93d1a59..3879ac9 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -41,7 +41,12 @@ SUBDIRS = \
general-loader \
vschema \
align-info \
+ align-cache \
vdb-dump \
+ ref-variation \
+ vdb-validate \
+ kar \
+ copycat \
# under construction
# ngs-pileup \
diff --git a/test/align-cache/CSRA_file b/test/align-cache/CSRA_file
new file mode 100644
index 0000000..18e06d2
Binary files /dev/null and b/test/align-cache/CSRA_file differ
diff --git a/test/kget/Makefile b/test/align-cache/Makefile
similarity index 71%
copy from test/kget/Makefile
copy to test/align-cache/Makefile
index e2cd576..9a8ded8 100644
--- a/test/kget/Makefile
+++ b/test/align-cache/Makefile
@@ -26,31 +26,33 @@ default: runtests
TOP ?= $(abspath ../..)
-MODULE = test/kget
+MODULE = test/align-cache
TEST_TOOLS = \
+ALL_TOOLS = \
+ $(TEST_TOOLS) \
+
include $(TOP)/build/Makefile.env
+$(ALL_TOOLS): makedirs
+ @ $(MAKE_CMD) $(TEST_BINDIR)/$@
+
.PHONY: $(TEST_TOOLS)
+clean: stdclean
+
#-------------------------------------------------------------------------------
-# slowtests: match output vs wget
+# align-cache tool tests
#
+runtests: align-cache
-slowtests: diff-vs-wget
-
-ACCESSION = SRR072810
-URL = http://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR072/SRR072810/SRR072810.sra
-
-clean:
- rm -f $(ACCESSION)*
-
-diff-vs-wget: clean
- $(BINDIR)/kget --reliable -c ./$(ACCESSION).cachetee $(URL) $(ACCESSION).dat --progress
- wget $(URL)
- diff $(ACCESSION).sra ./$(ACCESSION).dat
- #diff $(ACCESSION).sra ./$(ACCESSION).cachetee
- rm -f $(ACCESSION)*
-.PHONY:
+align-cache: $(BINDIR)/align-cache
+ @ rm -rf CSRA_file.cache
+ @ $(BINDIR)/align-cache -t 10 --min-cache-count 1 CSRA_file CSRA_file.cache
+ @ $(BINDIR)/vdb-validate CSRA_file.cache/ 2>&1 | grep --quiet "is consistent"
+ @ rm -rf CSRA_file.cache
+
+vg: $(BINDIR)/align-cache
+ valgrind --ncbi --suppressions=$(SRCDIR)/valgrind.suppress $(BINDIR)/align-cache -t 10 --min-cache-count 1 CSRA_file CSRA_file.cache
diff --git a/test/ref-variation/valgrind.suppress b/test/align-cache/valgrind.suppress
similarity index 100%
rename from test/ref-variation/valgrind.suppress
rename to test/align-cache/valgrind.suppress
diff --git a/test/vcf-loader/Makefile b/test/copycat/Makefile
similarity index 70%
copy from test/vcf-loader/Makefile
copy to test/copycat/Makefile
index 32aab45..3f4ed99 100644
--- a/test/vcf-loader/Makefile
+++ b/test/copycat/Makefile
@@ -26,22 +26,12 @@ default: runtests
TOP ?= $(abspath ../..)
-MODULE = test/vcf-loader
+MODULE = test/copycat
TEST_TOOLS = \
- test-vcf-loader
include $(TOP)/build/Makefile.env
-
-# make sure runs are not cached in the user repository when running tests
-ifeq ($(wildcard ../../../asm-trace),)
- ifeq (,$(VDB_CONFIG))
- VDB_CONFIG = $(shell pwd)/../only-remote-repository.kfg
- endif
-endif
-
-
$(TEST_TOOLS): makedirs
@ $(MAKE_CMD) $(TEST_BINDIR)/$@
@@ -50,24 +40,21 @@ $(TEST_TOOLS): makedirs
clean: stdclean
#-------------------------------------------------------------------------------
-# white-box test
+# scripted tests
#
-INCDIRS += -I$(TOP)/tools/vcf-loader
-
-VCF_TEST_SRC = \
- test-vcf-loader
-
-VCF_TEST_OBJ = \
- $(addsuffix .$(OBJX),$(VCF_TEST_SRC))
-
-VCF_TEST_LIB = \
- -skapp \
- -sktst \
- -sncbi-wvdb \
- -svcfloader
-
-$(TEST_BINDIR)/test-vcf-loader: $(VCF_TEST_OBJ)
- $(LP) --exe -o $@ $^ $(VCF_TEST_LIB)
+ifeq (1,$(HAVE_MAGIC))
+runtests: copy
+else
+runtests:
+ @ echo "NOTE - copycat tests are skipped:" \
+ "copycat was not built"
+ @ echo "because it requires our internal library 'libkff'" \
+ "which requires 'libmagic' and its development headers."
+endif
-valgrind: test-vcf-loader
- valgrind --ncbi $(TEST_BINDIR)/test-vcf-loader
+copy:
+ @ echo "Starting copycat tests..."
+ @ rm -rf actual
+ @ $(BINDIR)/copycat -h >/dev/null
+ @ export PATH=$(BINDIR):$$PATH; vdb-config | grep bin; copycat ./input/1.xml actual/ >/dev/null && diff ./input/1.xml actual/1.xml
+ @ rm -rf actual
diff --git a/test/copycat/input/1.xml b/test/copycat/input/1.xml
new file mode 100644
index 0000000..cc70b4f
--- /dev/null
+++ b/test/copycat/input/1.xml
@@ -0,0 +1,5 @@
+<ROOT version="2.6.2">
+ <archive id="1" path="1.tar" name="1.tar" size="10240" mtime="2016-04-07T21:06:20Z" filetype="Archive/TapeArchive" md5="41fbc9b272d12c815c8a4c301a354b95" crc32="d7b8fc09">
+ <file id="2" path="1.tar/1" name="1" size="25" lines="4" mtime="2016-04-07T19:47:13Z" filetype="Unknown/Unknown" md5="293363f5942f2ca7e4d0a1aa9ffda34f" offset="512"/>
+ </archive>
+</ROOT>
diff --git a/test/fastq-loader/Makefile b/test/fastq-loader/Makefile
index 31020d1..36c1d13 100644
--- a/test/fastq-loader/Makefile
+++ b/test/fastq-loader/Makefile
@@ -66,9 +66,6 @@ $(TEST_BINDIR)/wb-test-fastq: $(FASTQ_TEST_OBJ)
wb: wb-test-fastq
$(TEST_BINDIR)/wb-test-fastq 2>&1
-valgrind_wb: wb-test-fastq
- valgrind --ncbi $(TEST_BINDIR)/wb-test-fastq
-
#-------------------------------------------------------------------------------
# test-fastqtest-loader
#
@@ -85,10 +82,6 @@ tfl:
$(MAKE) -C $(OBJDIR) -f $(SRCDIR)/Makefile test-fastq-loader
$(TEST_BINDIR)/test-fastq-loader
-valgrind_tfl:
- $(MAKE) -C $(OBJDIR) -f $(SRCDIR)/Makefile test-fastq-loader
- valgrind --ncbi $(TEST_BINDIR)/test-fastq-loader
-
.PHONY: test-fastq-loader
#-------------------------------------------------------------------------------
@@ -97,6 +90,11 @@ valgrind_tfl:
runtests: set_schema smalltests
set_schema: $(BINDIR)/vdb-config
+ $(BINDIR)/vdb-config -on
+ find $(HOME)/ncbi -size +9999c -exec ls -l {} \; | sort -nk5 | tail
+ echo HOST=`hostname`
+ echo OS=$(OS)
+ echo USER=$(USER)
$(BINDIR)/vdb-config -s vdb/schema/paths="$(VDB_INCDIR)"
SMALLRUN = @ $(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)
@@ -198,10 +196,16 @@ smalltests: $(TEST_TOOLS)
# Illumina, use barcodes as spotgroups
$(SMALLRUN) 12.1 0 $(SRCDIR)/input/12.1.fastq --quality PHRED_33
# Illumina, use barcodes as spotgroups, alternative format
- $(SMALLRUN) 12.2 0 $(SRCDIR)/input/12.2.fastq --quality PHRED_33
+ $(SMALLRUN) 12.2 0 $(SRCDIR)/input/12.2.fastq --quality PHRED_33
+ # Consume our own dog food (13.0.fastq created by fastq-dump )
+ $(SMALLRUN) 13.0 0 $(SRCDIR)/input/13.0.fastq --quality PHRED_33
+ # Consume our own dog food (13.1.fastq created by fastq-dump -F )
+ $(SMALLRUN) 13.1 0 $(SRCDIR)/input/13.1.fastq --quality PHRED_33
+ # SRA-2932: 30 is the preferred value for qualities when reading FASTA
+ $(SMALLRUN) 14.0 0 $(SRCDIR)/input/14.0.fasta $(SRCDIR)/input/14.0.fasta --quality PHRED_33
rm -rf $(SRCDIR)/actual
onetest:
rm -rf $(SRCDIR)/actual
- $(SMALLRUN) 1.4 0 $(SRCDIR)/input/1.4.fastq --quality LOGODDS
+ $(SMALLRUN) 13.1 0 $(SRCDIR)/input/13.1.fastq --quality PHRED_33
diff --git a/test/fastq-loader/expected/1.1.stdout b/test/fastq-loader/expected/1.1.stdout
index 1e9793c..377f056 100644
--- a/test/fastq-loader/expected/1.1.stdout
+++ b/test/fastq-loader/expected/1.1.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 36
BIO_BASE_COUNT: 36
CMP_BASE_COUNT: 36
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 36
- TMP_KEY_ID: 0
TRIM_LEN: 36
TRIM_START: 0
diff --git a/test/fastq-loader/expected/1.2.stdout b/test/fastq-loader/expected/1.2.stdout
index 831a7c4..f9dd417 100644
--- a/test/fastq-loader/expected/1.2.stdout
+++ b/test/fastq-loader/expected/1.2.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 36
BIO_BASE_COUNT: 36
CMP_BASE_COUNT: 36
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 36
- TMP_KEY_ID: 0
TRIM_LEN: 36
TRIM_START: 0
diff --git a/test/fastq-loader/expected/1.4.stdout b/test/fastq-loader/expected/1.4.stdout
index 572f76e..eb437e2 100755
--- a/test/fastq-loader/expected/1.4.stdout
+++ b/test/fastq-loader/expected/1.4.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 216
BIO_BASE_COUNT: 216
CMP_BASE_COUNT: 216
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 131033233331013023032230132132312301331032212003012030012223002013323212
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 72
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 216
BIO_BASE_COUNT: 216
CMP_BASE_COUNT: 216
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 110003323031230312213200233330230230030231233302133312123321320321233203
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 72
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 2
@@ -62,11 +66,13 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 216
BIO_BASE_COUNT: 216
CMP_BASE_COUNT: 216
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 122312131322312131021100002130102223101233212132311133020101010332200312
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 72
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 3
diff --git a/test/fastq-loader/expected/10.0.stdout b/test/fastq-loader/expected/10.0.stdout
index b0da961..737f572 100644
--- a/test/fastq-loader/expected/10.0.stdout
+++ b/test/fastq-loader/expected/10.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 21
BIO_BASE_COUNT: 21
CMP_BASE_COUNT: 21
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 21
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 21
- TMP_KEY_ID: 0
TRIM_LEN: 21
TRIM_START: 0
diff --git a/test/fastq-loader/expected/11.0.stdout b/test/fastq-loader/expected/11.0.stdout
index 346615e..7a25759 100644
--- a/test/fastq-loader/expected/11.0.stdout
+++ b/test/fastq-loader/expected/11.0.stdout
@@ -2,17 +2,19 @@
BASE_COUNT: 152
BIO_BASE_COUNT: 152
CMP_BASE_COUNT: 152
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 12231033031122330010100000000000000000000000000203301033111000111011021301111301123113320231133000013123101102202202000000000000001300011000033112330033
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 152
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0, 0
- QUALITY: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ QUALITY: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 [...]
RD_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
READ: GAGCAATAATGTCTATTTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTCCGCCAATACACCCCACAACAAGTAACACGCCACTACATAGGATGTATTTTTGCAGCAACAAGAAGAAGGGGGGGGGGGGGGGTAAAACAAAAATACAGCGGGCG
READ_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 152
- TMP_KEY_ID: 0
TRIM_LEN: 152
TRIM_START: 0
diff --git a/test/fastq-loader/expected/11.1.stdout b/test/fastq-loader/expected/11.1.stdout
index 01676c5..7b029dc 100644
--- a/test/fastq-loader/expected/11.1.stdout
+++ b/test/fastq-loader/expected/11.1.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 202
BIO_BASE_COUNT: 202
CMP_BASE_COUNT: 202
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 0300232002130023102002201303012112133111010333330332133032020333303000312301230003311300303300333003003002320021300231020022013030121121331110103333303321330320203333030003123012300033113003033003330030
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 202
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 202
- TMP_KEY_ID: 0
TRIM_LEN: 202
TRIM_START: 0
diff --git a/test/fastq-loader/expected/12.0.stdout b/test/fastq-loader/expected/12.0.stdout
index 641ab0b..c9fa758 100644
--- a/test/fastq-loader/expected/12.0.stdout
+++ b/test/fastq-loader/expected/12.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 42
BIO_BASE_COUNT: 42
CMP_BASE_COUNT: 42
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 21
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 42
BIO_BASE_COUNT: 42
CMP_BASE_COUNT: 42
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 21
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/12.1.stdout b/test/fastq-loader/expected/12.1.stdout
index d6fafcd..553cc50 100644
--- a/test/fastq-loader/expected/12.1.stdout
+++ b/test/fastq-loader/expected/12.1.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 42
BIO_BASE_COUNT: 42
CMP_BASE_COUNT: 42
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 21
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 42
BIO_BASE_COUNT: 42
CMP_BASE_COUNT: 42
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 21
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/12.2.stdout b/test/fastq-loader/expected/12.2.stdout
index 4b332aa..fba4c6d 100644
--- a/test/fastq-loader/expected/12.2.stdout
+++ b/test/fastq-loader/expected/12.2.stdout
@@ -2,17 +2,19 @@
BASE_COUNT: 42
BIO_BASE_COUNT: 42
CMP_BASE_COUNT: 42
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 202311120113300010103202311120113300010103
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 42
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0, 0
- QUALITY: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ QUALITY: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
READ: CCTACACTTGTATTTTGGTTACCTACACTTGTATTTTGGTTA
READ_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP: ATCACG
SPOT_ID: 1
SPOT_LEN: 42
- TMP_KEY_ID: 0
TRIM_LEN: 42
TRIM_START: 0
diff --git a/test/fastq-loader/expected/13.0.stdout b/test/fastq-loader/expected/13.0.stdout
new file mode 100644
index 0000000..a2ff6c3
--- /dev/null
+++ b/test/fastq-loader/expected/13.0.stdout
@@ -0,0 +1,96 @@
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 798
+ BIO_BASE_COUNT: 798
+ CMP_BASE_COUNT: 798
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 02123011032113003322110220022200103231333121221313323333221121030210103010021013001013213022003000313012103130101022120231301120133013301021030022010303311212103020322101303323213222201312030133023222030132201031220213011122033133303021323002311003110321203331102202000203
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 1
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 28, 28, 28, 28, 36, 28, 28, 37, 29, 28, 28, 28, 27, 36, 30, 8, 27, 28, 26, 28, 26, 37, 28, 27, 39, 33, 13, 28, 24, 39, 33, 13, 33, 24, 22, 28, 27, 28, 28, 28, 26, 28, 28, 27, 26, 28, 28, 28, 28, 28, 28, 27, 27, 28, 28, 28, 28, 28, 27, 28, 27, 35, 26, 33, 26, 27, 31, 24, 35, 28, 33, 26, 34, 29, 9, 27, 31, 22, 27, 34, 30, 9, 33, 25, 25, 26, 25, 25, 34, 27, 26, 37, 33, 16, 38, 34, 22, 8, 24, 26, 33, 25, 27, 26, 35, 28, 26, 27, 34, 28, 34, 28, 33, 26, 27, 27, 27, 30, 21 [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGCCACCGACATTTATCTGTTCTTTCTCCCAATCGTATACTGAGTACGCTATATCTGTCAATTCAACCGGTTTCAACGGGTTGCTGCCTCCCGGGGCATTGACCGTAACCAAGACTTCGTAACAGGTATTGCGGTTCAATTTCTTGGCCGCACTGACCGGAATCTGGTAATAGCTGCTCTCCATGAATTGCGGATCTCCGGTAGAACCGTCTTCATTGTGAGGCGTATAATTCATCGGGATGTTTACAATCAGGCGCACCTCCTTTTCCG
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 272
+ READ_SEG: [0, 272]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 1
+ SPOT_LEN: 272
+ TRIM_LEN: 272
+ TRIM_START: 0
+
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 798
+ BIO_BASE_COUNT: 798
+ CMP_BASE_COUNT: 798
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 0212133120302031103223023130302333331100230101211112020102102023030100111020031210320003031033121131131213113211032000132301322100113002211313331220312000132031302330330022331330202022022002211001010221301221321323012311302113311332101033223110202203011220212312
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 2
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 28, 28, 26, 26, 28, 28, 28, 26, 35, 26, 34, 25, 30, 20, 28, 25, 37, 29, 27, 28, 28, 37, 29, 27, 28, 28, 36, 27, 37, 29, 27, 27, 28, 28, 28, 28, 28, 41, 35, 16, 26, 37, 28, 36, 28, 26, 25, 27, 28, 28, 28, 36, 28, 36, 27, 36, 27, 27, 34, 24, 33, 23, 28, 31, 21, 37, 28, 40, 33, 14, 28, 25, 34, 27, 36, 31, 12, 25, 25, 25, 34, 27, 26, 37, 33, 20, 5, 33, 25, 21, 29, 21, 27, 27, 22, 25, 27, 28, 27, 28, 28, 28, 27, 27, 26, 26, 28, 28, 27, 27, 26, 34, 28, 28, 37, 34, 20, 6, [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGTATGAATTCCGTGGCTCGGATGCCGGATATATGTTTCGGTTGACACAGGAACCTGGAAGCCGGTTTGTGGAAATGACCGAAAATTACCGCAGTGCACGTCATGTAGTGGCTTTTGCTAACGAGTTTGTAAAGACATGCGCAGAATGAAAACGAATGCCTATTATTTCTATGCGGAAGGAGGAGGGAGTGGGTTGGAGTAACTCATCATCGGTCGTGCCTGTATGTATCAACCGCTCGTGGAAGAATTGTCTTCAGCAG
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 262
+ READ_SEG: [0, 262]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 2
+ SPOT_LEN: 262
+ TRIM_LEN: 262
+ TRIM_START: 0
+
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 798
+ BIO_BASE_COUNT: 798
+ CMP_BASE_COUNT: 798
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 021223230310323132313220212100222021223323122220130103132133332013121323122301300232301313201211100211212211223032322123003120303302023102030103111320011332210331020023130202021223113011030132013220123232330313100113321033223312011032123131023010313100003120312200
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 3
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 28, 28, 27, 28, 28, 26, 27, 37, 29, 28, 35, 26, 28, 26, 28, 20, 28, 28, 24, 27, 27, 27, 37, 29, 27, 28, 28, 41, 35, 16, 28, 28, 37, 28, 28, 28, 28, 28, 26, 27, 27, 27, 28, 28, 28, 28, 37, 29, 27, 37, 28, 37, 28, 27, 28, 28, 28, 28, 28, 27, 28, 28, 34, 27, 27, 27, 27, 27, 27, 26, 26, 27, 27, 26, 27, 34, 27, 26, 37, 33, 17, 27, 25, 27, 34, 27, 23, 27, 27, 27, 34, 27, 27, 26, 25, 28, 37, 33, 16, 27, 27, 27, 25, 27, 27, 27, 28, 26, 28, 27, 35, 28, 27, 28, 27, 27, 26, 28 [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGATCGGCAATCGTAGCATCTTCAGTTTCTCCTGAGCGATGAGAGGTAACCGTAGTATATCCATGACGATGAGCCATTTCGATTGCATCCAGTGTTTCACTGAGTGAGCCGATCTGATTTACTTAATAAGGATGGAATTGGCACATCCCACGCTCAATACCTTTCGTAAGGAAGTCTACATTGTTAACGAACGAGGTCGATCGCCGTACCCACGCTGGCGAGCGTCCACCGACTACGTTCGGTTACGTTTTTACTTACTCCC
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 264
+ READ_SEG: [0, 264]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 3
+ SPOT_LEN: 264
+ TRIM_LEN: 264
+ TRIM_START: 0
+
diff --git a/test/fastq-loader/expected/13.1.stdout b/test/fastq-loader/expected/13.1.stdout
new file mode 100644
index 0000000..d8b7b9b
--- /dev/null
+++ b/test/fastq-loader/expected/13.1.stdout
@@ -0,0 +1,96 @@
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 605
+ BIO_BASE_COUNT: 605
+ CMP_BASE_COUNT: 605
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 021210023002012331130011033133230233111022331010003110311021202310200113011320222330110103222021332332130211230321222111331011200023320310203112311200023320.
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 1
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 28, 27, 25, 22, 37, 30, 8, 24, 37, 30, 8, 29, 18, 27, 28, 27, 28, 28, 28, 37, 30, 9, 27, 35, 25, 27, 27, 27, 28, 27, 25, 37, 29, 28, 27, 28, 28, 25, 35, 26, 28, 28, 26, 28, 35, 26, 43, 36, 23, 11, 19, 24, 36, 27, 28, 28, 36, 28, 28, 26, 36, 28, 28, 28, 36, 27, 37, 31, 10, 28, 28, 36, 27, 28, 28, 27, 35, 28, 27, 23, 27, 28, 34, 26, 26, 32, 24, 31, 22, 26, 27, 24, 29, 21, 27, 27, 28, 28, 28, 27, 28, 27, 27, 32, 25, 23, 27, 25, 26, 34, 27, 27, 28, 27, 26, 27, 27, 27, 2 [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGTTTCGGGAACTATGTAAACAATACGCTAAGCGTGTTCTATGGTTTTACAATGTTCAGGATGGAAACATTGTAGGAGATAACAACCGAGAAGTATCGCTGCCTGTCGGCTGAGACACGCAACAGGGGATAGGCAAGGCACTACAGGGGATAGGN
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 157
+ READ_SEG: [0, 157]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 1
+ SPOT_LEN: 157
+ TRIM_LEN: 157
+ TRIM_START: 0
+
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 605
+ BIO_BASE_COUNT: 605
+ CMP_BASE_COUNT: 605
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 0212323113012102000312333011322331202002303323200230312320022301013000022123103113311011103312010311102202032031113210221021002200300012000033000001020130200232210123131000123113210202302102001012123321013310300302131332122130023313123321320021112313200221112022201330102030312122
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 2
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 27, 28, 19, 24, 28, 25, 28, 27, 28, 35, 26, 28, 28, 37, 28, 41, 34, 19, 4, 24, 28, 28, 28, 28, 36, 27, 28, 28, 27, 24, 28, 28, 28, 26, 36, 28, 39, 33, 13, 27, 35, 26, 28, 28, 28, 28, 36, 29, 7, 27, 35, 25, 27, 28, 27, 28, 37, 30, 9, 28, 27, 35, 26, 36, 27, 28, 44, 36, 24, 13, 3, 28, 27, 27, 26, 24, 33, 26, 25, 27, 27, 26, 26, 27, 31, 24, 26, 25, 31, 24, 28, 23, 28, 29, 20, 34, 28, 27, 25, 23, 34, 27, 28, 29, 21, 31, 23, 28, 28, 20, 27, 27, 27, 27, 28, 27, 35, 28, 27 [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGCTACATTGACCTTTTACTATAACATCTATGAAGGGATTATCGAAAGCCGTCGAAAGATTGGTAAAAAGACTACCGTGCGTGGTGTTATGAACCGTGTTCTTCCGAATGTGCTGGAGTTCAAAGAAATTTTGAAAAATAAAAAACCTTGCCTTTCGAGTTGATGCAAAACTACATCAAGGATTCAAGGGTTGACTATCAACGCAATTTAAGTACGCTGAGTAAAGCGTACTATCATCCCTGTGATGCTTTCTGTGAAGAGGTATTGGAATTACTGAG
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 280
+ READ_SEG: [0, 280]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 2
+ SPOT_LEN: 280
+ TRIM_LEN: 280
+ TRIM_START: 0
+
+ ALIGNMENT_COUNT: 0
+ BASE_COUNT: 605
+ BIO_BASE_COUNT: 605
+ CMP_BASE_COUNT: 605
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 02120000213300010201113302312032021013211123101211131013113313011230110302210130301112310031321011330113021333300113020302202122211133101120002332031020311111200023320.
+ CS_KEY: T
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 3
+ MIN_SPOT_ID: 1
+ NAME: 3
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0
+ QUALITY: 28, 28, 27, 43, 36, 23, 12, 1, 28, 27, 24, 40, 34, 18, 1, 33, 23, 32, 22, 26, 28, 28, 28, 36, 27, 28, 28, 28, 36, 27, 27, 27, 16, 26, 29, 18, 26, 27, 27, 28, 28, 28, 28, 28, 36, 28, 28, 28, 28, 28, 28, 28, 36, 28, 28, 28, 28, 28, 28, 28, 28, 36, 27, 28, 28, 28, 35, 25, 28, 33, 26, 34, 27, 28, 28, 34, 27, 28, 34, 27, 34, 27, 27, 28, 28, 28, 27, 35, 30, 10, 27, 27, 28, 27, 33, 25, 27, 28, 27, 33, 25, 27, 28, 33, 26, 28, 27, 28, 27, 28, 35, 30, 11, 28, 27, 34, 26, 32, [...]
+ RD_FILTER: SRA_READ_FILTER_PASS
+ READ: TCAGGGGGACGCCCCAAGGTGTATTCGTCCGAAGTTGCTGTGATGGTCACATGGTACATACGGTGATTGTTAAGACCATTAACACTACCCGTAGTTGTATTGTAAGTATATTTGTAAGGCCTCCTGAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGGN
+ READ_FILTER: SRA_READ_FILTER_PASS
+ READ_LEN: 168
+ READ_SEG: [0, 168]
+ READ_START: 0
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 3
+ SPOT_GROUP:
+ SPOT_ID: 3
+ SPOT_LEN: 168
+ TRIM_LEN: 168
+ TRIM_START: 0
+
diff --git a/test/fastq-loader/expected/14.0.stdout b/test/fastq-loader/expected/14.0.stdout
new file mode 100755
index 0000000..821c6a6
--- /dev/null
+++ b/test/fastq-loader/expected/14.0.stdout
@@ -0,0 +1,32 @@
+ ALIGNMENT_COUNT: 0, 0
+ BASE_COUNT: 76
+ BIO_BASE_COUNT: 76
+ CMP_BASE_COUNT: 76
+ CMP_LINKAGE_GROUP:
+ COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
+ CSREAD: 1223103303112233001010000000000000000000000000020330103311100011101102130111
+ CS_KEY: TT
+ CS_NATIVE: false
+ FIXED_SPOT_LEN: 76
+ LINKAGE_GROUP:
+ MAX_SPOT_ID: 1
+ MIN_SPOT_ID: 1
+ NAME: 1
+ PLATFORM: SRA_PLATFORM_UNDEFINED
+PRIMARY_ALIGNMENT_ID: 0, 0
+ QUALITY: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
+ RD_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
+ READ: GAGCAATAATGTCTATTTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTCCGCCAATACACCCCACAACAAGTAACAC
+ READ_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
+ READ_LEN: 76, 0
+ READ_SEG: [0, 76], [76, 0]
+ READ_START: 0, 76
+ READ_TYPE: SRA_READ_TYPE_BIOLOGICAL, SRA_READ_TYPE_TECHNICAL
+ SIGNAL_LEN: 0
+ SPOT_COUNT: 1
+ SPOT_GROUP:
+ SPOT_ID: 1
+ SPOT_LEN: 76
+ TRIM_LEN: 76
+ TRIM_START: 0
+
diff --git a/test/fastq-loader/expected/2.1.1.stdout b/test/fastq-loader/expected/2.1.1.stdout
index 39f13f2..8381971 100644
--- a/test/fastq-loader/expected/2.1.1.stdout
+++ b/test/fastq-loader/expected/2.1.1.stdout
@@ -3,8 +3,10 @@
BIO_BASE_COUNT: 12
CMP_BASE_COUNT: 12
CMP_CSREAD: 123123
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CS_KEY: TT
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -25,8 +27,10 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BIO_BASE_COUNT: 12
CMP_BASE_COUNT: 12
CMP_CSREAD: 123123
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CS_KEY: GT
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/2.1.stdout b/test/fastq-loader/expected/2.1.stdout
index fdfe9dd..a4e43ec 100644
--- a/test/fastq-loader/expected/2.1.stdout
+++ b/test/fastq-loader/expected/2.1.stdout
@@ -2,17 +2,19 @@
BASE_COUNT: 148
BIO_BASE_COUNT: 148
CMP_BASE_COUNT: 148
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 01020330101302022131302322022022032222311023120230220132002100031300203330
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 74
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0, 0
- QUALITY: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ QUALITY: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
READ: TGGAATAACCATTCCTCATGCCTAGAAGAAGAATCTCTACAAGCAGGATTCTTGCTTTCAAAATGCCCTTATAA
READ_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
@@ -32,17 +34,19 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 148
BIO_BASE_COUNT: 148
CMP_BASE_COUNT: 148
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 00131012333231011230202133203233112022003112203010033321302212200030111213
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 74
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0, 0
- QUALITY: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ QUALITY: 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
READ: TTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTA
READ_FILTER: SRA_READ_FILTER_PASS, SRA_READ_FILTER_PASS
diff --git a/test/fastq-loader/expected/2.5.stdout b/test/fastq-loader/expected/2.5.stdout
index 78d3c21..4b49549 100644
--- a/test/fastq-loader/expected/2.5.stdout
+++ b/test/fastq-loader/expected/2.5.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 66
BIO_BASE_COUNT: 66
CMP_BASE_COUNT: 66
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 301212321010213302323201331023331
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 33
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 66
BIO_BASE_COUNT: 66
CMP_BASE_COUNT: 66
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 311203221313213231002230003322030
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 33
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/2.6.stdout b/test/fastq-loader/expected/2.6.stdout
index c7db9a3..5ab3dcb 100644
--- a/test/fastq-loader/expected/2.6.stdout
+++ b/test/fastq-loader/expected/2.6.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 66
BIO_BASE_COUNT: 66
CMP_BASE_COUNT: 66
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 301212321010213302323201331023331311203221313213231002230003322030
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 66
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 66
- TMP_KEY_ID: 0
TRIM_LEN: 66
TRIM_START: 0
diff --git a/test/fastq-loader/expected/2.7.stdout b/test/fastq-loader/expected/2.7.stdout
index 32a917b..ee4e42e 100644
--- a/test/fastq-loader/expected/2.7.stdout
+++ b/test/fastq-loader/expected/2.7.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 33
BIO_BASE_COUNT: 33
CMP_BASE_COUNT: 33
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 301212321010213302323201331023331
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 33
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 33
- TMP_KEY_ID: 0
TRIM_LEN: 33
TRIM_START: 0
diff --git a/test/fastq-loader/expected/2.8.1.stdout b/test/fastq-loader/expected/2.8.1.stdout
index 3706e68..5389458 100644
--- a/test/fastq-loader/expected/2.8.1.stdout
+++ b/test/fastq-loader/expected/2.8.1.stdout
@@ -2,17 +2,19 @@
BASE_COUNT: 6
BIO_BASE_COUNT: 6
CMP_BASE_COUNT: 6
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: ..3
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 3
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0
- QUALITY: 0, 0, 0
+ QUALITY: 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS
READ: NTA
READ_FILTER: SRA_READ_FILTER_PASS
@@ -32,17 +34,19 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 6
BIO_BASE_COUNT: 6
CMP_BASE_COUNT: 6
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: ...
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 3
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0
- QUALITY: 0, 0, 0
+ QUALITY: 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS
READ: NNT
READ_FILTER: SRA_READ_FILTER_PASS
diff --git a/test/fastq-loader/expected/2.8.stdout b/test/fastq-loader/expected/2.8.stdout
index 17db95c..b6ed399 100644
--- a/test/fastq-loader/expected/2.8.stdout
+++ b/test/fastq-loader/expected/2.8.stdout
@@ -2,17 +2,19 @@
BASE_COUNT: 6
BIO_BASE_COUNT: 6
CMP_BASE_COUNT: 6
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: ..3
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 3
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0
- QUALITY: 0, 0, 0
+ QUALITY: 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS
READ: NTA
READ_FILTER: SRA_READ_FILTER_PASS
@@ -32,17 +34,19 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 6
BIO_BASE_COUNT: 6
CMP_BASE_COUNT: 6
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: ..3
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 3
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
PLATFORM: SRA_PLATFORM_UNDEFINED
PRIMARY_ALIGNMENT_ID: 0
- QUALITY: 0, 0, 0
+ QUALITY: 30, 30, 30
RD_FILTER: SRA_READ_FILTER_PASS
READ: NTA
READ_FILTER: SRA_READ_FILTER_PASS
diff --git a/test/fastq-loader/expected/3.1.stdout b/test/fastq-loader/expected/3.1.stdout
index 701afb2..22ea79c 100644
--- a/test/fastq-loader/expected/3.1.stdout
+++ b/test/fastq-loader/expected/3.1.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 144
BIO_BASE_COUNT: 144
CMP_BASE_COUNT: 144
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 4
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 144
BIO_BASE_COUNT: 144
CMP_BASE_COUNT: 144
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 122311322021001....32101321......2..
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 4
MIN_SPOT_ID: 1
NAME: 2
@@ -62,11 +66,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 144
BIO_BASE_COUNT: 144
CMP_BASE_COUNT: 144
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 312102231132..2..00....12...........
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 4
MIN_SPOT_ID: 1
NAME: 3
@@ -92,11 +98,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 144
BIO_BASE_COUNT: 144
CMP_BASE_COUNT: 144
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 221022311322201....30030211......3..
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 4
MIN_SPOT_ID: 1
NAME: 4
diff --git a/test/fastq-loader/expected/4.2.stdout b/test/fastq-loader/expected/4.2.stdout
index 4aa7d25..8aa9ae8 100644
--- a/test/fastq-loader/expected/4.2.stdout
+++ b/test/fastq-loader/expected/4.2.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 36
BIO_BASE_COUNT: 36
CMP_BASE_COUNT: 36
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 36
- TMP_KEY_ID: 0
TRIM_LEN: 36
TRIM_START: 0
diff --git a/test/fastq-loader/expected/4.4.stdout b/test/fastq-loader/expected/4.4.stdout
index 7d001d0..29bb4c8 100644
--- a/test/fastq-loader/expected/4.4.stdout
+++ b/test/fastq-loader/expected/4.4.stdout
@@ -3,8 +3,10 @@
BIO_BASE_COUNT: 105
CMP_BASE_COUNT: 105
CMP_CSREAD: 12312312312312312312312312312312312
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CS_KEY: GT
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 1
@@ -25,8 +27,10 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BIO_BASE_COUNT: 105
CMP_BASE_COUNT: 105
CMP_CSREAD: 12312312312312312312312312312312312
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CS_KEY: GT
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 2
@@ -47,8 +51,10 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BIO_BASE_COUNT: 105
CMP_BASE_COUNT: 105
CMP_CSREAD: 12312312312312312312312312312312312
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CS_KEY: GT
+ LINKAGE_GROUP:
MAX_SPOT_ID: 3
MIN_SPOT_ID: 1
NAME: 3
diff --git a/test/fastq-loader/expected/4.6.stdout b/test/fastq-loader/expected/4.6.stdout
index 5b19de9..2d798b5 100644
--- a/test/fastq-loader/expected/4.6.stdout
+++ b/test/fastq-loader/expected/4.6.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 36
BIO_BASE_COUNT: 36
CMP_BASE_COUNT: 36
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 36
- TMP_KEY_ID: 0
TRIM_LEN: 36
TRIM_START: 0
diff --git a/test/fastq-loader/expected/5.0.stdout b/test/fastq-loader/expected/5.0.stdout
index 1e9793c..377f056 100644
--- a/test/fastq-loader/expected/5.0.stdout
+++ b/test/fastq-loader/expected/5.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 36
BIO_BASE_COUNT: 36
CMP_BASE_COUNT: 36
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 112332022230202..1120022110..03..0..
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 36
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 36
- TMP_KEY_ID: 0
TRIM_LEN: 36
TRIM_START: 0
diff --git a/test/fastq-loader/expected/6.0.stdout b/test/fastq-loader/expected/6.0.stdout
index df6b548..f162102 100644
--- a/test/fastq-loader/expected/6.0.stdout
+++ b/test/fastq-loader/expected/6.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 33333113001122113112313102030222133122230321033330003303123012303302102200033003011212211003120000330322030230111230023000331031322331013122211000001112132020113221212022310122001303300031130003321103303120130022220212122000003122200012300330033000311110001103033003330301230033021210033003
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 0120230102202301112310113213110012202220220222322023022130220320202112303211020022210222022022021121310130201332031103210011302222010211021210231301020303202212303210211330322111311122200220131133120121202301320220120112212332211222121001
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 2
@@ -62,11 +66,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: .
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 3
@@ -92,11 +98,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 3233002222000311131011112200120012023120111002313221303322200232101021320113203131113033111022010111033201333100123131330333103123103222323120100123231
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 4
@@ -122,11 +130,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 20223123320211200021310032023022113211020213231200100101020333313100120232001001121212120333033312111130201212320110020211321111111011101012102302211312210301021213112121103330001222301133102212210100011002111030120223123001130013200000..0000213
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 5
@@ -152,11 +162,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: .
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 6
@@ -182,11 +194,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 223200321003103331210230003113330102230133303001022311100130130130131100321130133000012310133313120110010203102331310332111310031021031232200102023210011110102223120033232133321130013221231231102322000101220220002200102131111102333001221113300123201102323233323230223032203330223213323101
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 7
@@ -212,11 +226,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 312332130330013213131311103130100220330003132112211210312031220331223032033112121022101200020112122023001312220111111111111111111111111111232
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 8
@@ -242,11 +258,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 3
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 9
@@ -272,11 +290,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 1113102220321220323011002113132101023311102023132320203003331103221213312231022232120110210122020113123200020011332122312101301202210221013202122020003213210102101021102012100330320023102103000110102300112013212333113010003103101221222211022111033003
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 10
@@ -302,11 +322,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 33311333221321211222302103222110103211322212002132122301203222112123122333113
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 11
@@ -332,11 +354,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 303
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 12
@@ -362,11 +386,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1992
BIO_BASE_COUNT: 1992
CMP_BASE_COUNT: 1992
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 020211001202210203122201033110123121112132313130203112333120312022111011200121221103112021111111201133110022130210220230010312320130210113221001212321320221111102202100010312200222312330200030233002201312320012211123133221111221031312122131320310320002113131112033112211322122113000202200102031111120000233
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 13
MIN_SPOT_ID: 1
NAME: 13
diff --git a/test/fastq-loader/expected/7.1.stdout b/test/fastq-loader/expected/7.1.stdout
index 7a31be5..2647a33 100644
--- a/test/fastq-loader/expected/7.1.stdout
+++ b/test/fastq-loader/expected/7.1.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 1025
BIO_BASE_COUNT: 1025
CMP_BASE_COUNT: 1025
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 32221001232021032212221120133210330331132030111131310212320130231200330232013203300231032110333113001122130311333232301021300200321100002110112231020013122132303310001331220213313321330223331101002002230033013230020231000320033101332123232101011000130130302310102031031213302310210102212222023123212031111210200121222111301020122122023130022031231211000203330131110310000200100212313123101330333110220231213313002301111130012202000333020020223303121301310203312033023110 [...]
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1025
BIO_BASE_COUNT: 1025
CMP_BASE_COUNT: 1025
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 32221001232021032212021320133210330133132030111131310212320132221322231020312100202300020312003220322002110332013001122130133311001301021302331020020203323022300200121002213031302033303311211202123313103200132110300133220301130133202310210033312001303232321010112223301123001101020332132310013010301221222200332013012111121020121222111301201221220231302203123121100020333013331031003101002121331231033103331122112022201000230232313002322211033010002022000002213013100102 [...]
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/7.2.stdout b/test/fastq-loader/expected/7.2.stdout
index 2fbe683..e5ede24 100644
--- a/test/fastq-loader/expected/7.2.stdout
+++ b/test/fastq-loader/expected/7.2.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 1033
BIO_BASE_COUNT: 1033
CMP_BASE_COUNT: 1033
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 3320110002200110011000000000002310000002200001111110022000011110002112000000110330330022003213300111110000001001320011111100000110000013211101113301100110000012311110110110001110010123000000110000001100011000001110111
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1033
BIO_BASE_COUNT: 1033
CMP_BASE_COUNT: 1033
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 20111100001100101000113030000101011011011010001110003333000110000111100011113301100330110033011101330003213332001001100033330011010100110110003211100110000033110110000000333311033001100011100132133010122010100000000033011001230330000000110123000001100033003223003213030113333003311000123033000012300000000000001111100001010100100011000011001010001101100133011010000110000111010000000111010101000110011011010112201100003300001010003210000101100013311110111000133011000321 [...]
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/7.3.stdout b/test/fastq-loader/expected/7.3.stdout
index 0674152..761bfe7 100644
--- a/test/fastq-loader/expected/7.3.stdout
+++ b/test/fastq-loader/expected/7.3.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 1076
BIO_BASE_COUNT: 1076
CMP_BASE_COUNT: 1076
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 1013222012202312332112220332021212330330022330222013211133333202123102303221202133123222223222022210102222002332020301202110230033301121323222230022311123012302231012212200232222323231012221211111011221332310
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0
BASE_COUNT: 1076
BIO_BASE_COUNT: 1076
CMP_BASE_COUNT: 1076
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 10021022333303332130220223031222201202222233333322222203300211203322000303033331121333021322123333320232202222131131321312123123331222322023211331233111222311133230222233230101320032111111110130220223302220223212202211222203102222000213332203303332121120201210321221233033211132123133333333303312110333333333332110233311312220332203333303003003331231323222103032103322203231220233221230331212222320013222013022231203300101312232120101101330101303120220231200112201331312 [...]
CS_KEY: T
CS_NATIVE: false
FIXED_SPOT_LEN: 0
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/expected/8.0.stdout b/test/fastq-loader/expected/8.0.stdout
index c7db9a3..5ab3dcb 100644
--- a/test/fastq-loader/expected/8.0.stdout
+++ b/test/fastq-loader/expected/8.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 66
BIO_BASE_COUNT: 66
CMP_BASE_COUNT: 66
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 301212321010213302323201331023331311203221313213231002230003322030
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 66
+ LINKAGE_GROUP:
MAX_SPOT_ID: 1
MIN_SPOT_ID: 1
NAME: 1
@@ -25,7 +27,6 @@ PRIMARY_ALIGNMENT_ID: 0, 0
SPOT_GROUP:
SPOT_ID: 1
SPOT_LEN: 66
- TMP_KEY_ID: 0
TRIM_LEN: 66
TRIM_START: 0
diff --git a/test/fastq-loader/expected/9.0.stdout b/test/fastq-loader/expected/9.0.stdout
index 4ca7504..eac2d02 100644
--- a/test/fastq-loader/expected/9.0.stdout
+++ b/test/fastq-loader/expected/9.0.stdout
@@ -2,11 +2,13 @@
BASE_COUNT: 404
BIO_BASE_COUNT: 404
CMP_BASE_COUNT: 404
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 2230120003312333000101101303001013310103223001100120023010030322210223120322311200013213102011200203322132132213111122002212110220112212323122012213221002021302221333210033032200312330210133320301210321
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 202
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 1
@@ -32,11 +34,13 @@ PRIMARY_ALIGNMENT_ID: 0, 0
BASE_COUNT: 404
BIO_BASE_COUNT: 404
CMP_BASE_COUNT: 404
+ CMP_LINKAGE_GROUP:
COLOR_MATRIX: 0, 1, 2, 3, 4, 1, 0, 3, 2, 4, 2, 3, 0, 1, 4, 3, 2, 1, 0, 4, 4, 4, 4, 4, 4
CSREAD: 1131131133323131133030331132231121330123331222003300110002133123111202120023213022003330321020211202111300303030011213303300332202121220032020312021032011103132222220220023012002233111002332013120311030
CS_KEY: TT
CS_NATIVE: false
FIXED_SPOT_LEN: 202
+ LINKAGE_GROUP:
MAX_SPOT_ID: 2
MIN_SPOT_ID: 1
NAME: 2
diff --git a/test/fastq-loader/input/13.0.fasta b/test/fastq-loader/input/13.0.fasta
new file mode 100644
index 0000000..67e5df2
--- /dev/null
+++ b/test/fastq-loader/input/13.0.fasta
@@ -0,0 +1,2 @@
+ at SRR390728.1.1
+GAGCAATAATGTCTATTTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTCCGCCAATACACCCCACAACAAGTAACAC
diff --git a/test/fastq-loader/input/13.0.fastq b/test/fastq-loader/input/13.0.fastq
new file mode 100644
index 0000000..e6b5a3c
--- /dev/null
+++ b/test/fastq-loader/input/13.0.fastq
@@ -0,0 +1,12 @@
+ at SRR000123.10 EXRHO8E16JXOK2 length=272
+TCAGCCACCGACATTTATCTGTTCTTTCTCCCAATCGTATACTGAGTACGCTATATCTGTCAATTCAACCGGTTTCAACGGGTTGCTGCCTCCCGGGGCATTGACCGTAACCAAGACTTCGTAACAGGTATTGCGGTTCAATTTCTTGGCCGCACTGACCGGAATCTGGTAATAGCTGCTCTCCATGAATTGCGGATCTCCGGTAGAACCGTCTTCATTGTGAGGCGTATAATTCATCGGGATGTTTACAATCAGGCGCACCTCCTTTTCCG
++SRR000123.10 EXRHO8E16JXOK2 length=272
+====E==F>===<E?)<=;=;F=<HB.=9HB.B97=<===;==<;======<<=====<=<D;B;<@9D=B;C>*<@7<C?*B::;::C<;FB1GC7)9;B:<;D=;<C=C=B;<<<?6<=<A9;=@7=<A99=D=C<<B;@;$;C=@8 at 88=<;;<:C=B=C==<<C=;C==<<=<<<;==D====C<C=;<C==<;;D=D=9==C<C=;=;B;<;@8=;<<B;==<<<C<C<<<:=FB2<;<FB198B;9<;B;<;==C=<C<GC6(B;1
+ at SRR000123.11 EXRHO8E16JR2D4 length=262
+TCAGTATGAATTCCGTGGCTCGGATGCCGGATATATGTTTCGGTTGACACAGGAACCTGGAAGCCGGTTTGTGGAAATGACCGAAAATTACCGCAGTGCACGTCATGTAGTGGCTTTTGCTAACGAGTTTGTAAAGACATGCGCAGAATGAAAACGAATGCCTATTATTTCTATGCGGAAGGAGGAGGGAGTGGGTTGGAGTAACTCATCATCGGTCGTGCCTGTATGTATCAACCGCTCGTGGAAGAATTGTCTTCAGCAG
++SRR000123.11 EXRHO8E16JR2D4 length=262
+==;;===;D;C:?5=:F><==F><==E<F><<=====JD1;F=E=;:<===E=E<E<<C9B8=@6F=IB/=:C<E at -:::C<;FB5&B:6>6<<7:<=<===<<;;==<<;C==FC5'820>59969C>*=2?;#;54222254<:C;=7EA2"95<3<8B;<;C=4FB2<4<<8;B<=5@;2@;<FB2;9<@<&C=@;<78C=<874;18<;C=27(4,9/:<5<99<3==:0C=<852<:B:>67C<:0:(=?6:7<442
+ at SRR000123.12 EXRHO8E16JYG2J length=264
+TCAGATCGGCAATCGTAGCATCTTCAGTTTCTCCTGAGCGATGAGAGGTAACCGTAGTATATCCATGACGATGAGCCATTTCGATTGCATCCAGTGTTTCACTGAGTGAGCCGATCTGATTTACTTAATAAGGATGGAATTGGCACATCCCACGCTCAATACCTTTCGTAAGGAAGTCTACATTGTTAACGAACGAGGTCGATCGCCGTACCCACGCTGGCGAGCGTCCACCGACTACGTTCGGTTACGTTTTTACTTACTCCC
++SRR000123.12 EXRHO8E16JYG2J length=264
+==<==;<F>=D;=;=5==9<<<F><==JD1==F=====;<<<====F><F=F=<=====<==C<<<<<<;;<<;<C<;FB2<:<C<8<<<C<<;:=FB1<<<:<<<=;=<D=<=<<;=<FB1;=@;;2=B:9/=6;2;2C<?6:999<A=(:)<5<:@8;2B;@;%9=.@;A9A<(5:2<98B=07-8.<:C=7&2B:;<8<<;:B<(<=@;$9-<=<@8<5:<73<C=8C=<<*=<<9A<:C=D=3(2GC8-"94C<569EA.
diff --git a/test/fastq-loader/input/13.1.fastq b/test/fastq-loader/input/13.1.fastq
new file mode 100644
index 0000000..213002e
--- /dev/null
+++ b/test/fastq-loader/input/13.1.fastq
@@ -0,0 +1,12 @@
+ at EXRHO8E16JTGUV
+TCAGTTTCGGGAACTATGTAAACAATACGCTAAGCGTGTTCTATGGTTTTACAATGTTCAGGATGGAAACATTGTAGGAGATAACAACCGAGAAGTATCGCTGCCTGTCGGCTGAGACACGCAACAGGGGATAGGCAAGGCACTACAGGGGATAGGN
++EXRHO8E16JTGUV
+=<:7F?)9F?)>3<=<===F?*<D:<<<=<:F>=<==:D;==;=D;LE8,49E<==E==;E===E<F at +==E<==<D=<8<=C;;A9 at 7;<9>6<<===<=<<A:8<:;C<<=<;<<<9<:=B;88GC6(6<<C==C=C=<=<&<9=GC7):<<B:!
+ at EXRHO8E16JU4WU
+TCAGCTACATTGACCTTTTACTATAACATCTATGAAGGGATTATCGAAAGCCGTCGAAAGATTGGTAAAAAGACTACCGTGCGTGGTGTTATGAACCGTGTTCTTCCGAATGTGCTGGAGTTCAAAGAAATTTTGAAAAATAAAAAACCTTGCCTTTCGAGTTGATGCAAAACTACATCAAGGATTCAAGGGTTGACTATCAACGCAATTTAAGTACGCTGAGTAAAGCGTACTATCATCCCTGTGATGCTTTCTGTGAAGAGGTATTGGAATTACTGAG
++EXRHO8E16JU4WU
+<=49=:=<=D;==F=JC4%9====E<==<9===;E=HB.<D;====E>(<D:<=<=F?*=<D;E<=ME9.$=<<;9B;:<<;;<@9;:@9=8=>5C=<:8C<=>6 at 8==5<<<<=<D=<9D=;FB1;B>)GC7)<HD9/%<HC92+$@:C==C=FB18<=;D==<:<=HD8,3<=<<<<C<D=<C=;C=FB1C=;:<;;<<D=;<:@8FA/C=<<<<=:<:=<9FB2<<<;;;;=<<<<FB0<<<<<==;FB19:<<=C=9:C<29C=C<C<C<<<8<4<
+ at EXRHO8E16JVI1Y
+TCAGGGGGACGCCCCAAGGTGTATTCGTCCGAAGTTGCTGTGATGGTCACATGGTACATACGGTGATTGTTAAGACCATTAACACTACCCGTAGTTGTATTGTAAGTATATTTGTAAGGCCTCCTGAGACACGCAACAGGGGATAGGCAAGGCACACAGGGGATAGGN
++EXRHO8E16JVI1Y
+==<LE8-"=<9IC3"B8A7;===E<===E<<<1;>3;<<=====E=======E========E<===D:=B;C<==C<=C<C<<===<D?+<<=<B:<=<B:<=B;=<=<=D?,=<C;A:B:<?7==<=<<<==<D=<=GC6(:<=C=<C=D===<===HD8+<;=C=!
diff --git a/test/fastq-loader/input/14.0.fasta b/test/fastq-loader/input/14.0.fasta
new file mode 100644
index 0000000..67e5df2
--- /dev/null
+++ b/test/fastq-loader/input/14.0.fasta
@@ -0,0 +1,2 @@
+ at SRR390728.1.1
+GAGCAATAATGTCTATTTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTCCGCCAATACACCCCACAACAAGTAACAC
diff --git a/test/fastq-loader/wb-test-fastq.cpp b/test/fastq-loader/wb-test-fastq.cpp
index 5172bbe..08d7eff 100644
--- a/test/fastq-loader/wb-test-fastq.cpp
+++ b/test/fastq-loader/wb-test-fastq.cpp
@@ -908,6 +908,15 @@ FIXTURE_TEST_CASE(SequenceGetSpotGroupBarcode, LoaderFixture)
REQUIRE(SequenceIsFirst(seq));
}
+FIXTURE_TEST_CASE(SequenceGetSpotGroupUnderscore, LoaderFixture)
+{
+ REQUIRE(CreateFileGetSequence(GetName(), "@FCA5PJ4:1:1101:14707:1407#GTAGTCGC_AGCTCGGT/1\nATCG\n"));
+ REQUIRE_RC(SequenceGetSpotGroup(seq, &name, &length));
+ REQUIRE_EQ(string("GTAGTCGC_AGCTCGGT"), string(name, length));
+ REQUIRE(!SequenceIsSecond(seq));
+ REQUIRE(SequenceIsFirst(seq));
+}
+
#define TEST_PAIRED(line, paired)\
REQUIRE(CreateFileGetSequence(GetName(), line "\n" "GATT\n" "+\n" "!''*\n"));\
@@ -1308,6 +1317,17 @@ FIXTURE_TEST_CASE ( AnotherUnexpectedEOLreported, LoaderFixture )
REQUIRE(SequenceIsSecond(seq));
}
+FIXTURE_TEST_CASE ( FastqDumpOutput, LoaderFixture )
+{ // VDB-2835 source: fastq-dump SRR000123
+ REQUIRE(CreateFileGetSequence(GetName(),
+ "@SRR000123.1 EXRHO8E16JTGUV length=157\n"
+ "TCAGTTTCGGGAACTATGTAAACAATACGCTAAGCGTGTTCTATGGTTTTACAATGTTCAGGATGGAAACATTGTAGGAGATAACAACCGAGAAGTATCGCTGCCTGTCGGCTGAGACACGCAACAGGGGATAGGCAAGGCACTACAGGGGATAGGN\n"
+ "+SRR000123.1 EXRHO8E16JTGUV length=157\n"
+ "=<:7F?)9F?)>3<=<===F?*<D:<<<=<:F>=<==:D;==;=D;LE8,49E<==E==;E===E<F at +==E<==<D=<8<=C;;A9 at 7;<9>6<<===<=<<A:8<:;C<<=<;<<<9<:=B;88GC6(6<<C==C=C=<=<&<9=GC7):<<B:!\n"
+ ));
+ REQUIRE_RC(SequenceGetSpotName(seq, &name, &length));
+ REQUIRE_EQ(string("SRR000123.1"), string(name, length));
+}
// FIXTURE_TEST_CASE(Pacbio, LoaderFixture)
diff --git a/test/general-loader/.gitignore b/test/general-loader/.gitignore
index 6fdb9d7..08ce4e2 100644
--- a/test/general-loader/.gitignore
+++ b/test/general-loader/.gitignore
@@ -1,3 +1,2 @@
db
-schema
*.gw
diff --git a/test/general-loader/Makefile b/test/general-loader/Makefile
index 6fe2389..ca7f02d 100644
--- a/test/general-loader/Makefile
+++ b/test/general-loader/Makefile
@@ -38,7 +38,7 @@ ALL_TOOLS = \
include $(TOP)/build/Makefile.env
-INCDIRS += -I$(TOP)/tools/general-loader -DLOCAL_SCHEMA=$(VDB_INCDIR)
+INCDIRS += -I$(TOP)/tools/general-loader
$(ALL_TOOLS): makedirs
@ $(MAKE_CMD) $(TEST_BINDIR)/$@
@@ -69,9 +69,6 @@ $(TEST_BINDIR)/test-general-loader: $(TEST_GEN_LOAD_OBJ)
gen_load: test-general-loader
$(TEST_BINDIR)/test-general-loader
-vg_gen_load: test-general-loader
- valgrind --ncbi $(TEST_BINDIR)/test-general-loader
-
#-------------------------------------------------------------------------------
# test-general-writer
#
@@ -89,11 +86,6 @@ TEST_GEN_WRITE_LIB = \
$(TEST_BINDIR)/test-general-writer: $(TEST_GEN_WRITE_OBJ)
$(LP) --exe -o $@ $^ $(TEST_GEN_WRITE_LIB)
-vg_gen_write: test-general-writer
- valgrind --ncbi $(TEST_BINDIR)/test-general-writer -o gen-write-out
- rm -f gen-write-out
-
-
runtests:$(TEST_TOOLS) #piped-load-tests
piped-load-tests: $(ALL_TOOLS)
@@ -119,9 +111,6 @@ $(TEST_BINDIR)/test-gw-dumper: $(TEST_GW_DUMP_OBJ)
gw_dump: test-gw-dumper
$(TEST_BINDIR)/$^
-vg_gw_dump: test-gw-dumper
- valgrind --ncbi $(TEST_BINDIR)/test-gw-dumper
-
#-------------------------------------------------------------------------------
# general-loader tool tests
#
diff --git a/test/general-loader/expected/2.stderr b/test/general-loader/expected/2.stderr
index bac063e..9851334 100644
--- a/test/general-loader/expected/2.stderr
+++ b/test/general-loader/expected/2.stderr
@@ -1,2 +1,2 @@
-general-loader.2.5.7 err: general-loader: error "something is wrong"
-general-loader.2.5.7 err: error exists while reading file - load failed
+general-loader.2.6.2 err: general-loader: error "something is wrong"
+general-loader.2.6.2 err: error exists while reading file - load failed
diff --git a/test/general-loader/expected/2packed.stderr b/test/general-loader/expected/2packed.stderr
index bac063e..9851334 100644
--- a/test/general-loader/expected/2packed.stderr
+++ b/test/general-loader/expected/2packed.stderr
@@ -1,2 +1,2 @@
-general-loader.2.5.7 err: general-loader: error "something is wrong"
-general-loader.2.5.7 err: error exists while reading file - load failed
+general-loader.2.6.2 err: general-loader: error "something is wrong"
+general-loader.2.6.2 err: error exists while reading file - load failed
diff --git a/test/general-loader/column01 b/test/general-loader/input/column01
similarity index 91%
rename from test/general-loader/column01
rename to test/general-loader/input/column01
index 41713c2..c5ed0f9 100644
--- a/test/general-loader/column01
+++ b/test/general-loader/input/column01
@@ -1,3 +1,2 @@
-First test sentence
-Second sentence to test the stream
-Third
\ No newline at end of file
+First test sentence
+Second sentence to test the stream
diff --git a/test/general-loader/column02 b/test/general-loader/input/column02
similarity index 91%
rename from test/general-loader/column02
rename to test/general-loader/input/column02
index 41713c2..c5ed0f9 100644
--- a/test/general-loader/column02
+++ b/test/general-loader/input/column02
@@ -1,3 +1,2 @@
-First test sentence
-Second sentence to test the stream
-Third
\ No newline at end of file
+First test sentence
+Second sentence to test the stream
diff --git a/test/general-loader/test-general-loader.cpp b/test/general-loader/test-general-loader.cpp
index f425eda..eebe65c 100644
--- a/test/general-loader/test-general-loader.cpp
+++ b/test/general-loader/test-general-loader.cpp
@@ -81,7 +81,24 @@ static rc_t argsHandler(int argc, char* argv[]) {
}
TEST_SUITE_WITH_ARGS_HANDLER(GeneralLoaderTestSuite, argsHandler);
-const string ScratchDir = "./db/";
+const string ScratchDir = "./db/";
+const string DefaultSchema = ScratchDir + "default.vschema";
+
+const string DefaultSchemaText =
+ "table table1 #1.0.0 { column ascii columnAscii; column U32 columnU32; column bool columnBool; };\n"
+ "table table2 #1.0.0 { column I64 columnI64; column U8 columnU8; };\n"
+ "database root_database #1 { table table1 #1 TABLE1; table table2 #1 TABLE2; };\n";
+
+const string DefaultDatabase = "root_database";
+
+const string DefaultTable = "TABLE1";
+const string Table2 = "TABLE2";
+
+const string DefaultColumn = "columnAscii";
+const string U32Column = "columnU32";
+const string BoolColumn = "columnBool";
+const string I64Column = "columnI64";
+const string U8Column = "columnU8";
static
void
@@ -114,20 +131,7 @@ public:
{
THROW_ON_RC ( KDirectoryNativeDir ( & m_wd ) );
-#ifdef LOCAL_SCHEMA
- {
- VFSManager * vfs;
- VFSManagerMake ( & vfs );
- VPath* path;
- VFSManagerMakeSysPath ( vfs, &path, stringize ( LOCAL_SCHEMA ) );
- const String *uri = NULL;
- VPathMakeString ( path, &uri );
- m_schemaDir = string( uri->addr, uri->size );
- VPathRelease ( path );
- VFSManagerRelease ( vfs );
- }
-#endif
-
+ CreateFile ( DefaultSchema, DefaultSchemaText );
}
~GeneralLoaderFixture()
{
@@ -148,10 +152,7 @@ public:
THROW_ON_RC ( KStreamFromKFilePair ( & inStream, p_input, 0 ) );
GeneralLoader* ret = new GeneralLoader ( argv0, * inStream );
- if ( ! m_schemaDir.empty() )
- {
- ret -> AddSchemaIncludePath ( m_schemaDir );
- }
+ ret -> AddSchemaIncludePath ( ScratchDir );
THROW_ON_RC ( KStreamRelease ( inStream ) );
THROW_ON_RC ( KFileRelease ( p_input ) );
@@ -178,10 +179,7 @@ public:
THROW_ON_RC ( KStreamFromKFilePair ( & inStream, p_input, 0 ) );
GeneralLoader gl ( argv0, *inStream );
- if ( ! m_schemaDir.empty() )
- {
- gl . AddSchemaIncludePath ( m_schemaDir );
- }
+ gl . AddSchemaIncludePath ( ScratchDir );
rc_t rc = gl.Run();
bool ret;
@@ -246,7 +244,7 @@ public:
}
}
- void SetUpStream( const char* p_dbName, const string& p_schema = "align/align.vschema", const string& p_schemaName = "NCBI:align:db:alignment_sorted" )
+ void SetUpStream( const string& p_dbName, const string& p_schema = DefaultSchema, const string& p_schemaName = DefaultDatabase )
{
m_source . SchemaEvent ( p_schema, p_schemaName );
string dbName = ScratchDir + p_dbName;
@@ -256,24 +254,24 @@ public:
}
m_source . DatabaseEvent ( dbName );
}
- void SetUpStream_OneTable( const char* p_dbName, const char* p_tableName )
+ void SetUpStream_OneTable( const string& p_dbName )
{
SetUpStream( p_dbName );
- m_source . NewTableEvent ( DefaultTableId, p_tableName );
+ m_source . NewTableEvent ( DefaultTableId, DefaultTable );
}
- void OpenStream_OneTableOneColumn ( const char* p_dbName, const char* p_tableName, const char* p_columnName, size_t p_elemBits )
+ void OpenStream_OneTableOneColumn ( const string& p_dbName )
{
- SetUpStream_OneTable( p_dbName, p_tableName );
- m_source . NewColumnEvent ( DefaultColumnId, DefaultTableId, p_columnName, ( uint32_t ) p_elemBits );
+ SetUpStream_OneTable( p_dbName );
+ m_source . NewColumnEvent ( DefaultColumnId, DefaultTableId, DefaultColumn, 8 );
m_source . OpenStreamEvent();
}
- bool SetUpForIntegerCompression( const char* p_dbName )
+ bool SetUpForIntegerCompression( const string& p_dbName )
{
if ( ! TestSource::packed )
return false; // integer compaction is used in packed mode only
- m_tempSchemaFile = string ( p_dbName ) + ".vschema";
+ m_tempSchemaFile = ScratchDir + string ( p_dbName ) + ".vschema";
string schemaText =
"table table1 #1.0.0\n"
"{\n"
@@ -298,20 +296,28 @@ public:
return true;
}
- void OpenCursor( const char* p_table, const char* p_column )
+ void OpenCursor( const string& p_table, const string& p_column )
{
OpenDatabase();
const VTable * tbl;
- THROW_ON_RC ( VDatabaseOpenTableRead ( m_db, &tbl, p_table ) );
- THROW_ON_RC ( VTableCreateCursorRead ( tbl, & m_cursor ) );
-
- uint32_t idx;
- THROW_ON_RC ( VCursorAddColumn ( m_cursor, &idx, p_column ) );
- THROW_ON_RC ( VCursorOpen ( m_cursor ) );
- THROW_ON_RC ( VTableRelease ( tbl ) );
+ THROW_ON_RC ( VDatabaseOpenTableRead ( m_db, &tbl, p_table.c_str() ) );
+ try
+ {
+ THROW_ON_RC ( VTableCreateCursorRead ( tbl, & m_cursor ) );
+
+ uint32_t idx;
+ THROW_ON_RC ( VCursorAddColumn ( m_cursor, &idx, p_column.c_str() ) );
+ THROW_ON_RC ( VCursorOpen ( m_cursor ) );
+ THROW_ON_RC ( VTableRelease ( tbl ) );
+ }
+ catch (...)
+ {
+ VTableRelease ( tbl );
+ throw;
+ }
}
- template < typename T > T GetValue ( const char* p_table, const char* p_column, uint64_t p_row )
+ template < typename T > T GetValue ( const string& p_table, const string& p_column, uint64_t p_row )
{
OpenCursor( p_table, p_column );
THROW_ON_RC ( VCursorSetRowId ( m_cursor, p_row ) );
@@ -324,7 +330,7 @@ public:
return ret;
}
- template < typename T > bool IsNullValue ( const char* p_table, const char* p_column, uint64_t p_row )
+ template < typename T > bool IsNullValue ( const string& p_table, const string& p_column, uint64_t p_row )
{
OpenCursor( p_table, p_column );
THROW_ON_RC ( VCursorSetRowId ( m_cursor, p_row ) );
@@ -337,7 +343,7 @@ public:
return num_read == 0;
}
- template < typename T > T GetValueWithIndex ( const char* p_table, const char* p_column, uint64_t p_row, uint32_t p_count, size_t p_index )
+ template < typename T > T GetValueWithIndex ( const string& p_table, const string& p_column, uint64_t p_row, uint32_t p_count, size_t p_index )
{
OpenCursor( p_table, p_column );
THROW_ON_RC ( VCursorSetRowId ( m_cursor, p_row ) );
@@ -407,10 +413,10 @@ public:
const VCursor * m_cursor;
KDirectory* m_wd;
string m_tempSchemaFile;
- string m_schemaDir;
+ string m_ScratchDir;
};
-template<> std::string GeneralLoaderFixture::GetValue ( const char* p_table, const char* p_column, uint64_t p_row )
+template<> std::string GeneralLoaderFixture::GetValue ( const string& p_table, const string& p_column, uint64_t p_row )
{
OpenCursor( p_table, p_column );
THROW_ON_RC ( VCursorSetRowId ( m_cursor, p_row ) );
@@ -425,9 +431,6 @@ template<> std::string GeneralLoaderFixture::GetValue ( const char* p_table, con
std::string GeneralLoaderFixture :: argv0;
-const char* tableName = "REFERENCE";
-const char* columnName = "SPOT_GROUP";
-
FIXTURE_TEST_CASE ( EmptyInput, GeneralLoaderFixture )
{
const struct KFile * input;
@@ -521,15 +524,15 @@ FIXTURE_TEST_CASE ( BadTableName_Long, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( DuplicateTableId, GeneralLoaderFixture )
{
SetUpStream ( GetName() );
- m_source . NewTableEvent ( 1, "REFERENCE" );
- m_source . NewTableEvent ( 1, "SEQUENCE" ); // same Id
+ m_source . NewTableEvent ( 1, DefaultTable );
+ m_source . NewTableEvent ( 1, "differentTable" ); // same Id
REQUIRE ( Run ( m_source . MakeSource (), SILENT_RC ( rcExe, rcFile, rcReading, rcTable, rcExists ) ) );
}
FIXTURE_TEST_CASE ( BadColumnName, GeneralLoaderFixture )
{
SetUpStream ( GetName() );
- m_source . NewTableEvent ( DefaultTableId, "REFERENCE" );
+ m_source . NewTableEvent ( DefaultTableId, DefaultTable );
m_source . NewColumnEvent ( 1, DefaultTableId, "nosuchcolumn", 8 );
REQUIRE ( Run ( m_source . MakeSource (), SILENT_RC ( rcVDB, rcCursor, rcUpdating, rcColumn, rcNotFound ) ) );
}
@@ -537,23 +540,23 @@ FIXTURE_TEST_CASE ( BadColumnName, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( BadTableId, GeneralLoaderFixture )
{
SetUpStream ( GetName() );
- m_source . NewTableEvent ( 1, "REFERENCE" );
- m_source . NewColumnEvent ( 1, 2, "SPOT_GROUP", 8 );
+ m_source . NewTableEvent ( 1, DefaultTable );
+ m_source . NewColumnEvent ( 1, 2, DefaultColumn, 8 );
REQUIRE ( Run ( m_source . MakeSource (), SILENT_RC ( rcExe, rcFile, rcReading, rcTable, rcInvalid ) ) );
}
FIXTURE_TEST_CASE ( DuplicateColumnName, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), "REFERENCE" );
- m_source . NewColumnEvent ( 1, DefaultTableId, "SPOT_GROUP", 8 );
- m_source . NewColumnEvent ( 2, DefaultTableId, "SPOT_GROUP", 8 );
+ SetUpStream_OneTable ( GetName() );
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, DefaultTableId, DefaultColumn, 8 );
REQUIRE ( Run ( m_source . MakeSource (), SILENT_RC ( rcVDB, rcCursor, rcUpdating, rcColumn, rcExists ) ) );
}
FIXTURE_TEST_CASE ( DuplicateColumnId, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), "REFERENCE" );
- m_source . NewColumnEvent ( 1, DefaultTableId, "SPOT_GROUP", 8 );
+ SetUpStream_OneTable ( GetName() );
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
m_source . NewColumnEvent ( 1, DefaultTableId, "NAME", 8 );
REQUIRE ( Run ( m_source . MakeSource (), SILENT_RC ( rcExe, rcFile, rcReading, rcColumn, rcExists ) ) );
}
@@ -891,22 +894,19 @@ FIXTURE_TEST_CASE ( ColMetadataNode, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( NoData, GeneralLoaderFixture )
{
SetUpStream ( GetName() );
- m_source . NewTableEvent ( 2, tableName ); // ids do not have to be consecutive
- m_source . NewColumnEvent ( 222, 2, columnName, 8 );
+ m_source . NewTableEvent ( 2, DefaultTable ); // ids do not have to be consecutive
+ m_source . NewColumnEvent ( 222, 2, DefaultColumn, 8 );
m_source . OpenStreamEvent();
m_source . CloseStreamEvent();
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- OpenCursor( tableName, columnName );
- uint64_t count;
- REQUIRE_RC ( VCursorIdRange ( m_cursor, 1, NULL, &count ) );
- REQUIRE_EQ ( (uint64_t)0, count );
+ REQUIRE_THROW ( OpenCursor( DefaultTable, DefaultColumn ) );
}
FIXTURE_TEST_CASE ( Chunk_BadColumnId, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
m_source . CellDataEvent( /*bad*/2, string("blah") );
m_source . CloseStreamEvent();
@@ -916,7 +916,7 @@ FIXTURE_TEST_CASE ( Chunk_BadColumnId, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( WriteNoCommit, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value = "a single character string cell";
m_source . CellDataEvent( DefaultColumnId, value );
@@ -924,15 +924,12 @@ FIXTURE_TEST_CASE ( WriteNoCommit, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- OpenCursor( tableName, columnName );
- uint64_t count;
- REQUIRE_RC ( VCursorIdRange ( m_cursor, 1, NULL, &count ) );
- REQUIRE_EQ ( (uint64_t)0, count );
+ REQUIRE_THROW ( OpenCursor( DefaultTable, DefaultColumn ) );
}
FIXTURE_TEST_CASE ( CommitBadTableId, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
m_source . NextRowEvent ( /*bad*/2 );
m_source . CloseStreamEvent();
@@ -942,7 +939,7 @@ FIXTURE_TEST_CASE ( CommitBadTableId, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( OneColumnOneCellOneChunk, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value = "a single character string cell";
m_source . CellDataEvent( DefaultColumnId, value );
@@ -951,12 +948,12 @@ FIXTURE_TEST_CASE ( OneColumnOneCellOneChunk, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( OneColumnOneCellOneChunk_Long, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value ( GeneralLoader :: MaxPackedString + 1, 'x' );
m_source . CellDataEvent( DefaultColumnId, value );
@@ -965,12 +962,12 @@ FIXTURE_TEST_CASE ( OneColumnOneCellOneChunk_Long, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( OneColumnOneCellManyChunks, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value1 = "first";
m_source . CellDataEvent( DefaultColumnId, value1 );
@@ -983,7 +980,7 @@ FIXTURE_TEST_CASE ( OneColumnOneCellManyChunks, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1 + value2 + value3, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value1 + value2 + value3, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( IntegerCompression_MinimumCompression, GeneralLoaderFixture )
@@ -1131,7 +1128,7 @@ FIXTURE_TEST_CASE ( IntegerCompression_MultipleValues, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( OneColumnDefaultNoWrite, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value = "this be my default";
m_source . CellDefaultEvent( DefaultColumnId, value );
@@ -1141,12 +1138,12 @@ FIXTURE_TEST_CASE ( OneColumnDefaultNoWrite, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( OneColumnDefaultNoWrite_Long, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value ( GeneralLoader :: MaxPackedString + 1, 'x' );
m_source . CellDefaultEvent( DefaultColumnId, value );
@@ -1156,12 +1153,12 @@ FIXTURE_TEST_CASE ( OneColumnDefaultNoWrite_Long, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( MoveAhead, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value = "this be my default";
m_source . CellDefaultEvent( DefaultColumnId, value );
@@ -1170,15 +1167,15 @@ FIXTURE_TEST_CASE ( MoveAhead, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 2 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 3 ) );
- REQUIRE_THROW ( GetValue<string> ( tableName, columnName, 4 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 2 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 3 ) );
+ REQUIRE_THROW ( GetValue<string> ( DefaultTable, DefaultColumn, 4 ) );
}
FIXTURE_TEST_CASE ( OneColumnDefaultOverwite, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string valueDflt = "this be my default";
m_source . CellDefaultEvent( DefaultColumnId, valueDflt );
@@ -1189,12 +1186,12 @@ FIXTURE_TEST_CASE ( OneColumnDefaultOverwite, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value, GetValue<string> ( tableName, columnName, 1 ) );
+ REQUIRE_EQ ( value, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( OneColumnChangeDefault, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value1 = "this be my first default";
m_source . CellDefaultEvent( DefaultColumnId, value1 );
@@ -1208,13 +1205,13 @@ FIXTURE_TEST_CASE ( OneColumnChangeDefault, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1, GetValue<string> ( tableName, columnName, 1 ) );
- REQUIRE_EQ ( value2, GetValue<string> ( tableName, columnName, 2 ) );
+ REQUIRE_EQ ( value1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( value2, GetValue<string> ( DefaultTable, DefaultColumn, 2 ) );
}
FIXTURE_TEST_CASE ( OneColumnDataAndDefaultsMixed, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
string value1 = "first value";
m_source . CellDataEvent( DefaultColumnId, value1 );
@@ -1237,21 +1234,19 @@ FIXTURE_TEST_CASE ( OneColumnDataAndDefaultsMixed, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1, GetValue<string> ( tableName, columnName, 1 ) );
- REQUIRE_EQ ( default1, GetValue<string> ( tableName, columnName, 2 ) );
- REQUIRE_EQ ( default1, GetValue<string> ( tableName, columnName, 3 ) );
- REQUIRE_EQ ( value2, GetValue<string> ( tableName, columnName, 4 ) );
- REQUIRE_EQ ( default2, GetValue<string> ( tableName, columnName, 5 ) );
+ REQUIRE_EQ ( value1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( default1, GetValue<string> ( DefaultTable, DefaultColumn, 2 ) );
+ REQUIRE_EQ ( default1, GetValue<string> ( DefaultTable, DefaultColumn, 3 ) );
+ REQUIRE_EQ ( value2, GetValue<string> ( DefaultTable, DefaultColumn, 4 ) );
+ REQUIRE_EQ ( default2, GetValue<string> ( DefaultTable, DefaultColumn, 5 ) );
}
FIXTURE_TEST_CASE ( TwoColumnsFullRow, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
+ SetUpStream_OneTable ( GetName() );
- const char* columnName1 = "SPOT_GROUP";
- const char* columnName2 = "MAX_SEQ_LEN";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 8 );
- m_source . NewColumnEvent ( 2, DefaultTableId, columnName2, 32 );
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, DefaultTableId, U32Column, 32 );
m_source . OpenStreamEvent();
string value1 = "value1";
@@ -1263,18 +1258,16 @@ FIXTURE_TEST_CASE ( TwoColumnsFullRow, GeneralLoaderFixture )
m_source . CloseStreamEvent();
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1, GetValue<string> ( tableName, columnName1, 1 ) );
- REQUIRE_EQ ( value2, GetValue<uint32_t> ( tableName, columnName2, 1 ) );
+ REQUIRE_EQ ( value1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( value2, GetValue<uint32_t> ( DefaultTable, U32Column, 1 ) );
}
FIXTURE_TEST_CASE ( TwoColumnsIncompleteRow, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
+ SetUpStream_OneTable ( GetName() );
- const char* columnName1 = "SPOT_GROUP";
- const char* columnName2 = "MAX_SEQ_LEN";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 8 );
- m_source . NewColumnEvent ( 2, DefaultTableId, columnName2, 32 );
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, DefaultTableId, U32Column, 32 );
m_source . OpenStreamEvent();
string value1 = "value1";
@@ -1289,12 +1282,10 @@ FIXTURE_TEST_CASE ( TwoColumnsIncompleteRow, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( TwoColumnsPartialRowWithDefaults, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
+ SetUpStream_OneTable ( GetName() );
- const char* columnName1 = "SPOT_GROUP";
- const char* columnName2 = "MAX_SEQ_LEN";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 8 );
- m_source . NewColumnEvent ( 2, DefaultTableId, columnName2, 32 );
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, DefaultTableId, U32Column, 32 );
m_source . OpenStreamEvent();
string value1 = "value1";
@@ -1308,20 +1299,17 @@ FIXTURE_TEST_CASE ( TwoColumnsPartialRowWithDefaults, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1, GetValue<string> ( tableName, columnName1, 1 ) );
- REQUIRE_EQ ( value2, GetValue<uint32_t> ( tableName, columnName2, 1 ) );
+ REQUIRE_EQ ( value1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( value2, GetValue<uint32_t> ( DefaultTable, U32Column, 1 ) );
}
FIXTURE_TEST_CASE ( TwoColumnsPartialRowWithDefaultsAndOverride, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
-
- const char* columnName1 = "SPOT_GROUP";
- const char* columnName2 = "MAX_SEQ_LEN";
- const char* columnName3 = "CIRCULAR";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 8 );
- m_source . NewColumnEvent ( 2, DefaultTableId, columnName2, 32 );
- m_source . NewColumnEvent ( 3, DefaultTableId, columnName3, 8 );
+ SetUpStream_OneTable ( GetName() );
+
+ m_source . NewColumnEvent ( 1, DefaultTableId, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, DefaultTableId, U32Column, 32 );
+ m_source . NewColumnEvent ( 3, DefaultTableId, BoolColumn, 8 );
m_source . OpenStreamEvent();
string value1 = "value1";
@@ -1337,17 +1325,15 @@ FIXTURE_TEST_CASE ( TwoColumnsPartialRowWithDefaultsAndOverride, GeneralLoaderFi
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( value1, GetValue<string> ( tableName, columnName1, 1 ) ); // explicit
- REQUIRE_EQ ( value2, GetValue<uint32_t> ( tableName, columnName2, 1 ) ); // default
- REQUIRE_EQ ( value3, GetValue<bool> ( tableName, columnName3, 1 ) ); // not the default
+ REQUIRE_EQ ( value1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) ); // explicit
+ REQUIRE_EQ ( value2, GetValue<uint32_t> ( DefaultTable, U32Column, 1 ) ); // default
+ REQUIRE_EQ ( value3, GetValue<bool> ( DefaultTable, BoolColumn, 1 ) ); // not the default
}
FIXTURE_TEST_CASE ( EmptyDefault_String, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
+ OpenStream_OneTableOneColumn ( GetName() );
- const char* columnName1 = "SPOT_GROUP";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 8 );
m_source . OpenStreamEvent();
m_source . CellEmptyDefaultEvent( 1 );
@@ -1357,15 +1343,14 @@ FIXTURE_TEST_CASE ( EmptyDefault_String, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( string(), GetValue<string> ( tableName, columnName1, 1 ) );
+ REQUIRE_EQ ( string(), GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
}
FIXTURE_TEST_CASE ( EmptyDefault_Int, GeneralLoaderFixture )
{
- SetUpStream_OneTable ( GetName(), tableName );
+ SetUpStream_OneTable ( GetName() );
- const char* columnName1 = "MAX_SEQ_LEN";
- m_source . NewColumnEvent ( 1, DefaultTableId, columnName1, 32 );
+ m_source . NewColumnEvent ( 1, DefaultTableId, U32Column, 32 );
m_source . OpenStreamEvent();
m_source . CellEmptyDefaultEvent( 1 );
@@ -1375,26 +1360,20 @@ FIXTURE_TEST_CASE ( EmptyDefault_Int, GeneralLoaderFixture )
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE ( IsNullValue<uint32_t> ( tableName, columnName1, 1 ) );
+ REQUIRE ( IsNullValue<uint32_t> ( DefaultTable, U32Column, 1 ) );
}
FIXTURE_TEST_CASE ( MultipleTables_Multiple_Columns_MultipleRows, GeneralLoaderFixture )
{
SetUpStream ( GetName() );
- const char* table1 = "REFERENCE";
- const char* table1column1 = "SPOT_GROUP"; // ascii
- const char* table1column2 = "MAX_SEQ_LEN"; // u32
- m_source . NewTableEvent ( 100, table1 );
- m_source . NewColumnEvent ( 1, 100, table1column1, 8 );
- m_source . NewColumnEvent ( 2, 100, table1column2, 32 );
+ m_source . NewTableEvent ( 100, DefaultTable );
+ m_source . NewColumnEvent ( 1, 100, DefaultColumn, 8 );
+ m_source . NewColumnEvent ( 2, 100, U32Column, 32 );
- const char* table2 = "SEQUENCE";
- const char* table2column1 = "PRIMARY_ALIGNMENT_ID"; // i64
- const char* table2column2 = "ALIGNMENT_COUNT"; // u8
- m_source . NewTableEvent ( 200, table2 );
- m_source . NewColumnEvent ( 3, 200, table2column1, 64 );
- m_source . NewColumnEvent ( 4, 200, table2column2, 8 );
+ m_source . NewTableEvent ( 200, Table2 );
+ m_source . NewColumnEvent ( 3, 200, I64Column, 64 );
+ m_source . NewColumnEvent ( 4, 200, U8Column, 8 );
m_source . OpenStreamEvent();
@@ -1426,15 +1405,15 @@ FIXTURE_TEST_CASE ( MultipleTables_Multiple_Columns_MultipleRows, GeneralLoaderF
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
- REQUIRE_EQ ( t1c1v1, GetValue<string> ( table1, table1column1, 1 ) );
- REQUIRE_EQ ( t1c2v1, GetValue<uint32_t> ( table1, table1column2, 1 ) );
- REQUIRE_EQ ( t2c1v1, GetValue<int64_t> ( table2, table2column1, 1 ) );
- REQUIRE_EQ ( t2c2v1, GetValue<uint8_t> ( table2, table2column2, 1 ) );
+ REQUIRE_EQ ( t1c1v1, GetValue<string> ( DefaultTable, DefaultColumn, 1 ) );
+ REQUIRE_EQ ( t1c2v1, GetValue<uint32_t> ( DefaultTable, U32Column, 1 ) );
+ REQUIRE_EQ ( t2c1v1, GetValue<int64_t> ( Table2, I64Column, 1 ) );
+ REQUIRE_EQ ( t2c2v1, GetValue<uint8_t> ( Table2, U8Column, 1 ) );
- REQUIRE_EQ ( t1c1v2, GetValue<string> ( table1, table1column1, 2 ) );
- REQUIRE_EQ ( t1c2v2, GetValue<uint32_t> ( table1, table1column2, 2 ) );
- REQUIRE_EQ ( t2c1v2, GetValue<int64_t> ( table2, table2column1, 2 ) );
- REQUIRE_EQ ( t2c2v2, GetValue<uint8_t> ( table2, table2column2, 2 ) );
+ REQUIRE_EQ ( t1c1v2, GetValue<string> ( DefaultTable, DefaultColumn, 2 ) );
+ REQUIRE_EQ ( t1c2v2, GetValue<uint32_t> ( DefaultTable, U32Column, 2 ) );
+ REQUIRE_EQ ( t2c1v2, GetValue<int64_t> ( Table2, I64Column, 2 ) );
+ REQUIRE_EQ ( t2c2v2, GetValue<uint8_t> ( Table2, U8Column, 2 ) );
}
FIXTURE_TEST_CASE ( AdditionalSchemaIncludePaths_Single, GeneralLoaderFixture )
@@ -1580,7 +1559,7 @@ FIXTURE_TEST_CASE ( AdditionalSchemaFiles_Multiple, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( ErrorMessage, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
m_source . OpenStreamEvent();
m_source . ErrorMessageEvent ( "error message" );
m_source . CloseStreamEvent();
@@ -1590,7 +1569,7 @@ FIXTURE_TEST_CASE ( ErrorMessage, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( ErrorMessage_Long, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
m_source . OpenStreamEvent();
m_source . ErrorMessageEvent ( string ( 257, 'x' ) );
m_source . CloseStreamEvent();
@@ -1600,9 +1579,16 @@ FIXTURE_TEST_CASE ( ErrorMessage_Long, GeneralLoaderFixture )
FIXTURE_TEST_CASE ( LogMessage, GeneralLoaderFixture )
{
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+//FullLog();
+// uncomment the line above to see the message on stdout, eg:
+// 2016-01-05T18:45:01 test-general-loader.1 info: general-loader: log from front-end-app: "some log message"
+
+ OpenStream_OneTableOneColumn ( GetName() );
+ const string SoftwareName = "front-end-app";
+ const string Version = "2.1.1";
+ m_source . SoftwareNameEvent ( SoftwareName, Version );
m_source . OpenStreamEvent();
- m_source . LogMessageEvent ( "log message" );
+ m_source . LogMessageEvent ( "some log message" );
m_source . CloseStreamEvent();
REQUIRE ( Run ( m_source . MakeSource (), 0 ) );
@@ -1614,7 +1600,7 @@ FIXTURE_TEST_CASE ( ProgressMessage, GeneralLoaderFixture )
// timestamp
time_t timestamp = time ( NULL );
- OpenStream_OneTableOneColumn ( GetName(), tableName, columnName, 8 );
+ OpenStream_OneTableOneColumn ( GetName() );
m_source . OpenStreamEvent ();
m_source . ProgMessageEvent ( 123, "progress message", timestamp, 2, 45 );
m_source . CloseStreamEvent ();
diff --git a/test/general-loader/test-general-writer.cpp b/test/general-loader/test-general-writer.cpp
index ad7268b..902b2eb 100644
--- a/test/general-loader/test-general-writer.cpp
+++ b/test/general-loader/test-general-writer.cpp
@@ -295,7 +295,7 @@ int main ( int argc, char * argv [] )
try
{
- const char *outfile = 0;
+ const char *outfile = "./db/";
const char *schema_path = "./test-general-writer.vschema";
int num_columns = 0;
@@ -322,7 +322,7 @@ int main ( int argc, char * argv [] )
if ( num_columns == 0 )
{
- const char * columns [ 2 ] = { "column01", "column02" };
+ const char * columns [ 2 ] = { "input/column01", "input/column02" };
ncbi :: runTest ( 2, columns, outfile, schema_path );
}
else
diff --git a/test/kget/Makefile b/test/kar/Makefile
similarity index 74%
copy from test/kget/Makefile
copy to test/kar/Makefile
index e2cd576..b3fb7cb 100644
--- a/test/kget/Makefile
+++ b/test/kar/Makefile
@@ -26,31 +26,30 @@ default: runtests
TOP ?= $(abspath ../..)
-MODULE = test/kget
+MODULE = test/kar
-TEST_TOOLS = \
+TEST_TOOLS =
include $(TOP)/build/Makefile.env
-.PHONY: $(TEST_TOOLS)
+$(TEST_TOOLS): makedirs
+ @ $(MAKE_CMD) $(TEST_BINDIR)/$@
+
+runtests: kar_md5 kar
#-------------------------------------------------------------------------------
-# slowtests: match output vs wget
+# scripted tests
#
+kar_md5: test-kar.sh
+ @ echo "Starting kar_md5 tests..."
+ @ bash test-kar.sh $(BINDIR)/kar
-slowtests: diff-vs-wget
-
-ACCESSION = SRR072810
-URL = http://ftp-trace.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/SRR072/SRR072810/SRR072810.sra
+kar:
+ rm -rf actual
+ mkdir -p actual
+ $(BINDIR)/kar -c actual/1.0.kar -d input/1.0; $(BINDIR)/kar -t actual/1.0.kar >actual/1.0.stdout; diff expected/1.0.stdout actual/1.0.stdout
+ rm -rf actual
-clean:
- rm -f $(ACCESSION)*
-
-diff-vs-wget: clean
- $(BINDIR)/kget --reliable -c ./$(ACCESSION).cachetee $(URL) $(ACCESSION).dat --progress
- wget $(URL)
- diff $(ACCESSION).sra ./$(ACCESSION).dat
- #diff $(ACCESSION).sra ./$(ACCESSION).cachetee
- rm -f $(ACCESSION)*
+.PHONY: $(TEST_TOOLS)
-.PHONY:
+clean: stdclean
diff --git a/test/kar/expected/1.0.stdout b/test/kar/expected/1.0.stdout
new file mode 100644
index 0000000..a06c00e
--- /dev/null
+++ b/test/kar/expected/1.0.stdout
@@ -0,0 +1,35 @@
+1.md5
+col
+col/sub1
+col/sub1/idx
+col/sub1/idx0
+col/sub1/idx1
+col/sub1/idx2
+col/sub1/idx3
+idx
+idx/4a
+idx/4b
+idx/sub
+idx/sub/4c
+idx/sub/4d
+md
+md/cur
+md5
+sub1
+sub1/col
+sub1/col/sub1
+sub1/col/sub1/idx
+sub1/col/sub1/idx0
+sub1/col/sub1/idx1
+sub1/col/sub1/idx2
+sub1/col/sub1/idx3
+sub1/md
+sub1/md/cur
+sub1/sub2
+sub1/sub2/col
+sub1/sub2/col/sub1
+sub1/sub2/col/sub1/idx
+sub1/sub2/col/sub1/idx0
+sub1/sub2/col/sub1/idx1
+sub1/sub2/col/sub1/idx2
+sub1/sub2/col/sub1/idx3
diff --git a/test/kar/input/1.0/1.md5 b/test/kar/input/1.0/1.md5
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/col/sub1/idx b/test/kar/input/1.0/col/sub1/idx
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/col/sub1/idx0 b/test/kar/input/1.0/col/sub1/idx0
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/col/sub1/idx1 b/test/kar/input/1.0/col/sub1/idx1
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/col/sub1/idx2 b/test/kar/input/1.0/col/sub1/idx2
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/col/sub1/idx3 b/test/kar/input/1.0/col/sub1/idx3
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/idx/4a b/test/kar/input/1.0/idx/4a
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/idx/4b b/test/kar/input/1.0/idx/4b
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/idx/sub/4c b/test/kar/input/1.0/idx/sub/4c
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/idx/sub/4d b/test/kar/input/1.0/idx/sub/4d
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/md/cur b/test/kar/input/1.0/md/cur
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/md5 b/test/kar/input/1.0/md5
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/col/sub1/idx b/test/kar/input/1.0/sub1/col/sub1/idx
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/col/sub1/idx0 b/test/kar/input/1.0/sub1/col/sub1/idx0
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/col/sub1/idx1 b/test/kar/input/1.0/sub1/col/sub1/idx1
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/col/sub1/idx2 b/test/kar/input/1.0/sub1/col/sub1/idx2
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/col/sub1/idx3 b/test/kar/input/1.0/sub1/col/sub1/idx3
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/md/cur b/test/kar/input/1.0/sub1/md/cur
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/sub2/col/sub1/idx b/test/kar/input/1.0/sub1/sub2/col/sub1/idx
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/sub2/col/sub1/idx0 b/test/kar/input/1.0/sub1/sub2/col/sub1/idx0
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/sub2/col/sub1/idx1 b/test/kar/input/1.0/sub1/sub2/col/sub1/idx1
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/sub2/col/sub1/idx2 b/test/kar/input/1.0/sub1/sub2/col/sub1/idx2
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/input/1.0/sub1/sub2/col/sub1/idx3 b/test/kar/input/1.0/sub1/sub2/col/sub1/idx3
new file mode 100644
index 0000000..e69de29
diff --git a/test/kar/test-kar.sh b/test/kar/test-kar.sh
new file mode 100755
index 0000000..e7688ab
--- /dev/null
+++ b/test/kar/test-kar.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# ===========================================================================
+#
+# PUBLIC DOMAIN NOTICE
+# National Center for Biotechnology Information
+#
+# This software/database is a "United States Government Work" under the
+# terms of the United States Copyright Act. It was written as part of
+# the author's official duties as a United States Government employee and
+# thus cannot be copyrighted. This software/database is freely available
+# to the public for use. The National Library of Medicine and the U.S.
+# Government have not placed any restriction on its use or reproduction.
+#
+# Although all reasonable efforts have been taken to ensure the accuracy
+# and reliability of the software and data, the NLM and the U.S.
+# Government do not and cannot warrant the performance or results that
+# may be obtained by using this software or data. The NLM and the U.S.
+# Government disclaim all warranties, express or implied, including
+# warranties of performance, merchantability or fitness for any particular
+# purpose.
+#
+# Please cite the author in any work or product based on this material.
+#
+# ===========================================================================
+
+
+TESTDIR="./kar_testsource"
+ARCHIVE="kar.kar"
+
+KAR=kar
+[ $# -ge 1 ] && KAR="$1"
+
+testdir_setup ()
+{
+ if ! mkdir $TESTDIR
+ then
+ echo "Failed to create testsource"
+ exit 1
+ fi
+
+ echo "random text for test purposes" > $TESTDIR/test_file1
+ echo "random text for test purposes and more" > $TESTDIR/test_file2
+}
+
+cleanup ()
+{
+ rm -rf $TESTDIR $ARCHIVE $ARCHIVE.md5
+}
+
+
+# if test source directory doesnt exist, make it and populate it
+if [ ! -d $TESTDIR ]
+then
+ testdir_setup
+fi
+
+# run the script
+if ! $KAR --md5 -f -d $TESTDIR -c $ARCHIVE
+then
+ STATUS=$?
+ echo "KAR md5 operation failed"
+else
+ case $(uname) in
+ (Linux)
+ if ! md5sum -c $ARCHIVE.md5
+ then
+ STATUS=$?
+ echo "md5sum check failed with status $STATUS"
+ fi
+ ;;
+ (Darwin)
+ if ! MD5=$(md5 $ARCHIVE)
+ then
+ STATUS=$?
+ echo "md5 failed with status $STATUS"
+ else
+ MY_MD5=$(cut -f1 -d' ' $ARCHIVE.md5)
+ if [ "$MD5" == "$MY_MD5" ]
+ then
+ echo "$ARCHIVE: OK"
+ else
+ echo "$ARCHIVE: FAILED"
+ STATUS=1
+ fi
+ fi
+ ;;
+ (*)
+ STATUS=1
+ echo unknown platform
+ ;;
+ esac
+fi
+
+cleanup
+
+
+exit $STATUS
diff --git a/test/kget/Makefile b/test/kget/Makefile
index e2cd576..dc7427e 100644
--- a/test/kget/Makefile
+++ b/test/kget/Makefile
@@ -35,6 +35,15 @@ include $(TOP)/build/Makefile.env
.PHONY: $(TEST_TOOLS)
#-------------------------------------------------------------------------------
+# runtests:
+#
+runtests: download
+
+download:
+ $(BINDIR)/vdb-config -s /repository/remote/main/CGI/resolver-cgi=http://www.ncbi.nlm.nih.gov/Traces/names/names.cgi
+ export PATH=$(BINDIR):$$PATH; python test_kget.py # expect rc = 0
+
+#-------------------------------------------------------------------------------
# slowtests: match output vs wget
#
@@ -53,4 +62,5 @@ diff-vs-wget: clean
#diff $(ACCESSION).sra ./$(ACCESSION).cachetee
rm -f $(ACCESSION)*
+
.PHONY:
diff --git a/test/kget/test_kget.py b/test/kget/test_kget.py
new file mode 100644
index 0000000..63d232a
--- /dev/null
+++ b/test/kget/test_kget.py
@@ -0,0 +1,175 @@
+import os
+import sys
+import subprocess
+import hashlib
+import datetime
+
+'''---------------------------------------------------------------------
+ calls "vdb-dump ACCESSION --info"
+ extracts from the output the line that starts with "remote : ...."
+ returns the remote url or None
+---------------------------------------------------------------------'''
+def get_remote_url( acc ):
+ cmd = "vdb-dump %s --info"%( acc )
+ try:
+ lines = subprocess.check_output( cmd, shell = True ).split( "\n" )
+ for line in lines:
+ try:
+ colon = line.index( ":" )
+ if line[ :colon ].strip() == "remote" :
+ url = line[ colon+1: ].strip()
+ if url.startswith( "http:" ) :
+ return url
+ except:
+ pass
+ for line in lines:
+ try:
+ colon = line.index( ":" )
+ if line[ :colon ].strip() == "path" :
+ url = line[ colon+1: ].strip()
+ if url.startswith( "http:" ) :
+ return url
+ except:
+ pass
+ except:
+ pass
+ return None
+
+'''---------------------------------------------------------------------
+ calls "kget URL --show-size"
+ extracts from the output the line that starts with "file-size : ...."
+ returns the value as int or 0
+---------------------------------------------------------------------'''
+def kget_remote_size( url ):
+ cmd = "kget %s --show-size"%( url )
+ try:
+ lines = subprocess.check_output( cmd, shell = True ).split( "\n" )
+ for line in lines:
+ try:
+ eq = line.index( "=" )
+ if line[ :eq ].strip() == "file-size" :
+ return int( line[ eq+1: ].strip() )
+ except:
+ pass
+ except:
+ pass
+ return 0
+
+'''---------------------------------------------------------------------
+ helper functions to create a md5 or sha256 hash from a file
+ ( this way we do not depend on the existence of a md5sum-binary )
+---------------------------------------------------------------------'''
+def hashfile( afile, hasher, blocksize=65536 ) :
+ buf = afile.read( blocksize )
+ while len( buf ) > 0 :
+ hasher.update( buf )
+ buf = afile.read( blocksize )
+ return hasher.hexdigest()
+
+def md5( fname ) :
+ return hashfile( open( fname, 'rb' ), hashlib.md5() )
+
+def sha256( fname ) :
+ return hashfile( open( fname, 'rb' ), hashlib.sha256() )
+
+
+'''---------------------------------------------------------------------
+ calls "kget URL"
+---------------------------------------------------------------------'''
+def kget_download_partial( url, acc ):
+ try:
+ os.remove( acc )
+ except:
+ pass
+
+ cmd = "kget %s"%( url )
+ try:
+ subprocess.check_output( cmd, shell = True )
+ return md5( acc )
+ except:
+ return None
+
+'''---------------------------------------------------------------------
+ calls "kget URL --full"
+---------------------------------------------------------------------'''
+def kget_download_full( url, acc ):
+ try:
+ os.remove( acc )
+ except:
+ pass
+ cmd = "kget %s --full"%( url )
+ try:
+ subprocess.check_output( cmd, shell = True )
+ return md5( acc )
+ except:
+ return None
+
+
+'''---------------------------------------------------------------------
+ the expected values
+---------------------------------------------------------------------'''
+ACC = "NC_011748.1"
+EXP_SIZE = 1313197
+EXP_MD5 = "2656bc178743c5e32bf0594246300dd6"
+
+
+'''---------------------------------------------------------------------
+ main...
+---------------------------------------------------------------------'''
+print "-" * 80
+print "we test download of accession '%s'"%( ACC )
+
+URL = get_remote_url( ACC )
+if URL == None :
+ print "cannot resolve accession '%s'"%( ACC )
+ sys.exit( -1 )
+
+print "'%s' is resolved into '%s'"%( ACC, URL )
+
+remote_size = kget_remote_size( URL )
+if remote_size != EXP_SIZE :
+ print "size (%d) differs from expected size(%d)"%( remote_size, EXP_SIZE )
+ sys.exit( -1 )
+else :
+ print "size as expected = %d"%( remote_size )
+
+t_start = datetime.datetime.now()
+remote_md5 = kget_download_partial( URL, ACC )
+t_partial = datetime.datetime.now() - t_start;
+if remote_md5 == None :
+ print "error downloading '%s'"%( URL )
+ sys.exit( -1 )
+
+if remote_md5 != EXP_MD5 :
+ print "md5 diff: expected (%s) vs remote (%s)"%( EXP_MD5, remote_md5 )
+ sys.exit( -1 )
+else :
+ print "partial donwload ok in %d ms"%( t_partial.microseconds)
+
+t_start = datetime.datetime.now()
+remote_md5 = kget_download_full( URL, ACC )
+t_full = datetime.datetime.now() - t_start;
+if remote_md5 == None :
+ print "error downloading '%s'"%( URL )
+ sys.exit( -1 )
+
+if remote_md5 != EXP_MD5 :
+ print "md5 diff: expected (%s) vs remote (%s)"%( EXP_MD5, remote_md5 )
+ sys.exit( -1 )
+else :
+ print "full donwload ok in %d ms"%( t_full.microseconds )
+
+'''---------------------------------------------------------------------
+if t_full >= t_partial :
+ print "timing problem: full download should be faster than partial download"
+ sys.exit( -1 )
+else :
+ print "timing ok: full download is faster than partial download"
+---------------------------------------------------------------------'''
+
+try:
+ os.remove( ACC )
+except:
+ pass
+
+print "-" * 80
diff --git a/test/ngs-pileup/Makefile b/test/ngs-pileup/Makefile
index 3611f34..03ab186 100644
--- a/test/ngs-pileup/Makefile
+++ b/test/ngs-pileup/Makefile
@@ -59,9 +59,6 @@ TEST_NGS_PILEUP_LIB = \
$(TEST_BINDIR)/test-ngs-pileup: $(TEST_NGS_PILEUP_OBJ)
$(LP) --exe -o $@ $^ $(TEST_NGS_PILEUP_LIB)
-valgrind_ngs_pileup: test-ngs-pileup
- valgrind --ncbi --suppressions=$(SRCDIR)/valgrind.suppress $(TEST_BINDIR)/test-ngs-pileup
-
#-------------------------------------------------------------------------------
# slowtests: match output vs sra-pileup
#
diff --git a/test/vcf-loader/Makefile b/test/pileup-stats/Makefile
similarity index 60%
copy from test/vcf-loader/Makefile
copy to test/pileup-stats/Makefile
index 32aab45..0e7e41b 100644
--- a/test/vcf-loader/Makefile
+++ b/test/pileup-stats/Makefile
@@ -26,23 +26,16 @@ default: runtests
TOP ?= $(abspath ../..)
-MODULE = test/vcf-loader
+MODULE = test/pileup-stats
TEST_TOOLS = \
- test-vcf-loader
-include $(TOP)/build/Makefile.env
-
-
-# make sure runs are not cached in the user repository when running tests
-ifeq ($(wildcard ../../../asm-trace),)
- ifeq (,$(VDB_CONFIG))
- VDB_CONFIG = $(shell pwd)/../only-remote-repository.kfg
- endif
-endif
+ALL_TOOLS = \
+ $(TEST_TOOLS) \
+include $(TOP)/build/Makefile.env
-$(TEST_TOOLS): makedirs
+$(ALL_TOOLS): makedirs
@ $(MAKE_CMD) $(TEST_BINDIR)/$@
.PHONY: $(TEST_TOOLS)
@@ -50,24 +43,19 @@ $(TEST_TOOLS): makedirs
clean: stdclean
#-------------------------------------------------------------------------------
-# white-box test
+# pileup-stats tool tests
#
-INCDIRS += -I$(TOP)/tools/vcf-loader
-
-VCF_TEST_SRC = \
- test-vcf-loader
+runtests: pileup-stats
-VCF_TEST_OBJ = \
- $(addsuffix .$(OBJX),$(VCF_TEST_SRC))
+pileup-stats: $(BINDIR)/pileup-stats
+ @ # The line below is the test for --version output. Yes, this is ridiculous to make tests like this (especially the one depending on the path to the sources of the binary being tested) but Anatoly insisted on it here: VDB-2837
+ @ $(BINDIR)/pileup-stats -V | sed -e 's/\([1-9][0-9]*\)\.0\.0/\1/' -e 's/\([1-9][0-9]*\.[1-9][0-9]*\)\.0/\1/' -e 's/\([1-9][0-9]*\.[0-9][0-9]*\.[1-9][0-9]*\)/\1/' | grep --quiet "$(shell head -n 1 $(TOP)/tools/pileup-stats/pileup-stats.vers)[^0-9\.]"
-VCF_TEST_LIB = \
- -skapp \
- -sktst \
- -sncbi-wvdb \
- -svcfloader
+slowtests: optional-column-ref-offset-type
-$(TEST_BINDIR)/test-vcf-loader: $(VCF_TEST_OBJ)
- $(LP) --exe -o $@ $^ $(VCF_TEST_LIB)
+optional-column-ref-offset-type:
+ @ $(BINDIR)/pileup-stats --align-category primary ERR085308 >/dev/null
-valgrind: test-vcf-loader
- valgrind --ncbi $(TEST_BINDIR)/test-vcf-loader
+vg: $(BINDIR)/pileup-stats
+ @ # TODO: a better command line options should be chosen to test for memory leaks
+ valgrind --ncbi --suppressions=$(SRCDIR)/valgrind.suppress $(BINDIR)/pileup-stats -V
diff --git a/test/ngs-pileup/valgrind.suppress b/test/pileup-stats/valgrind.suppress
similarity index 100%
rename from test/ngs-pileup/valgrind.suppress
rename to test/pileup-stats/valgrind.suppress
diff --git a/test/ref-variation/Makefile b/test/ref-variation/Makefile
index 2877a3f..34cdafc 100644
--- a/test/ref-variation/Makefile
+++ b/test/ref-variation/Makefile
@@ -45,13 +45,18 @@ clean: stdclean
#-------------------------------------------------------------------------------
# ref-variation tool tests
#
-runtests:
+runtests: ref-variation var-expand
-one-tool: $(BINDIR)/ref-variation
- #$(BINDIR)/ref-variation -t 16 -r NC_000002.11 -p 73613067 --query '-' -l 3 SRR1597772 SRR1597776 SRR1597780 SRR1597784 SRR1597788 SRR1597792 SRR1597796 SRR1597800 SRR1597915 SRR1597919 SRR1597924 SRR1597928 SRR1597932 SRR1597936 SRR1597940 SRR1597944 SRR1597950 SRR1597955 SRR1598087 SRR1598091 SRR1598097 SRR1598101 SRR1598105 SRR1598111 SRR1598118 SRR945586 SRR867664 SRR867668 SRR867672 SRR867676 SRR867680 SRR867684 SRR867688 SRR867692 SRR851163 SRR792574 SRR792583 SRR792595 SRR792600 [...]
- $(BINDIR)/ref-variation -t 16 -r NC_000002.11 -p 73613067 --query '-' -l 3 SRR1597772 SRR1597776 SRR1597780 SRR1597784 SRR1597788 SRR1597792 SRR1597796 SRR1597800 SRR1597915 SRR1597919 SRR1597924 SRR1597928 SRR1597932 SRR1597936 SRR1597940 SRR1597944 SRR1597950 SRR1597955 SRR1598087 SRR1598091 SRR1598097 SRR1598101 SRR1598105 SRR1598111 SRR1598118 SRR945586 SRR867664 SRR867668 SRR867672 SRR867676 SRR867680 SRR867684 SRR867688 SRR867692 SRR851163 SRR792574 SRR792583 SRR792595 SRR792600 S [...]
- #$(BINDIR)/ref-variation -t 2 -r NC_000002.11 -p 73613067 --query '-' -l 3 SRR1597772 SRR1597776
-
+ref-variation: $(BINDIR)/ref-variation
+
+ @ #./have-access.sh $(BINDIR) || (echo "no access to repository, skipping ref-variation test..."; exit 0)
+ @ ./have-access.sh $(BINDIR) $@
+
vg: $(BINDIR)/ref-variation
valgrind --ncbi --suppressions=$(SRCDIR)/valgrind.suppress $(BINDIR)/ref-variation -r NC_000002.11 -p 73613067 --query '-' -l 3 \
- SRR1597772 SRR1597776 SRR1597780 SRR1597784 SRR1597788 SRR1597792 SRR1597796 SRR1597800 SRR1597915 SRR1597919 SRR1597924 SRR1597928 SRR1597932 SRR1597936 SRR1597940 SRR1597944 SRR1597950 SRR1597955 SRR1598087 SRR1598091 SRR1598097 SRR1598101 SRR1598105 SRR1598111 SRR1598118 SRR945586 SRR867664 SRR867668 SRR867672 SRR867676 SRR867680 SRR867684 SRR867688 SRR867692 SRR851163 SRR792574 SRR792583 SRR792595 SRR792600 SRR792609 SRR792617 SRR792634 SRR792699 SRR792723 SRR792865 SRR792873 SRR [...]
+ SRR1597772 SRR1597776 SRR1597780 SRR1597784 SRR1597788 SRR1597792 SRR1597796 SRR1597800 SRR1597915 SRR1597919 SRR1597924 SRR1597928 SRR1597932 SRR1597936 SRR1597940 SRR1597944 SRR1597950 SRR1597955 SRR1598087 SRR1598091 SRR1598097 SRR1598101 SRR1598105 SRR1598111 SRR1598118 SRR945586 SRR867664 SRR867668 SRR867672 SRR867676 SRR867680 SRR867684 SRR867688 SRR867692 SRR851163 SRR792574 SRR792583 SRR792595 SRR792600 SRR792609 SRR792617 SRR792634 SRR792699 SRR792723 SRR792865 SRR792873 SRR792 [...]
+# SRR793139 SRR793150 SRR793291 SRR793304 SRR793325 SRR793352 SRR793517 SRR793545 SRR793566 SRR793584 SRR793709 SRR793718 SRR793732 SRR793739 SRR793760 SRR793788 SRR793800 SRR793927 SRR793949 SRR793961 SRR793990 SRR794015 SRR794150 SRR794208 SRR794255 SRR794383 SRR794420 SRR794439 SRR794474 SRR794592 SRR794607 SRR794624 SRR795034 SRR795057 SRR795077 SRR795091 SRR795107 SRR795249 SRR795262 SRR795289 SRR795302 SRR795308 SRR795321 SRR795326 SRR795335 SRR795476 SRR795493 SRR795504 SRR795525 [...]
+
+var-expand: $(BINDIR)/var-expand
+ @# ./have-access.sh $(BINDIR) || (echo "no access to repository, skipping var-expand test..."; exit 0)
+ @ ./have-access.sh $(BINDIR) $@
diff --git a/test/ref-variation/expected/ref-variation.out b/test/ref-variation/expected/ref-variation.out
new file mode 100644
index 0000000..9394831
--- /dev/null
+++ b/test/ref-variation/expected/ref-variation.out
@@ -0,0 +1,192 @@
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : NC_000013.10:100635036:0:ACC
+warn: Adjusted variation spec: NC_000013.10:100635007:29:CCACCACCACCACCACCACCACCACCACCACC
+SRR793062
+info: percent="33"
+info: processed 33%
+SRR795251
+info: percent="66"
+info: processed 66%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=73613071, length=1
+info: reference: ...AGGAA[G]AGGAG...
+info: var_query=C
+warn: Input variation spec : NC_000002.11:73613071:1:C
+warn: Adjusted variation spec: NC_000002.11:73613071:1:C
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=117292899, length=9
+info: reference: ...GAAAG[TATTTATTT]TTTCT...
+info: var_query=GTATTTT
+warn: Input variation spec : NC_000007.13:117292900:4:
+warn: Adjusted variation spec: NC_000007.13:117292899:9:TATTT
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : NC_000002.11:73613067:3:
+warn: Adjusted variation spec: NC_000002.11:73613031:39:GGAGGAGGAGGAGGAGGAGGAGGAGGAGGAGGAGGA
+SRR867061 0 45
+info: percent="33"
+info: processed 33%
+SRR867131 22 50
+info: percent="66"
+info: processed 66%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : NC_000002.11:73613067:3:
+warn: Adjusted variation spec: NC_000002.11:73613031:39:GGAGGAGGAGGAGGAGGAGGAGGAGGAGGAGGAGGA
+SRR867061 0,0 45,27
+info: percent="33"
+info: processed 33%
+SRR867131 22,12 50,27
+info: percent="66"
+info: processed 66%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : CM000671.1:136131022:1:T
+warn: Adjusted variation spec: CM000671.1:136131022:2:TG
+SRR1601768 0 5
+info: percent="50"
+info: processed 50%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: percent="100"
+info: processed 100%
+err: ERROR: The selected reference region [N] does not contain valid bases, exiting...
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : CM000671.1:136131021:1:T
+warn: Adjusted variation spec: CM000671.1:136131020:2:CT
+SRR1596639 0 8
+info: percent="50"
+info: processed 50%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=73613030, length=4
+info: reference: ...CGAGC[TGGA]GGAGG...
+info: var_query=CATAG
+warn: Input variation spec : NC_000002.11:73613030:3:AT
+warn: Adjusted variation spec: NC_000002.11:73613030:4:ATA
+info: Found indel box at pos=73613030, length=4
+info: reference: ...CGAGC[TGGA]GGAGG...
+info: var_query=ATATA
+warn: Input variation spec : NC_000002.11:73613030:3:ATAT
+warn: Adjusted variation spec: NC_000002.11:73613030:4:ATATA
+info: Found indel box at pos=73613030, length=4
+info: reference: ...CGAGC[TGGA]GGAGG...
+info: var_query=ATATATA
+warn: Input variation spec : NC_000002.11:73613030:3:ATATAT
+warn: Adjusted variation spec: NC_000002.11:73613030:4:ATATATA
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : CM000664.1:234668879:14:ATATATATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATATATATA
+SRR1597895 29 31
+info: percent="50"
+info: processed 50%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+warn: Input variation spec : CM000664.1:234668879:14:AT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATA
+warn: Input variation spec : CM000664.1:234668879:14:ATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATATATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATATATATA
+warn: Input variation spec : CM000664.1:234668879:14:ATATATATATATATAT
+warn: Adjusted variation spec: CM000664.1:234668879:15:ATATATATATATATATA
+warn: Total counts don't match for SRR1597895 for query # 6 and 7: total: 31 vs 29, total positive: 31 vs 29
+SRR1597895 0 0 0 0 0 0 28 8 31
+info: percent="50"
+info: processed 50%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=1064998, length=2
+info: reference: ...GAATG[CC]GTGGC...
+info: var_query=CA
+warn: Input variation spec : NC_000001.10:1064999:1:A
+warn: Adjusted variation spec: NC_000001.10:1064998:2:CA
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=137518, length=19
+info: reference: ...GTGTG[TATATATATATATATATAT]TCCAT...
+info: var_query=GTATATATATATATATATT
+warn: Input variation spec : NC_000020.10:137534:2:
+warn: Adjusted variation spec: NC_000020.10:137518:19:TATATATATATATATAT
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=123452, length=8
+info: reference: ...GATTA[GGGAAATG]TCTCA...
+info: var_query=GGGAGAAATG
+warn: Input variation spec : CM000663.1:123452:7:GGGAGAAAT
+warn: Adjusted variation spec: CM000663.1:123452:8:GGGAGAAATG
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=36662041, length=10
+info: reference: ...CAACA[ATAATTATAA]GATTC...
+info: var_query=AATAAG
+warn: Input variation spec : CM000684.1:36662045:6:
+warn: Adjusted variation spec: CM000684.1:36662041:10:ATAA
+info: [0] Processing parameter # 0: SRR1597729, path=[], pileup path=[]
+info: [0] Processing SRR1597729.pileup...
+info: [0] found CM000684.1 row_id=2829728721, id_count=51304566
+info: [0] SRR1597729 is suspicious
+info: [0] Processing SRR1597729
+info: [0] id=SRR1597729.PA.257963012: AATAAT
+info: [0] id=SRR1597729.PA.257963013: AATAAG MATCH!
+info: [0] id=SRR1597729.PA.257963014: AATAAT
+info: [0] id=SRR1597729.PA.257963016: AATAAG MATCH!
+info: [0] id=SRR1597729.PA.257963017: AATAAT
+info: [0] id=SRR1597729.PA.257963018: AATAAG MATCH!
+SRR1597729 3 6
+info: percent="50"
+info: processed 50%
+info: percent="100"
+info: processed 100%
+info: percent="0"
+info: processed 0%
+info: Found indel box at pos=569998, length=4
+info: reference: ...GATAC[GGGA]TAATC...
+info: var_query=GGAA
+warn: Input variation spec : NC_000001.10:570000:1:A
+warn: Adjusted variation spec: NC_000001.10:569998:4:GGAA
+info: percent="100"
+info: processed 100%
+SRR867061 0 45
+SRR867061 0 45
+SRR867131 22 50
+SRR867131 22 50
diff --git a/test/ref-variation/expected/var-expand.out b/test/ref-variation/expected/var-expand.out
new file mode 100644
index 0000000..602b6dc
--- /dev/null
+++ b/test/ref-variation/expected/var-expand.out
@@ -0,0 +1 @@
+a CM000664.1:234668879:14:AT CM000664.1:234668879:15:ATA CM000664.1:234668879:15:ATATATATATATATA
diff --git a/test/ref-variation/have-access.sh b/test/ref-variation/have-access.sh
new file mode 100755
index 0000000..ef9813e
--- /dev/null
+++ b/test/ref-variation/have-access.sh
@@ -0,0 +1,25 @@
+BINDIR=$1
+test_name=$2
+exit_code=0
+have_access()
+{
+ $BINDIR/vdb-config -on /repository/site > /dev/null
+}
+
+if have_access; then
+ echo "Starting $test_name test...";
+ if [ "$test_name" = "ref-variation" ]; then
+ ./ref-variation.sh $BINDIR;
+ exit_code=$?
+ elif [ "$test_name" = "var-expand" ]; then
+ rm -f var-expand.out
+ echo "a CM000664.1:234668879:14:AT" | $BINDIR/var-expand --no-user-settings --algorithm=ra >> var-expand.out
+ diff expected/var-expand.out var-expand.out
+ exit_code=$?
+ rm var-expand.out
+ fi
+ echo "$test_name test is done";
+else
+ echo "no access to repository, skipping $test_name test...";
+fi
+exit $exit_code
\ No newline at end of file
diff --git a/test/ref-variation/ref-variation.in b/test/ref-variation/ref-variation.in
new file mode 100644
index 0000000..5f29904
--- /dev/null
+++ b/test/ref-variation/ref-variation.in
@@ -0,0 +1,4 @@
+SRR867061 SRR867061 SRR867061.pileup
+SRR867061 SRR867061
+SRR867131
+SRR867131 SRR867131.pileup
diff --git a/test/ref-variation/ref-variation.sh b/test/ref-variation/ref-variation.sh
new file mode 100755
index 0000000..f69fd53
--- /dev/null
+++ b/test/ref-variation/ref-variation.sh
@@ -0,0 +1,29 @@
+BINDIR=$1
+
+if [ "$(uname)" = "Darwin" ]
+then
+ echo "ref-variation test is disabled for Mac"
+else
+ rm -f ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -r NC_000013.10 -p 100635036 --query ACC -l 0 SRR793062 SRR795251 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -r NC_000002.11 -p 73613071 --query "C" -l 1 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -t 16 -r NC_000007.13 -p 117292900 --query "-" -l 4 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c -t 1 -r NC_000002.11 -p 73613067 --query "-" -l 3 SRR867061 SRR867131 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c --count-strand counteraligned -t 1 -r NC_000002.11 -p 73613067 --query "-" -l 3 SRR867061 SRR867131 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c -r CM000671.1 -p 136131022 --query "T" -l 1 SRR1601768 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c -r NC_000001.11 -p 136131022 --query "T" -l 1 SRR1601768 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -t 16 -c -r CM000671.1 -p 136131021 --query "T" -l 1 SRR1596639 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -r NC_000002.11 -p 73613030 --query "AT[1-3]" -l 3 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c -r CM000664.1 -p 234668879 -l 14 --query "ATATATATATATAT" SRR1597895 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -v -c -r CM000664.1 -p 234668879 -l 14 --query "AT[1-8]" SRR1597895 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -r NC_000001.10 -p 1064999 -l 1 --query A 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -r NC_000020.10 -p 137534 -l 2 --query - 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -c -r CM000663.1 -p 123452 -l 7 --query "GGGAGAAAT" 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vvv -r CM000684.1 -p 36662045 --query - -l 6 -vvv -c SRR1597729 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L info -vv -r NC_000001.10 -p 570000 -l 1 --query A 2>&1 | sed "s/[0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}T[0-9]\{2\}\:[0-9]\{2\}\:[0-9]\{2\}[ \t]*ref-variation\.[0-9]\+\.[0-9]\+\(\.[0-9]\+\)\?[ \t]*//" >> ref-variation.out
+ $BINDIR/ref-variation --no-user-settings --algorithm=ra -L err -v -c -t 1 -r NC_000002.11 -p 73613067 --query "-" -l 3 -i ref-variation.in >> ref-variation.out 2>&1
+ diff expected/ref-variation.out ref-variation.out
+ EXIT_CODE=$?
+ rm ref-variation.out
+ exit $EXIT_CODE
+fi
diff --git a/test/samline/Makefile b/test/samline/Makefile
index e3a76ef..414b84d 100644
--- a/test/samline/Makefile
+++ b/test/samline/Makefile
@@ -33,8 +33,9 @@ include $(TOP)/build/Makefile.env
INT_TOOLS = \
EXT_TOOLS = \
- samline
-
+ samline \
+ sampart
+
ALL_TOOLS = \
$(INT_TOOLS) \
$(EXT_TOOLS)
@@ -80,21 +81,35 @@ clean: stdclean
.PHONY: clean
+CMN_LIBS = \
+ -skapp \
+ -sncbi-vdb \
+ -lm
+
#-------------------------------------------------------------------------------
-# TOOL_SRC
+# SAMLINE
#
-TOOL_SRC = \
+SAMLINE_SRC = \
refbases \
cigar \
alig-gen
-TOOL_OBJ = \
- $(addsuffix .$(OBJX),$(TOOL_SRC))
+SAMLINE_OBJ = \
+ $(addsuffix .$(OBJX),$(SAMLINE_SRC))
-TOOL_LIB = \
- -skapp \
- -sncbi-vdb \
- -lm
+$(BINDIR)/samline: $(SAMLINE_OBJ)
+ $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(CMN_LIBS)
+
+#-------------------------------------------------------------------------------
+# SAMPART
+#
+SAMPART_SRC = \
+ refbases \
+ cigar \
+ sampart
+
+SAMPART_OBJ = \
+ $(addsuffix .$(OBJX),$(SAMPART_SRC))
-$(BINDIR)/samline: $(TOOL_OBJ)
- $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(TOOL_LIB)
+$(BINDIR)/sampart: $(SAMPART_OBJ)
+ $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(CMN_LIBS)
diff --git a/test/samline/alig-gen.c b/test/samline/alig-gen.c
index 07f1f9b..66845c5 100644
--- a/test/samline/alig-gen.c
+++ b/test/samline/alig-gen.c
@@ -46,87 +46,96 @@
#include "refbases.h"
#include "cigar.h"
-#define DFLT_REFNAME "NC_011752.1"
-#define DFLT_REFPOS 10000
-#define DFLT_CIGAR "50M"
-#define DFLT_INSBASES "ACGTACGTACGT"
-#define DFLT_MAPQ 20
-#define DFLT_QNAME "1"
-#define DFLT_TLEN 0
-
-static const char * refname_usage[] = { "the ref-seq-id to use 'NC_011752.1'", NULL };
-static const char * refpos_usage[] = { "the position on the reference 0-based", NULL };
-static const char * cigar_usage[] = { "the cigar-string to use", NULL };
-static const char * insbases_usage[] = { "what bases to insert ( if needed )", NULL };
-static const char * mapq_usage[] = { "what mapq to use", NULL };
-static const char * reverse_usage[] = { "alignment is reverse", NULL };
-static const char * qname_usage[] = { "query template name", NULL };
-static const char * sec_usage[] = { "secondary alignment", NULL };
-static const char * bad_usage[] = { "did not pass quality control", NULL };
-static const char * dup_usage[] = { "is PCR or optical duplicate", NULL };
-static const char * prop_usage[] = { "each fragment is properly aligned", NULL };
-static const char * show_usage[] = { "show details of calculations", NULL };
-static const char * ref_usage[] = { "return only refbases (set cigar to 100M for len=100)", NULL };
-static const char * flags_usage[] = { "decode decimal flags-value", NULL };
-static const char * header_usage[] = { "produce header", NULL };
-static const char * config_usage[] = { "procuce config-file", NULL };
-static const char * mdtag_usage[] = { "procuce md-tag", NULL };
-
-#define OPTION_REFNAME "refname"
-#define OPTION_REFPOS "refpos"
-#define OPTION_CIGAR "cigar"
-#define OPTION_INSBASES "insbases"
-#define OPTION_MAPQ "mapq"
-#define OPTION_REVERSE "reverse"
-#define OPTION_QNAME "qname"
-#define OPTION_SEC "secondary"
-#define OPTION_BAD "bad"
-#define OPTION_DUP "duplicate"
-#define OPTION_PROP "proper"
-#define OPTION_SHOW "show"
-#define OPTION_REF "ref"
-#define OPTION_FLAGS "flags"
-#define OPTION_HEADER "header"
-#define OPTION_CONFIG "config"
-#define OPTION_MDTAG "mdtag"
-
-#define ALIAS_REFNAME "r"
-#define ALIAS_REFPOS "p"
-#define ALIAS_CIGAR "c"
-#define ALIAS_INSBASES "i"
-#define ALIAS_MAPQ "m"
-#define ALIAS_REVERSE "e"
-#define ALIAS_QNAME "q"
-#define ALIAS_SEC "2"
-#define ALIAS_BAD "a"
-#define ALIAS_DUP "u"
-#define ALIAS_PROP "o"
-#define ALIAS_SHOW "s"
-#define ALIAS_REF "f"
-#define ALIAS_FLAGS "l"
-#define ALIAS_HEADER "d"
-#define ALIAS_CONFIG "n"
-#define ALIAS_MDTAG "t"
+#define DFLT_REFNAME "NC_011752.1"
+#define DFLT_REFPOS 10000
+#define DFLT_CIGAR "50M"
+#define DFLT_INSBASES "ACGTACGTACGT"
+#define DFLT_MAPQ 20
+#define DFLT_QNAME "1"
+#define DFLT_TLEN 0
+
+static const char * refname_usage[] = { "the ref-seq-id to use 'NC_011752.1'", NULL };
+static const char * refalias_usage[] = { "the alias to be written out like 'c1'", NULL };
+static const char * refpos_usage[] = { "the position on the reference 0-based", NULL };
+static const char * cigar_usage[] = { "the cigar-string to use", NULL };
+static const char * insbases_usage[] = { "what bases to insert ( if needed )", NULL };
+static const char * mapq_usage[] = { "what mapq to use", NULL };
+static const char * reverse_usage[] = { "alignment is reverse", NULL };
+static const char * qname_usage[] = { "query template name", NULL };
+static const char * sec_usage[] = { "secondary alignment", NULL };
+static const char * bad_usage[] = { "did not pass quality control", NULL };
+static const char * dup_usage[] = { "is PCR or optical duplicate", NULL };
+static const char * prop_usage[] = { "each fragment is properly aligned", NULL };
+static const char * first_usage[] = { "fragment is first", NULL };
+static const char * last_usage[] = { "fragment is last", NULL };
+static const char * show_usage[] = { "show details of calculations", NULL };
+static const char * ref_usage[] = { "return only refbases (set cigar to 100M for len=100)", NULL };
+static const char * flags_usage[] = { "decode decimal flags-value", NULL };
+static const char * header_usage[] = { "produce header", NULL };
+static const char * config_usage[] = { "procuce config-file", NULL };
+static const char * mdtag_usage[] = { "procuce md-tag", NULL };
+
+#define OPTION_REFNAME "refname"
+#define OPTION_REFALIAS "refalias"
+#define OPTION_REFPOS "refpos"
+#define OPTION_CIGAR "cigar"
+#define OPTION_INSBASES "insbases"
+#define OPTION_MAPQ "mapq"
+#define OPTION_REVERSE "reverse"
+#define OPTION_QNAME "qname"
+#define OPTION_SEC "secondary"
+#define OPTION_BAD "bad"
+#define OPTION_DUP "duplicate"
+#define OPTION_PROP "proper"
+#define OPTION_FIRST "first"
+#define OPTION_LAST "last"
+#define OPTION_SHOW "show"
+#define OPTION_REF "ref"
+#define OPTION_FLAGS "flags"
+#define OPTION_HEADER "header"
+#define OPTION_CONFIG "config"
+#define OPTION_MDTAG "mdtag"
+
+#define ALIAS_REFNAME "r"
+#define ALIAS_REFALIAS "b"
+#define ALIAS_REFPOS "p"
+#define ALIAS_CIGAR "c"
+#define ALIAS_INSBASES "i"
+#define ALIAS_MAPQ "m"
+#define ALIAS_REVERSE "e"
+#define ALIAS_SEC "2"
+#define ALIAS_BAD "a"
+#define ALIAS_DUP "u"
+#define ALIAS_PROP "o"
+#define ALIAS_SHOW "s"
+#define ALIAS_REF "f"
+#define ALIAS_FLAGS "l"
+#define ALIAS_HEADER "d"
+#define ALIAS_CONFIG "n"
+#define ALIAS_MDTAG "t"
OptDef Options[] =
{
- { OPTION_REFNAME, ALIAS_REFNAME, NULL, refname_usage, 2, true, false },
- { OPTION_REFPOS, ALIAS_REFPOS, NULL, refpos_usage, 2, true, false },
- { OPTION_CIGAR, ALIAS_CIGAR, NULL, cigar_usage, 2, true, false },
- { OPTION_INSBASES, ALIAS_INSBASES, NULL, insbases_usage, 1, true, false },
- { OPTION_MAPQ, ALIAS_MAPQ, NULL, mapq_usage, 2, true, false },
- { OPTION_REVERSE, ALIAS_REVERSE, NULL, reverse_usage, 1, false, false },
- { OPTION_QNAME, ALIAS_QNAME, NULL, qname_usage, 2, true, false },
- { OPTION_SEC, ALIAS_SEC, NULL, sec_usage, 2, false, false },
- { OPTION_BAD, ALIAS_BAD, NULL, bad_usage, 2, false, false },
- { OPTION_DUP, ALIAS_DUP, NULL, dup_usage, 2, false, false },
- { OPTION_PROP, ALIAS_PROP, NULL, prop_usage, 2, false, false },
- { OPTION_SHOW, ALIAS_SHOW, NULL, show_usage, 1, false, false },
- { OPTION_REF, ALIAS_REF, NULL, ref_usage, 1, false, false },
- { OPTION_FLAGS, ALIAS_FLAGS, NULL, flags_usage, 1, true, false },
- { OPTION_HEADER, ALIAS_HEADER, NULL, header_usage, 1, false, false },
- { OPTION_CONFIG, ALIAS_CONFIG, NULL, config_usage, 1, true, false },
- { OPTION_MDTAG, ALIAS_MDTAG, NULL, mdtag_usage, 1, false, false }
+ { OPTION_REFNAME, ALIAS_REFNAME, NULL, refname_usage, 2, true, false },
+ { OPTION_REFALIAS, ALIAS_REFALIAS, NULL, refalias_usage, 2, true, false },
+ { OPTION_REFPOS, ALIAS_REFPOS, NULL, refpos_usage, 2, true, false },
+ { OPTION_CIGAR, ALIAS_CIGAR, NULL, cigar_usage, 2, true, false },
+ { OPTION_INSBASES, ALIAS_INSBASES, NULL, insbases_usage, 1, true, false },
+ { OPTION_MAPQ, ALIAS_MAPQ, NULL, mapq_usage, 2, true, false },
+ { OPTION_REVERSE, ALIAS_REVERSE, NULL, reverse_usage, 1, false, false },
+ { OPTION_QNAME, NULL, NULL, qname_usage, 2, true, false },
+ { OPTION_SEC, ALIAS_SEC, NULL, sec_usage, 2, true, false },
+ { OPTION_BAD, ALIAS_BAD, NULL, bad_usage, 2, true, false },
+ { OPTION_DUP, ALIAS_DUP, NULL, dup_usage, 2, true, false },
+ { OPTION_PROP, ALIAS_PROP, NULL, prop_usage, 2, true, false },
+ { OPTION_FIRST, NULL, NULL, first_usage, 2, true, false },
+ { OPTION_LAST, NULL, NULL, last_usage, 2, true, false },
+ { OPTION_SHOW, ALIAS_SHOW, NULL, show_usage, 1, false, false },
+ { OPTION_REF, ALIAS_REF, NULL, ref_usage, 1, false, false },
+ { OPTION_FLAGS, ALIAS_FLAGS, NULL, flags_usage, 1, true, false },
+ { OPTION_HEADER, ALIAS_HEADER, NULL, header_usage, 1, false, false },
+ { OPTION_CONFIG, ALIAS_CONFIG, NULL, config_usage, 1, true, false },
+ { OPTION_MDTAG, ALIAS_MDTAG, NULL, mdtag_usage, 1, false, false }
};
const char UsageDefaultName[] = "samline";
@@ -140,7 +149,7 @@ rc_t CC Usage ( const Args * args )
{
const char * progname = UsageDefaultName;
const char * fullpath = UsageDefaultName;
- int i, n_options;
+ int i, n_options;
rc_t rc;
if ( args == NULL )
@@ -154,14 +163,14 @@ rc_t CC Usage ( const Args * args )
UsageSummary ( progname );
KOutMsg ( "Options:\n" );
- n_options = sizeof Options / sizeof Options[ 0 ];
- for ( i = 0; i < n_options; ++i )
- {
- OptDef * o = &Options[ i ];
- HelpOptionLine( o->aliases, o->name, NULL, o->help );
- }
+ n_options = sizeof Options / sizeof Options[ 0 ];
+ for ( i = 0; i < n_options; ++i )
+ {
+ OptDef * o = &Options[ i ];
+ HelpOptionLine( o->aliases, o->name, NULL, o->help );
+ }
- KOutMsg ( "\n" );
+ KOutMsg ( "\n" );
HelpOptionsStandard ();
HelpVersion ( fullpath, KAppVersion() );
@@ -186,55 +195,56 @@ static const char * get_str_option( const Args * args, const char * name, uint32
uint32_t count;
rc_t rc = ArgsOptionCount( args, name, &count );
if ( ( rc == 0 )&&( count > idx ) )
- {
- const char * res = NULL;
+ {
+ const char * res = NULL;
ArgsOptionValue( args, name, idx, (const void **)&res );
- return res;
- }
- else
- return dflt;
+ return res;
+ }
+ else
+ return dflt;
}
static uint32_t get_uint32_option( const Args * args, const char * name, uint32_t idx, const uint32_t dflt )
{
- const char * s = get_str_option( args, name, idx, NULL );
- if ( s == NULL )
- return dflt;
- return atoi( s );
+ const char * s = get_str_option( args, name, idx, NULL );
+ if ( s == NULL )
+ return dflt;
+ return atoi( s );
}
static uint32_t get_bool_option( const Args * args, const char * name )
{
uint32_t count;
rc_t rc = ArgsOptionCount( args, name, &count );
- return ( rc == 0 && count > 0 );
+ return ( rc == 0 && count > 0 );
}
typedef struct alignment
{
- const char * refname;
- const char * cigar_str;
- const char * refbases;
- char read[ 4096 ];
- char sam[ 4096 ];
-
- int reverse, secondary, bad, dup, prop;
-
- uint32_t refpos, mapq, bases_in_ref, reflen;
-
- struct cigar_t * cigar;
+ const char * qname;
+ const char * refname;
+ const char * refalias;
+ const char * cigar_str;
+ const char * refbases;
+ char read[ 4096 ];
+ char sam[ 4096 ];
+
+ int reverse, secondary, bad, dup, prop, first, last;
+
+ uint32_t refpos, mapq, bases_in_ref, reflen;
+
+ struct cigar_t * cigar;
} alignment;
typedef struct gen_context
{
- const char * qname;
- const char * insbases;
- const char * config;
- uint32_t flags, header;
- int32_t tlen;
-
- alignment alig[ 2 ];
+ const char * insbases;
+ const char * config;
+ uint32_t flags, header;
+ int32_t tlen;
+
+ alignment alig[ 2 ];
} gen_context;
@@ -249,356 +259,398 @@ static rc_t CC write_to_FILE ( void *f, const char *buffer, size_t bytes, size_t
static size_t random_string( char * buffer, size_t buflen, const char * char_set, size_t length )
{
- size_t res = 0;
- if ( buffer != NULL && buflen > 0 )
- {
- const char dflt_charset[] = "0123456789"
- "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
- const char * cs = ( char_set == NULL ) ? dflt_charset : char_set;
- size_t charset_len = strlen( cs ) - 1;
- while ( res < length && res < ( buflen - 1 ) )
- {
- size_t rand_idx = ( double ) rand() / RAND_MAX * charset_len;
- buffer[ res++ ] = cs[ rand_idx ];
- }
- buffer[ res ] = 0;
- }
- return res;
+ size_t res = 0;
+ if ( buffer != NULL && buflen > 0 )
+ {
+ const char dflt_charset[] = "0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ const char * cs = ( char_set == NULL ) ? dflt_charset : char_set;
+ size_t charset_len = strlen( cs ) - 1;
+ while ( res < length && res < ( buflen - 1 ) )
+ {
+ size_t rand_idx = ( double ) rand() / RAND_MAX * charset_len;
+ buffer[ res++ ] = cs[ rand_idx ];
+ }
+ buffer[ res ] = 0;
+ }
+ return res;
}
static size_t random_quality( char * buffer, size_t buflen, size_t length )
{
- const char qualities[] = "!\"#$%&'()*+,-./0123456789:;<=>?"
- "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
- "`abcdefghijklmnopqrstuvwxyz{|}~";
- return random_string( buffer, buflen, qualities, length );
+ const char qualities[] = "!\"#$%&'()*+,-./0123456789:;<=>?"
+ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
+ "`abcdefghijklmnopqrstuvwxyz{|}~";
+ return random_string( buffer, buflen, qualities, length );
}
static uint32_t sam_flags( const alignment * alig, const alignment * other, int first, int last )
{
- uint32_t res = 0;
- if ( first || last ) res |= 0x01; /* multiple fragments */
- if ( alig->prop != 0 ) res |= 0x02; /* each fragment properly aligned */
- if ( alig->refpos == 0 ) res |= 0x04; /* this fragment is unmapped */
- if ( other != NULL && other->refpos == 0 ) res |= 0x08; /* next fragment is unmapped */
- if ( alig->reverse ) res |= 0x10; /* this fragment is reversed */
- if ( other != NULL && other->reverse ) res |= 0x20; /* next fragment is reversed */
- if ( first ) res |= 0x40; /* this is the first fragment */
- if ( last ) res |= 0x80; /* this is the last fragment */
- if ( alig->secondary != 0 ) res |= 0x100; /* this is a secondary alignment */
- if ( alig->bad != 0 ) res |= 0x200; /* this is did not pass quality controls */
- if ( alig->dup != 0 ) res |= 0x400; /* this is PCR or optical duplicate */
- return res;
+ uint32_t res = 0;
+ if ( first || last ) res |= 0x01; /* multiple fragments */
+ if ( alig->prop != 0 ) res |= 0x02; /* each fragment properly aligned */
+ if ( alig->refpos == 0 ) res |= 0x04; /* this fragment is unmapped */
+ if ( other != NULL && other->refpos == 0 ) res |= 0x08; /* next fragment is unmapped */
+ if ( alig->reverse ) res |= 0x10; /* this fragment is reversed */
+ if ( other != NULL && other->reverse ) res |= 0x20; /* next fragment is reversed */
+ if ( first ) res |= 0x40; /* this is the first fragment */
+ if ( last ) res |= 0x80; /* this is the last fragment */
+ if ( alig->secondary != 0 ) res |= 0x100; /* this is a secondary alignment */
+ if ( alig->bad != 0 ) res |= 0x200; /* this is did not pass quality controls */
+ if ( alig->dup != 0 ) res |= 0x400; /* this is PCR or optical duplicate */
+ return res;
}
static size_t produce_sam( char * buffer, size_t buflen,
- const gen_context * gctx, const alignment * alig, const alignment * other )
+ const gen_context * gctx, const alignment * alig, const alignment * other )
{
- size_t res = 0;
- if ( buffer != NULL ) buffer[ 0 ] = 0;
-
- if ( buffer != NULL && gctx != NULL && alig != NULL )
- {
- char merged_cigar_str[ 4096 ];
- char quality[ 4096 ];
- int first = 0;
- int last = 0;
- const char * r_next = "*";
- uint32_t r_pos = 0;
- struct cigar_t * merged_cigar = merge_cigar_t( alig->cigar );
-
- random_quality( quality, sizeof quality, cigar_t_readlen( merged_cigar ) ) ;
- cigar_t_string( merged_cigar_str, sizeof merged_cigar_str, merged_cigar );
-
- if ( other != NULL && other->refname != NULL && other->refpos != 0 )
- {
- r_next = other->refname;
- first = ( alig->refpos < other->refpos );
- last = !first;
- r_pos = other->refpos;
- }
-
- string_printf ( buffer, buflen, &res,
- "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s",
- gctx->qname,
- sam_flags( alig, other, first, last ),
- alig->refname,
- alig->refpos,
- alig->mapq,
- merged_cigar_str,
- r_next,
- r_pos,
- gctx->tlen,
- alig->read,
- quality );
-
- free_cigar_t( merged_cigar );
- }
- return res;
+ size_t res = 0;
+ if ( buffer != NULL ) buffer[ 0 ] = 0;
+
+ if ( buffer != NULL && gctx != NULL && alig != NULL )
+ {
+ char merged_cigar_str[ 4096 ];
+ char quality[ 4096 ];
+ int first = 0;
+ int last = 0;
+ const char * r_next = "*";
+ uint32_t r_pos = 0;
+ struct cigar_t * merged_cigar = merge_cigar_t( alig->cigar );
+
+ random_quality( quality, sizeof quality, cigar_t_readlen( merged_cigar ) ) ;
+ cigar_t_string( merged_cigar_str, sizeof merged_cigar_str, merged_cigar );
+
+ if ( other != NULL && other->refname != NULL && other->refpos != 0 )
+ {
+ r_next = other->refalias == NULL ? other->refname : other->refalias;
+ first = ( alig->refpos < other->refpos );
+ last = !first;
+ r_pos = other->refpos;
+ }
+ else
+ {
+ first = alig->first;
+ last = alig->last;
+ }
+
+ string_printf ( buffer, buflen, &res,
+ "%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s",
+ alig->qname,
+ sam_flags( alig, other, first, last ),
+ alig->refalias == NULL ? alig->refname : alig->refalias,
+ alig->refpos,
+ alig->mapq,
+ merged_cigar_str,
+ r_next,
+ r_pos,
+ gctx->tlen,
+ alig->read,
+ quality );
+
+ free_cigar_t( merged_cigar );
+ }
+ return res;
}
static void show_alig_details( const alignment * alig )
{
- KOutMsg ( "REFNAME : %s\n", alig->refname );
- KOutMsg ( "REFPOS : %d\n", alig->refpos );
- KOutMsg ( "CIGAR : %s\n", alig->cigar_str );
- KOutMsg ( "MAPQ : %d\n", alig->mapq );
- KOutMsg ( "REVERSE : %s\n", alig->reverse ? "YES" : "NO" );
- KOutMsg ( "SECONDARY: %s\n", alig->secondary ? "YES" : "NO" );
- KOutMsg ( "BAD : %s\n", alig->bad ? "YES" : "NO" );
- KOutMsg ( "DUPLICATE: %s\n", alig->dup ? "YES" : "NO" );
- KOutMsg ( "PROPERLY : %s\n", alig->prop ? "YES" : "NO" );
- KOutMsg ( "REFLEN : %d\n", alig->reflen );
- KOutMsg ( "READLEN : %d\n", cigar_t_readlen( alig->cigar ) );
- KOutMsg ( "INSLEN : %d\n", cigar_t_inslen( alig->cigar ) );
- KOutMsg ( "REFBASES : %s\n", alig->refbases );
- KOutMsg ( "READ : %s\n", alig->read );
- KOutMsg ( "SAM : %s\n", alig->sam );
-
+ KOutMsg ( "QNAME : %s\n", alig->qname );
+ KOutMsg ( "REFNAME : %s\n", alig->refname );
+ KOutMsg ( "REFALIAS : %s\n", alig->refalias == NULL ? "n/a" : alig->refalias );
+ KOutMsg ( "REFPOS : %d\n", alig->refpos );
+ KOutMsg ( "CIGAR : %s\n", alig->cigar_str );
+ KOutMsg ( "MAPQ : %d\n", alig->mapq );
+ KOutMsg ( "REVERSE : %s\n", alig->reverse ? "YES" : "NO" );
+ KOutMsg ( "SECONDARY: %s\n", alig->secondary ? "YES" : "NO" );
+ KOutMsg ( "BAD : %s\n", alig->bad ? "YES" : "NO" );
+ KOutMsg ( "DUPLICATE: %s\n", alig->dup ? "YES" : "NO" );
+ KOutMsg ( "PROPERLY : %s\n", alig->prop ? "YES" : "NO" );
+ KOutMsg ( "FIRST : %s\n", alig->first ? "YES" : "NO" );
+ KOutMsg ( "LAST : %s\n", alig->last ? "YES" : "NO" );
+ KOutMsg ( "REFLEN : %d\n", alig->reflen );
+ KOutMsg ( "READLEN : %d\n", cigar_t_readlen( alig->cigar ) );
+ KOutMsg ( "INSLEN : %d\n", cigar_t_inslen( alig->cigar ) );
+ KOutMsg ( "REFBASES : %s\n", alig->refbases );
+ KOutMsg ( "READ : %s\n", alig->read );
+ KOutMsg ( "SAM : %s\n", alig->sam );
+
}
static void show_details( const gen_context * gctx )
{
- KOutMsg ( "QNAME : %s\n", gctx->qname );
- KOutMsg ( "INSBASES : %s\n", gctx->insbases );
- KOutMsg ( "TLEN : %d\n", gctx->tlen );
- KOutMsg ( "CONFIG : %s\n", gctx->config );
- if ( gctx->tlen != 0 )
- {
- KOutMsg ( "----- ALIGNMENT #1 -----\n" );
- show_alig_details( &gctx->alig[ 0 ] );
- KOutMsg ( "----- ALIGNMENT #2 -----\n" );
- show_alig_details( &gctx->alig[ 1 ] );
- }
- else
- show_alig_details( &gctx->alig[ 0 ] );
+ KOutMsg ( "INSBASES : %s\n", gctx->insbases );
+ KOutMsg ( "TLEN : %d\n", gctx->tlen );
+ KOutMsg ( "CONFIG : %s\n", gctx->config );
+ if ( gctx->tlen != 0 )
+ {
+ KOutMsg ( "----- ALIGNMENT #1 -----\n" );
+ show_alig_details( &gctx->alig[ 0 ] );
+ KOutMsg ( "----- ALIGNMENT #2 -----\n" );
+ show_alig_details( &gctx->alig[ 1 ] );
+ }
+ else
+ show_alig_details( &gctx->alig[ 0 ] );
}
static void show_mdtag( const gen_context * gctx )
{
- struct cigar_t * cigar;
-
- KOutMsg ( "calculating MD-TAG:\n" );
- KOutMsg ( "READ : %s\n", gctx->alig[0].read );
- KOutMsg ( "REFBASES : %s\n", gctx->alig[0].refbases );
-
- cigar = make_cigar_t( gctx->alig[0].cigar_str );
- if ( cigar != NULL )
- {
- struct cigar_t *merged_cigar = merge_cigar_t( cigar );
- if ( merged_cigar != NULL )
- {
- char merged_cigar_str[ 4096 ];
- char the_tag[ 4096 ];
-
- cigar_t_string( merged_cigar_str, sizeof merged_cigar_str, merged_cigar );
- md_tag( the_tag, sizeof the_tag, merged_cigar, gctx->alig[0].read, gctx->alig[0].refbases );
-
- KOutMsg ( "CIGAR : %s\n", merged_cigar_str );
- KOutMsg ( "MD-TAG : %s\n", the_tag );
-
- free_cigar_t( merged_cigar );
- }
- free_cigar_t( cigar );
- }
+ struct cigar_t * cigar;
+
+ KOutMsg ( "calculating MD-TAG:\n" );
+ KOutMsg ( "READ : %s\n", gctx->alig[0].read );
+ KOutMsg ( "REFBASES : %s\n", gctx->alig[0].refbases );
+
+ cigar = make_cigar_t( gctx->alig[0].cigar_str );
+ if ( cigar != NULL )
+ {
+ struct cigar_t *merged_cigar = merge_cigar_t( cigar );
+ if ( merged_cigar != NULL )
+ {
+ char merged_cigar_str[ 4096 ];
+ char the_tag[ 4096 ];
+
+ cigar_t_string( merged_cigar_str, sizeof merged_cigar_str, merged_cigar );
+ md_tag( the_tag, sizeof the_tag, merged_cigar, gctx->alig[0].read, gctx->alig[0].refbases );
+
+ KOutMsg ( "CIGAR : %s\n", merged_cigar_str );
+ KOutMsg ( "MD-TAG : %s\n", the_tag );
+
+ free_cigar_t( merged_cigar );
+ }
+ free_cigar_t( cigar );
+ }
}
static void explain_flags( const uint32_t flags )
{
- if ( ( flags & 0x01 ) == 0x01 )
- KOutMsg ( "0x001 ... multiple fragments\n" );
- if ( ( flags & 0x02 ) == 0x02 )
- KOutMsg ( "0x002 ... each fragment properly aligned\n" );
- if ( ( flags & 0x04 ) == 0x04 )
- KOutMsg ( "0x004 ... this fragment is unmapped\n" );
- if ( ( flags & 0x08 ) == 0x08 )
- KOutMsg ( "0x008 ... next fragment is unmapped\n" );
- if ( ( flags & 0x10 ) == 0x10 )
- KOutMsg ( "0x010 ... this fragment is reversed\n" );
- if ( ( flags & 0x20 ) == 0x20 )
- KOutMsg ( "0x020 ... next fragment is reversed\n" );
- if ( ( flags & 0x40 ) == 0x40 )
- KOutMsg ( "0x040 ... this is the first fragment\n" );
- if ( ( flags & 0x80 ) == 0x80 )
- KOutMsg ( "0x080 ... this is the last fragment\n" );
- if ( ( flags & 0x100 ) == 0x100 )
- KOutMsg ( "0x100 ... this is a secondary alignment\n" );
- if ( ( flags & 0x200 ) == 0x200 )
- KOutMsg ( "0x200 ... this is did not pass quality controls\n" );
- if ( ( flags & 0x400 ) == 0x400 )
- KOutMsg ( "0x400 ... this is PCR or optical duplicate\n" );
+ if ( ( flags & 0x01 ) == 0x01 )
+ KOutMsg ( "0x001 ... multiple fragments\n" );
+ if ( ( flags & 0x02 ) == 0x02 )
+ KOutMsg ( "0x002 ... each fragment properly aligned\n" );
+ if ( ( flags & 0x04 ) == 0x04 )
+ KOutMsg ( "0x004 ... this fragment is unmapped\n" );
+ if ( ( flags & 0x08 ) == 0x08 )
+ KOutMsg ( "0x008 ... next fragment is unmapped\n" );
+ if ( ( flags & 0x10 ) == 0x10 )
+ KOutMsg ( "0x010 ... this fragment is reversed\n" );
+ if ( ( flags & 0x20 ) == 0x20 )
+ KOutMsg ( "0x020 ... next fragment is reversed\n" );
+ if ( ( flags & 0x40 ) == 0x40 )
+ KOutMsg ( "0x040 ... this is the first fragment\n" );
+ if ( ( flags & 0x80 ) == 0x80 )
+ KOutMsg ( "0x080 ... this is the last fragment\n" );
+ if ( ( flags & 0x100 ) == 0x100 )
+ KOutMsg ( "0x100 ... this is a secondary alignment\n" );
+ if ( ( flags & 0x200 ) == 0x200 )
+ KOutMsg ( "0x200 ... this is did not pass quality controls\n" );
+ if ( ( flags & 0x400 ) == 0x400 )
+ KOutMsg ( "0x400 ... this is PCR or optical duplicate\n" );
}
-static size_t write_config_line( KFile * dst, size_t at, const char * refname0, const char * refname1 )
+static size_t write_config_line( KFile * dst, size_t at, const char * alias, const char * canonical )
{
- size_t num_in_buffer, res = 0;
- char buffer[ 4096 ];
- rc_t rc = string_printf ( buffer, sizeof buffer, &num_in_buffer, "%s\t%s\n", refname0, refname1 );
- if ( rc == 0 )
- {
- size_t written_to_file;
- rc = KFileWriteAll ( dst, at, buffer, num_in_buffer, &written_to_file );
- if ( rc == 0 )
- res = at + written_to_file;
- }
- return res;
+ size_t num_in_buffer, res = 0;
+ char buffer[ 4096 ];
+ rc_t rc = string_printf ( buffer, sizeof buffer, &num_in_buffer, "%s\t%s\n", alias, canonical );
+ if ( rc == 0 )
+ {
+ size_t written_to_file;
+ rc = KFileWriteAll ( dst, at, buffer, num_in_buffer, &written_to_file );
+ if ( rc == 0 )
+ res = at + written_to_file;
+ }
+ return res;
}
-static void write_config_file( const char * filename, const char * refname0, const char * refname1 )
+static void write_config_file( const char * filename, const alignment * alig0, const alignment * alig1 )
{
- KDirectory *dir;
- rc_t rc = KDirectoryNativeDir( &dir );
- if ( rc == 0 )
- {
- KFile * dst;
- rc = KDirectoryCreateFile ( dir, &dst, false, 0664, kcmInit, filename );
- if ( rc == 0 )
- {
- size_t pos = write_config_line( dst, 0, refname0, refname0 );
- if ( pos > 0 && ( strcmp( refname0, refname1 ) != 0 ) )
- write_config_line( dst, pos, refname1, refname1 );
- KFileRelease( dst );
- }
- KDirectoryRelease( dir );
- }
+ KDirectory *dir;
+ rc_t rc = KDirectoryNativeDir( &dir );
+ if ( rc == 0 )
+ {
+ KFile * dst;
+ rc = KDirectoryCreateFile ( dir, &dst, false, 0664, kcmInit, filename );
+ if ( rc == 0 )
+ {
+ size_t pos = 0;
+ const char * alias0 = NULL;
+ const char * alias1 = NULL;
+ const char * canonical0 = NULL;
+ const char * canonical1 = NULL;
+
+ if ( alig0 != 0 )
+ {
+ canonical0 = alig0->refname == NULL ? DFLT_REFNAME : alig0->refname;
+ alias0 = alig0->refalias == NULL ? canonical0 : alig0->refalias;
+ }
+
+ if ( alig1 != 0 )
+ {
+ canonical1 = alig1->refname == NULL ? DFLT_REFNAME : alig1->refname;
+ alias1 = alig1->refalias == NULL ? canonical1 : alig1->refalias;
+ }
+
+ pos = write_config_line( dst, pos, alias0, canonical0 );
+ if ( ( strcmp( alias0, alias1 ) != 0 ) )
+ write_config_line( dst, pos, alias1, canonical1 );
+
+ KFileRelease( dst );
+ }
+ KDirectoryRelease( dir );
+ }
}
static void generate_alignment( const gen_context * gctx )
{
- /* write reference names into config-file for bam-load */
- if ( gctx->config != NULL )
- write_config_file( gctx->config, gctx->alig[ 0 ].refname, gctx->alig[ 1 ].refname );
-
- /* procude SAM-header on stdout */
- if ( gctx->header )
- {
- const char * refname = gctx->alig[ 0 ].refname;
- int bases_in_ref = gctx->alig[ 0 ].bases_in_ref;
- KOutMsg( "@HD\tVN:1.3\n" );
- KOutMsg( "@SQ\tSN:%s\tAS:%s\tLN:%d\n", refname, refname, bases_in_ref );
- }
-
- /* produces SAM-line for 1st alignment */
- KOutMsg( "%s\n", gctx->alig[ 0 ].sam );
-
- /* produces SAM-line for 2nd alignment ( mate ) */
- if ( gctx->tlen != 0 )
- KOutMsg( "%s\n", gctx->alig[ 1 ].sam );
+ /* write reference names into config-file for bam-load */
+ if ( gctx->config != NULL )
+ write_config_file( gctx->config, &gctx->alig[ 0 ], &gctx->alig[ 1 ] );
+
+ /* procude SAM-header on stdout */
+ if ( gctx->header )
+ {
+ const char * refname0 = gctx->alig[ 0 ].refalias;
+ const char * refname1 = gctx->alig[ 1 ].refalias;
+ int bases_in_ref0 = gctx->alig[ 0 ].bases_in_ref;
+
+ if ( refname0 == NULL ) refname0 = gctx->alig[ 0 ].refname;
+ if ( refname1 == NULL ) refname1 = gctx->alig[ 1 ].refname;
+
+ KOutMsg( "@HD\tVN:1.3\n" );
+ KOutMsg( "@SQ\tSN:%s\tAS:%s\tLN:%d\n", refname0, refname0, bases_in_ref0 );
+ if ( refname1 != NULL && ( strcmp( refname0, refname1 ) != 0 ) )
+ {
+ int bases_in_ref1 = gctx->alig[ 1 ].bases_in_ref;
+ KOutMsg( "@SQ\tSN:%s\tAS:%s\tLN:%d\n", refname1, refname1, bases_in_ref1 );
+ }
+ }
+
+ /* produces SAM-line for 1st alignment */
+ KOutMsg( "%s\n", gctx->alig[ 0 ].sam );
+
+ /* produces SAM-line for 2nd alignment ( mate ) */
+ if ( gctx->tlen != 0 )
+ KOutMsg( "%s\n", gctx->alig[ 1 ].sam );
}
-static void read_alig_context( Args * args, gen_context * gctx, alignment * alig, uint32_t idx )
+static void read_alig_context( Args * args, alignment * alig, uint32_t idx )
{
- alig->refname = get_str_option( args, OPTION_REFNAME, idx, idx == 0 ? DFLT_REFNAME : NULL );
- alig->refpos = get_uint32_option( args, OPTION_REFPOS, idx, idx == 0 ? DFLT_REFPOS : 0 );
- alig->cigar_str = get_str_option( args, OPTION_CIGAR, idx, DFLT_CIGAR );
- alig->mapq = get_uint32_option( args, OPTION_MAPQ, idx, DFLT_MAPQ );
-
- alig->reverse = get_uint32_option( args, OPTION_REVERSE, idx, 0 );
- alig->secondary = get_uint32_option( args, OPTION_SEC, idx, 0 );
- alig->bad = get_uint32_option( args, OPTION_BAD, idx, 0 );
- alig->dup = get_uint32_option( args, OPTION_DUP, idx, 0 );
- alig->prop = get_uint32_option( args, OPTION_PROP, idx, 0 );
-
- /* precalculate values need in all functions */
- alig->cigar = make_cigar_t( alig->cigar_str );
- alig->reflen = cigar_t_reflen( alig->cigar );
+ alig->qname = get_str_option( args, OPTION_QNAME, idx, idx == 0 ? "1" : "2" );
+ alig->refname = get_str_option( args, OPTION_REFNAME, idx, idx == 0 ? DFLT_REFNAME : NULL );
+ alig->refalias = get_str_option( args, OPTION_REFALIAS, idx, NULL );
+ alig->refpos = get_uint32_option( args, OPTION_REFPOS, idx, idx == 0 ? DFLT_REFPOS : 0 );
+ alig->cigar_str = get_str_option( args, OPTION_CIGAR, idx, DFLT_CIGAR );
+ alig->mapq = get_uint32_option( args, OPTION_MAPQ, idx, DFLT_MAPQ );
+
+ alig->reverse = get_uint32_option( args, OPTION_REVERSE, idx, 0 );
+ alig->secondary = get_uint32_option( args, OPTION_SEC, idx, 0 );
+ alig->bad = get_uint32_option( args, OPTION_BAD, idx, 0 );
+ alig->dup = get_uint32_option( args, OPTION_DUP, idx, 0 );
+ alig->prop = get_uint32_option( args, OPTION_PROP, idx, 0 );
+ alig->first = get_uint32_option( args, OPTION_FIRST, idx, 0 );
+ alig->last = get_uint32_option( args, OPTION_LAST, idx, 0 );
+
+ /* precalculate values need in all functions */
+ alig->cigar = make_cigar_t( alig->cigar_str );
+ alig->reflen = cigar_t_reflen( alig->cigar );
}
static void release_alig( alignment * alig )
{
- if ( alig->refbases != NULL ) free( ( void* ) alig->refbases );
- free_cigar_t( alig->cigar );
+ if ( alig->refbases != NULL ) free( ( void* ) alig->refbases );
+ free_cigar_t( alig->cigar );
}
static void read_context( Args * args, gen_context * gctx )
{
- alignment *alig0, *alig1;
-
- gctx->qname = get_str_option( args, OPTION_QNAME, 0, DFLT_QNAME );
- gctx->insbases = get_str_option( args, OPTION_INSBASES, 0, DFLT_INSBASES );
- gctx->flags = get_uint32_option( args, OPTION_FLAGS, 0, 0 );
- gctx->header = get_bool_option( args, OPTION_HEADER );
- gctx->config = get_str_option( args, OPTION_CONFIG, 0, NULL );
- gctx->tlen = 0;
-
- read_alig_context( args, gctx, &gctx->alig[ 0 ], 0 );
- read_alig_context( args, gctx, &gctx->alig[ 1 ], 1 );
-
- alig0 = &gctx->alig[ 0 ];
- alig1 = &gctx->alig[ 1 ];
-
- if ( gctx->alig[ 1 ].refpos > 0 )
- {
- if ( gctx->alig[ 1 ].refname == NULL )
- gctx->alig[ 1 ].refname = gctx->alig[ 0 ].refname;
-
- uint32_t end = alig1->refpos + alig1->reflen;
- gctx->tlen = ( end - alig0->refpos );
-
- alig0->refbases = read_refbases( alig0->refname, alig0->refpos, alig0->reflen, &alig0->bases_in_ref );
- alig1->refbases = read_refbases( alig1->refname, alig1->refpos, alig1->reflen, &alig1->bases_in_ref );
-
- cigar_t_2_read( alig0->read, sizeof alig0->read, alig0->cigar, alig0->refbases, gctx->insbases );
- cigar_t_2_read( alig1->read, sizeof alig1->read, alig1->cigar, alig1->refbases, gctx->insbases );
-
- produce_sam( alig0->sam, sizeof alig0->sam, gctx, alig0, alig1 );
- produce_sam( alig1->sam, sizeof alig1->sam, gctx, alig1, alig0 ) ;
- }
- else
- {
- alig0->refbases = read_refbases( alig0->refname, alig0->refpos, alig0->reflen, &alig0->bases_in_ref );
- alig1->refbases = NULL;
- alig1->bases_in_ref = 0;
-
- cigar_t_2_read( alig0->read, sizeof alig0->read, alig0->cigar, alig0->refbases, gctx->insbases );
- alig1->read[ 0 ] = 0;
-
- produce_sam( alig0->sam, sizeof alig0->sam, gctx, alig0, alig1 );
- alig1->sam[ 0 ] = 0;
- }
+ alignment *alig0, *alig1;
+
+ gctx->insbases = get_str_option( args, OPTION_INSBASES, 0, DFLT_INSBASES );
+ gctx->flags = get_uint32_option( args, OPTION_FLAGS, 0, 0 );
+ gctx->header = get_bool_option( args, OPTION_HEADER );
+ gctx->config = get_str_option( args, OPTION_CONFIG, 0, NULL );
+ gctx->tlen = 0;
+
+ read_alig_context( args, &gctx->alig[ 0 ], 0 );
+ read_alig_context( args, &gctx->alig[ 1 ], 1 );
+
+ alig0 = &gctx->alig[ 0 ];
+ alig1 = &gctx->alig[ 1 ];
+
+ if ( gctx->alig[ 1 ].refpos > 0 )
+ {
+ if ( gctx->alig[ 1 ].refname == NULL )
+ gctx->alig[ 1 ].refname = gctx->alig[ 0 ].refname;
+ if ( gctx->alig[ 1 ].refalias == NULL )
+ gctx->alig[ 1 ].refalias = gctx->alig[ 0 ].refalias;
+
+ uint32_t end = alig1->refpos + alig1->reflen;
+ gctx->tlen = ( end - alig0->refpos );
+
+ alig0->refbases = read_refbases( alig0->refname, alig0->refpos, alig0->reflen, &alig0->bases_in_ref );
+ alig1->refbases = read_refbases( alig1->refname, alig1->refpos, alig1->reflen, &alig1->bases_in_ref );
+
+ cigar_t_2_read( alig0->read, sizeof alig0->read, alig0->cigar, alig0->refbases, gctx->insbases );
+ cigar_t_2_read( alig1->read, sizeof alig1->read, alig1->cigar, alig1->refbases, gctx->insbases );
+
+ produce_sam( alig0->sam, sizeof alig0->sam, gctx, alig0, alig1 );
+ produce_sam( alig1->sam, sizeof alig1->sam, gctx, alig1, alig0 );
+ }
+ else
+ {
+ alig0->refbases = read_refbases( alig0->refname, alig0->refpos, alig0->reflen, &alig0->bases_in_ref );
+ alig1->refbases = NULL;
+ alig1->bases_in_ref = 0;
+
+ cigar_t_2_read( alig0->read, sizeof alig0->read, alig0->cigar, alig0->refbases, gctx->insbases );
+ alig1->read[ 0 ] = 0;
+
+ produce_sam( alig0->sam, sizeof alig0->sam, gctx, alig0, alig1 );
+ alig1->sam[ 0 ] = 0;
+ }
}
rc_t CC KMain( int argc, char *argv [] )
{
rc_t rc = KOutHandlerSet ( write_to_FILE, stdout );
if ( rc == 0 )
- {
- Args * args;
+ {
+ Args * args;
- int n_options = sizeof Options / sizeof Options[ 0 ];
+ int n_options = sizeof Options / sizeof Options[ 0 ];
rc = ArgsMakeAndHandle( &args, argc, argv, 1, Options, n_options );
- if ( rc == 0 )
- {
- gen_context gctx;
-
- read_context( args, &gctx );
-
- if ( get_bool_option( args, OPTION_SHOW ) )
- show_details( &gctx );
- else if ( get_bool_option( args, OPTION_REF ) )
- KOutMsg ( "%s\n", gctx.alig[ 0 ].refbases );
- else if ( get_bool_option( args, OPTION_MDTAG ) )
- show_mdtag( &gctx );
- else if ( gctx.flags > 0 )
- explain_flags( gctx.flags );
- else
- generate_alignment( &gctx );
-
- release_alig( &gctx.alig[ 0 ] );
- if ( gctx.tlen != 0 )
- release_alig( &gctx.alig[ 1 ] );
-
- ArgsWhack( args );
- }
- }
+ if ( rc == 0 )
+ {
+ gen_context gctx;
+
+ read_context( args, &gctx );
+
+ if ( get_bool_option( args, OPTION_SHOW ) )
+ show_details( &gctx );
+ else if ( get_bool_option( args, OPTION_REF ) )
+ KOutMsg ( "%s\n", gctx.alig[ 0 ].refbases );
+ else if ( get_bool_option( args, OPTION_MDTAG ) )
+ show_mdtag( &gctx );
+ else if ( gctx.flags > 0 )
+ explain_flags( gctx.flags );
+ else
+ generate_alignment( &gctx );
+
+ release_alig( &gctx.alig[ 0 ] );
+ if ( gctx.tlen != 0 )
+ release_alig( &gctx.alig[ 1 ] );
+
+ ArgsWhack( args );
+ }
+ }
return rc;
}
diff --git a/test/samline/bx_tag_test.py b/test/samline/bx_tag_test.py
new file mode 100755
index 0000000..3448d52
--- /dev/null
+++ b/test/samline/bx_tag_test.py
@@ -0,0 +1,27 @@
+#!/opt/python-all/bin/python
+from sam import *
+
+REF = "NC_011752.1"
+ALIAS = "c1"
+CSRA1 = "X.CSRA"
+
+def load( L ) :
+ R1 = bam_load( L, CSRA1, "--make-spots-with-secondary -E0 -Q0" )
+ print "bam-load = %d"%( R1 )
+
+def load_and_print( L ) :
+ load( L )
+ sam_dump( CSRA1 )
+
+
+def test1() :
+ A1 = make_prim( "A1", 0, REF, ALIAS, 17000, 20, "60M" )
+ A2 = make_prim( "A2", 0, REF, ALIAS, 12500, 20, "50M" )
+ A1.pair_with( A2 )
+
+ A1.set_tags( "BX:Z:i_am_a_BX_tag" )
+
+ load_and_print( [ A1, A2 ] )
+
+
+test1()
diff --git a/test/samline/ca_test.py b/test/samline/ca_test.py
new file mode 100755
index 0000000..c8e30e3
--- /dev/null
+++ b/test/samline/ca_test.py
@@ -0,0 +1,80 @@
+#!/opt/python-all/bin/python
+from sam import *
+
+def dump( acc ) :
+ print "%s.SEQ"%(acc)
+ vdb_dump( acc, "-C SPOT_ID,READ -l0 -f tab" )
+ print "%s.PRIM"%( acc )
+ vdb_dump( acc, "-T PRIMARY_ALIGNMENT -C ALIGN_ID,READ -l0, -f tab" )
+ print "%s.SEC"%( acc )
+ vdb_dump( acc, "-T SECONDARY_ALIGNMENT -C ALIGN_ID,READ -l0 -f tab" )
+
+REF = "NC_011752.1"
+ALIAS = "c1"
+CSRA1 = "AFTER_BAM_LOAD.CSRA"
+CSRA2 = "AFTER_SRA_SORT.CSRA"
+
+def load_sort( L ) :
+ R1 = bam_load( L, CSRA1, "--make-spots-with-secondary -L 3 -E0 -Q0" )
+ if R1 == 1 :
+ print "bam-load = OK"
+ R2 = sra_sort( CSRA1, CSRA2 )
+ if R2 == 1 :
+ print "sra-sort = OK"
+ else :
+ print "sra-sort = FAILED"
+ else :
+ print "bam-load = FAILED"
+
+def load_sort_print( L ) :
+ load_sort( L )
+ dump( CSRA1 )
+ dump( CSRA2 )
+
+
+def test1() :
+ print "test #1 --------------------------------------------------"
+ print "...having a single recondary alignmnet without a primary it belongs to"
+ A1 = make_prim( "A1", 0, REF, ALIAS, 17000, 20, "60M" )
+ A2 = make_prim( "A2", 0, REF, ALIAS, 12500, 20, "50M" )
+ A1.pair_with( A2 )
+
+ A3 = make_prim( "A3", 0, REF, ALIAS, 33000, 20, "60M" )
+ U1 = make_unaligned( "U1", 0, "ACTTTAGTAAGGGGTTNN" )
+
+ A4 = make_sec( "A4", 0, REF, ALIAS, 19000, 20, "60M" )
+ A4.link_to( A1 )
+
+ A5 = make_sec( "A5", 0, REF, ALIAS, 22000, 20, "30M" )
+
+ load_sort_print( [ A1, A2, A3, A4, U1, A5 ] )
+
+# the resulting X.CSRA and S.CSRA produce errors in seq_restore_read_impl2
+def test2() :
+ print "test #2 --------------------------------------------------"
+ print "...having a pair of a prim. and a sec. alignment"
+ print "= SEQUENCE-table cannot reconstruct READ"
+ A1 = make_prim( "A1", 0, REF, ALIAS, 17000, 20, "60M" )
+ A2 = make_sec( "A2", 0, REF, ALIAS, 12500, 20, "50M" )
+ A1.pair_with( A2 )
+
+ load_sort_print( [ A1, A2 ] )
+
+# the resulting X.CSRA produces errors in seq_restore_read_impl2
+# but S.CSRA sefaults in vdb-dump!
+def test3() :
+ print "test #3 --------------------------------------------------"
+ A1 = make_prim( "A1", 0, REF, ALIAS, 1000, 20, "53M" )
+ A2 = make_sec( "A2", 0, REF, ALIAS, 3500, 20, "50M" )
+ A1.pair_with( A2 )
+
+ A3 = make_sec( "A3", 0, REF, ALIAS, 6800, 20, "55M" )
+ A3.flags |= FLAG_NEXT_UNMAPPED
+
+ load_sort_print( [ A3, A1, A2 ] )
+
+
+test1()
+test2()
+test3()
+
diff --git a/test/samline/cigar.c b/test/samline/cigar.c
index 2e6c179..6193699 100644
--- a/test/samline/cigar.c
+++ b/test/samline/cigar.c
@@ -36,411 +36,411 @@
struct cigar_t
{
- char * op;
- int * count;
- size_t size, length;
+ char * op;
+ int * count;
+ size_t size, length;
};
static void init_cigar_t( struct cigar_t * c, size_t size )
{
- if ( c != NULL )
- {
- c->size = 0;
- c->length = 0;
- c->op = malloc( sizeof( c->op[ 0 ] ) * size );
- if ( c->op != NULL )
- {
- c->count = malloc( sizeof( c->count[ 0 ] ) * size );
- if ( c->count != NULL )
- c->size = size;
- else
- free( ( void * ) c->op );
- }
- }
+ if ( c != NULL )
+ {
+ c->size = 0;
+ c->length = 0;
+ c->op = malloc( sizeof( c->op[ 0 ] ) * size );
+ if ( c->op != NULL )
+ {
+ c->count = malloc( sizeof( c->count[ 0 ] ) * size );
+ if ( c->count != NULL )
+ c->size = size;
+ else
+ free( ( void * ) c->op );
+ }
+ }
}
static void resize_cigar_t( struct cigar_t * c, size_t new_size )
{
- if ( c != NULL )
- {
- if ( c->size == 0 )
- init_cigar_t( c, new_size );
- else if ( c->size < new_size )
- {
- char * temp_op = c->op;
- c->op = realloc( c->op, sizeof( c->op[ 0 ] ) * new_size );
- if ( c->op != NULL )
- {
- int * temp_count = c->count;
- c->count = realloc( c->count, sizeof( c->count[ 0 ] ) * new_size );
- if ( c->count != NULL )
- c->size = new_size;
- else
- c->count = temp_count;
- }
- else
- c->op = temp_op;
- }
- }
+ if ( c != NULL )
+ {
+ if ( c->size == 0 )
+ init_cigar_t( c, new_size );
+ else if ( c->size < new_size )
+ {
+ char * temp_op = c->op;
+ c->op = realloc( c->op, sizeof( c->op[ 0 ] ) * new_size );
+ if ( c->op != NULL )
+ {
+ int * temp_count = c->count;
+ c->count = realloc( c->count, sizeof( c->count[ 0 ] ) * new_size );
+ if ( c->count != NULL )
+ c->size = new_size;
+ else
+ c->count = temp_count;
+ }
+ else
+ c->op = temp_op;
+ }
+ }
}
static void append_to_cigar_t( struct cigar_t * c, char op, int count )
{
- if ( c->length < c->size )
- {
- c->op[ c->length ] = op;
- c->count[ c->length ++ ] = count;
- }
+ if ( c->length < c->size )
+ {
+ c->op[ c->length ] = op;
+ c->count[ c->length ++ ] = count;
+ }
}
void parse_cigar_t( struct cigar_t * c, const char * cigar_str )
{
- if ( c != NULL && cigar_str != NULL && cigar_str[ 0 ] != 0 )
- {
- resize_cigar_t( c, strlen( cigar_str ) );
- if ( c->size > 0 )
- {
- int count = 0;
- while ( *cigar_str != 0 && c->length < c->size )
- {
- if ( isdigit( *cigar_str ) )
- {
- count = ( count * 10 ) + ( *cigar_str - '0' );
- }
- else
- {
- if ( count == 0 ) count = 1;
- append_to_cigar_t( c, *cigar_str, count );
- count = 0;
- }
- cigar_str++;
- }
- }
- }
+ if ( c != NULL && cigar_str != NULL && cigar_str[ 0 ] != 0 )
+ {
+ resize_cigar_t( c, strlen( cigar_str ) );
+ if ( c->size > 0 )
+ {
+ int count = 0;
+ while ( *cigar_str != 0 && c->length < c->size )
+ {
+ if ( isdigit( *cigar_str ) )
+ {
+ count = ( count * 10 ) + ( *cigar_str - '0' );
+ }
+ else
+ {
+ if ( count == 0 ) count = 1;
+ append_to_cigar_t( c, *cigar_str, count );
+ count = 0;
+ }
+ cigar_str++;
+ }
+ }
+ }
}
struct cigar_t * make_cigar_t( const char * cigar_str )
{
- struct cigar_t * res = malloc( sizeof * res );
- if ( res != NULL )
- {
- size_t size;
- if ( cigar_str != NULL && cigar_str[ 0 ] != 0 )
- size = strlen( cigar_str );
- else
- size = 1024;
- init_cigar_t( res, size );
- if ( res->size == size )
- parse_cigar_t( res, cigar_str );
- }
- return res;
+ struct cigar_t * res = malloc( sizeof * res );
+ if ( res != NULL )
+ {
+ size_t size;
+ if ( cigar_str != NULL && cigar_str[ 0 ] != 0 )
+ size = strlen( cigar_str );
+ else
+ size = 1024;
+ init_cigar_t( res, size );
+ if ( res->size == size )
+ parse_cigar_t( res, cigar_str );
+ }
+ return res;
}
void free_cigar_t( struct cigar_t * c )
{
- if ( c != NULL )
- {
- if ( c->op != NULL )
- {
- free( ( void * ) c->op );
- c->op = NULL;
- }
- if ( c->count != NULL )
- {
- free( ( void * ) c->count );
- c->count = NULL;
- }
- free( ( void * ) c );
- }
+ if ( c != NULL )
+ {
+ if ( c->op != NULL )
+ {
+ free( ( void * ) c->op );
+ c->op = NULL;
+ }
+ if ( c->count != NULL )
+ {
+ free( ( void * ) c->count );
+ c->count = NULL;
+ }
+ free( ( void * ) c );
+ }
}
int cigar_t_reflen( const struct cigar_t * c )
{
- int res = 0;
- if ( c != NULL )
- {
- int i;
- for ( i = 0; i < c->length; ++i )
- {
- switch( c->op[ i ] )
- {
- case 'A' : res += c->count[ i ]; break;
- case 'C' : res += c->count[ i ]; break;
- case 'G' : res += c->count[ i ]; break;
- case 'T' : res += c->count[ i ]; break;
-
- case 'D' : res += c->count[ i ]; break;
- case 'M' : res += c->count[ i ]; break;
- }
- }
- }
- return res;
+ int res = 0;
+ if ( c != NULL )
+ {
+ int i;
+ for ( i = 0; i < c->length; ++i )
+ {
+ switch( c->op[ i ] )
+ {
+ case 'A' : res += c->count[ i ]; break;
+ case 'C' : res += c->count[ i ]; break;
+ case 'G' : res += c->count[ i ]; break;
+ case 'T' : res += c->count[ i ]; break;
+
+ case 'D' : res += c->count[ i ]; break;
+ case 'M' : res += c->count[ i ]; break;
+ }
+ }
+ }
+ return res;
}
int cigar_t_readlen( const struct cigar_t * c )
{
- int res = 0;
- if ( c != NULL )
- {
- int i;
- for ( i = 0; i < c->length; ++i )
- {
- if ( c->op[ i ] != 'D' )
- res += c->count[ i ];
- }
- }
- return res;
+ int res = 0;
+ if ( c != NULL )
+ {
+ int i;
+ for ( i = 0; i < c->length; ++i )
+ {
+ if ( c->op[ i ] != 'D' )
+ res += c->count[ i ];
+ }
+ }
+ return res;
}
int cigar_t_inslen( const struct cigar_t * c )
{
- int res = 0;
- if ( c != NULL )
- {
- int i;
- for ( i = 0; i < c->length; ++i )
- {
- if ( c->op[ i ] == 'I' )
- res += c->count[ i ];
- }
- }
- return res;
+ int res = 0;
+ if ( c != NULL )
+ {
+ int i;
+ for ( i = 0; i < c->length; ++i )
+ {
+ if ( c->op[ i ] == 'I' )
+ res += c->count[ i ];
+ }
+ }
+ return res;
}
size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c )
{
- size_t res = 0;
- if ( buffer != NULL && buf_len > 0 && c != NULL && c->length > 0 )
- {
- int i;
- for ( i = 0; i < c->length && res < buf_len; ++i )
- {
- size_t num_writ;
- string_printf( &buffer[ res ], buf_len - res, &num_writ,
- "%d%c", c->count[ i ], c->op[ i ] );
- res += num_writ;
- }
- if ( res < buf_len )
- buffer[ res ] = 0;
- }
- return res;
+ size_t res = 0;
+ if ( buffer != NULL && buf_len > 0 && c != NULL && c->length > 0 )
+ {
+ int i;
+ for ( i = 0; i < c->length && res < buf_len; ++i )
+ {
+ size_t num_writ;
+ string_printf( &buffer[ res ], buf_len - res, &num_writ,
+ "%d%c", c->count[ i ], c->op[ i ] );
+ res += num_writ;
+ }
+ if ( res < buf_len )
+ buffer[ res ] = 0;
+ }
+ return res;
}
void debug_cigar_t( const struct cigar_t * c )
{
- if ( c != NULL )
- {
- int i;
- for ( i = 0; i < c->length; ++i )
- KOutMsg( "c[%d]: %d x %c\n", i, c->count[ i ], c->op[ i ] );
- }
+ if ( c != NULL )
+ {
+ int i;
+ for ( i = 0; i < c->length; ++i )
+ KOutMsg( "c[%d]: %d x %c\n", i, c->count[ i ], c->op[ i ] );
+ }
}
static int can_merge( char op1, char op2 )
{
- char mop1 = op1;
- char mop2 = op2;
- if ( mop1 == 'A' || mop1 == 'C' || mop1 == 'G' || mop1 == 'T' )
- mop1 = 'M';
- if ( mop2 == 'A' || mop2 == 'C' || mop2 == 'G' || mop2 == 'T' )
- mop2 = 'M';
- return ( mop1 == mop2 );
+ char mop1 = op1;
+ char mop2 = op2;
+ if ( mop1 == 'A' || mop1 == 'C' || mop1 == 'G' || mop1 == 'T' )
+ mop1 = 'M';
+ if ( mop2 == 'A' || mop2 == 'C' || mop2 == 'G' || mop2 == 'T' )
+ mop2 = 'M';
+ return ( mop1 == mop2 );
}
struct cigar_t * merge_cigar_t( const struct cigar_t * c )
{
- struct cigar_t * res = NULL;
- if ( c != NULL && c -> length > 0 )
- {
- res = malloc( sizeof * res );
- if ( res != NULL )
- {
- init_cigar_t( res, c->size );
- if ( res->size == c->size )
- {
- int i, last;
- append_to_cigar_t( res, c->op[ 0 ], c->count[ 0 ] );
- for ( i = 1; i < c->length; ++i )
- {
- last = res->length - 1;
- if ( can_merge( c->op[ i ], res->op[ last ] ) )
- {
- res->count[ last ] += c->count[ i ];
- res->op[ last ] = 'M';
- }
- else
- append_to_cigar_t( res, c->op[ i ], c->count[ i ] );
- }
- }
- }
- }
- return res;
+ struct cigar_t * res = NULL;
+ if ( c != NULL && c -> length > 0 )
+ {
+ res = malloc( sizeof * res );
+ if ( res != NULL )
+ {
+ init_cigar_t( res, c->size );
+ if ( res->size == c->size )
+ {
+ int i, last;
+ append_to_cigar_t( res, c->op[ 0 ], c->count[ 0 ] );
+ for ( i = 1; i < c->length; ++i )
+ {
+ last = res->length - 1;
+ if ( can_merge( c->op[ i ], res->op[ last ] ) )
+ {
+ res->count[ last ] += c->count[ i ];
+ res->op[ last ] = 'M';
+ }
+ else
+ append_to_cigar_t( res, c->op[ i ], c->count[ i ] );
+ }
+ }
+ }
+ }
+ return res;
}
static void append_base( char * buffer, size_t buf_len, size_t * buf_idx, int count, char c )
{
- int i;
- for ( i = 0; i < count && *buf_idx < buf_len; ++i )
- buffer[ (*buf_idx)++ ] = c;
+ int i;
+ for ( i = 0; i < count && *buf_idx < buf_len; ++i )
+ buffer[ (*buf_idx)++ ] = c;
}
static void append_bases( char * buffer, size_t buf_len, size_t * buf_idx, int count,
- const char * src, int src_len, int *src_idx )
+ const char * src, int src_len, int *src_idx )
{
- int i;
- for ( i = 0; i < count && *buf_idx < buf_len && *src_idx < src_len; ++i )
- buffer[ (*buf_idx)++ ] = src[ (*src_idx)++ ];
+ int i;
+ for ( i = 0; i < count && *buf_idx < buf_len && *src_idx < src_len; ++i )
+ buffer[ (*buf_idx)++ ] = src[ (*src_idx)++ ];
}
size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases )
+ const struct cigar_t * c, const char * ref_bases, const char * ins_bases )
{
- size_t res = 0;
- if ( buffer != NULL && buf_len > 0 && c != NULL )
- {
- int readlen = cigar_t_readlen( c );
- if ( readlen > 0 )
- {
- int needed_ref_bases = cigar_t_reflen( c );
- int available_ref_bases = ref_bases != NULL ? strlen( ref_bases ) : 0;
- if ( available_ref_bases >= needed_ref_bases )
- {
- int needed_ins_bases = cigar_t_inslen( c );
- int available_ins_bases = ins_bases != NULL ? strlen( ins_bases ) : 0;
- if ( available_ins_bases >= needed_ins_bases )
- {
- int ref_idx = 0;
- int ins_idx = 0;
- int cigar_idx;
- for ( cigar_idx = 0; cigar_idx < c->length; ++cigar_idx )
- {
- int count = c->count[ cigar_idx ];
- switch ( c->op[ cigar_idx ] )
- {
- case 'A' : append_base( buffer, buf_len, &res, count, 'A' );
- ref_idx += count;
- break;
-
- case 'C' : append_base( buffer, buf_len, &res, count, 'C' );
- ref_idx += count;
- break;
-
- case 'G' : append_base( buffer, buf_len, &res, count, 'G' );
- ref_idx += count;
- break;
-
- case 'T' : append_base( buffer, buf_len, &res, count, 'T' );
- ref_idx += count;
- break;
-
- case 'D' : ref_idx += count;
- break;
-
- case 'I' : append_bases( buffer, buf_len, &res, count,
- ins_bases, available_ins_bases, &ins_idx );
- break;
-
- case 'M' : append_bases( buffer, buf_len, &res, count,
- ref_bases, available_ref_bases, &ref_idx );
- break;
- }
- }
- if ( res < buf_len )
- buffer[ res ] = 0;
- }
- }
- }
- }
- return res;
+ size_t res = 0;
+ if ( buffer != NULL && buf_len > 0 && c != NULL )
+ {
+ int readlen = cigar_t_readlen( c );
+ if ( readlen > 0 )
+ {
+ int needed_ref_bases = cigar_t_reflen( c );
+ int available_ref_bases = ref_bases != NULL ? strlen( ref_bases ) : 0;
+ if ( available_ref_bases >= needed_ref_bases )
+ {
+ int needed_ins_bases = cigar_t_inslen( c );
+ int available_ins_bases = ins_bases != NULL ? strlen( ins_bases ) : 0;
+ if ( available_ins_bases >= needed_ins_bases )
+ {
+ int ref_idx = 0;
+ int ins_idx = 0;
+ int cigar_idx;
+ for ( cigar_idx = 0; cigar_idx < c->length; ++cigar_idx )
+ {
+ int count = c->count[ cigar_idx ];
+ switch ( c->op[ cigar_idx ] )
+ {
+ case 'A' : append_base( buffer, buf_len, &res, count, 'A' );
+ ref_idx += count;
+ break;
+
+ case 'C' : append_base( buffer, buf_len, &res, count, 'C' );
+ ref_idx += count;
+ break;
+
+ case 'G' : append_base( buffer, buf_len, &res, count, 'G' );
+ ref_idx += count;
+ break;
+
+ case 'T' : append_base( buffer, buf_len, &res, count, 'T' );
+ ref_idx += count;
+ break;
+
+ case 'D' : ref_idx += count;
+ break;
+
+ case 'I' : append_bases( buffer, buf_len, &res, count,
+ ins_bases, available_ins_bases, &ins_idx );
+ break;
+
+ case 'M' : append_bases( buffer, buf_len, &res, count,
+ ref_bases, available_ref_bases, &ref_idx );
+ break;
+ }
+ }
+ if ( res < buf_len )
+ buffer[ res ] = 0;
+ }
+ }
+ }
+ }
+ return res;
}
static void print_matchcount( char * buffer, size_t buf_len, size_t *buf_idx, int *match_count )
{
- size_t num_writ;
- string_printf( &buffer[ *buf_idx ], buf_len - *buf_idx, &num_writ, "%d", *match_count );
- *match_count = 0;
- *buf_idx += num_writ;
+ size_t num_writ;
+ string_printf( &buffer[ *buf_idx ], buf_len - *buf_idx, &num_writ, "%d", *match_count );
+ *match_count = 0;
+ *buf_idx += num_writ;
}
static void md_delete( char * buffer, size_t buf_len, size_t *buf_idx, int count, int *match_count,
- const char * reference, int *ref_idx )
+ const char * reference, int *ref_idx )
{
- if ( *match_count > 0 )
- print_matchcount( buffer, buf_len, buf_idx, match_count );
-
- if ( *buf_idx + count + 1 < buf_len )
- {
- int i;
- buffer[ (*buf_idx)++ ] = '^';
- for ( i = 0; i < count; ++i )
- buffer[ (*buf_idx)++ ] = reference[ (*ref_idx)++ ];
- }
+ if ( *match_count > 0 )
+ print_matchcount( buffer, buf_len, buf_idx, match_count );
+
+ if ( *buf_idx + count + 1 < buf_len )
+ {
+ int i;
+ buffer[ (*buf_idx)++ ] = '^';
+ for ( i = 0; i < count; ++i )
+ buffer[ (*buf_idx)++ ] = reference[ (*ref_idx)++ ];
+ }
}
static void md_match( char * buffer, size_t buf_len, size_t *buf_idx, int count, int *match_count,
- const char * read, int *read_idx, const char *reference, int *ref_idx )
+ const char * read, int *read_idx, const char *reference, int *ref_idx )
{
- int i;
- for ( i = 0; i < count; ++i )
- {
- if ( read[ (*read_idx)++ ] == reference[ *ref_idx ] )
- {
- (*match_count)++;
- }
- else
- {
- print_matchcount( buffer, buf_len, buf_idx, match_count );
- if ( *buf_idx < buf_len )
- buffer[ (*buf_idx)++ ] = reference[ *ref_idx ];
- }
- (*ref_idx)++;
- }
+ int i;
+ for ( i = 0; i < count; ++i )
+ {
+ if ( read[ (*read_idx)++ ] == reference[ *ref_idx ] )
+ {
+ (*match_count)++;
+ }
+ else
+ {
+ print_matchcount( buffer, buf_len, buf_idx, match_count );
+ if ( *buf_idx < buf_len )
+ buffer[ (*buf_idx)++ ] = reference[ *ref_idx ];
+ }
+ (*ref_idx)++;
+ }
}
size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference )
+ const struct cigar_t * c, const char * read, const char * reference )
{
- size_t res = 0;
- if ( buffer != NULL && buf_len > 0 && c != NULL )
- {
- int read_idx = 0;
- int ref_idx = 0;
- int match_count = 0;
- int cigar_idx;
- for ( cigar_idx = 0; cigar_idx < c->length; ++cigar_idx )
- {
- int count = c->count[ cigar_idx ];
- switch ( c->op[ cigar_idx ] )
- {
- case 'D' : md_delete( buffer, buf_len, &res, count, &match_count,
- reference, &ref_idx );
- break;
-
- case 'I' : read_idx += count; break;
-
- case 'M' : md_match( buffer, buf_len, &res, count, &match_count,
- read, &read_idx, reference, &ref_idx );
- break;
- }
- }
- if ( match_count > 0 )
- print_matchcount( buffer, buf_len, &res, &match_count );
-
- if ( res < buf_len )
- buffer[ res ] = 0;
- }
- return res;
+ size_t res = 0;
+ if ( buffer != NULL && buf_len > 0 && c != NULL )
+ {
+ int read_idx = 0;
+ int ref_idx = 0;
+ int match_count = 0;
+ int cigar_idx;
+ for ( cigar_idx = 0; cigar_idx < c->length; ++cigar_idx )
+ {
+ int count = c->count[ cigar_idx ];
+ switch ( c->op[ cigar_idx ] )
+ {
+ case 'D' : md_delete( buffer, buf_len, &res, count, &match_count,
+ reference, &ref_idx );
+ break;
+
+ case 'I' : read_idx += count; break;
+
+ case 'M' : md_match( buffer, buf_len, &res, count, &match_count,
+ read, &read_idx, reference, &ref_idx );
+ break;
+ }
+ }
+ if ( match_count > 0 )
+ print_matchcount( buffer, buf_len, &res, &match_count );
+
+ if ( res < buf_len )
+ buffer[ res ] = 0;
+ }
+ return res;
}
diff --git a/test/samline/cigar.h b/test/samline/cigar.h
index d0ec04a..75bb6a8 100644
--- a/test/samline/cigar.h
+++ b/test/samline/cigar.h
@@ -45,12 +45,12 @@ size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c )
struct cigar_t * merge_cigar_t( const struct cigar_t * c );
size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
+ const struct cigar_t * c, const char * read, const char * reference );
void debug_cigar_t( const struct cigar_t * c );
size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+ const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
#ifdef __cplusplus
}
diff --git a/test/samline/example.sh b/test/samline/example.sh
index 9e05454..4c385f3 100755
--- a/test/samline/example.sh
+++ b/test/samline/example.sh
@@ -11,54 +11,51 @@ execute()
# call: produce_SAM "$CONFIG" "$SAMFILE"
produce_SAM()
{
- SAMLINE_BINARY="samline"
- REFNAME="NC_011752.1"
- REFPOS1=1000
- REFPOS2=3500
- CIGAR1="30MAAA20M"
- CIGAR2="50M2D10M"
- execute "$SAMLINE_BINARY -r $REFNAME -p $REFPOS1 -p $REFPOS2 -c $CIGAR1 -c $CIGAR2 -n $1 -d > $2"
+ SAMLINE_BINARY="samline"
+ REFNAME="NC_011752.1"
+ REFPOS1=1000
+ REFPOS2=3500
+ CIGAR1="30MAAA20M"
+ CIGAR2="50M2D10M"
+ execute "$SAMLINE_BINARY -r $REFNAME -p $REFPOS1 -p $REFPOS2 -c $CIGAR1 -c $CIGAR2 -n $1 -d > $2"
}
# call: convert_SAM_to_BAM "$SAMFILE" "$BAMFILE"
convert_SAM_to_BAM()
{
- SAMTOOLS_BINARY="/netopt/ncbi_tools64/samtools/bin/samtools"
- execute "$SAMTOOLS_BINARY view -bS $1 > $2"
+ SAMTOOLS_BINARY="/netopt/ncbi_tools64/samtools/bin/samtools"
+ execute "$SAMTOOLS_BINARY view -bS $1 > $2"
}
# call: load_BAM_to_CSRA "$CONFIG" "$BAMFILE" "$TEMP_DIR"
load_BAM_to_CSRA()
{
- BAMLOAD_BINARY="bam-load"
- execute "$BAMLOAD_BINARY -L 3 -o $3 -k $1 -E0 -Q0 $2"
+ BAMLOAD_BINARY="bam-load"
+ execute "$BAMLOAD_BINARY -L 3 -o $3 -k $1 -E0 -Q0 $2"
}
# call: load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
load_SAM_to_CSRA()
{
- BAMLOAD_BINARY="bam-load"
- execute "cat $2 | $BAMLOAD_BINARY -L 3 -o $3 -k $1 -E0 -Q0 /dev/stdin"
+ BAMLOAD_BINARY="bam-load"
+ execute "cat $2 | $BAMLOAD_BINARY -L 3 -o $3 -k $1 -E0 -Q0 /dev/stdin"
}
# call: kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
kar_CSRA()
{
- KAR_BINARY="kar"
- execute "$KAR_BINARY --create $1 -d $2 -f"
+ KAR_BINARY="kar"
+ execute "$KAR_BINARY --create $1 -d $2 -f"
}
SAMFILE="temp.SAM"
-BAMFILE="temp.BAM"
CONFIG="temp.kfg"
TEMP_DIR="temp_csra"
FINAL_CSRA="test.csra"
produce_SAM "$CONFIG" "$SAMFILE"
-#convert_SAM_to_BAM "$SAMFILE" "$BAMFILE"
-#load_BAM_to_CSRA "$CONFIG" "$BAMFILE" "$TEMP_DIR"
load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
-execute "rm -rf $TEMP_DIR $SAMFILE $BAMFILE $CONFIG"
+execute "rm -rf $TEMP_DIR $SAMFILE $CONFIG"
execute "vdb-dump $FINAL_CSRA --info"
diff --git a/test/samline/refbases.c b/test/samline/refbases.c
index 76476d8..45d375d 100644
--- a/test/samline/refbases.c
+++ b/test/samline/refbases.c
@@ -41,105 +41,144 @@
static uint32_t read_uint32( const VCursor * cur, uint32_t col_idx )
{
- uint32_t elem_bits, boff, row_len;
- const uint32_t * value;
- rc_t rc = VCursorCellDataDirect ( cur, 1, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc == 0 )
- return *value;
- return 0;
+ uint32_t elem_bits, boff, row_len;
+ const uint32_t * value;
+ rc_t rc = VCursorCellDataDirect ( cur, 1, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc == 0 )
+ return *value;
+ return 0;
}
static uint32_t read_buffer( const VCursor * cur, char * buffer, int64_t row_id,
- uint32_t offset, size_t buflen, uint32_t col_idx )
+ uint32_t offset, size_t buflen, uint32_t col_idx )
{
- uint32_t elem_bits, boff, row_len, res = 0;
- const char * value;
- rc_t rc = VCursorCellDataDirect ( cur, row_id, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc == 0 && row_len > offset )
- {
- res = ( row_len - offset );
- if ( res > buflen ) res = buflen;
- memcpy ( buffer, &value[ offset ], res );
- }
- return res;
+ uint32_t elem_bits, boff, row_len, res = 0;
+ const char * value;
+ rc_t rc = VCursorCellDataDirect ( cur, row_id, col_idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc == 0 && row_len > offset )
+ {
+ res = ( row_len - offset );
+ if ( res > buflen ) res = buflen;
+ memcpy ( buffer, &value[ offset ], res );
+ }
+ return res;
}
static uint32_t read_bases( const VCursor * cur, char * buffer, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t col_idx, uint32_t max_seq_len )
+ uint32_t ref_len, uint32_t col_idx, uint32_t max_seq_len )
{
- uint32_t res = 0, n_read = 1;
- uint32_t row_id = ( ( ref_pos_1_based - 1 ) / max_seq_len ) + 1;
- uint32_t offset = ( ref_pos_1_based - 1 ) - ( ( row_id - 1 ) * max_seq_len );
- size_t buflen = ref_len;
- while ( res < ref_len && n_read > 0 )
- {
- n_read = read_buffer( cur, &buffer[ res ], row_id++, offset, buflen, col_idx );
- res += n_read;
- buflen -= n_read;
- offset = 0;
- }
- return res;
+ uint32_t res = 0, n_read = 1;
+ uint32_t row_id = ( ( ref_pos_1_based - 1 ) / max_seq_len ) + 1;
+ uint32_t offset = ( ref_pos_1_based - 1 ) - ( ( row_id - 1 ) * max_seq_len );
+ size_t buflen = ref_len;
+ while ( res < ref_len && n_read > 0 )
+ {
+ n_read = read_buffer( cur, &buffer[ res ], row_id++, offset, buflen, col_idx );
+ res += n_read;
+ buflen -= n_read;
+ offset = 0;
+ }
+ return res;
}
char * read_refbases( const char * refname, uint32_t ref_pos_1_based, uint32_t ref_len, uint32_t * bases_in_ref )
{
- char * res = NULL;
+ char * res = NULL;
KDirectory * dir;
rc_t rc = KDirectoryNativeDir( &dir );
- if ( rc == 0 )
- {
- const VDBManager * mgr;
- rc = VDBManagerMakeRead ( &mgr, dir );
- if ( rc == 0 )
- {
- const VTable * tab;
- rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
- if ( rc == 0 )
- {
- const VCursor * cur;
- rc = VTableCreateCursorRead( tab, &cur );
- if ( rc == 0 )
- {
- uint32_t base_count_idx, read_idx, max_seq_len_idx;
- rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
- if ( rc == 0 )
- {
- rc = VCursorAddColumn( cur, &read_idx, "READ" );
- if ( rc == 0 )
- {
- rc = VCursorAddColumn( cur, &max_seq_len_idx, "MAX_SEQ_LEN" );
- if ( rc == 0 )
- {
- rc = VCursorOpen ( cur );
- if ( rc == 0 )
- {
- uint32_t base_count = read_uint32( cur, base_count_idx );
- if ( bases_in_ref != NULL )
- *bases_in_ref = base_count;
- uint32_t max_seq_len = read_uint32( cur, max_seq_len_idx );
- if ( base_count > ( ref_pos_1_based + ref_len ) && max_seq_len > 0 )
- {
- res = malloc( ref_len + 1 );
- if ( res != NULL )
- {
- uint32_t n_read = read_bases( cur, res, ref_pos_1_based,
- ref_len, read_idx, max_seq_len );
- res[ n_read ] = 0;
- }
- }
- }
- }
- }
- }
- VCursorRelease( cur );
- }
- VTableRelease( tab );
- }
- VDBManagerRelease( mgr );
- }
- KDirectoryRelease( dir );
- }
- return res;
+ if ( rc == 0 )
+ {
+ const VDBManager * mgr;
+ rc = VDBManagerMakeRead ( &mgr, dir );
+ if ( rc == 0 )
+ {
+ const VTable * tab;
+ rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
+ if ( rc == 0 )
+ {
+ const VCursor * cur;
+ rc = VTableCreateCursorRead( tab, &cur );
+ if ( rc == 0 )
+ {
+ uint32_t base_count_idx, read_idx, max_seq_len_idx;
+ rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
+ if ( rc == 0 )
+ {
+ rc = VCursorAddColumn( cur, &read_idx, "READ" );
+ if ( rc == 0 )
+ {
+ rc = VCursorAddColumn( cur, &max_seq_len_idx, "MAX_SEQ_LEN" );
+ if ( rc == 0 )
+ {
+ rc = VCursorOpen ( cur );
+ if ( rc == 0 )
+ {
+ uint32_t base_count = read_uint32( cur, base_count_idx );
+ if ( bases_in_ref != NULL )
+ *bases_in_ref = base_count;
+ uint32_t max_seq_len = read_uint32( cur, max_seq_len_idx );
+ if ( base_count > ( ref_pos_1_based + ref_len ) && max_seq_len > 0 )
+ {
+ res = malloc( ref_len + 1 );
+ if ( res != NULL )
+ {
+ uint32_t n_read = read_bases( cur, res, ref_pos_1_based,
+ ref_len, read_idx, max_seq_len );
+ res[ n_read ] = 0;
+ }
+ }
+ }
+ }
+ }
+ }
+ VCursorRelease( cur );
+ }
+ VTableRelease( tab );
+ }
+ VDBManagerRelease( mgr );
+ }
+ KDirectoryRelease( dir );
+ }
+ return res;
+}
+
+
+uint32_t ref_len( const char * refname )
+{
+ uint32_t res = 0;
+ KDirectory * dir;
+ rc_t rc = KDirectoryNativeDir( &dir );
+ if ( rc == 0 )
+ {
+ const VDBManager * mgr;
+ rc = VDBManagerMakeRead ( &mgr, dir );
+ if ( rc == 0 )
+ {
+ const VTable * tab;
+ rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", refname );
+ if ( rc == 0 )
+ {
+ const VCursor * cur;
+ rc = VTableCreateCursorRead( tab, &cur );
+ if ( rc == 0 )
+ {
+ uint32_t base_count_idx;
+ rc = VCursorAddColumn( cur, &base_count_idx, "BASE_COUNT" );
+ if ( rc == 0 )
+ {
+ rc = VCursorOpen ( cur );
+ if ( rc == 0 )
+ res = read_uint32( cur, base_count_idx );
+ }
+ VCursorRelease( cur );
+ }
+ VTableRelease( tab );
+ }
+ VDBManagerRelease( mgr );
+ }
+ KDirectoryRelease( dir );
+ }
+ return res;
}
diff --git a/test/samline/refbases.h b/test/samline/refbases.h
index 94136fb..e229559 100644
--- a/test/samline/refbases.h
+++ b/test/samline/refbases.h
@@ -32,7 +32,9 @@ extern "C" {
#endif
char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+ uint32_t ref_len, uint32_t * bases_in_ref );
+
+uint32_t ref_len( const char * refname );
#ifdef __cplusplus
}
diff --git a/test/samline/reject_multi_refnames.sh b/test/samline/reject_multi_refnames.sh
new file mode 100755
index 0000000..bd554e2
--- /dev/null
+++ b/test/samline/reject_multi_refnames.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+execute()
+{
+ echo "------------------------------------------------------"
+ echo $1
+ eval $1
+ echo "."
+}
+
+# call: produce_SAM "$SAMFILE" "$CONFIG"
+produce_SAM()
+{
+ SAMLINE_BINARY="samline"
+
+ OUTFILE=$1
+ CONFIG=$2
+
+ REFNAME="NC_011752.1"
+ REFALIAS0="c1"
+ REFALIAS1="c2"
+ REFPOS0=1000
+ REFPOS1=3500
+ CIGAR0="50M"
+ CIGAR1="50M"
+
+ ALIG0="-r $REFNAME -b $REFALIAS0 -p $REFPOS0 -c $CIGAR0"
+ ALIG1="-r $REFNAME -b $REFALIAS1 -p $REFPOS1 -c $CIGAR1"
+ #ALIG0="-r $REFNAME -p $REFPOS0 -c $CIGAR0"
+ #ALIG1="-r $REFNAME -p $REFPOS1 -c $CIGAR1"
+ WRITE_CONFIG="-n $CONFIG"
+ WRITE_HDR="-d"
+
+ execute "$SAMLINE_BINARY $ALIG0 $ALIG1 $WRITE_HDR $WRITE_CONFIG > $OUTFILE"
+}
+
+# call: load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
+load_SAM_to_CSRA()
+{
+ BAMLOAD_BINARY="bam-load"
+ execute "cat $2 | $BAMLOAD_BINARY -L 5 -o $3 -k $1 -E0 -Q0 /dev/stdin" # --allow-multi-map"
+}
+
+# call: kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
+kar_CSRA()
+{
+ KAR_BINARY="kar"
+ execute "$KAR_BINARY --create $1 -d $2 -f"
+}
+
+PREFIX="RMF"
+SAMFILE="${PREFIX}.SAM"
+CONFIG="${PREFIX}.kfg"
+TEMP_DIR="${PREFIX}_csra"
+FINAL_CSRA="${PREFIX}.csra"
+
+#produce_SAM "$SAMFILE" "$CONFIG"
+#execute "rm -rf $TEMP_DIR"
+load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
+#kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
+#execute "rm -rf $TEMP_DIR"
+#execute "vdb-dump $FINAL_CSRA --info"
\ No newline at end of file
diff --git a/test/samline/sam.py b/test/samline/sam.py
new file mode 100644
index 0000000..67d7284
--- /dev/null
+++ b/test/samline/sam.py
@@ -0,0 +1,339 @@
+import subprocess
+import os
+import shutil
+
+'''---------------------------------------------------------------
+helper function: generate READ from cigar,refname,refpos
+---------------------------------------------------------------'''
+def cigar2read( cigar, pos, ref ):
+ cmd = "sampart -f read -c %s -p %d -r %s"%( cigar, pos, ref )
+ return subprocess.check_output( cmd, shell = True )
+
+'''---------------------------------------------------------------
+helper function: generate random QUALITY of given length
+---------------------------------------------------------------'''
+def rnd_qual( l ):
+ cmd = "sampart -f qual -l %d -s 7"%( l )
+ return subprocess.check_output( cmd, shell = True )
+
+'''---------------------------------------------------------------
+helper function: transform cigar with given inserts into
+a 'clean' cigar, bam-load does accept
+---------------------------------------------------------------'''
+def merge_cigar( cigar ):
+ cmd = "sampart -f cigar -c %s"%( cigar )
+ return subprocess.check_output( cmd, shell = True )
+
+'''---------------------------------------------------------------
+helper function: to get length of a reference ( from the RefSeq-Acc )
+---------------------------------------------------------------'''
+def ref_len( ref ):
+ cmd = "sampart -f rlen -r %s"%( ref )
+ return int( subprocess.check_output( cmd, shell = True ) )
+
+'''---------------------------------------------------------------
+helper function: remove a file, without error if it does not exist
+---------------------------------------------------------------'''
+def rm_file( filename ) :
+ try:
+ os.remove( filename )
+ except:
+ pass
+
+'''---------------------------------------------------------------
+helper function: remove a direcotry, without error if it does not exist
+---------------------------------------------------------------'''
+def rm_dir( dirname ) :
+ try:
+ shutil.rmtree( dirname, ignore_errors=True )
+ except:
+ pass
+
+def load_file( filename ) :
+ if os.path.isfile( filename ) :
+ with open( filename, "r" ) as the_file:
+ return the_file.read()
+ return ""
+
+def print_file( filename ) :
+ s = load_file( filename )
+ if len( s ) > 0 :
+ print s
+
+def print_txt( txt ) :
+ if len( txt ) > 0 :
+ print txt
+
+def print_txt_list( list ) :
+ for a in list :
+ print_txt( a )
+
+'''===============================================================
+preform a bam-load on a python-list of SAM-objects
+ will create temporary files and directory ( x.sam, x.kfg, x_csra )
+ writes the content of the python-list into x.sam
+ writes a config file into x.kfg
+ performs bam-load, and prints it's output
+ kar's the created directory into the given output-file
+ can be asked to keep the temporary files
+ list ........ list of SAM-objects
+ output....... name of cSRA-file to be created
+ params....... parameters passed into bam-load
+ keep_files... False/True for debugging temp. files
+==============================================================='''
+def bam_load( list, output, params, keep_files = False ) :
+ res = 0
+ txt1=""
+ txt2=""
+ try :
+ rm_dir( "x_csra" )
+ rm_file( output )
+ rm_file( "err.txt" )
+ save_sam( list, "x.sam" )
+ save_config( list, "x.kfg" )
+ cmd = "bam-load %s -o x_csra -k x.kfg x.sam 2>err.txt"%( params )
+ txt1 = subprocess.check_output( cmd, shell=True )
+ cmd = "kar --create %s -d x_csra -f 2>err.txt"%( output )
+ txt2 = subprocess.check_output( cmd, shell=True )
+ if not keep_files :
+ rm_dir( "x_csra" )
+ rm_file( "x.sam" )
+ rm_file( "x.kfg" )
+ res = 1
+ except :
+ pass
+ print_txt_list( [ load_file( "err.txt" ), txt1, txt2 ] )
+ rm_file( "err.txt" )
+ return res
+
+
+'''===============================================================
+preform a sra-sort on a given cSRA-file
+ will create a temporary directory ( x_csra )
+ performs sra-sort, and prints it's output
+ kar's the created directory into the given output-file
+ can be asked to keep the temporary files
+ list ........ list of SAM-objects
+ output....... name of cSRA-file to be created
+ params....... parameters passed into bam-load
+ keep_files... False/True for debugging temp. files
+==============================================================='''
+def sra_sort( input, output, params = "", keep_files = False ) :
+ res = 0
+ txt1 = ""
+ txt2 = ""
+ try :
+ rm_dir( "s_csra" )
+ rm_file( output )
+ rm_file( "err.txt" )
+ cmd = "sra-sort %s s_csra -f %s 2>err.txt"%( input, params )
+ txt1 = subprocess.check_output( cmd, shell=True )
+ cmd = "kar --create %s -d s_csra -f 2>err.txt"%( output )
+ txt2 = subprocess.check_output( cmd, shell=True )
+ if not keep_files :
+ rm_dir( "s_csra" )
+ res = 1
+ except :
+ pass
+ print_txt_list( [ load_file( "err.txt" ), txt1, txt2 ] )
+ rm_file( "err.txt" )
+ return res
+
+
+def vdb_dump( accession, params = "" ) :
+ try :
+ cmd = "vdb-dump %s %s"%( accession, params )
+ txt = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True )
+ print txt
+ return 1
+ except :
+ pass
+ return 0
+
+def sam_dump( accession, params = "" ) :
+ try :
+ cmd = "sam-dump %s %s"%( accession, params )
+ txt = subprocess.check_output( cmd, stderr=subprocess.STDOUT, shell=True )
+ print txt
+ return 1
+ except :
+ pass
+ return 0
+
+'''===============================================================
+all 11 different SAM-Flags
+==============================================================='''
+FLAG_MULTI = 0x01
+FLAG_PROPPER = 0x02
+FLAG_UNMAPPED = 0x04
+FLAG_NEXT_UNMAPPED = 0x08
+FLAG_REVERSED = 0x010
+FLAG_NEXT_REVERSED = 0x020
+FLAG_FIRST = 0x040
+FLAG_LAST = 0x080
+FLAG_SECONDARY = 0x0100
+FLAG_BAD = 0x0200
+FLAG_PCR = 0x0400
+
+
+'''===============================================================
+ make a primary SAM-alignment
+==============================================================='''
+def make_prim( qname, flags, refname, refalias, pos, mapq, cigar, rnxt = "*", pnxt = "0" ) :
+ return SAM( qname, flags | FLAG_PROPPER, refname, refalias, pos, mapq, merge_cigar( cigar ),
+ cigar2read( cigar, pos, refname ), rnxt, pnxt )
+
+
+'''===============================================================
+ make a secondary SAM-alignment
+==============================================================='''
+def make_sec( qname, flags, refname, refalias, pos, mapq, cigar, rnxt = "*", pnxt = "0" ) :
+ return SAM( qname, flags | FLAG_SECONDARY, refname, refalias, pos, mapq, merge_cigar( cigar ),
+ cigar2read( cigar, pos, refname ), rnxt, pnxt )
+
+
+'''===============================================================
+ make a unaligned SAM
+==============================================================='''
+def make_unaligned( qname, flags, seq ) :
+ return SAM( qname, flags | FLAG_UNMAPPED, "-", "-", 0, 255, "*", seq, "-", 0 )
+
+
+'''---------------------------------------------------------------
+helper function: walk the list of SAM-objects, create a dictionary
+ key: refalias, value: refname
+ ( used in extract_headers and produce_config )
+---------------------------------------------------------------'''
+def make_refdict( list ) :
+ res = {}
+ for a in list :
+ res[ a.refalias ] = a.refname
+ return res
+
+'''---------------------------------------------------------------
+helper function: create SAM-headers from a list of SAM-objects
+ as a list of strings
+ used in print_sam and save_sam
+---------------------------------------------------------------'''
+def extract_headers( list ) :
+ reflist = make_refdict( list )
+ res = [ "@HD\tVN:1.3" ]
+ for k, v in reflist.items():
+ l = ref_len( v )
+ res.append( "@SQ\tSN:%s\tAS:%s\tLN:%d"%( k, k, l ) )
+ return res
+
+'''---------------------------------------------------------------
+helper function: create a config-file for bam-load out of
+ a list of SAM-objects
+ used in save_config
+---------------------------------------------------------------'''
+def produce_config( list ) :
+ reflist = make_refdict( list )
+ res = []
+ for k, v in reflist.items():
+ if k != "*" and k != "-" :
+ res.append( "%s\t%s"%( k, v ) )
+ return res
+
+'''---------------------------------------------------------------
+helper function: save config file created from list of SAM-objects
+ used in bam_load
+---------------------------------------------------------------'''
+def save_config( list, filename ) :
+ with open( filename, "w" ) as f:
+ for s in produce_config( list ) :
+ f.write( "%s\n"%( s ) )
+
+'''---------------------------------------------------------------
+helper function: prints a list of SAM-objects
+---------------------------------------------------------------'''
+def print_sam( list ):
+ for s in extract_headers( list ) :
+ print s
+ for s in list :
+ print s
+
+'''---------------------------------------------------------------
+helper function: save a list of SAM-objects as file
+ used in bam_load
+---------------------------------------------------------------'''
+def save_sam( list, filename ) :
+ with open( filename, "w" ) as f:
+ for s in extract_headers( list ) :
+ f.write( "%s\n"%( s ) )
+ for s in list :
+ f.write( "%s\n"%( s ) )
+
+'''===============================================================
+ SAM-object
+==============================================================='''
+class SAM:
+
+ def __init__( self, qname, flags, refname, refalias, pos, mapq, cigar, seq, rnxt, pnxt, tags="" ) :
+ self.qname = qname
+ self.flags = flags
+ self.refname = refname
+ self.refalias = refalias
+ self.pos = pos
+ self.mapq = mapq
+ self.cigar = cigar
+ self.seq = seq
+ self.qual = rnd_qual( len( self.seq ) )
+ self.nxt_ref = rnxt
+ self.nxt_pos = pnxt
+ self.tlen = 0
+ self.tags = tags
+
+ def __str__( self ):
+ if len( self.tags ) > 0 :
+ return "%s\t%d\t%s\t%s\t%d\t%s\t%s\t%s\t%d\t%s\t%s\t%s"%( self.qname,
+ self.flags, self.refalias, self.pos, self.mapq, self.cigar, self.nxt_ref,
+ self.nxt_pos, self.tlen, self.seq, self.qual, self.tags )
+ else :
+ return "%s\t%d\t%s\t%s\t%d\t%s\t%s\t%s\t%d\t%s\t%s"%( self.qname,
+ self.flags, self.refalias, self.pos, self.mapq, self.cigar, self.nxt_ref,
+ self.nxt_pos, self.tlen, self.seq, self.qual )
+
+ def set_flag( self, flagbit, state ) :
+ if state :
+ self.flags |= flagbit
+ else :
+ self.flags &= ~flagbit
+
+ def set_tags( self, tag ) :
+ self.tags = tag
+
+ def add_tag( self, tag ) :
+ if len( self.tags ) > 0 :
+ self.tags += ";"
+ self.tags += tag
+ else :
+ self.tags = tag
+
+ def pair_with( self, other ) :
+ self.nxt_ref = other.refalias
+ self.nxt_pos = other.pos
+ other.nxt_ref = self.refalias
+ other.nxt_pos = self.pos
+ self.flags |= FLAG_MULTI
+ other.flags |= FLAG_MULTI
+ self.set_flag( FLAG_FIRST, True )
+ other.set_flag( FLAG_FIRST, False )
+ self.set_flag( FLAG_LAST, False )
+ other.set_flag( FLAG_LAST, True )
+ self.set_flag( FLAG_NEXT_UNMAPPED, other.flags & FLAG_UNMAPPED )
+ self.set_flag( FLAG_NEXT_REVERSED, other.flags & FLAG_REVERSED )
+ other.set_flag( FLAG_NEXT_UNMAPPED, self.flags & FLAG_UNMAPPED )
+ other.set_flag( FLAG_NEXT_REVERSED, self.flags & FLAG_REVERSED )
+ other.qname = self.qname
+
+ def link_to( self, other ) :
+ self.flags |= FLAG_MULTI
+ self.set_flag( FLAG_FIRST, other.flags & FLAG_FIRST )
+ self.set_flag( FLAG_LAST, other.flags & FLAG_LAST )
+ self.set_flag( FLAG_NEXT_UNMAPPED, other.flags & FLAG_UNMAPPED )
+ self.set_flag( FLAG_NEXT_REVERSED, other.flags & FLAG_REVERSED )
+ self.nxt_ref = other.refalias
+ self.nxt_pos = other.pos
+ self.qname = other.qname
diff --git a/test/samline/sampart.c b/test/samline/sampart.c
new file mode 100644
index 0000000..e07c3f5
--- /dev/null
+++ b/test/samline/sampart.c
@@ -0,0 +1,304 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+#include "sampart.vers.h"
+
+#include <kapp/main.h>
+#include <kapp/args.h>
+
+#include <klib/out.h>
+#include <klib/rc.h>
+#include <klib/printf.h>
+#include <klib/text.h>
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "refbases.h"
+#include "cigar.h"
+
+#define DFLT_FUNCTION "read"
+#define DFLT_REFNAME "NC_011752.1"
+#define DFLT_REFPOS 10000
+#define DFLT_CIGAR "50M"
+#define DFLT_INSBASES "ACGTACGTACGT"
+#define DFLT_LEN 50
+#define DFLT_SEED 7
+
+static const char * function_usage[] = { "which function to execute ( read, qual, cigar, rlen )", NULL };
+static const char * refname_usage[] = { "the ref-seq-id to use 'NC_011752.1'", NULL };
+static const char * refpos_usage[] = { "the position on the reference 0-based", NULL };
+static const char * cigar_usage[] = { "the cigar-string to use", NULL };
+static const char * insbases_usage[] = { "what bases to insert ( if needed )", NULL };
+static const char * len_usage[] = { "length of random quality", NULL };
+static const char * seed_usage[] = { "seed for random", NULL };
+
+#define OPTION_FUNCTION "function"
+#define OPTION_REFNAME "refname"
+#define OPTION_REFPOS "refpos"
+#define OPTION_CIGAR "cigar"
+#define OPTION_INSBASES "insbases"
+#define OPTION_LEN "len"
+#define OPTION_SEED "seed"
+
+#define ALIAS_FUNCTION "f"
+#define ALIAS_REFNAME "r"
+#define ALIAS_REFPOS "p"
+#define ALIAS_CIGAR "c"
+#define ALIAS_INSBASES "i"
+#define ALIAS_LEN "l"
+#define ALIAS_SEED "s"
+
+OptDef Options[] =
+{
+ { OPTION_FUNCTION, ALIAS_FUNCTION, NULL, function_usage, 1, true, false },
+ { OPTION_REFNAME, ALIAS_REFNAME, NULL, refname_usage, 1, true, false },
+ { OPTION_REFPOS, ALIAS_REFPOS, NULL, refpos_usage, 1, true, false },
+ { OPTION_CIGAR, ALIAS_CIGAR, NULL, cigar_usage, 1, true, false },
+ { OPTION_INSBASES, ALIAS_INSBASES, NULL, insbases_usage, 1, true, false },
+ { OPTION_LEN, ALIAS_LEN, NULL, len_usage, 1, true, false },
+ { OPTION_SEED, ALIAS_SEED, NULL, seed_usage, 1, true, false }
+};
+
+const char UsageDefaultName[] = "sampart";
+
+rc_t CC UsageSummary ( const char * progname )
+{
+ return KOutMsg( "\nUsage:\n %s [options]\n\n", progname );
+}
+
+rc_t CC Usage ( const Args * args )
+{
+ const char * progname = UsageDefaultName;
+ const char * fullpath = UsageDefaultName;
+ int i, n_options;
+ rc_t rc;
+
+ if ( args == NULL )
+ rc = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
+ else
+ rc = ArgsProgram ( args, &fullpath, &progname );
+
+ if ( rc != 0 )
+ progname = fullpath = UsageDefaultName;
+
+ UsageSummary ( progname );
+ KOutMsg ( "Options:\n" );
+
+ n_options = sizeof Options / sizeof Options[ 0 ];
+ for ( i = 0; i < n_options; ++i )
+ {
+ OptDef * o = &Options[ i ];
+ HelpOptionLine( o->aliases, o->name, NULL, o->help );
+ }
+
+ KOutMsg ( "\n" );
+ HelpOptionsStandard ();
+ HelpVersion ( fullpath, KAppVersion() );
+
+ return rc;
+}
+
+
+/* Version EXTERN
+ * return 4-part version code: 0xMMmmrrrr, where
+ * MM = major release
+ * mm = minor release
+ * rrrr = bug-fix release
+ */
+ver_t CC KAppVersion ( void )
+{
+ return SAMPART_VERS;
+}
+
+static const char * get_str_option( const Args * args, const char * name, uint32_t idx, const char * dflt )
+{
+ uint32_t count;
+ rc_t rc = ArgsOptionCount( args, name, &count );
+ if ( ( rc == 0 )&&( count > idx ) )
+ {
+ const char * res = NULL;
+ ArgsOptionValue( args, name, idx, (const void **)&res );
+ return res;
+ }
+ else
+ return dflt;
+}
+
+static uint32_t get_uint32_option( const Args * args, const char * name, uint32_t idx, const uint32_t dflt )
+{
+ const char * s = get_str_option( args, name, idx, NULL );
+ if ( s == NULL )
+ return dflt;
+ return atoi( s );
+}
+
+static rc_t CC write_to_FILE ( void *f, const char *buffer, size_t bytes, size_t *num_writ )
+{
+ * num_writ = fwrite ( buffer, 1, bytes, f );
+ if ( * num_writ != bytes )
+ return RC ( rcExe, rcFile, rcWriting, rcTransfer, rcIncomplete );
+ return 0;
+}
+
+static rc_t make_read( const Args * args )
+{
+ rc_t rc = 0;
+ const char * cigar_str = get_str_option( args, OPTION_CIGAR, 0, DFLT_CIGAR );
+ struct cigar_t * cigar = make_cigar_t( cigar_str );
+ if ( cigar != NULL )
+ {
+ uint32_t reflen = cigar_t_reflen( cigar );
+ uint32_t refpos = get_uint32_option( args, OPTION_REFPOS, 0, DFLT_REFPOS );
+ const char * refname = get_str_option( args, OPTION_REFNAME, 0, DFLT_REFNAME );
+ const char * refbases = read_refbases( refname, refpos, reflen, NULL );
+ if ( refbases != NULL )
+ {
+ int read_len = cigar_t_readlen( cigar );
+ char * buffer = malloc( read_len + 1 );
+ if ( buffer != NULL )
+ {
+ const char * ins_bases = get_str_option( args, OPTION_INSBASES, 0, DFLT_INSBASES );
+ cigar_t_2_read( buffer, read_len + 1, cigar, refbases, ins_bases );
+ rc = KOutMsg( "%s", buffer );
+ free( buffer );
+ }
+ free( ( void * )refbases );
+ }
+ free_cigar_t( cigar );
+ }
+ return rc;
+}
+
+
+static size_t random_string( char * buffer, size_t buflen, const char * char_set, size_t length )
+{
+ size_t res = 0;
+ if ( buffer != NULL && buflen > 0 )
+ {
+ const char dflt_charset[] = "0123456789"
+ "abcdefghijklmnopqrstuvwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ const char * cs = ( char_set == NULL ) ? dflt_charset : char_set;
+ size_t charset_len = strlen( cs ) - 1;
+ while ( res < length && res < ( buflen - 1 ) )
+ {
+ size_t rand_idx = ( double ) rand() / RAND_MAX * charset_len;
+ buffer[ res++ ] = cs[ rand_idx ];
+ }
+ buffer[ res ] = 0;
+ }
+ return res;
+}
+
+
+static size_t random_quality( char * buffer, size_t buflen, size_t length )
+{
+ const char qualities[] = "!\"#$%&'()*+,-./0123456789:;<=>?"
+ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
+ "`abcdefghijklmnopqrstuvwxyz{|}~";
+ return random_string( buffer, buflen, qualities, length );
+}
+
+
+static rc_t make_quality( const Args * args )
+{
+ rc_t rc = 0;
+ uint32_t len = get_uint32_option( args, OPTION_LEN, 0, DFLT_LEN );
+ uint32_t seed = get_uint32_option( args, OPTION_SEED, 0, DFLT_SEED );
+ char * buffer = malloc( len + 1 );
+ if ( buffer != NULL )
+ {
+ if ( seed > 0 ) srand( seed );
+ random_quality( buffer, len + 1, len );
+ rc = KOutMsg( "%s", buffer );
+ free( buffer );
+ }
+ return rc;
+}
+
+static rc_t make_cigar( const Args * args )
+{
+ rc_t rc = 0;
+ const char * cigar_str = get_str_option( args, OPTION_CIGAR, 0, DFLT_CIGAR );
+ struct cigar_t * cigar = make_cigar_t( cigar_str );
+ if ( cigar != NULL )
+ {
+ char buffer[ 4096 ];
+ struct cigar_t * merged_cigar = merge_cigar_t( cigar );
+ cigar_t_string( buffer, sizeof buffer, merged_cigar );
+ rc = KOutMsg( "%s", buffer );
+ free_cigar_t( merged_cigar );
+ free_cigar_t( cigar );
+ }
+ return rc;
+}
+
+static rc_t get_ref_len( const Args * args )
+{
+ rc_t rc = 0;
+ const char * refname = get_str_option( args, OPTION_REFNAME, 0, DFLT_REFNAME );
+ if ( refname != NULL )
+ rc = KOutMsg( "%d", ref_len( refname ) );
+ return rc;
+}
+
+static const char function_read[] = "read";
+static const char function_qual[] = "qual";
+static const char function_cigar[] = "cigar";
+static const char function_rlen[] = "rlen";
+
+rc_t CC KMain( int argc, char *argv [] )
+{
+ rc_t rc = KOutHandlerSet ( write_to_FILE, stdout );
+ if ( rc == 0 )
+ {
+ Args * args;
+
+ int n_options = sizeof Options / sizeof Options[ 0 ];
+ rc = ArgsMakeAndHandle( &args, argc, argv, 1, Options, n_options );
+ if ( rc == 0 )
+ {
+ const char * function = get_str_option( args, OPTION_FUNCTION, 0, DFLT_FUNCTION );
+
+ if ( 0 == strcase_cmp ( function, string_size( function ), function_read, 4, 4 ) )
+ rc = make_read( args );
+ else if ( 0 == strcase_cmp ( function, string_size( function ), function_qual, 4, 4 ) )
+ rc = make_quality( args );
+ else if ( 0 == strcase_cmp ( function, string_size( function ), function_cigar, 5, 5 ) )
+ rc = make_cigar( args );
+ else if ( 0 == strcase_cmp ( function, string_size( function ), function_rlen, 4, 4 ) )
+ rc = get_ref_len( args );
+ else
+ rc = KOutMsg( "unknown function '%s'\n", function );
+ ArgsWhack( args );
+ }
+ }
+ return rc;
+}
diff --git a/tools/kget/kget.vers b/test/samline/sampart.vers
similarity index 100%
copy from tools/kget/kget.vers
copy to test/samline/sampart.vers
diff --git a/test/samline/sec_align_problem.sh b/test/samline/sec_align_problem.sh
new file mode 100755
index 0000000..59f6be3
--- /dev/null
+++ b/test/samline/sec_align_problem.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+execute()
+{
+ echo "------------------------------------------------------"
+ echo $1
+ eval $1
+ echo "."
+}
+
+# this will produce 2 files:
+# a) a config-file for bam-load with the used reference
+# b) a SAM-file with header-line containing the lonely sec. alignment
+# call: produce_SAM "$CONFIG" "$SAMFILE"
+produce_SAM1()
+{
+ SAMLINE_BINARY="samline"
+ QNAME="--qname 1"
+ REFNAME="-r NC_011752.1"
+ REFPOS="-p 6800"
+ CIGAR="-c 55M"
+ SECONDARY="-2 1"
+ #FIRST="--first 1"
+ WITH_HEADERS="-d"
+ CREATE_CONFIG="-n $1"
+ execute "$SAMLINE_BINARY $QNAME $REFNAME $REFPOS $CIGAR $SECONDARY $FIRST $WITH_HEADERS $CREATE_CONFIG > $2"
+}
+
+# this will produce 1 file with these 2 primary mates
+# call: produce_SAM "$SAMFILE"
+produce_SAM2()
+{
+ SAMLINE_BINARY="samline"
+ QNAME="--qname 2"
+ REFNAME="-r NC_011752.1"
+ REFPOS1="-p 1000"
+ REFPOS2="-p 3500"
+ CIGAR1="-c 30MAAA20M"
+ CIGAR2="-c 50M2D10M"
+ SEC1="-2 0"
+ SEC2="-2 1"
+ execute "$SAMLINE_BINARY $QNAME $REFNAME $REFPOS1 $REFPOS2 $CIGAR1 $CIGAR2 $SEC1 $SEC2 >> $1"
+}
+
+# call: load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
+load_SAM_to_CSRA()
+{
+ BAMLOAD_BINARY="bam-load"
+ execute "cat $2 | $BAMLOAD_BINARY --make-spots-with-secondary -L 3 -o $3 -k $1 -E0 -Q0 /dev/stdin"
+}
+
+# call: kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
+kar_CSRA()
+{
+ KAR_BINARY="kar"
+ execute "$KAR_BINARY --create $1 -d $2 -f"
+}
+
+SAMFILE="p1.SAM"
+CONFIG="p1.cfg"
+TEMP_DIR="p1_csra"
+FINAL_CSRA="p1.csra"
+TEMP_SORTED_CSRA="p2_csra"
+SORTED_CSRA="p2.csra"
+
+produce_SAM1 "$CONFIG" "$SAMFILE"
+produce_SAM2 "$SAMFILE"
+load_SAM_to_CSRA "$CONFIG" "$SAMFILE" "$TEMP_DIR"
+kar_CSRA "$FINAL_CSRA" "$TEMP_DIR"
+#execute "vdb-dump $FINAL_CSRA --info"
+#execute "sra-stat $FINAL_CSRA"
+execute "sra-sort $FINAL_CSRA $TEMP_SORTED_CSRA -f"
+kar_CSRA "$SORTED_CSRA" "$TEMP_SORTED_CSRA"
+#execute "vdb-dump $SORTED_CSRA --info"
+#execute "sra-stat $SORTED_CSRA"
+#execute "rm -rf $TEMP_DIR $TEMP_SORTED_CSRA $SAMFILE $CONFIG"
diff --git a/test/vcf-loader/Makefile b/test/vcf-loader/Makefile
index 32aab45..d5de406 100644
--- a/test/vcf-loader/Makefile
+++ b/test/vcf-loader/Makefile
@@ -68,6 +68,3 @@ VCF_TEST_LIB = \
$(TEST_BINDIR)/test-vcf-loader: $(VCF_TEST_OBJ)
$(LP) --exe -o $@ $^ $(VCF_TEST_LIB)
-
-valgrind: test-vcf-loader
- valgrind --ncbi $(TEST_BINDIR)/test-vcf-loader
diff --git a/test/vcf-loader/Makefile b/test/vdb-validate/Makefile
similarity index 56%
copy from test/vcf-loader/Makefile
copy to test/vdb-validate/Makefile
index 32aab45..9a0978b 100644
--- a/test/vcf-loader/Makefile
+++ b/test/vdb-validate/Makefile
@@ -26,23 +26,16 @@ default: runtests
TOP ?= $(abspath ../..)
-MODULE = test/vcf-loader
+MODULE = test/vdb-validate
TEST_TOOLS = \
- test-vcf-loader
-
-include $(TOP)/build/Makefile.env
+ALL_TOOLS = \
+ $(TEST_TOOLS) \
-# make sure runs are not cached in the user repository when running tests
-ifeq ($(wildcard ../../../asm-trace),)
- ifeq (,$(VDB_CONFIG))
- VDB_CONFIG = $(shell pwd)/../only-remote-repository.kfg
- endif
-endif
-
+include $(TOP)/build/Makefile.env
-$(TEST_TOOLS): makedirs
+$(ALL_TOOLS): makedirs
@ $(MAKE_CMD) $(TEST_BINDIR)/$@
.PHONY: $(TEST_TOOLS)
@@ -50,24 +43,22 @@ $(TEST_TOOLS): makedirs
clean: stdclean
#-------------------------------------------------------------------------------
-# white-box test
+# ref-variation tool tests
#
-INCDIRS += -I$(TOP)/tools/vcf-loader
-
-VCF_TEST_SRC = \
- test-vcf-loader
-
-VCF_TEST_OBJ = \
- $(addsuffix .$(OBJX),$(VCF_TEST_SRC))
-
-VCF_TEST_LIB = \
- -skapp \
- -sktst \
- -sncbi-wvdb \
- -svcfloader
-
-$(TEST_BINDIR)/test-vcf-loader: $(VCF_TEST_OBJ)
- $(LP) --exe -o $@ $^ $(VCF_TEST_LIB)
+runtests: vdb-validate
+
+vdb-validate: $(BINDIR)/vdb-validate
+ @ echo "Starting vdb-validate tests..."
+ @ rm -rf actual/
+ @ mkdir actual/
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_tmp_mismatch.csra --sdc:rows 100%" sdc_tmp_mismatch 3
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_pa_longer.csra --sdc:rows 100%" sdc_pa_longer_1 3
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_pa_longer.csra --sdc:rows 100% --sdc:plen_thold 50%" sdc_pa_longer_2 3
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_pa_longer.csra --sdc:rows 100% --sdc:plen_thold 51%" sdc_pa_longer_3 0
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_len_mismatch.csra --sdc:rows 100% --sdc:plen_thold 1%" sdc_len_mismatch_1 3
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_len_mismatch.csra --sdc:rows 100% --sdc:plen_thold 100%" sdc_len_mismatch_2 3
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/sdc_len_mismatch.csra" no_sdc_checks 0
+ @ ./runtestcase.sh "$(BINDIR)/vdb-validate db/blob-row-gap.kar" ROW_GAP 0
+ @ echo "All vdb-validate tests succeed"
+ @ rm -rf actual/
-valgrind: test-vcf-loader
- valgrind --ncbi $(TEST_BINDIR)/test-vcf-loader
diff --git a/test/vdb-validate/db/blob-row-gap.kar b/test/vdb-validate/db/blob-row-gap.kar
new file mode 100644
index 0000000..9b6bdd8
Binary files /dev/null and b/test/vdb-validate/db/blob-row-gap.kar differ
diff --git a/test/vdb-validate/db/sdc_len_mismatch.csra b/test/vdb-validate/db/sdc_len_mismatch.csra
new file mode 100644
index 0000000..52afee0
Binary files /dev/null and b/test/vdb-validate/db/sdc_len_mismatch.csra differ
diff --git a/test/vdb-validate/db/sdc_pa_longer.csra b/test/vdb-validate/db/sdc_pa_longer.csra
new file mode 100644
index 0000000..3e767a5
Binary files /dev/null and b/test/vdb-validate/db/sdc_pa_longer.csra differ
diff --git a/test/vdb-validate/db/sdc_tmp_mismatch.csra b/test/vdb-validate/db/sdc_tmp_mismatch.csra
new file mode 100644
index 0000000..a34c387
Binary files /dev/null and b/test/vdb-validate/db/sdc_tmp_mismatch.csra differ
diff --git a/test/vdb-validate/expected/ROW_GAP b/test/vdb-validate/expected/ROW_GAP
new file mode 100644
index 0000000..17532f7
--- /dev/null
+++ b/test/vdb-validate/expected/ROW_GAP
@@ -0,0 +1,42 @@
+info: Database 'blob-row-gap.kar' metadata: md5 ok
+info: Table 'FEATURE' metadata: md5 ok
+info: Column 'FEAT_TYPE': checksums ok
+info: Column 'LOC_LEN': checksums ok
+info: Column 'LOC_ROW_ID': checksums ok
+info: Column 'LOC_SEQ_TYPE': checksums ok
+info: Column 'LOC_START': checksums ok
+info: Column 'LOC_STRAND': checksums ok
+info: Column 'PRODUCT_LEN': checksums ok
+info: Column 'PRODUCT_ROW_ID': checksums ok
+info: Column 'PRODUCT_SEQ_TYPE': checksums ok
+info: Column 'PRODUCT_START': checksums ok
+info: Column 'SEQ_FEAT': checksums ok
+info: Table 'GI_IDX' metadata: md5 ok
+info: Column 'NUC_ROW_ID': checksums ok
+info: Column 'PROT_ROW_ID': checksums ok
+info: Table 'PROTEIN' metadata: md5 ok
+info: Column 'DESCR': checksums ok
+info: Column 'FEAT_PRODUCT_ROW_ID': checksums ok
+info: Column 'FEAT_ROW_END': checksums ok
+info: Column 'FEAT_ROW_START': checksums ok
+info: Column 'GI': checksums ok
+info: Column 'HASH': checksums ok
+info: Column 'PROTEIN': checksums ok
+info: Column 'PROTEIN_LEN': checksums ok
+info: Column 'PROTEIN_NAME': checksums ok
+info: Column 'TITLE': checksums ok
+info: Table 'PROT_ACC_IDX' metadata: md5 ok
+info: Column 'PROTEIN_ROW_ID': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'ALTREAD': checksums ok
+info: Column 'CLIP_LEN': checksums ok
+info: Column 'CONTIG_NAME': checksums ok
+info: Column 'DESCR': checksums ok
+info: Column 'FEAT_ROW_END': checksums ok
+info: Column 'FEAT_ROW_START': checksums ok
+info: Column 'GI': checksums ok
+info: Column 'HASH': checksums ok
+info: Column 'PROT_COUNT': checksums ok
+info: Column 'READ': checksums ok
+info: Column 'TITLE': checksums ok
+info: Database 'blob-row-gap.kar' is consistent
diff --git a/test/vdb-validate/expected/no_sdc_checks b/test/vdb-validate/expected/no_sdc_checks
new file mode 100644
index 0000000..53e5c2c
--- /dev/null
+++ b/test/vdb-validate/expected/no_sdc_checks
@@ -0,0 +1,23 @@
+info: Database 'sdc_len_mismatch.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CGRAPH_MISMATCHES': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_len_mismatch.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_len_mismatch.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+info: Database 'sdc_len_mismatch.csra' is consistent
diff --git a/test/vdb-validate/expected/sdc_len_mismatch_1 b/test/vdb-validate/expected/sdc_len_mismatch_1
new file mode 100644
index 0000000..7ae23b4
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_len_mismatch_1
@@ -0,0 +1,24 @@
+info: Database 'sdc_len_mismatch.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CGRAPH_MISMATCHES': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_len_mismatch.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_len_mismatch.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+err: data inconsistent while validating database - Database 'db/sdc_len_mismatch.csra': Limit violation (pa_longer_sa): there are at least 1 alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT
+err: data inconsistent while validating database - Database 'sdc_len_mismatch.csra' check failed
diff --git a/test/vdb-validate/expected/sdc_len_mismatch_2 b/test/vdb-validate/expected/sdc_len_mismatch_2
new file mode 100644
index 0000000..313eb27
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_len_mismatch_2
@@ -0,0 +1,24 @@
+info: Database 'sdc_len_mismatch.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CGRAPH_MISMATCHES': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_len_mismatch.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_len_mismatch.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+err: data inconsistent while validating database - Database 'db/sdc_len_mismatch.csra': PRIMARY_ALIGNMENT:2 HAS_REF_OFFSET length (23) less than SECONDARY_ALIGNMENT:2 HAS_REF_OFFSET length (52)
+err: data inconsistent while validating database - Database 'sdc_len_mismatch.csra' check failed
diff --git a/test/vdb-validate/expected/sdc_pa_longer_1 b/test/vdb-validate/expected/sdc_pa_longer_1
new file mode 100644
index 0000000..ecd7ac8
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_pa_longer_1
@@ -0,0 +1,26 @@
+info: Database 'sdc_pa_longer.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Column 'TMP_HAS_MISMATCH': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_pa_longer.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_pa_longer.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+err: data inconsistent while validating database - Database 'db/sdc_pa_longer.csra': Limit violation (pa_longer_sa): there are at least 1 alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT
+err: data inconsistent while validating database - Database 'sdc_pa_longer.csra' check failed
diff --git a/test/vdb-validate/expected/sdc_pa_longer_2 b/test/vdb-validate/expected/sdc_pa_longer_2
new file mode 100644
index 0000000..ecd7ac8
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_pa_longer_2
@@ -0,0 +1,26 @@
+info: Database 'sdc_pa_longer.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Column 'TMP_HAS_MISMATCH': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_pa_longer.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_pa_longer.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+err: data inconsistent while validating database - Database 'db/sdc_pa_longer.csra': Limit violation (pa_longer_sa): there are at least 1 alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT
+err: data inconsistent while validating database - Database 'sdc_pa_longer.csra' check failed
diff --git a/test/vdb-validate/expected/sdc_pa_longer_3 b/test/vdb-validate/expected/sdc_pa_longer_3
new file mode 100644
index 0000000..edadc1a
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_pa_longer_3
@@ -0,0 +1,26 @@
+info: Database 'sdc_pa_longer.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_MISMATCH': checksums ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Column 'HAS_REF_OFFSET': checksums ok
+info: Column 'REF_LEN': checksums ok
+info: Column 'SEQ_SPOT_ID': checksums ok
+info: Column 'TMP_HAS_MISMATCH': checksums ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Column 'PRIMARY_ALIGNMENT_ID': checksums ok
+info: Column 'QUALITY': checksums ok
+info: Column 'READ_LEN': checksums ok
+info: Database 'db/sdc_pa_longer.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_pa_longer.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+info: Database 'db/sdc_pa_longer.csra': SECONDARY_ALIGNMENT table checks ok
+info: Database 'sdc_pa_longer.csra' is consistent
diff --git a/test/vdb-validate/expected/sdc_tmp_mismatch b/test/vdb-validate/expected/sdc_tmp_mismatch
new file mode 100644
index 0000000..5893d4e
--- /dev/null
+++ b/test/vdb-validate/expected/sdc_tmp_mismatch
@@ -0,0 +1,16 @@
+info: Database 'sdc_tmp_mismatch.csra' metadata: md5 ok
+info: Table 'PRIMARY_ALIGNMENT' metadata: md5 ok
+info: Table 'REFERENCE' metadata: md5 ok
+info: Column 'CGRAPH_HIGH': checksums ok
+info: Column 'CS_KEY': checksums ok
+info: Column 'OVERLAP_REF_POS': checksums ok
+info: Column 'PRIMARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SECONDARY_ALIGNMENT_IDS': checksums ok
+info: Column 'SEQ_LEN': checksums ok
+info: Column 'SEQ_START': checksums ok
+info: Table 'SECONDARY_ALIGNMENT' metadata: md5 ok
+info: Table 'SEQUENCE' metadata: md5 ok
+info: Database 'db/sdc_tmp_mismatch.csra': SEQUENCE.PRIMARY_ALIGNMENT_ID <-> PRIMARY_ALIGNMENT.SEQ_SPOT_ID referential integrity ok
+info: Database 'db/sdc_tmp_mismatch.csra': REFERENCE.PRIMARY_ALIGNMENT_IDS <-> PRIMARY_ALIGNMENT.REF_ID referential integrity ok
+err: data inconsistent while validating database - Database 'db/sdc_tmp_mismatch.csra': SECONDARY_ALIGNMENT:1 TMP_MISMATCH column contains '='
+err: data inconsistent while validating database - Database 'sdc_tmp_mismatch.csra' check failed
diff --git a/build/ld.linux.gcc.sh b/test/vdb-validate/runtestcase.sh
similarity index 62%
copy from build/ld.linux.gcc.sh
copy to test/vdb-validate/runtestcase.sh
index cfc8ad0..84ba505 100755
--- a/build/ld.linux.gcc.sh
+++ b/test/vdb-validate/runtestcase.sh
@@ -22,29 +22,40 @@
# Please cite the author in any work or product based on this material.
#
# ===========================================================================
+#echo "$0 $*"
-# define linker params
-LD_EXPORT_GLOBAL="-Wl,--export-dynamic"
-LD_MULTIPLE_DEFS="-Wl,-zmuldefs"
-LD_STATIC="-Wl,-Bstatic"
-LD_DYNAMIC="-Wl,-Bdynamic"
-LD_ALL_SYMBOLS="-Wl,-whole-archive"
-LD_REF_SYMBOLS="-Wl,-no-whole-archive"
-
-# build command
-DLIB_CMD="$LD -shared"
-EXE_CMD="$LD -static-libstdc++ -static-libgcc"
-EXE_STATIC_CMD="$EXE_CMD"
-#EXE_CMD="$LD"
-#EXE_STATIC_CMD="$LD -static"
-
-# versioned output
-if [ "$VERS" = "" ]
-then
- DLIB_CMD="$DLIB_CMD -o $TARG"
- EXE_CMD="$EXE_CMD -o $TARG"
-else
- set-vers $(echo $VERS | tr '.' ' ')
- DLIB_CMD="$DLIB_CMD -o $OUTDIR/$NAME$DBGAP.so.$VERS -Wl,-soname,$NAME.so.$MAJ"
- EXE_CMD="$EXE_CMD -o $OUTDIR/$NAME$DBGAP.$VERS"
+TEST_CMD=$1
+CASEID=$2
+RC=$3
+
+CMD="$TEST_CMD > \"actual/$CASEID.tmp\" 2>&1"
+#echo $CMD
+eval $CMD
+rc="$?"
+
+if [ "$rc" != "$RC" ] ; then
+ echo "command \"$TEST_CMD\" returned $rc, expected $RC"
+ echo "command executed:"
+ echo $CMD
+
+ echo "command output:"
+ cat actual/$CASEID.tmp
+ exit 2
fi
+
+# remove first two columns from output: datetime and progname
+cat "actual/$CASEID.tmp" | awk '{if(substr($2,1,12) == "vdb-validate"){$2=$1="";} print $0}' > "actual/$CASEID"
+rm "actual/$CASEID.tmp"
+
+# remove trailing white spaces
+sed -i -e 's/^[ \t]*//g' "actual/$CASEID"
+# remove file names and line numbers
+sed -i -e 's/: .*:[0-9]*:[^ ]*:/:/g' "actual/$CASEID"
+
+diff expected/$CASEID actual/$CASEID
+rc="$?"
+
+if [ "$rc" != "0" ] ; then
+ exit 3
+fi
+
diff --git a/test/vschema/Makefile b/test/vschema/Makefile
index 97fb28b..0c9ba93 100644
--- a/test/vschema/Makefile
+++ b/test/vschema/Makefile
@@ -108,8 +108,10 @@ idx-text-tests: idx-text-makeinputs idx-text-checklookup
@$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text ci-2 0 y ./idx-text-checklookup
@$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text ci-3 0 y ./idx-text-checklookup
@$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text ci-4 0 y ./idx-text-checklookup
+ @$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text ci-5 0 n
@$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text cs-1 0 y ./idx-text-checklookup
@$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text cs-2 0 y ./idx-text-checklookup
+ @$(SRCDIR)/runtestcase.sh $(BINDIR) $(SRCDIR)/idx-text idx-text cs-3 0 n
@rm -f ./idx-text-checklookup
@rm -f $(SRCDIR)/idx-text/input/*.gl
@echo "idx-text tests executed successfuly"
diff --git a/test/vschema/idx-text/checklookup.cpp b/test/vschema/idx-text/checklookup.cpp
index b2977f3..7d6a877 100644
--- a/test/vschema/idx-text/checklookup.cpp
+++ b/test/vschema/idx-text/checklookup.cpp
@@ -78,15 +78,27 @@ rc_t runChecks(const TestCase& test_case, const VCursor * cursor, uint32_t name_
rc = VCursorCellDataDirect( cursor, row_id, name_idx, NULL, (void const **)&name, NULL, &name_len );
if ( rc != 0 )
+ {
+ LOGERR( klogInt, rc, "VCursorCellDataDirect() failed" );
return rc;
+ }
+ if ( name_len == 0 )
+ continue;
+
rc = VCursorParamsSet( ( struct VCursorParams const * )cursor, "QUERY_NAME", "%.*s", name_len, name );
if ( rc != 0 )
+ {
+ LOGERR( klogInt, rc, "VCursorParamsSet() failed" );
return rc;
+ }
rc = VCursorCellDataDirect( cursor, row_id, name_range_idx, NULL, (void const **)&row_range, NULL, NULL );
if ( rc != 0 )
+ {
+ LOGERR( klogInt, rc, "VCursorCellDataDirect() failed" );
return rc;
+ }
std::string name_str(name, name_len);
@@ -183,12 +195,12 @@ void initTestCases()
{
std::map<std::string, RowRange> key_ranges;
{
- RowRange range = {1, 4};
+ RowRange range = {2, 5};
key_ranges["a"] = range;
key_ranges["A"] = range;
}
{
- RowRange range = {5, 5};
+ RowRange range = {7, 7};
key_ranges["b"] = range;
}
@@ -320,4 +332,4 @@ int main(int argc, const char* argv[])
std::cout << "Success" << std::endl;
return 0;
-}
\ No newline at end of file
+}
diff --git a/test/vschema/idx-text/expected/ci-1.stdout b/test/vschema/idx-text/expected/ci-1.stdout
index 85403c2..0f8a271 100644
--- a/test/vschema/idx-text/expected/ci-1.stdout
+++ b/test/vschema/idx-text/expected/ci-1.stdout
@@ -1,3 +1,6 @@
+ NAME:
+NAME_RANGE:
+
NAME: a
NAME_RANGE:
@@ -10,6 +13,9 @@ NAME_RANGE:
NAME: A
NAME_RANGE:
+ NAME:
+NAME_RANGE:
+
NAME: b
NAME_RANGE:
diff --git a/test/vschema/idx-text/expected/ci-5.stdout b/test/vschema/idx-text/expected/ci-5.stdout
new file mode 100644
index 0000000..dfb77ec
--- /dev/null
+++ b/test/vschema/idx-text/expected/ci-5.stdout
@@ -0,0 +1,15 @@
+ NAME: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+NAME_RANGE:
+
+ NAME: Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+NAME_RANGE:
+
+ NAME:
+NAME_RANGE:
+
+ NAME: aaa
+NAME_RANGE:
+
+ NAME:
+NAME_RANGE:
+
diff --git a/test/vschema/idx-text/expected/cs-3.stdout b/test/vschema/idx-text/expected/cs-3.stdout
new file mode 100644
index 0000000..a0dea33
--- /dev/null
+++ b/test/vschema/idx-text/expected/cs-3.stdout
@@ -0,0 +1,18 @@
+ NAME: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+NAME_RANGE:
+
+ NAME: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+NAME_RANGE:
+
+ NAME: Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+NAME_RANGE:
+
+ NAME:
+NAME_RANGE:
+
+ NAME: aaa
+NAME_RANGE:
+
+ NAME:
+NAME_RANGE:
+
diff --git a/test/vschema/idx-text/makeinputs.cpp b/test/vschema/idx-text/makeinputs.cpp
index e8ecdf9..50ebe97 100644
--- a/test/vschema/idx-text/makeinputs.cpp
+++ b/test/vschema/idx-text/makeinputs.cpp
@@ -45,7 +45,7 @@ enum TestCaseType
defaultCaseSensitive
};
-void run( const char * p_caseId, TestCaseType test_case_type, const std::string names[], size_t names_len )
+void run( const char * p_caseId, TestCaseType test_case_type, const std::string names[], int names_len )
{
std::string output_path = std::string ( TEST_SUITE "/input/" ) + p_caseId + ".gl";
std::string db_path = std::string ( TEST_SUITE "/actual/" ) + p_caseId + "/db";
@@ -83,7 +83,10 @@ void run( const char * p_caseId, TestCaseType test_case_type, const std::string
for (int i = 0; i < names_len; ++i)
{
- gw -> write ( column_name_id, 8, names[i].c_str(), names[i].size() );
+ if ( names[i].size() > 0 )
+ gw -> write ( column_name_id, 8, names[i].c_str(), names[i].size() );
+ else
+ gw -> columnDefault ( column_name_id, 8, names[i].c_str(), names[i].size() );
gw -> nextRow(table_id);
}
@@ -103,7 +106,7 @@ void run( const char * p_caseId, TestCaseType test_case_type, const std::string
int main()
{
{
- std::string names[] = {"a", "a", "a", "A", "b"};
+ std::string names[] = {"", "a", "a", "a", "A", "", "b"};
const char * test_case_id = "ci-1";
run( test_case_id, caseInsensitiveLower, names, sizeof names / sizeof names[0] );
@@ -131,6 +134,20 @@ int main()
}
{
+ std::string names[] = {
+ // very long name - 1025 characters
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+ "Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+ "",
+ "aaa",
+ ""
+ };
+ const char * test_case_id = "ci-5";
+
+ run( test_case_id, caseInsensitiveLower, names, sizeof names / sizeof names[0] );
+ }
+
+ {
std::string names[] = {"abcdefghi", "abcdefghi", "abcdefghi", "abcdefGHI", "ABcdefghi", "ABcdefghi1234567890"};
const char * test_case_id = "cs-1";
@@ -144,5 +161,22 @@ int main()
run( test_case_id, defaultCaseSensitive, names, sizeof names / sizeof names[0] );
}
+ {
+ std::string names[] = {
+ // very long name - 1024 characters
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+ // very long name - 1023 characters
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+ // very long name - 1025 characters
+ "Aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa [...]
+ "",
+ "aaa",
+ ""
+ };
+ const char * test_case_id = "cs-3";
+
+ run( test_case_id, caseSensitive, names, sizeof names / sizeof names[0] );
+ }
+
return 0;
}
diff --git a/tools/Makefile b/tools/Makefile
index f88c281..64e774d 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -62,12 +62,12 @@ SUBDIRS = \
ccextract \
pacbio-load \
fuse \
- vdb-diff \
- kget \
- ngs-pileup \
- general-loader \
- ref-variation \
-
+ vdb-diff \
+ kget \
+ ngs-pileup \
+ general-loader \
+ ref-variation \
+ fastdump \
ifneq (win,$(BUILD_OS))
ifneq (rwin,$(BUILD_OS))
diff --git a/tools/align-cache/Makefile b/tools/align-cache/Makefile
index bcf306f..6b65a58 100644
--- a/tools/align-cache/Makefile
+++ b/tools/align-cache/Makefile
@@ -85,7 +85,6 @@ ALIGN_CACHE_OBJ = \
ALIGN_CACHE_LIB = \
-skapp \
-sncbi-wvdb \
- -sxml2 \
-sm \
-sload
diff --git a/tools/align-info/align-info.vers b/tools/align-info/align-info.vers
index 35d16fb..097a15a 100644
--- a/tools/align-info/align-info.vers
+++ b/tools/align-info/align-info.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/align-info/align-info.vers.h b/tools/align-info/align-info.vers.h
index 088696c..15c01a7 100644
--- a/tools/align-info/align-info.vers.h
+++ b/tools/align-info/align-info.vers.h
@@ -1 +1 @@
-#define ALIGN_INFO_VERS 0x02050007
+#define ALIGN_INFO_VERS 0x02060002
diff --git a/tools/bam-loader/Globals.h b/tools/bam-loader/Globals.h
index 8a4e718..6031eaf 100644
--- a/tools/bam-loader/Globals.h
+++ b/tools/bam-loader/Globals.h
@@ -24,18 +24,17 @@
*
*/
-#ifndef BAM_LOAD_GLOBALS_H_
-#define BAM_LOAD_GLOBALS_H_ 1
-
enum LoaderModes {
mode_Archive,
- mode_Analysis
+ mode_Remap
};
typedef struct globals
{
char const *inpath;
char const *outpath;
+ char const *outname;
+ char const *firstOut;
char const *tmpfs;
struct KFile *noMatchLog;
@@ -64,6 +63,7 @@ typedef struct globals
unsigned minMatchCount; /* minimum number of matches to count as an alignment */
int minMapQual;
enum LoaderModes mode;
+ enum LoaderModes globalMode;
uint32_t maxSeqLen;
bool omit_aligned_reads;
bool omit_reference_reads;
@@ -86,7 +86,8 @@ typedef struct globals
bool hasTI;
bool acceptHardClip;
bool allowMultiMapping; /* allow multiple reference names to map to the same real reference */
+ bool assembleWithSecondary;
+ bool deferSecondary;
} Globals;
extern Globals G;
-#endif
diff --git a/tools/bam-loader/Makefile b/tools/bam-loader/Makefile
index ab5202a..5a27e06 100644
--- a/tools/bam-loader/Makefile
+++ b/tools/bam-loader/Makefile
@@ -87,7 +87,8 @@ BAMLOAD_SRC = \
reference-writer \
sequence-writer \
loader-imp \
- mem-bank
+ mem-bank \
+ low-match-count
BAMLOAD_OBJ = \
$(addsuffix .$(OBJX),$(BAMLOAD_SRC))
diff --git a/tools/bam-loader/alignment-writer.h b/tools/bam-loader/alignment-writer.h
index cba425b..bdbfcfa 100644
--- a/tools/bam-loader/alignment-writer.h
+++ b/tools/bam-loader/alignment-writer.h
@@ -82,6 +82,8 @@ struct AlignmentRecord {
#define AR_OFFSET(X) ((INSDC_coord_zero *)((X).data.ref_offset.buffer))
#define AR_OFFSET_TYPE(X) ((uint8_t *)((X).data.ref_offset_type.buffer))
+#define AR_LINKAGE_GROUP(X) ((X).data.linkageGroup)
+
Alignment *AlignmentMake(VDatabase *db);
rc_t AlignmentWriteRecord(Alignment *self, AlignmentRecord *data);
diff --git a/tools/bam-loader/bam-alignment.h b/tools/bam-loader/bam-alignment.h
new file mode 100644
index 0000000..14aae6d
--- /dev/null
+++ b/tools/bam-loader/bam-alignment.h
@@ -0,0 +1,74 @@
+/* ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ */
+
+struct bam_alignment_s {
+ uint8_t rID[4];
+ uint8_t pos[4];
+ uint8_t read_name_len;
+ uint8_t mapQual;
+ uint8_t bin[2];
+ uint8_t n_cigars[2];
+ uint8_t flags[2];
+ uint8_t read_len[4];
+ uint8_t mate_rID[4];
+ uint8_t mate_pos[4];
+ uint8_t ins_size[4];
+ char read_name[1 /* read_name_len */];
+/* if you change length of read_name,
+ * adjust calculation of offsets in BAM_AlignmentSetOffsets */
+/* uint32_t cigar[n_cigars];
+ * uint8_t seq[(read_len + 1) / 2];
+ * uint8_t qual[read_len];
+ * uint8_t extra[...];
+ */
+};
+
+typedef union bam_alignment_u {
+ struct bam_alignment_s cooked;
+ uint8_t raw[sizeof(struct bam_alignment_s)];
+} bam_alignment;
+
+struct offset_size_s {
+ unsigned offset;
+ unsigned size; /* this is the total length of the tag; length of data is size - 3 */
+};
+
+struct BAM_Alignment {
+ struct BAM_File *parent;
+ bam_alignment const *data;
+ uint8_t *storage;
+
+ uint64_t keyId;
+ bool wasInserted;
+
+ unsigned datasize;
+ unsigned cigar;
+ unsigned seq;
+ unsigned qual;
+ unsigned numExtra;
+ unsigned hasColor;
+ struct offset_size_s extra[1];
+};
diff --git a/tools/bam-loader/bam-load.vers b/tools/bam-loader/bam-load.vers
index 35d16fb..097a15a 100644
--- a/tools/bam-loader/bam-load.vers
+++ b/tools/bam-loader/bam-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/bam-loader/bam-loader.c b/tools/bam-loader/bam-loader.c
index d5cb688..6f54efe 100644
--- a/tools/bam-loader/bam-loader.c
+++ b/tools/bam-loader/bam-loader.c
@@ -49,6 +49,74 @@
#include "Globals.h"
#include "loader-imp.h"
+/*: ARGS
+Summary:
+ Load a BAM formatted data file
+
+Usage:
+ --help display this text and quit
+ --version display the version string and quit
+ [global-options] [options] <file...> [ --remap [options] <file>... ]...
+
+//:global-options
+Global Options:
+
+* options effecting logging
+ log-level <level> logging level values: [fatal|sys|int|err|warn|info|0-5] default: info
+ xml-log <filename> produce an XML-formatted log file
+
+* options effecting performance optimisation
+ tmpfs <directory> where to store temparary files, default: '/tmp'
+ cache-size <mbytes> the limit in MB for temparary files
+
+* options effecting error limits
+ max-err-count <number> the maximum number of errors to ignore
+ max-warning-dup-flag <count> the limit for number of duplicate flag mismatch warnings
+
+//:options
+Options:
+ output <name> name of the output, required
+ config <file> reference configuration file (See Configuration)
+ header <file> file containing the SAM header
+ remap special option to enable processing sets of remapped files. remap MUST be given between each set, all regular options can be respecified, in fact the output must be unique for each set. This is for when a set of reads are aligned multiple times, for example against different reference builds or with different aligners. This mode ensures that spot ids are the same across the several outputs.
+
+Debugging Options:
+ only-verify exit after verifying existence of references
+ max-rec-count <number> exit after processing this many records (per file)
+ nomatch-log <path> log alignments with no matching bases
+
+Filtering Options:
+ minimum-match <number> minimum number of matches for an alignment
+ no-secondary ignore alignments marked as secondary
+ accept-dups accept spots with inconsistent PCR duplicate flags
+ accept-nomatch accept alignments with no matching bases
+ ref-config limit processing to references in the config file, ignoring all others
+ ref-filter <name> limit processing to the given reference, ignoring all others
+ min-mapq <number> filter secondary alignments by minimum mapping quality
+
+Rare or Esoteric Options:
+ input <directory> where to find fasta files
+ ref-file <file> fasta file with references
+ unsorted expect unsorted input (requires more memory)
+ sorted require sorted input
+ TI look for trace id optional tag
+ unaligned <file> file without aligned reads
+
+Deprecated Options:
+ use-OQ use OQ option column for quality values instead of QUAL
+ no-verify skip verify existence of references from the BAM file
+ accept-hard-clip allow hard clipping in CIGAR
+ allow-multi-map allow the same reference to be mapped to multiple names in the input files
+ edit-aligned-qual <number> convert quality at aligned positions to this value
+ cs turn on awareness of colorspace
+ qual-quant quality scores quantization level
+ keep-mismatch-qual don't quantized quality at mismatched positions
+
+
+Example:
+ bam-load -o /tmp/SRZ123456 -k analysis.bam.cfg 123456.bam
+*/
+
/* MARK: Arguments and Usage */
static char const option_input[] = "input";
static char const option_output[] = "output";
@@ -81,6 +149,8 @@ static char const option_TI[] = "TI";
static char const option_max_warn_dup_flag[] = "max-warning-dup-flag";
static char const option_accept_hard_clip[] = "accept-hard-clip";
static char const option_allow_multi_map[] = "allow-multi-map";
+static char const option_allow_secondary[] = "make-spots-with-secondary";
+static char const option_defer_secondary[] = "defer-secondary";
#define OPTION_INPUT option_input
#define OPTION_OUTPUT option_output
@@ -104,6 +174,8 @@ static char const option_allow_multi_map[] = "allow-multi-map";
#define OPTION_MAX_WARN_DUP_FLAG option_max_warn_dup_flag
#define OPTION_ACCEPT_HARD_CLIP option_accept_hard_clip
#define OPTION_ALLOW_MULTI_MAP option_allow_multi_map
+#define OPTION_ALLOW_SECONDARY option_allow_secondary
+#define OPTION_DEFER_SECONDARY option_defer_secondary
#define ALIAS_INPUT "i"
#define ALIAS_OUTPUT "o"
@@ -339,6 +411,20 @@ char const * use_allow_multi_map[] =
NULL
};
+static
+char const * use_allow_secondary[] =
+{
+ "use secondary alignments for constructing spots",
+ NULL
+};
+
+static
+char const * use_defer_secondary[] =
+{
+ "defer processing of secondary alignments until the end of the file",
+ NULL
+};
+
OptDef Options[] =
{
/* order here is same as in param array below!!! */
@@ -372,7 +458,9 @@ OptDef Options[] =
{ OPTION_TI, NULL, NULL, use_TI, 1, false, false },
{ OPTION_MAX_WARN_DUP_FLAG, NULL, NULL, use_max_dup_warnings, 1, true, false },
{ OPTION_ACCEPT_HARD_CLIP, NULL, NULL, use_accept_hard_clip, 1, false, false },
- { OPTION_ALLOW_MULTI_MAP, NULL, NULL, use_allow_multi_map, 1, false, false }
+ { OPTION_ALLOW_MULTI_MAP, NULL, NULL, use_allow_multi_map, 1, false, false },
+ { OPTION_ALLOW_SECONDARY, NULL, NULL, use_allow_secondary, 1, false, false },
+ { OPTION_DEFER_SECONDARY, NULL, NULL, use_defer_secondary, 1, false, false }
};
const char* OptHelpParam[] =
@@ -408,7 +496,9 @@ const char* OptHelpParam[] =
NULL, /* use XT->TI */
"count", /* max. duplicate warning count */
NULL, /* allow hard clipping */
- NULL /* allow multimapping */
+ NULL, /* allow multimapping */
+ NULL, /* allow secondary */
+ NULL /* defer secondary */
};
rc_t UsageSummary (char const * progname)
@@ -495,7 +585,7 @@ static rc_t PathWithBasePath(char rslt[], size_t sz, char const path[], char con
return 0;
}
else if (plen < sz) {
- strcpy(rslt, path);
+ strncpy(rslt, path, sz);
return 0;
}
{
@@ -565,9 +655,29 @@ static rc_t LoadHeader(char const **rslt, char const path[], char const base[])
return rc;
}
-rc_t CC KMain (int argc, char * argv[])
+static rc_t main_help_vers(int argc, char * argv[])
+{
+ Args *args = NULL;
+ rc_t const rc = ArgsMakeAndHandle (&args, argc, argv, 2, Options,
+ sizeof Options / sizeof (OptDef), XMLLogger_Args, XMLLogger_ArgsQty);
+ ArgsWhack(args);
+ return rc;
+}
+
+static rc_t getArgValue(Args *const args, char const *name, int index, char const **result)
+{
+ void const *value;
+ rc_t const rc = ArgsOptionValue(args, name, index, &value);
+ if (rc) return rc;
+ free((void *)*result);
+ *result = strdup(value);
+ assert(*result);
+ return 0;
+}
+
+static rc_t main_1(int argc, char *argv[], bool const continuing, unsigned const load)
{
- Args * args;
+ Args *args;
rc_t rc;
unsigned n_aligned = 0;
unsigned n_unalgnd = 0;
@@ -578,67 +688,48 @@ rc_t CC KMain (int argc, char * argv[])
unsigned nbsz = 0;
char const *value;
char *dummy;
- const XMLLogger* xml_logger = NULL;
-
- memset(&G, 0, sizeof(G));
-
- G.mode = mode_Archive;
- G.maxSeqLen = TableWriterRefSeq_MAX_SEQ_LEN;
- G.schemaPath = SCHEMAFILE;
- G.omit_aligned_reads = true;
- G.omit_reference_reads = true;
- G.minMapQual = 0; /* accept all */
- G.tmpfs = "/tmp";
-#if _ARCH_BITS == 32
-#warning 32-bit build is not tested. BEWARE!!!
- G.cache_size = ((size_t) 1) << 30;
-#else
- G.cache_size = ((size_t)16) << 30;
-#endif
- G.maxErrCount = 1000;
- G.minMatchCount = 10;
-
- set_pid();
-
- rc = ArgsMakeAndHandle (&args, argc, argv, 2, Options,
- sizeof Options / sizeof (OptDef), XMLLogger_Args, XMLLogger_ArgsQty);
+ rc = ArgsMakeAndHandle (&args, argc, argv, 1, Options, sizeof(Options)/sizeof(Options[0]));
while (rc == 0) {
uint32_t pcount;
- if( (rc = XMLLogger_Make(&xml_logger, NULL, args)) != 0 ) {
- break;
- }
rc = ArgsOptionCount(args, option_only_verify, &pcount);
if (rc)
break;
- G.onlyVerifyReferences = (pcount > 0);
+ G.onlyVerifyReferences |= (pcount > 0);
rc = ArgsOptionCount(args, option_no_verify, &pcount);
if (rc)
break;
- G.noVerifyReferences = (pcount > 0);
+ G.noVerifyReferences |= (pcount > 0);
rc = ArgsOptionCount(args, option_use_qual, &pcount);
if (rc)
break;
- G.useQUAL = (pcount > 0);
+ G.useQUAL |= (pcount > 0);
rc = ArgsOptionCount(args, option_ref_config, &pcount);
if (rc)
break;
- G.limit2config = (pcount > 0);
+ G.limit2config |= (pcount > 0);
rc = ArgsOptionCount(args, OPTION_REF_FILE, &pcount);
if (rc)
break;
+ if (pcount && G.refFiles) {
+ int i;
+
+ for (i = 0; G.refFiles[i]; ++i)
+ free((void *)G.refFiles[i]);
+ free((void *)G.refFiles);
+ }
G.refFiles = calloc(pcount + 1, sizeof(*(G.refFiles)));
- if( !G.refFiles ) {
+ if (!G.refFiles) {
rc = RC(rcApp, rcArgv, rcAccessing, rcMemory, rcExhausted);
break;
}
while(pcount-- > 0) {
- rc = ArgsOptionValue(args, OPTION_REF_FILE, pcount, (const void **)&G.refFiles[pcount]);
+ rc = getArgValue(args, OPTION_REF_FILE, pcount, &G.refFiles[pcount]);
if (rc)
break;
}
@@ -648,7 +739,7 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_TMPFS, 0, (const void **)&G.tmpfs);
+ rc = getArgValue(args, OPTION_TMPFS, 0, &G.tmpfs);
if (rc)
break;
}
@@ -665,7 +756,7 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_INPUT, 0, (const void **)&G.inpath);
+ rc = getArgValue(args, OPTION_INPUT, 0, &G.inpath);
if (rc)
break;
}
@@ -682,7 +773,7 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, option_ref_filter, 0, (const void **)&G.refFilter);
+ rc = getArgValue(args, option_ref_filter, 0, &G.refFilter);
if (rc)
break;
}
@@ -699,7 +790,7 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_CONFIG, 0, (const void **)&G.refXRefPath);
+ rc = getArgValue(args, OPTION_CONFIG, 0, &G.refXRefPath);
if (rc)
break;
}
@@ -716,9 +807,14 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_OUTPUT, 0, (const void **)&G.outpath);
+ rc = getArgValue(args, OPTION_OUTPUT, 0, &G.outpath);
if (rc)
break;
+ if (load == 0) {
+ G.firstOut = strdup(G.outpath);
+ }
+ value = strrchr(G.outpath, '/');
+ G.outname = value ? (value + 1) : G.outpath;
}
else if (pcount > 1)
{
@@ -739,7 +835,7 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_MINMAPQ, 0, (const void **)&value);
+ rc = ArgsOptionValue(args, OPTION_MINMAPQ, 0, (const void **)&value);
if (rc)
break;
G.minMapQual = strtoul(value, &dummy, 0);
@@ -750,11 +846,11 @@ rc_t CC KMain (int argc, char * argv[])
break;
if (pcount == 1)
{
- rc = ArgsOptionValue (args, OPTION_QCOMP, 0, (const void **)&G.QualQuantizer);
+ rc = getArgValue(args, OPTION_QCOMP, 0, &G.QualQuantizer);
if (rc)
break;
}
-
+
rc = ArgsOptionCount (args, option_edit_aligned_qual, &pcount);
if (rc)
break;
@@ -804,12 +900,12 @@ rc_t CC KMain (int argc, char * argv[])
rc = ArgsOptionCount (args, option_unsorted, &pcount);
if (rc)
break;
- G.expectUnsorted = pcount > 0;
+ G.expectUnsorted |= (pcount > 0);
rc = ArgsOptionCount (args, option_sorted, &pcount);
if (rc)
break;
- G.requireSorted = pcount > 0;
+ G.requireSorted |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_MAX_REC_COUNT, &pcount);
if (rc)
@@ -847,42 +943,52 @@ rc_t CC KMain (int argc, char * argv[])
rc = ArgsOptionCount (args, OPTION_ACCEPT_DUP, &pcount);
if (rc)
break;
- G.acceptBadDups = pcount > 0;
+ G.acceptBadDups |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_ACCEPT_NOMATCH, &pcount);
if (rc)
break;
- G.acceptNoMatch = pcount > 0;
+ G.acceptNoMatch |= (pcount > 0);
rc = ArgsOptionCount (args, option_keep_mismatch_qual, &pcount);
if (rc)
break;
- G.keepMismatchQual = pcount > 0;
+ G.keepMismatchQual |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_NO_CS, &pcount);
if (rc)
break;
- G.noColorSpace = pcount > 0;
+ G.noColorSpace |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_NO_SECONDARY, &pcount);
if (rc)
break;
- G.noSecondary = pcount > 0;
+ G.noSecondary |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_TI, &pcount);
if (rc)
break;
- G.hasTI = pcount > 0;
+ G.hasTI |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_ACCEPT_HARD_CLIP, &pcount);
if (rc)
break;
- G.acceptHardClip = pcount > 0;
+ G.acceptHardClip |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_ALLOW_MULTI_MAP, &pcount);
if (rc)
break;
- G.allowMultiMapping = pcount > 0;
+ G.allowMultiMapping |= (pcount > 0);
+
+ rc = ArgsOptionCount (args, OPTION_ALLOW_SECONDARY, &pcount);
+ if (rc)
+ break;
+ G.assembleWithSecondary |= (pcount > 0);
+
+ rc = ArgsOptionCount (args, OPTION_DEFER_SECONDARY, &pcount);
+ if (rc)
+ break;
+ G.deferSecondary |= (pcount > 0);
rc = ArgsOptionCount (args, OPTION_NOMATCH_LOG, &pcount);
if (rc)
@@ -906,6 +1012,7 @@ rc_t CC KMain (int argc, char * argv[])
if (pcount == 1) {
rc = ArgsOptionValue (args, OPTION_HEADER, 0, (const void **)&value);
if (rc) break;
+ free((void *)G.headerText);
rc = LoadHeader(&G.headerText, value, G.inpath);
if (rc) break;
}
@@ -993,28 +1100,178 @@ rc_t CC KMain (int argc, char * argv[])
}
else
break;
-
- rc = run(argv[0], n_aligned, (char const **)aligned, n_unalgnd, (char const **)unalgnd);
+
+ rc = run(argv[0], n_aligned, (char const **)aligned, n_unalgnd, (char const **)unalgnd, continuing);
break;
}
free(name_buffer);
- free((void *)G.headerText);
- free(G.refFiles);
- value = G.outpath ? strrchr(G.outpath, '/') : "/???";
- if( value == NULL ) {
- value = G.outpath;
- } else {
- value++;
- }
if (rc) {
(void)PLOGERR(klogErr, (klogErr, rc, "load failed",
- "severity=total,status=failure,accession=%s,errors=%u", value, G.errCount));
+ "severity=total,status=failure,accession=%s,errors=%u", G.outname, G.errCount));
} else {
(void)PLOGMSG(klogInfo, (klogInfo, "loaded",
- "severity=total,status=success,accession=%s,errors=%u", value, G.errCount));
+ "severity=total,status=success,accession=%s,errors=%u", G.outname, G.errCount));
}
ArgsWhack(args);
- XMLLogger_Release(xml_logger);
+ return rc;
+}
+
+static void cleanupGlobal(void)
+{
+ if (G.refFiles) {
+ int i;
+
+ for (i = 0; G.refFiles[i]; ++i)
+ free((void *)G.refFiles[i]);
+ free((void *)G.refFiles);
+ }
+ free((void *)G.tmpfs);
+ free((void *)G.inpath);
+ free((void *)G.refFilter);
+ free((void *)G.refXRefPath);
+ free((void *)G.outpath);
+ free((void *)G.firstOut);
+ free((void *)G.headerText);
+ free((void *)G.QualQuantizer);
+ free((void *)G.schemaPath);
+}
+
+static int find_arg(char const *const *const query, int const first, int const argc, char **const argv)
+{
+ int i;
+
+ for (i = first; i < argc; ++i) {
+ int j;
+
+ for (j = 0; query[j] != NULL; ++j) {
+ if (strcmp(argv[i], query[j]) == 0)
+ return i;
+ }
+ }
+ return 0;
+}
+
+static bool has_arg(char const *const *const query, int const argc, char **const argv)
+{
+ return find_arg(query, 1, argc, argv) > 0;
+}
+
+static const char *logger_options[] = { "--xml-log-fd", "--xml-log", "-z" };
+static XMLLogger const *make_logger(int *argc, char *argv[])
+{
+ XMLLogger const *rslt = NULL;
+ char *argf[4];
+ int i;
+
+ argf[0] = argv[0];
+ argf[1] = NULL;
+ argf[2] = NULL;
+ argf[3] = NULL;
+
+ for (i = 1; i < *argc; ++i) {
+ int remove = 0;
+
+ if (strcmp(argv[i], logger_options[2]) == 0) {
+ argf[1] = argv[i];
+ argf[2] = argv[i + 1];
+ remove = 2;
+ }
+ else {
+ int j;
+
+ for (j = 0; j < 2; ++j) {
+ if (strstr(argv[i], logger_options[j]) == argv[i]) {
+ int const n = strlen(logger_options[j]);
+
+ if (argv[i][n] == '\0') {
+ argf[1] = argv[i];
+ argf[2] = argv[i + 1];
+ remove = 2;
+ }
+ else if (argv[i][n] == '=') {
+ argv[i][n] = '\0';
+ argf[1] = argv[i];
+ argf[2] = argv[i] + n + 1;
+ remove = 1;
+ }
+ break;
+ }
+ }
+ }
+ if (argf[1] != NULL) {
+ Args *args = NULL;
+
+ ArgsMakeAndHandle(&args, 3, argf, 1, XMLLogger_Args, XMLLogger_ArgsQty);
+ if (args) {
+ XMLLogger_Make(&rslt, NULL, args);
+ ArgsWhack(args);
+ }
+ }
+ if (remove) {
+ *argc -= remove;
+ memmove(argv + i, argv + i + remove, (*argc + 1) * sizeof(argv[0]));
+ break;
+ }
+ }
+ return rslt;
+}
+
+rc_t CC KMain(int argc, char *argv[])
+{
+ static const char *help[] = { "--help", "-h", "-?", NULL };
+ static const char *vers[] = { "--version", "-V", NULL };
+
+ bool const has_help = has_arg(help, argc, argv);
+ bool const has_vers = has_arg(vers, argc, argv);
+ XMLLogger const *logger = NULL;
+ int argfirst = 0;
+ int arglast = 0;
+ rc_t rc = 0;
+ unsigned load = 0;
+
+ if (has_help) {
+ argc = 2;
+ argv[1] = "--help";
+ return main_help_vers(argc, argv);
+ }
+ if (has_vers) {
+ argc = 2;
+ argv[1] = "--vers";
+ return main_help_vers(argc, argv);
+ }
+
+ logger = make_logger(&argc, argv);
+
+ memset(&G, 0, sizeof(G));
+ G.mode = mode_Archive;
+ G.globalMode = mode_Archive;
+ G.maxSeqLen = TableWriterRefSeq_MAX_SEQ_LEN;
+ G.schemaPath = strdup(SCHEMAFILE);
+ G.omit_aligned_reads = true;
+ G.omit_reference_reads = true;
+ G.minMapQual = 0; /* accept all */
+ G.tmpfs = strdup("/tmp");
+ G.cache_size = ((size_t)16) << 30;
+ G.maxErrCount = 1000;
+ G.minMatchCount = 10;
+
+ set_pid();
+
+ for (arglast = 1; arglast < argc; ++arglast) {
+ if (strcmp(argv[arglast], "--remap") == 0) {
+ argv[arglast] = argv[0];
+ G.globalMode = mode_Remap;
+ rc = main_1(arglast - argfirst, argv + argfirst, true, load);
+ if (rc)
+ break;
+ G.mode = mode_Remap;
+ argfirst = arglast;
+ ++load;
+ }
+ }
+ rc = main_1(arglast - argfirst, argv + argfirst, false, load);
+ XMLLogger_Release(logger);
+ cleanupGlobal();
return rc;
}
diff --git a/tools/bam-loader/bam-priv.h b/tools/bam-loader/bam-priv.h
new file mode 100644
index 0000000..c10eda3
--- /dev/null
+++ b/tools/bam-loader/bam-priv.h
@@ -0,0 +1,196 @@
+/* ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ */
+
+#include "bam.h"
+#include "bam-alignment.h"
+
+typedef struct BAMIndex BAMIndex;
+typedef struct BufferedFile BufferedFile;
+typedef struct SAMFile SAMFile;
+typedef struct BGZFile BGZFile;
+
+#define ZLIB_BLOCK_SIZE (64u * 1024u)
+#define RGLR_BUFFER_SIZE (16u * ZLIB_BLOCK_SIZE)
+#define PIPE_BUFFER_SIZE (4096u)
+
+typedef uint8_t zlib_block_t[ZLIB_BLOCK_SIZE];
+
+typedef struct RawFile_vt_s {
+ rc_t (*FileRead)(void *, zlib_block_t, unsigned *);
+ uint64_t (*FileGetPos)(void const *);
+ float (*FileProPos)(void const *);
+ uint64_t (*FileGetSize)(void const *);
+ rc_t (*FileSetPos)(void *, uint64_t);
+ void (*FileWhack)(void *);
+} RawFile_vt;
+
+/* MARK: SAMFile */
+
+struct BufferedFile {
+ KFile const *kf;
+ void *buf;
+ uint64_t fmax; /* file size if known or 0 */
+ uint64_t fpos; /* position in file of first byte in buffer */
+ size_t bpos; /* position in buffer of read head */
+ size_t bmax; /* number of valid bytes in buffer */
+ size_t size; /* maximum number of that can be read into buffer */
+};
+
+struct SAMFile {
+ BufferedFile file;
+ int putback;
+ rc_t last;
+};
+
+struct BGZFile {
+ BufferedFile file;
+ z_stream zs;
+};
+
+struct BAM_File {
+ union {
+ BGZFile bam;
+ SAMFile sam;
+ } file;
+ RawFile_vt vt;
+
+ KFile *defer;
+
+ BAMRefSeq *refSeq; /* pointers into headerData1 except name points into headerData2 */
+ BAMReadGroup *readGroup; /* pointers into headerData1 */
+ char const *version;
+ char const *header;
+ void *headerData1; /* gets used for refSeq and readGroup */
+ void *headerData2; /* gets used for refSeq */
+ BAM_Alignment *nocopy; /* used to hold current record for BAM_FileRead2 */
+
+ uint64_t fpos_cur;
+ uint64_t deferPos;
+
+ unsigned refSeqs;
+ unsigned readGroups;
+ unsigned bufSize; /* current size of uncompressed buffer */
+ unsigned bufCurrent; /* location in uncompressed buffer of read head */
+ bool eof;
+ bool isSAM;
+ zlib_block_t buffer; /* uncompressed buffer */
+};
+
+#define CG_NUM_SEGS 4
+
+#ifdef __GNUC__
+static inline int getRefSeqId(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getPosition(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getReadNameLength(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getBin(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getMapQual(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getCigarCount(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getFlags(BAM_Alignment const *) __attribute__((always_inline));
+static inline unsigned getReadLen(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getMateRefSeqId(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getMatePos(BAM_Alignment const *) __attribute__((always_inline));
+static inline int getInsertSize(BAM_Alignment const *) __attribute__((always_inline));
+static inline char const *getReadName(BAM_Alignment const *) __attribute__((always_inline));
+static inline void const *getCigarBase(BAM_Alignment const *) __attribute__((always_inline));
+static inline uint16_t LE2HUI16(void const *) __attribute__((always_inline));
+static inline uint32_t LE2HUI32(void const *) __attribute__((always_inline));
+static inline uint64_t LE2HUI64(void const *) __attribute__((always_inline));
+static inline int16_t LE2HI16(void const *) __attribute__((always_inline));
+static inline int32_t LE2HI32(void const *) __attribute__((always_inline));
+static inline int64_t LE2HI64(void const *) __attribute__((always_inline));
+#endif /* __GNUC__ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline uint16_t LE2HUI16(void const *X) { uint16_t y; memcpy(&y, X, sizeof(y)); return y; }
+static inline uint32_t LE2HUI32(void const *X) { uint32_t y; memcpy(&y, X, sizeof(y)); return y; }
+static inline uint64_t LE2HUI64(void const *X) { uint64_t y; memcpy(&y, X, sizeof(y)); return y; }
+static inline int16_t LE2HI16(void const *X) { int16_t y; memcpy(&y, X, sizeof(y)); return y; }
+static inline int32_t LE2HI32(void const *X) { int32_t y; memcpy(&y, X, sizeof(y)); return y; }
+static inline int64_t LE2HI64(void const *X) { int64_t y; memcpy(&y, X, sizeof(y)); return y; }
+#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+static inline uint16_t LE2HUI16(void const *X) { uint16_t y; memcpy(&y, X, sizeof(y)); return (uint16_t)bswap_16(y); }
+static inline uint32_t LE2HUI32(void const *X) { uint32_t y; memcpy(&y, X, sizeof(y)); return (uint32_t)bswap_32(y); }
+static inline uint64_t LE2HUI64(void const *X) { uint64_t y; memcpy(&y, X, sizeof(y)); return (uint64_t)bswap_64(y); }
+static inline int16_t LE2HI16(void const *X) { int16_t y; memcpy(&y, X, sizeof(y)); return ( int16_t)bswap_16(y); }
+static inline int32_t LE2HI32(void const *X) { int32_t y; memcpy(&y, X, sizeof(y)); return ( int32_t)bswap_32(y); }
+static inline int64_t LE2HI64(void const *X) { int64_t y; memcpy(&y, X, sizeof(y)); return ( int64_t)bswap_64(y); }
+#endif
+
+static inline int getRefSeqId(BAM_Alignment const *const self) {
+ return LE2HI32(self->data->cooked.rID);
+}
+
+static inline int getPosition(BAM_Alignment const *const self) {
+ return LE2HI32(self->data->cooked.pos);
+}
+
+static inline int getReadNameLength(BAM_Alignment const *const self) {
+ return self->data->cooked.read_name_len;
+}
+
+static inline int getBin(BAM_Alignment const *const self) {
+ return LE2HUI16(self->data->cooked.bin);
+}
+
+static inline int getMapQual(BAM_Alignment const *const self) {
+ return self->data->cooked.mapQual;
+}
+
+static inline int getCigarCount(BAM_Alignment const *const self) {
+ return LE2HUI16(self->data->cooked.n_cigars);
+}
+
+static inline int getFlags(BAM_Alignment const *const self) {
+ return LE2HUI16(self->data->cooked.flags);
+}
+
+static inline unsigned getReadLen(BAM_Alignment const *const self) {
+ return LE2HUI32(self->data->cooked.read_len);
+}
+
+static inline int getMateRefSeqId(BAM_Alignment const *const self) {
+ return LE2HI32(self->data->cooked.mate_rID);
+}
+
+static inline int getMatePos(BAM_Alignment const *const self) {
+ return LE2HI32(self->data->cooked.mate_pos);
+}
+
+static inline int getInsertSize(BAM_Alignment const *const self) {
+ return LE2HI32(self->data->cooked.ins_size);
+}
+
+static inline char const *getReadName(BAM_Alignment const *const self) {
+ return &self->data->cooked.read_name[0];
+}
+
+static inline void const *getCigarBase(BAM_Alignment const *const self)
+{
+ return &self->data->raw[self->cigar];
+}
+
+static bool BAM_AlignmentShouldDefer(BAM_Alignment const *);
diff --git a/tools/bam-loader/bam.c b/tools/bam-loader/bam.c
index c6db6ee..8d544f7 100644
--- a/tools/bam-loader/bam.c
+++ b/tools/bam-loader/bam.c
@@ -40,8 +40,6 @@
#include <atomic32.h>
#include <strtol.h>
-#include "bam.h"
-
#include <vfs/path.h>
#include <vfs/path-priv.h>
#include <kfs/kfs-priv.h>
@@ -62,54 +60,7 @@
#include <zlib.h>
-#if __BYTE_ORDER == __LITTLE_ENDIAN
-static uint16_t LE2HUI16(void const *X) { uint16_t y; memcpy(&y, X, sizeof(y)); return y; }
-static uint32_t LE2HUI32(void const *X) { uint32_t y; memcpy(&y, X, sizeof(y)); return y; }
-static uint64_t LE2HUI64(void const *X) { uint64_t y; memcpy(&y, X, sizeof(y)); return y; }
-static int16_t LE2HI16(void const *X) { int16_t y; memcpy(&y, X, sizeof(y)); return y; }
-static int32_t LE2HI32(void const *X) { int32_t y; memcpy(&y, X, sizeof(y)); return y; }
-/* static int64_t LE2HI64(void const *X) { int64_t y; memcpy(&y, X, sizeof(y)); return y; } */
-#endif
-#if __BYTE_ORDER == __BIG_ENDIAN
-static uint16_t LE2HUI16(void const *X) { uint16_t y; memcpy(&y, X, sizeof(y)); return (uint16_t)bswap_16(y); }
-static uint32_t LE2HUI32(void const *X) { uint32_t y; memcpy(&y, X, sizeof(y)); return (uint32_t)bswap_32(y); }
-static uint64_t LE2HUI64(void const *X) { uint64_t y; memcpy(&y, X, sizeof(y)); return (uint64_t)bswap_64(y); }
-static int16_t LE2HI16(void const *X) { int16_t y; memcpy(&y, X, sizeof(y)); return ( int16_t)bswap_16(y); }
-static int32_t LE2HI32(void const *X) { int32_t y; memcpy(&y, X, sizeof(y)); return ( int32_t)bswap_32(y); }
-static int64_t LE2HI64(void const *X) { int64_t y; memcpy(&y, X, sizeof(y)); return ( int64_t)bswap_64(y); }
-#endif
-
-typedef struct BAMIndex BAMIndex;
-typedef struct BufferedFile BufferedFile;
-typedef struct SAMFile SAMFile;
-typedef struct BGZFile BGZFile;
-
-#define ZLIB_BLOCK_SIZE (64u * 1024u)
-#define RGLR_BUFFER_SIZE (256u * ZLIB_BLOCK_SIZE)
-#define PIPE_BUFFER_SIZE (4096u)
-
-typedef uint8_t zlib_block_t[ZLIB_BLOCK_SIZE];
-
-typedef struct RawFile_vt_s {
- rc_t (*FileRead)(void *, zlib_block_t, unsigned *);
- uint64_t (*FileGetPos)(void const *);
- float (*FileProPos)(void const *);
- uint64_t (*FileGetSize)(void const *);
- rc_t (*FileSetPos)(void *, uint64_t);
- void (*FileWhack)(void *);
-} RawFile_vt;
-
-/* MARK: SAMFile */
-
-struct BufferedFile {
- KFile const *kf;
- void *buf;
- uint64_t fmax; /* file size if known or 0 */
- uint64_t fpos; /* position in file of first byte in buffer */
- size_t bpos; /* position in buffer of read head */
- size_t bmax; /* number of valid bytes in buffer */
- size_t size; /* maximum number of that can be read into buffer */
-};
+#include "bam-priv.h"
static rc_t BufferedFileRead(BufferedFile *const self)
{
@@ -182,12 +133,6 @@ static uint64_t BufferedFileGetSize(BufferedFile const *const self)
return self->fmax;
}
-struct SAMFile {
- BufferedFile file;
- int putback;
- rc_t last;
-};
-
static int SAMFileRead1(SAMFile *const self)
{
if (self->putback < 0) {
@@ -219,8 +164,7 @@ static void SAMFilePutBack(SAMFile *const self, int ch)
self->putback = ch;
}
-static
-rc_t SAMFileInit(SAMFile *self, RawFile_vt *vt)
+static rc_t SAMFileInit(SAMFile *self, RawFile_vt *vt)
{
static RawFile_vt const my_vt = {
(rc_t (*)(void *, zlib_block_t, unsigned *))NULL,
@@ -247,15 +191,7 @@ rc_t SAMFileInit(SAMFile *self, RawFile_vt *vt)
#else
#endif
-#define CG_NUM_SEGS 4
-
-struct BGZFile {
- BufferedFile file;
- z_stream zs;
-};
-
-static
-rc_t BGZFileGetMoreBytes(BGZFile *self)
+static rc_t BGZFileGetMoreBytes(BGZFile *self)
{
rc_t const rc = BufferedFileRead(&self->file);
if (rc)
@@ -270,8 +206,7 @@ rc_t BGZFileGetMoreBytes(BGZFile *self)
return 0;
}
-static
-rc_t BGZFileRead(BGZFile *self, zlib_block_t dst, unsigned *pNumRead)
+static rc_t BGZFileRead(BGZFile *self, zlib_block_t dst, unsigned *pNumRead)
{
#if VALIDATE_BGZF_HEADER
uint8_t extra[256];
@@ -320,10 +255,10 @@ rc_t BGZFileRead(BGZFile *self, zlib_block_t dst, unsigned *pNumRead)
rc = BGZFileGetMoreBytes(self);
if ( rc != 0 )
{
- if ( GetRCObject( rc ) == (enum RCObject)rcData && GetRCState( rc ) == rcInsufficient )
+ if ((int)GetRCObject(rc) == rcData && GetRCState(rc) == rcInsufficient)
{
DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BGZF), ("EOF in Zlib block after %lu bytes\n", self->file.fpos + self->file.bpos));
- rc = RC( rcAlign, rcFile, rcReading, rcFile, rcTooShort );
+ rc = RC(rcAlign, rcFile, rcReading, rcFile, rcTooShort);
}
return rc;
}
@@ -405,87 +340,6 @@ static rc_t BGZFileInit(BGZFile *const self, RawFile_vt *const vt)
return 0;
}
-/* MARK: BAM_File structures */
-
-struct BAM_File {
- union {
- BGZFile bam;
- SAMFile sam;
- } file;
- RawFile_vt vt;
-
- BAMRefSeq *refSeq; /* pointers into headerData1 except name points into headerData2 */
- BAMReadGroup *readGroup; /* pointers into headerData1 */
- char const *version;
- char const *header;
- void *headerData1; /* gets used for refSeq and readGroup */
- void *headerData2; /* gets used for refSeq */
- BAM_Alignment *bufLocker;
- BAM_Alignment *nocopy; /* used to hold current record for BAM_FileRead2 */
-
- uint64_t fpos_first;
- uint64_t fpos_cur;
-
- size_t nocopy_size;
-
- unsigned refSeqs;
- unsigned readGroups;
- unsigned ucfirst; /* offset of first record in uncompressed buffer */
- unsigned bufSize; /* current size of uncompressed buffer */
- unsigned bufCurrent; /* location in uncompressed buffer of read head */
- bool eof;
- bool isSAM;
- zlib_block_t buffer; /* uncompressed buffer */
-};
-
-/* MARK: Alignment structures */
-
-struct bam_alignment_s {
- uint8_t rID[4];
- uint8_t pos[4];
- uint8_t read_name_len;
- uint8_t mapQual;
- uint8_t bin[2];
- uint8_t n_cigars[2];
- uint8_t flags[2];
- uint8_t read_len[4];
- uint8_t mate_rID[4];
- uint8_t mate_pos[4];
- uint8_t ins_size[4];
- char read_name[1 /* read_name_len */];
-/* if you change length of read_name,
- * adjust calculation of offsets in BAM_AlignmentSetOffsets */
-/* uint32_t cigar[n_cigars];
- * uint8_t seq[(read_len + 1) / 2];
- * uint8_t qual[read_len];
- * uint8_t extra[...];
- */
-};
-
-typedef union bam_alignment_u {
- struct bam_alignment_s cooked;
- uint8_t raw[sizeof(struct bam_alignment_s)];
-} bam_alignment;
-
-struct offset_size_s {
- unsigned offset;
- unsigned size; /* this is the total length of the tag; length of data is size - 3 */
-};
-
-struct BAM_Alignment {
- BAM_File *parent;
- bam_alignment const *data;
- uint8_t *storage;
- unsigned datasize;
-
- unsigned cigar;
- unsigned seq;
- unsigned qual;
- unsigned numExtra;
- unsigned hasColor;
- struct offset_size_s extra[1];
-};
-
static const char cigarChars[] = {
ct_Match,
ct_Insert,
@@ -499,62 +353,7 @@ static const char cigarChars[] = {
/* ct_Overlap must not appear in actual BAM file */
};
-/* MARK: Alignment accessors */
-
-static int32_t getRefSeqId(const BAM_Alignment *cself) {
- return LE2HI32(cself->data->cooked.rID);
-}
-
-static int32_t getPosition(const BAM_Alignment *cself) {
- return LE2HI32(cself->data->cooked.pos);
-}
-
-static uint8_t getReadNameLength(const BAM_Alignment *cself) {
- return cself->data->cooked.read_name_len;
-}
-
-static uint16_t getBin(const BAM_Alignment *cself) {
- return LE2HUI16(cself->data->cooked.bin);
-}
-
-static uint8_t getMapQual(const BAM_Alignment *cself) {
- return cself->data->cooked.mapQual;
-}
-
-static uint16_t getCigarCount(const BAM_Alignment *cself) {
- return LE2HUI16(cself->data->cooked.n_cigars);
-}
-
-static uint16_t getFlags(const BAM_Alignment *cself) {
- return LE2HUI16(cself->data->cooked.flags);
-}
-
-static uint32_t getReadLen(const BAM_Alignment *cself) {
- return LE2HUI32(cself->data->cooked.read_len);
-}
-
-static int32_t getMateRefSeqId(const BAM_Alignment *cself) {
- return LE2HI32(cself->data->cooked.mate_rID);
-}
-
-static int32_t getMatePos(const BAM_Alignment *cself) {
- return LE2HI32(cself->data->cooked.mate_pos);
-}
-
-static int32_t getInsertSize(const BAM_Alignment *cself) {
- return LE2HI32(cself->data->cooked.ins_size);
-}
-
-static char const *getReadName(const BAM_Alignment *cself) {
- return &cself->data->cooked.read_name[0];
-}
-
-static void const *getCigarBase(BAM_Alignment const *cself)
-{
- return &cself->data->raw[cself->cigar];
-}
-
-static int opt_tag_cmp(char const a[2], char const b[2])
+static inline int opt_tag_cmp(char const a[2], char const b[2])
{
int const d0 = (int)a[0] - (int)b[0];
return d0 ? d0 : ((int)a[1] - (int)b[1]);
@@ -700,6 +499,12 @@ static struct offset_size_s const *get_CG_GQ_info(BAM_Alignment const *cself)
return x;
}
+static char const *get_BX(BAM_Alignment const *cself)
+{
+ struct offset_size_s const *const x = tag_search(cself, "BX", 0);
+ return (char const *)(x && cself->data->raw[x->offset + 2] == 'Z' ? &cself->data->raw[x->offset + 3] : NULL);
+}
+
/* MARK: BAM_File Reading functions */
/* returns (rcData, rcInsufficient) if eof */
@@ -807,7 +612,7 @@ static unsigned ParseHD(char const **rslt, unsigned const hlen, char hdata[])
break;
case 2:
if (cc != ':')
- return 0;
+ continue;
hdata[i] = '\0';
value = i + 1;
++st;
@@ -870,7 +675,7 @@ static unsigned ParseSQ(BAMRefSeq *rs, unsigned const hlen, char hdata[])
}
#else
if (cc != ':')
- return 0;
+ continue;
#endif
hdata[i] = '\0';
value = i + 1;
@@ -971,7 +776,7 @@ static unsigned ParseRG(BAMReadGroup *dst, unsigned const hlen, char hdata[])
break;
case 2:
if (cc != ':')
- return 0;
+ continue;
hdata[i] = '\0';
value = i + 1;
++st;
@@ -1438,9 +1243,7 @@ static rc_t ProcessBAMHeader(BAM_File *self, char const headerText[])
rc = ReadHeaders(self, &htxt, &hlen, &rdat, &nrefs);
if (rc) return rc;
- self->fpos_first = self->fpos_cur;
- self->ucfirst = self->bufCurrent;
- DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BAM), ("BAM Data records start at: %lu+%u\n", self->ucfirst, self->fpos_first));
+ DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BAM), ("BAM Data records start at: %lu+%u\n", self->fpos_cur, self->bufCurrent));
if (nrefs) {
refSeq = calloc(nrefs, sizeof(self->refSeq[0]));
@@ -1531,7 +1334,7 @@ static rc_t ProcessSAMHeader(BAM_File *self, char const substitute[])
/* MARK: BAM File destructor */
static void BAM_FileWhack(BAM_File *self) {
- if (self->refSeq)
+ if (self->refSeqs > 0 && self->refSeq)
free(self->refSeq);
if (self->readGroup)
free(self->readGroup);
@@ -1545,15 +1348,16 @@ static void BAM_FileWhack(BAM_File *self) {
free(self->nocopy);
if (self->vt.FileWhack)
self->vt.FileWhack(&self->file);
+ KFileRelease(self->defer);
BufferedFileWhack(&self->file.bam.file);
}
/* MARK: BAM File constructors */
/* file is retained */
-static rc_t BAM_FileMakeWithKFileAndHeader(BAM_File const **cself,
- KFile const *file,
- char const *headerText)
+static rc_t BAM_FileMakeWithKFileAndHeader(BAM_File **cself,
+ KFile const *file,
+ char const *headerText)
{
BAM_File *self = calloc(1, sizeof(*self));
rc_t rc;
@@ -1593,9 +1397,10 @@ static rc_t BAM_FileMakeWithKFileAndHeader(BAM_File const **cself,
return rc;
}
-rc_t BAM_FileMakeWithHeader(const BAM_File **cself,
- char const headerText[],
- char const path[], ... )
+rc_t BAM_FileMake(const BAM_File **cself,
+ KFile *defer,
+ char const headerText[],
+ char const path[], ... )
{
KDirectory *dir;
va_list args;
@@ -1605,17 +1410,29 @@ rc_t BAM_FileMakeWithHeader(const BAM_File **cself,
if (cself == NULL)
return RC(rcAlign, rcFile, rcOpening, rcParam, rcNull);
*cself = NULL;
-
- rc = KDirectoryNativeDir(&dir);
- if (rc) return rc;
- va_start(args, path);
- rc = KDirectoryVOpenFileRead(dir, &kf, path, args);
+
+ if (strcmp(path, "/dev/stdin") == 0) {
+ rc = KFileMakeStdIn(&kf);
+ }
+ else {
+ rc = KDirectoryNativeDir(&dir);
+ if (rc) return rc;
+ va_start(args, path);
+ rc = KDirectoryVOpenFileRead(dir, &kf, path, args);
+ va_end(args);
+ KDirectoryRelease(dir);
+ }
if (rc == 0) {
- rc = BAM_FileMakeWithKFileAndHeader(cself, kf, headerText);
+ BAM_File *self = NULL;
+ rc = BAM_FileMakeWithKFileAndHeader(&self, kf, headerText);
+ if (rc == 0) {
+ assert(self != NULL);
+ KFileAddRef(defer);
+ self->defer = defer;
+ }
+ *cself = self;
KFileRelease(kf);
}
- va_end(args);
- KDirectoryRelease(dir);
return rc;
}
@@ -1899,6 +1716,25 @@ static bool BAM_AlignmentInit(BAM_Alignment *const self, unsigned const maxsize,
}
}
+static void BAM_AlignmentDebugPrint(BAM_Alignment const *const self)
+{
+ DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BAM), ("{"
+ "\"BAM record\": "
+ "{ "
+ "\"size\": %u, "
+ "\"name length\": %u, "
+ "\"cigar count\": %u, "
+ "\"read length\": %u, "
+ "\"extra count\": %u "
+ "}"
+ "}\n",
+ (unsigned)self->datasize,
+ (unsigned)getReadNameLength(self),
+ (unsigned)getCigarCount(self),
+ (unsigned)getReadLen(self),
+ (unsigned)self->numExtra));
+}
+
static bool BAM_AlignmentInitLog(BAM_Alignment *const self, unsigned const maxsize,
unsigned const datasize, void const *const data)
{
@@ -1916,37 +1752,10 @@ static bool BAM_AlignmentInitLog(BAM_Alignment *const self, unsigned const maxsi
rc_t const rc = ParseOptDataLog(self, maxsize, xtra, datasize);
if (rc == 0) {
- DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BAM), ("{"
- "\"BAM record\": "
- "{ "
- "\"size\": %u, "
- "\"name length\": %u, "
- "\"cigar count\": %u, "
- "\"read length\": %u, "
- "\"extra count\": %u "
- "}"
- "}\n",
- (unsigned)datasize,
- (unsigned)getReadNameLength(self),
- (unsigned)getCigarCount(self),
- (unsigned)getReadLen(self),
- (unsigned)self->numExtra));
+ BAM_AlignmentDebugPrint(self);
return true;
}
}
- DBGMSG(DBG_ALIGN, DBG_FLAG(DBG_ALIGN_BAM), ("{"
- "\"BAM record\": "
- "{ "
- "\"size\": %u, "
- "\"name length\": %u, "
- "\"cigar count\": %u, "
- "\"read length\": %u "
- "}"
- "}\n",
- (unsigned)datasize,
- (unsigned)getReadNameLength(self),
- (unsigned)getCigarCount(self),
- (unsigned)getReadLen(self)));
return false;
}
}
@@ -1985,7 +1794,7 @@ rc_t BAM_FileReadNoCopy(BAM_File *const self, unsigned actsize[], BAM_Alignment
if (rc == 0)
return BAM_FileReadNoCopy(self, actsize, rhs, maxsize);
- if ( GetRCObject( rc ) == (enum RCObject)rcData && GetRCState( rc ) == rcInsufficient )
+ if ((int)GetRCObject(rc) == rcData && GetRCState(rc) == rcInsufficient)
{
self->eof = true;
return RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);
@@ -2052,7 +1861,7 @@ rc_t BAM_FileReadCopy(BAM_File *const self, BAM_Alignment const *rslt[], bool co
rc = BAM_FileReadI32(self, &i32);
if ( rc != 0 )
{
- if ( GetRCObject( rc ) == (enum RCObject)rcData && GetRCState( rc ) == rcInsufficient )
+ if ((int)GetRCObject(rc) == rcData && GetRCState(rc) == rcInsufficient)
{
self->eof = true;
rc = RC( rcAlign, rcFile, rcReading, rcRow, rcNotFound );
@@ -2083,9 +1892,7 @@ rc_t BAM_FileReadCopy(BAM_File *const self, BAM_Alignment const *rslt[], bool co
if (y) {
if ((log ? BAM_AlignmentInitLog : BAM_AlignmentInit)(y, rsltsize, datasize, data)) {
- if (storage == NULL)
- self->bufLocker = y;
- else
+ if (storage != NULL)
y->storage = storage;
y->parent = self;
@@ -2106,22 +1913,6 @@ rc_t BAM_FileReadCopy(BAM_File *const self, BAM_Alignment const *rslt[], bool co
return rc;
}
-static
-rc_t BAM_FileBreakLock(BAM_File *const self)
-{
- if (self->bufLocker != NULL) {
- if (self->bufLocker->storage == NULL)
- self->bufLocker->storage = malloc(self->bufLocker->datasize);
- if (self->bufLocker->storage == NULL)
- return RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
-
- memcpy(self->bufLocker->storage, self->bufLocker->data, self->bufLocker->datasize);
- self->bufLocker->data = (bam_alignment *)&self->bufLocker->storage[0];
- self->bufLocker = NULL;
- }
- return 0;
-}
-
/* MARK: SAM code */
static void SAM2BAM_ConvertShort(void *const Dst, int value)
@@ -2653,39 +2444,31 @@ static rc_t BAM_FileReadSAM(BAM_File *const self, BAM_Alignment const **const rs
return RC(rcAlign, rcFile, rcReading, rcRow, rcInvalid);
}
-rc_t BAM_FileRead2(const BAM_File *cself, const BAM_Alignment **rhs)
+static rc_t read2(BAM_File *const self, BAM_Alignment const **const rhs)
{
- BAM_File *const self = (BAM_File *)cself;
unsigned actsize = 0;
rc_t rc;
- if (self == NULL || rhs == NULL)
- return RC(rcAlign, rcFile, rcReading, rcParam, rcNull);
-
- *rhs = NULL;
-
if (self->bufCurrent >= self->bufSize && self->eof)
return RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);
- if (self->isSAM) return BAM_FileReadSAM(self, rhs);
-
- rc = BAM_FileBreakLock(self);
- if (rc)
+ if (self->isSAM) {
+ rc = BAM_FileReadSAM(self, rhs);
+ if (rc != 0 && GetRCObject(rc) == rcRow && GetRCState(rc) == rcNotFound)
+ self->eof = true;
return rc;
-
- if (self->nocopy_size == 0) {
- size_t const size = 4096u;
+ }
+ if (self->nocopy == NULL) {
+ size_t const size = 64u * 1024u;
void *const temp = malloc(size);
if (temp == NULL)
return RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
self->nocopy = temp;
- self->nocopy_size = size;
}
-AGAIN:
- rc = BAM_FileReadNoCopy(self, &actsize, self->nocopy, (unsigned)self->nocopy_size);
+ rc = BAM_FileReadNoCopy(self, &actsize, self->nocopy, 64u * 1024u);
if (rc == 0) {
*rhs = self->nocopy;
if (BAM_AlignmentIsEmpty(self->nocopy)) {
@@ -2693,30 +2476,86 @@ AGAIN:
LOGERR(klogWarn, rc, "BAM Record contains no alignment or sequence data");
}
}
- else if ( GetRCObject( rc ) == (enum RCObject)rcBuffer && GetRCState( rc ) == rcInsufficient )
+ else if ((int)GetRCObject(rc) == rcBuffer && GetRCState(rc) == rcInsufficient)
{
- unsigned const size = (actsize + 4095u) & ~4095u;
- void *const temp = realloc(self->nocopy, size);
+ return RC(rcAlign, rcFile, rcReading, rcData, rcInvalid);
+ }
+ else if ((int)GetRCObject(rc) == rcBuffer && GetRCState(rc) == rcNotAvailable)
+ {
+ rc = BAM_FileReadCopy(self, rhs, true);
+ }
+ else if ((int)GetRCObject(rc) == rcRow && GetRCState(rc) == rcInvalid) {
+ BAM_AlignmentLogParseError(self->nocopy);
+ }
+ return rc;
+}
+
+static rc_t readDefer(BAM_File *const self, BAM_Alignment const **const rslt)
+{
+ uint32_t datasize = 0;
+ size_t nread = 0;
+ rc_t rc = 0;
+
+ rc = KFileReadAll(self->defer, self->deferPos, &datasize, 4, &nread);
+ if (rc) return rc;
+ if (nread == 0) {
+ KFileRelease(self->defer);
+ self->defer = NULL;
+ return RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);
+ }
+ assert(nread == 4);
+ assert(datasize < 64u * 1024u);
+ if (self->nocopy == NULL) {
+ size_t const size = 64u * 1024u;
+ void *const temp = malloc(size);
if (temp == NULL)
return RC(rcAlign, rcFile, rcReading, rcMemory, rcExhausted);
-
- self->nocopy = temp;
- self->nocopy_size = size;
- goto AGAIN;
- }
- else if ( GetRCObject( rc ) == (enum RCObject)rcBuffer && GetRCState( rc ) == rcNotAvailable )
- {
- rc = BAM_FileReadCopy( self, rhs, true );
+ self->nocopy = temp;
}
- else if (GetRCObject(rc) == rcRow && GetRCState(rc) == rcInvalid) {
- BAM_AlignmentLogParseError(self->nocopy);
+
+ rc = KFileReadAll(self->defer, self->deferPos + 4, self->buffer, datasize, &nread);
+ if (rc) return rc;
+ assert(nread == datasize);
+ self->deferPos += 4 + datasize;
+
+ BAM_AlignmentInitLog(self->nocopy, 64u * 1024u, datasize, self->buffer);
+ self->nocopy->parent = self;
+ *rslt = self->nocopy;
+ if (BAM_AlignmentIsEmpty(self->nocopy)) {
+ rc = RC(rcAlign, rcFile, rcReading, rcRow, rcEmpty);
+ LOGERR(klogWarn, rc, "BAM Record contains no alignment or sequence data");
}
return rc;
}
-rc_t BAM_FileRead(const BAM_File *cself, const BAM_Alignment **rhs)
+static rc_t writeExactly(KFile *const f, uint64_t const pos, void const *const data, size_t const size) {
+ char const *const p = (char const *)data;
+ size_t written = 0;
+
+ while (written < size) {
+ size_t num_writ = 0;
+ rc_t const rc = KFileWrite(f, pos + written, p + written, size - written, &num_writ);
+ if (rc) return rc;
+ written += num_writ;
+ }
+ return 0;
+}
+
+static rc_t writeDefer(BAM_File *const self, BAM_Alignment const *const algn)
+{
+ rc_t rc = 0;
+
+ rc = writeExactly(self->defer, self->deferPos, &algn->datasize, 4);
+ if (rc) return rc;
+ rc = writeExactly(self->defer, self->deferPos + 4, algn->data, algn->datasize);
+ if (rc) return rc;
+ self->deferPos += 4 + algn->datasize;
+ return 0;
+}
+
+rc_t BAM_FileRead2(const BAM_File *cself, const BAM_Alignment **rhs)
{
BAM_File *const self = (BAM_File *)cself;
@@ -2724,15 +2563,33 @@ rc_t BAM_FileRead(const BAM_File *cself, const BAM_Alignment **rhs)
return RC(rcAlign, rcFile, rcReading, rcParam, rcNull);
*rhs = NULL;
-
- if (self->bufCurrent >= self->bufSize && self->eof)
- return RC(rcAlign, rcFile, rcReading, rcRow, rcNotFound);
- else {
- rc_t const rc = BAM_FileBreakLock(self);
- if (rc)
+
+ if (self->eof && self->defer != NULL) {
+ return readDefer(self, rhs);
+ }
+ for ( ; ; ) {
+ rc_t const rc = read2(self, rhs);
+ if (rc != 0) {
+ if (self->eof && self->defer != NULL) {
+ self->deferPos = 0;
+ return readDefer(self, rhs);
+ }
return rc;
+ }
+ if (self->defer && BAM_AlignmentShouldDefer(*rhs)) {
+ rc_t const rc = writeDefer(self, *rhs);
+ if (rc) return rc;
+ }
+ else
+ break;
}
- return BAM_FileReadCopy(self, rhs, false);
+ return 0;
+}
+
+rc_t BAM_FileRead(const BAM_File *cself, const BAM_Alignment **rhs)
+{
+ assert(!"deprecated");
+ abort();
}
/* MARK: BAM File header info accessor */
@@ -2797,8 +2654,6 @@ rc_t BAM_FileGetHeaderText(BAM_File const *cself, char const **header, size_t *h
static rc_t BAM_AlignmentWhack(BAM_Alignment *self)
{
- if (self->parent->bufLocker == self)
- self->parent->bufLocker = NULL;
if (self != self->parent->nocopy) {
free(self->storage);
free(self);
@@ -2820,6 +2675,39 @@ rc_t BAM_AlignmentRelease(const BAM_Alignment *cself)
return 0;
}
+rc_t BAM_AlignmentCopy(const BAM_Alignment *self, BAM_Alignment **rslt)
+{
+ unsigned const rsltsize = BAM_AlignmentSize(self->numExtra);
+ unsigned const padded = (rsltsize + 15UL) & ~15UL;
+ void *const tmp = malloc(padded + self->datasize);
+ void *const tmp2 = &((char *)tmp)[padded];
+
+ assert(tmp != NULL);
+ if (tmp == NULL) {
+ LOGMSG(klogFatal, "OUT OF MEMORY");
+ abort();
+ }
+ memcpy(tmp, self, rsltsize);
+ memcpy(tmp2, self->data, self->datasize);
+ *rslt = tmp;
+ (**rslt).data = tmp2;
+ (**rslt).storage = NULL;
+
+ return 0;
+}
+
+static bool BAM_AlignmentShouldDefer(BAM_Alignment const *const self)
+{
+ int const flags = getFlags(self);
+ if (flags & BAMFlags_SelfIsUnmapped)
+ return false;
+ if (flags & BAMFlags_IsNotPrimary)
+ return true;
+ if (flags & BAMFlags_IsSupplemental)
+ return true;
+ return false;
+}
+
#if 0
uint16_t BAM_AlignmentIffyFields(const BAM_Alignment *self)
{
@@ -4083,3 +3971,10 @@ rc_t BAM_AlignmentGetRNAStrand(BAM_Alignment const *const self, uint8_t *const r
}
return 0;
}
+
+rc_t BAM_AlignmentGetLinkageGroup(BAM_Alignment const *self,
+ char const **name)
+{
+ *name = get_BX(self);
+ return 0;
+}
diff --git a/tools/bam-loader/bam.h b/tools/bam-loader/bam.h
index ae81c60..274744e 100644
--- a/tools/bam-loader/bam.h
+++ b/tools/bam-loader/bam.h
@@ -29,13 +29,13 @@
*/
typedef struct BAM_Alignment BAM_Alignment;
-
/* AddRef
* Release
*/
rc_t BAM_AlignmentAddRef ( const BAM_Alignment *self );
rc_t BAM_AlignmentRelease ( const BAM_Alignment *self );
+rc_t BAM_AlignmentCopy(const BAM_Alignment *self, BAM_Alignment **rslt);
/* GetReadLength
* get the sequence length
@@ -221,18 +221,20 @@ enum BAMFlags
BAMFlags_bit_IsNotPrimary, /* a read having split hits may have multiple primary alignments */
BAMFlags_bit_IsLowQuality, /* fails platform/vendor quality checks */
BAMFlags_bit_IsDuplicate, /* PCR or optical dup */
+ BAMFlags_bit_IsSupplemental,
BAMFlags_WasPaired = (1 << BAMFlags_bit_WasPaired),
BAMFlags_IsMappedAsPair = (1 << BAMFlags_bit_IsMappedAsPair),
BAMFlags_SelfIsUnmapped = (1 << BAMFlags_bit_SelfIsUnmapped),
BAMFlags_MateIsUnmapped = (1 << BAMFlags_bit_MateIsUnmapped),
- BAMFlags_SelfIsReverse = (1 << BAMFlags_bit_SelfIsReverse),
- BAMFlags_MateIsReverse = (1 << BAMFlags_bit_MateIsReverse),
+ BAMFlags_SelfIsReverse = (1 << BAMFlags_bit_SelfIsReverse),
+ BAMFlags_MateIsReverse = (1 << BAMFlags_bit_MateIsReverse),
BAMFlags_IsFirst = (1 << BAMFlags_bit_IsFirst),
BAMFlags_IsSecond = (1 << BAMFlags_bit_IsSecond),
- BAMFlags_IsNotPrimary = (1 << BAMFlags_bit_IsNotPrimary),
- BAMFlags_IsLowQuality = (1 << BAMFlags_bit_IsLowQuality),
- BAMFlags_IsDuplicate = (1 << BAMFlags_bit_IsDuplicate)
+ BAMFlags_IsNotPrimary = (1 << BAMFlags_bit_IsNotPrimary),
+ BAMFlags_IsLowQuality = (1 << BAMFlags_bit_IsLowQuality),
+ BAMFlags_IsDuplicate = (1 << BAMFlags_bit_IsDuplicate),
+ BAMFlags_IsSupplemental = (1 << BAMFlags_bit_IsSupplemental)
};
rc_t BAM_AlignmentGetFlags ( const BAM_Alignment *self, uint16_t *flags );
@@ -433,6 +435,9 @@ rc_t BAM_AlignmentGetCGAlignGroup(BAM_Alignment const *self,
char buffer[],
size_t max_size,
size_t *act_size);
+
+rc_t BAM_AlignmentGetLinkageGroup(BAM_Alignment const *self,
+ char const **name);
/*--------------------------------------------------------------------------
@@ -483,31 +488,10 @@ typedef uint64_t BAM_FilePosition;
*
* "path" [ IN ] - NUL terminated string or format
*/
-rc_t BAM_FileMake ( const BAM_File **result, const char *path, ... );
-
-rc_t BAM_FileMakeWithHeader ( const BAM_File **result,
- char const headerText[],
- char const path[], ... );
-
-/* MakeWithDir
- * open the BAM file specified by path and supplied directory
- *
- * "dir" [ IN ] - directory object used to open file
- *
- * "path" [ IN ] - NUL terminated string or format
- */
-rc_t BAM_FileMakeWithDir ( const BAM_File **result,
- struct KDirectory const *dir, const char *path, ... );
-rc_t BAM_FileVMakeWithDir ( const BAM_File **result,
- struct KDirectory const *dir, const char *path, va_list args );
-
-/* Make
- * open the BAM file specified by file
- *
- * "file" [ IN ] - an open KFile
- */
-rc_t BAM_FileMakeWithKFile(const BAM_File **result,
- struct KFile const *file);
+rc_t BAM_FileMake(const BAM_File **result,
+ KFile *defer,
+ char const headerText[],
+ char const path[], ... );
/* AddRef
* Release
@@ -534,17 +518,6 @@ rc_t BAM_FileGetPosition ( const BAM_File *self, BAM_FilePosition *pos );
*/
float BAM_FileGetProportionalPosition ( const BAM_File *self );
-
-/* Read
- * read an aligment
- *
- * "result" [ OUT ] - return param for BAM_Alignment object
- * must be released with BAM_AlignmentRelease
- *
- * returns RC(..., ..., ..., rcRow, rcNotFound) at end
- */
-rc_t BAM_FileRead ( const BAM_File *self, const BAM_Alignment **result );
-
/* Read
* read an aligment
@@ -564,19 +537,6 @@ rc_t BAM_FileRead ( const BAM_File *self, const BAM_Alignment **result );
rc_t BAM_FileRead2 ( const BAM_File *self, const BAM_Alignment **result );
-/* Rewind
- * reset the position back to the first aligment in the file
- */
-rc_t BAM_FileRewind ( const BAM_File *self );
-
-
-/* SetPosition
- * set the position to a particular alignment
- * pass in the values from GetPosition
- */
-rc_t BAM_FileSetPosition ( const BAM_File *self, const BAM_FilePosition *pos );
-
-
/* GetRefSeqCount
* get the number of Reference Sequences refered to in the header
* this is not necessarily the number of Reference Sequences referenced
diff --git a/tools/bam-loader/loader-imp.c b/tools/bam-loader/loader-imp.c
index 3dbbd5f..d2cfef6 100644
--- a/tools/bam-loader/loader-imp.c
+++ b/tools/bam-loader/loader-imp.c
@@ -63,6 +63,11 @@
#include <kapp/log-xml.h>
#include <kapp/progressbar.h>
+#include <kproc/queue.h>
+#include <kproc/thread.h>
+#include <kproc/timeout.h>
+#include <os-native.h>
+
#include <sysalloc.h>
#include <atomic32.h>
@@ -76,13 +81,20 @@
#include <assert.h>
#include <limits.h>
#include <time.h>
-
+#include <zlib.h>
#include "bam.h"
+#include "bam-alignment.h"
#include "Globals.h"
#include "sequence-writer.h"
#include "reference-writer.h"
#include "alignment-writer.h"
#include "mem-bank.h"
+#include "low-match-count.h"
+
+#define THREADING_BAMREAD 1 /*** Reading BAM and SAM are moved to a separate thread ***/
+#if THREADING_BAMREAD
+#define THREADING_BAMREAD_PRIME_NAME2KEY 1 /*** Only valid when THREADING_BAMREAD==1. Will prime Name2Key on BAM/SAM thread ***/
+#endif
#define NUM_ID_SPACES (256u)
@@ -113,9 +125,10 @@ typedef struct {
uint8_t alignmentCount[2]; /* 0..254; 254: saturated max; 255: special meaning "too many" */
uint8_t unmated: 1,
pcr_dup: 1,
- has_a_read: 1,
unaligned_1: 1,
- unaligned_2: 1;
+ unaligned_2: 1,
+ hardclipped: 1,
+ primary_is_set: 1;
} ctx_value_t;
#define CTX_VALUE_SET_P_ID(O,N,V) do { int64_t tv = (V); (O).primaryId[N] = (uint32_t)tv; (O).pId_ext[N] = tv >> 32; } while(0);
@@ -127,6 +140,7 @@ typedef struct {
typedef struct FragmentInfo {
uint64_t ti;
uint32_t readlen;
+ uint8_t lglen;
uint8_t aligned;
uint8_t is_bad;
uint8_t orientation;
@@ -135,16 +149,9 @@ typedef struct FragmentInfo {
uint8_t cskey;
} FragmentInfo;
-typedef struct context_t {
- const KLoadProgressbar *progress[4];
+typedef struct KeyToID {
KBTree *key2id[NUM_ID_SPACES];
char *key2id_names;
- MMArray *id2value;
- MemBank *frags;
- int64_t spotId;
- int64_t primaryId;
- int64_t secondId;
- uint64_t alignCount;
uint32_t idCount[NUM_ID_SPACES];
uint32_t key2id_hash[NUM_ID_SPACES];
@@ -158,6 +165,17 @@ typedef struct context_t {
/* this array is kept in name order */
/* this maps the names to key2id and idCount */
unsigned key2id_oid[NUM_ID_SPACES];
+} KeyToID;
+
+typedef struct context_t {
+ KeyToID keyToID;
+ const KLoadProgressbar *progress[4];
+ MMArray *id2value;
+ MemBank *frags;
+ int64_t spotId;
+ int64_t primaryId;
+ int64_t secondId;
+ uint64_t alignCount;
unsigned pass;
bool isColorSpace;
@@ -165,7 +183,7 @@ typedef struct context_t {
static char const *Print_ctx_value_t(ctx_value_t const *const self)
{
- static char buffer[4096];
+ static char buffer[16384];
rc_t rc = string_printf(buffer, sizeof(buffer), NULL, "pid: { %lu, %lu }, sid: %lu, fid: %u, alc: { %u, %u }, flg: %x", CTX_VALUE_GET_P_ID(*self, 0), CTX_VALUE_GET_P_ID(*self, 1), CTX_VALUE_GET_S_ID(*self), self->fragmentId, self->alignmentCount[0], self->alignmentCount[1], *(self->alignmentCount + sizeof(self->alignmentCount)/sizeof(self->alignmentCount[0])));
if (rc)
@@ -278,6 +296,31 @@ static void MMArrayLock(MMArray *const self)
#endif
}
+static void MMArrayClear(MMArray *self)
+{
+ size_t const chunk = MMA_SUBCHUNK_SIZE * self->elemSize;
+ unsigned i;
+
+ for (i = 0; i != sizeof(self->map)/sizeof(self->map[0]); ++i) {
+ unsigned j;
+
+ for (j = 0; j != sizeof(self->map[0].submap)/sizeof(self->map[0].submap[0]); ++j) {
+ if (self->map[i].submap[j].base) {
+#if PROT
+ mprotect(self->map[i].submap[j].base, chunk, PROT_READ|PROT_WRITE);
+#endif
+ memset(self->map[i].submap[j].base, 0, chunk);
+#if PROT
+ mprotect(self->map[i].submap[j].base, chunk, PROT_NONE);
+#endif
+ }
+ }
+ }
+#if PROT
+ self->current = NULL;
+#endif
+}
+
static void MMArrayWhack(MMArray *self)
{
size_t const chunk = MMA_SUBCHUNK_SIZE * self->elemSize;
@@ -330,7 +373,7 @@ static rc_t OpenKBTree(KBTree **const rslt, unsigned n, unsigned max)
return rc;
}
-static rc_t GetKeyIDOld(context_t *const ctx, uint64_t *const rslt, bool *const wasInserted, char const key[], char const name[], unsigned const namelen)
+static rc_t GetKeyIDOld(KeyToID *const ctx, uint64_t *const rslt, bool *const wasInserted, char const key[], char const name[], unsigned const namelen)
{
unsigned const keylen = strlen(key);
rc_t rc;
@@ -467,7 +510,7 @@ static size_t GetFixedNameLength(char const name[], size_t const namelen)
}
static
-rc_t GetKeyID(context_t *const ctx,
+rc_t GetKeyID(KeyToID *const ctx,
uint64_t *const rslt,
bool *const wasInserted,
char const key[],
@@ -606,7 +649,7 @@ static rc_t TmpfsDirectory(KDirectory **const rslt)
KDirectory *dir;
rc_t rc = KDirectoryNativeDir(&dir);
if (rc == 0) {
- rc = KDirectoryOpenDirUpdate(dir, rslt, false, "%s", G.tmpfs);
+ rc = KDirectoryOpenDirUpdate(dir, rslt, false, "%s", G.tmpfs);
KDirectoryRelease(dir);
}
return rc;
@@ -616,7 +659,7 @@ static rc_t SetupContext(context_t *ctx, unsigned numfiles)
{
rc_t rc = 0;
- memset(ctx, 0, sizeof(*ctx));
+ // memset(ctx, 0, sizeof(*ctx));
if (G.mode == mode_Archive) {
KDirectory *dir;
@@ -625,13 +668,6 @@ static rc_t SetupContext(context_t *ctx, unsigned numfiles)
fragSize[1] = (G.cache_size / 8);
fragSize[0] = fragSize[1] * 4;
- rc = KLoadProgressbar_Make(&ctx->progress[0], 0); if (rc) return rc;
- rc = KLoadProgressbar_Make(&ctx->progress[1], 0); if (rc) return rc;
- rc = KLoadProgressbar_Make(&ctx->progress[2], 0); if (rc) return rc;
- rc = KLoadProgressbar_Make(&ctx->progress[3], 0); if (rc) return rc;
-
- KLoadProgressbar_Append(ctx->progress[0], 100 * numfiles);
-
rc = TmpfsDirectory(&dir);
if (rc == 0)
rc = OpenMMapFile(ctx, dir);
@@ -639,6 +675,24 @@ static rc_t SetupContext(context_t *ctx, unsigned numfiles)
rc = MemBankMake(&ctx->frags, dir, G.pid, fragSize);
KDirectoryRelease(dir);
}
+ else if (G.mode == mode_Remap) {
+ KeyToID const save1 = ctx->keyToID;
+ MMArray *const save2 = ctx->id2value;
+ int64_t const save3 = ctx->spotId;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->keyToID = save1;
+ ctx->id2value = save2;
+ ctx->spotId = save3;
+ }
+
+ rc = KLoadProgressbar_Make(&ctx->progress[0], 0); if (rc) return rc;
+ rc = KLoadProgressbar_Make(&ctx->progress[1], 0); if (rc) return rc;
+ rc = KLoadProgressbar_Make(&ctx->progress[2], 0); if (rc) return rc;
+ rc = KLoadProgressbar_Make(&ctx->progress[3], 0); if (rc) return rc;
+
+ KLoadProgressbar_Append(ctx->progress[0], 100 * numfiles);
+
return rc;
}
@@ -648,13 +702,16 @@ static void ContextReleaseMemBank(context_t *ctx)
ctx->frags = NULL;
}
-static void ContextRelease(context_t *ctx)
+static void ContextRelease(context_t *ctx, bool continuing)
{
KLoadProgressbar_Release(ctx->progress[0], true);
KLoadProgressbar_Release(ctx->progress[1], true);
KLoadProgressbar_Release(ctx->progress[2], true);
KLoadProgressbar_Release(ctx->progress[3], true);
- MMArrayWhack(ctx->id2value);
+ if (!continuing)
+ MMArrayWhack(ctx->id2value);
+ else
+ MMArrayClear(ctx->id2value);
}
static
@@ -719,9 +776,35 @@ void COPY_READ(INSDC_dna_text D[], INSDC_dna_text const S[], unsigned const L, b
memcpy(D, S, L);
}
+static KFile *MakeDeferralFile() {
+ if (G.deferSecondary) {
+ char template[4096];
+ int fd;
+ KFile *f;
+ KDirectory *d;
+ size_t nwrit;
+
+ KDirectoryNativeDir(&d);
+ string_printf(template, sizeof(template), &nwrit, "%s/defer.XXXXXX", G.tmpfs);
+ fd = mkstemp(template);
+ KDirectoryOpenFileWrite(d, &f, true, template);
+ close(fd);
+ unlink(template);
+ return f;
+ }
+ return NULL;
+}
+
static rc_t OpenBAM(const BAM_File **bam, VDatabase *db, const char bamFile[])
{
- rc_t rc = BAM_FileMakeWithHeader(bam, G.headerText, "%s", bamFile);
+ rc_t rc = 0;
+
+ if (strcmp(bamFile, "/dev/stdin") == 0) {
+ rc = BAM_FileMake(bam, MakeDeferralFile(), G.headerText, "/dev/stdin");
+ }
+ else {
+ rc = BAM_FileMake(bam, MakeDeferralFile(), G.headerText, "%s", bamFile);
+ }
if (rc) {
(void)PLOGERR(klogErr, (klogErr, rc, "Failed to open '$(file)'", "file=%s", bamFile));
}
@@ -771,8 +854,7 @@ static rc_t VerifyReferences(BAM_File const *bam, Reference const *ref)
(void)PLOGMSG(klogWarn, (klogWarn, "Reference: '$(name)', Length: $(len); checksums do not match", "name=%s,len=%u", refSeq->name, (unsigned)refSeq->length));
#endif
}
- else
- if (GetRCObject(rc) == rcSize && GetRCState(rc) == rcUnequal) {
+ else if (GetRCObject(rc) == rcSize && GetRCState(rc) == rcUnequal) {
(void)PLOGMSG(klogWarn, (klogWarn, "Reference: '$(name)', Length: $(len); lengths do not match", "name=%s,len=%u", refSeq->name, (unsigned)refSeq->length));
}
else if (GetRCObject(rc) == rcSize && GetRCState(rc) == rcEmpty) {
@@ -946,23 +1028,71 @@ void RecordNoMatch(char const readName[], char const refName[], uint32_t const r
}
}
+static LowMatchCounter *lmc = NULL;
+
static
rc_t LogNoMatch(char const readName[], char const refName[], unsigned rpos, unsigned matches)
{
rc_t const rc = CheckLimitAndLogError();
static unsigned count = 0;
+ if (lmc == NULL)
+ lmc = LowMatchCounterMake();
+ assert(lmc != NULL);
+ LowMatchCounterAdd(lmc, refName);
+
++count;
if (rc) {
(void)PLOGMSG(klogInfo, (klogInfo, "This is the last warning; this class of warning occurred $(occurred) times",
"occurred=%u", count));
- (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)' contains too few ($(count)) matching bases to reference '$(ref)' at $(pos)",
+ (void)PLOGMSG(klogErr, (klogErr, "Spot '$(name)' contains too few ($(count)) matching bases to reference '$(ref)' at $(pos)",
"name=%s,ref=%s,pos=%u,count=%u", readName, refName, rpos, matches));
+ return rc;
}
- else if (G.maxWarnCount_NoMatch == 0 || count < G.maxWarnCount_NoMatch)
+ if (G.maxWarnCount_NoMatch == 0 || count < G.maxWarnCount_NoMatch)
(void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)' contains too few ($(count)) matching bases to reference '$(ref)' at $(pos)",
"name=%s,ref=%s,pos=%u,count=%u", readName, refName, rpos, matches));
- return rc;
+ return 0;
+}
+
+struct rlmc_context {
+ KMDataNode *node;
+ unsigned node_number;
+ rc_t rc;
+};
+
+static void RecordLowMatchCount(void *Ctx, char const name[], unsigned const count)
+{
+ struct rlmc_context *const ctx = Ctx;
+
+ if (ctx->rc == 0) {
+ KMDataNode *sub = NULL;
+
+ ctx->rc = KMDataNodeOpenNodeUpdate(ctx->node, &sub, "LOW_MATCH_COUNT_%u", ++ctx->node_number);
+ if (ctx->rc == 0) {
+ uint32_t const count_temp = count;
+ ctx->rc = KMDataNodeWriteAttr(sub, "REFNAME", name);
+ if (ctx->rc == 0)
+ ctx->rc = KMDataNodeWriteB32(sub, &count_temp);
+
+ KMDataNodeRelease(sub);
+ }
+ }
+}
+
+static rc_t RecordLowMatchCounts(KMDataNode *const node)
+{
+ struct rlmc_context ctx;
+
+ assert(lmc != NULL);
+ if (node) {
+ ctx.node = node;
+ ctx.node_number = 0;
+ ctx.rc = 0;
+
+ LowMatchCounterEach(lmc, &ctx, RecordLowMatchCount);
+ }
+ return ctx.rc;
}
static
@@ -994,9 +1124,20 @@ static char const *const CHANGED[] = {
"record made unfragmented",
"mate alignment lost",
"record discarded",
- "reference name changed"
+ "reference name changed",
+ "CIGAR changed"
};
+#define FLAG_CHANGED (0)
+#define QUAL_CHANGED (1)
+#define SEQ_CHANGED (2)
+#define MAKE_UNALIGNED (3)
+#define MAKE_UNFRAGMENTED (4)
+#define MATE_LOST (5)
+#define DISCARDED (6)
+#define REF_NAME_CHANGED (7)
+#define CIGAR_CHANGED (8)
+
static char const *const REASONS[] = {
/* FLAG changed */
"0x400 and 0x200 both set", /* 0 */
@@ -1030,44 +1171,52 @@ static char const *const REASONS[] = {
/* discarded */
"conflicting PCR duplicate", /* 24 */
"conflicting fragment info", /* 25 */
- "reference is skipped" /* 26 */
+ "reference is skipped", /* 26 */
/* reference name changed */
- "reference was named more than once" /* 27 */
+ "reference was named more than once", /* 27 */
+/* CIGAR changed */
+ "alignment overhanging end of reference", /* 28 */
+/* discarded */
+ "hard-clipped secondary alignment", /* 29 */
+ "low-matching secondary alignment", /* 30 */
};
static struct {
unsigned what, why;
} const CHANGES[] = {
- {0, 0},
- {0, 1},
- {0, 2},
- {0, 3},
- {1, 4},
- {1, 5},
- {1, 6},
- {1, 7},
- {1, 8},
- {2, 8},
- {3, 9},
- {3, 10},
- {3, 11},
- {3, 12},
- {3, 13},
- {3, 14},
- {3, 15},
- {3, 16},
- {3, 17},
- {3, 18},
- {4, 19},
- {4, 20},
- {5, 21},
- {5, 22},
- {5, 23},
- {6, 24},
- {6, 25},
- {6, 26},
- {6, 17},
- {7, 27},
+ {FLAG_CHANGED, 0},
+ {FLAG_CHANGED, 1},
+ {FLAG_CHANGED, 2},
+ {FLAG_CHANGED, 3},
+ {QUAL_CHANGED, 4},
+ {QUAL_CHANGED, 5},
+ {QUAL_CHANGED, 6},
+ {QUAL_CHANGED, 7},
+ {QUAL_CHANGED, 8},
+ {SEQ_CHANGED, 8},
+ {MAKE_UNALIGNED, 9},
+ {MAKE_UNALIGNED, 10},
+ {MAKE_UNALIGNED, 11},
+ {MAKE_UNALIGNED, 12},
+ {MAKE_UNALIGNED, 13},
+ {MAKE_UNALIGNED, 14},
+ {MAKE_UNALIGNED, 15},
+ {MAKE_UNALIGNED, 16},
+ {MAKE_UNALIGNED, 17},
+ {MAKE_UNALIGNED, 18},
+ {MAKE_UNFRAGMENTED, 19},
+ {MAKE_UNFRAGMENTED, 20},
+ {MATE_LOST, 21},
+ {MATE_LOST, 22},
+ {MATE_LOST, 23},
+ {DISCARDED, 24},
+ {DISCARDED, 25},
+ {DISCARDED, 26},
+ {DISCARDED, 17},
+ {REF_NAME_CHANGED, 27},
+ {CIGAR_CHANGED, 28},
+ {DISCARDED, 29},
+ {DISCARDED, 30},
};
#define NUMBER_OF_CHANGES ((unsigned)(sizeof(CHANGES)/sizeof(CHANGES[0])))
@@ -1167,6 +1316,121 @@ static rc_t RecordChanges(KMDataNode *const node, char const name[])
#define DISCARD_SKIP_REFERENCE do { LOG_CHANGE(27); } while(0)
#define DISCARD_UNKNOWN_REFERENCE do { LOG_CHANGE(28); } while(0)
#define RENAMED_REFERENCE do { LOG_CHANGE(29); } while(0)
+#define OVERHANGING_ALIGNMENT do { LOG_CHANGE(30); } while(0)
+#define DISCARD_HARDCLIP_SECONDARY do { LOG_CHANGE(31); } while(0)
+#define DISCARD_BAD_SECONDARY do { LOG_CHANGE(32); } while(0)
+
+static bool isHardClipped(unsigned const ops, uint32_t const cigar[/* ops */])
+{
+ unsigned i;
+
+ for (i = 0; i < ops; ++i) {
+ uint32_t const op = cigar[i];
+ int const code = op & 0x0F;
+
+ if (code == 5)
+ return true;
+ }
+ return false;
+}
+
+static rc_t FixOverhangingAlignment(KDataBuffer *cigBuf, uint32_t *opCount, uint32_t refPos, uint32_t refLen, uint32_t readlen)
+{
+ uint32_t const *cigar = cigBuf->base;
+ int refend = refPos;
+ int seqpos = 0;
+ unsigned i;
+
+ for (i = 0; i < *opCount; ++i) {
+ uint32_t const op = cigar[i];
+ int const len = op >> 4;
+ int const code = op & 0x0F;
+
+ switch (code) {
+ case 0: /* M */
+ case 7: /* = */
+ case 8: /* X */
+ seqpos += len;
+ refend += len;
+ break;
+ case 2: /* D */
+ case 3: /* N */
+ refend += len;
+ break;
+ case 1: /* I */
+ case 4: /* S */
+ case 9: /* B */
+ seqpos += len;
+ default:
+ break;
+ }
+ if (refend > refLen) {
+ int const chop = refend - refLen;
+ int const newlen = len - chop;
+ int const left = seqpos - chop;
+ if (left * 2 > readlen) {
+ int const clip = readlen - left;
+ rc_t rc;
+
+ *opCount = i + 2;
+ rc = KDataBufferResize(cigBuf, *opCount);
+ if (rc) return rc;
+ ((uint32_t *)cigBuf->base)[i ] = (newlen << 4) | code;
+ ((uint32_t *)cigBuf->base)[i+1] = (clip << 4) | 4;
+ OVERHANGING_ALIGNMENT;
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static context_t GlobalContext;
+
+#if THREADING_BAMREAD
+
+timeout_t bamq_tm;
+KQueue *bamq;
+static rc_t run_bamread_thread(const KThread *self, void *const file)
+{
+ rc_t rc = 0;
+
+ while (rc == 0) {
+ BAM_Alignment const *crec = NULL;
+ BAM_Alignment *rec = NULL;
+
+ rc = BAM_FileRead2(file, &crec);
+ if (rc) break;
+ rc = BAM_AlignmentCopy(crec, &rec);
+ BAM_AlignmentRelease(crec);
+ if (rc) break;
+
+#if THREADING_BAMREAD_PRIME_NAME2KEY
+ {
+ static char const dummy[] = "";
+ char const *spotGroup;
+ char const *name;
+ size_t namelen;
+
+ BAM_AlignmentGetReadName2(rec, &name, &namelen);
+ BAM_AlignmentGetReadGroupName(rec, &spotGroup);
+ rc = GetKeyID(&GlobalContext.keyToID, &rec->keyId, &rec->wasInserted, spotGroup ? spotGroup : dummy, name, namelen);
+ if (rc) break;
+ }
+#endif
+ for ( ; ; ) {
+ rc = KQueuePush(bamq, rec, &bamq_tm);
+ if (rc == 0 || (int)GetRCObject(rc) != rcTimeout)
+ break;
+ }
+ }
+ (void)PLOGERR(klogInfo, (klogInfo, rc, "bamread_thread done", NULL));
+ KQueueSeal(bamq);
+ return rc;
+}
+#endif
+
+
static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
Reference *ref, Sequence *seq, Alignment *align,
@@ -1179,10 +1443,10 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
KDataBuffer cigBuf;
rc_t rc;
int32_t lastRefSeqId = -1;
+ bool wasRenamed = false;
size_t rsize;
uint64_t keyId = 0;
uint64_t reccount = 0;
- SequenceRecord srec;
char spotGroup[512];
size_t namelen;
float progress = 0.0;
@@ -1201,8 +1465,25 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
char alignGroup[32];
size_t alignGroupLen;
AlignmentRecord data;
+#if THREADING_BAMREAD
+ KThread *bamread_thread=NULL;
+#endif
+ KDataBuffer seqBuffer;
+ KDataBuffer qualBuffer;
+ SequenceRecord srec;
+ SequenceRecordStorage srecStorage;
memset(&data, 0, sizeof(data));
+ memset(&srec, 0, sizeof(srec));
+
+ srec.ti = srecStorage.ti;
+ srec.readStart = srecStorage.readStart;
+ srec.readLen = srecStorage.readLen;
+ srec.orientation = srecStorage.orientation;
+ srec.is_bad = srecStorage.is_bad;
+ srec.alignmentCount = srecStorage.alignmentCount;
+ srec.aligned = srecStorage.aligned;
+ srec.cskey = srecStorage. cskey;
rc = OpenBAM(&bam, db, bamFile);
if (rc) return rc;
@@ -1213,15 +1494,15 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
return rc;
}
}
- if (ctx->key2id_max == 0) {
+ if (ctx->keyToID.key2id_max == 0) {
uint32_t rgcount;
unsigned rgi;
BAM_FileGetReadGroupCount(bam, &rgcount);
- if (rgcount > (sizeof(ctx->key2id)/sizeof(ctx->key2id[0]) - 1))
- ctx->key2id_max = 1;
+ if (rgcount > (sizeof(ctx->keyToID.key2id)/sizeof(ctx->keyToID.key2id[0]) - 1))
+ ctx->keyToID.key2id_max = 1;
else
- ctx->key2id_max = sizeof(ctx->key2id)/sizeof(ctx->key2id[0]);
+ ctx->keyToID.key2id_max = sizeof(ctx->keyToID.key2id)/sizeof(ctx->keyToID.key2id[0]);
for (rgi = 0; rgi != rgcount; ++rgi) {
BAMReadGroup const *rg;
@@ -1233,7 +1514,6 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
}
}
}
- memset(&srec, 0, sizeof(srec));
rc = KDataBufferMake(&cigBuf, 32, 0);
if (rc)
@@ -1247,9 +1527,26 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
if (rc)
return rc;
+ rc = KDataBufferMake(&seqBuffer, 8, 4096);
+ if (rc)
+ return rc;
+
+ rc = KDataBufferMake(&qualBuffer, 8, 4096);
+ if (rc)
+ return rc;
+
if (rc == 0) {
(void)PLOGMSG(klogInfo, (klogInfo, "Loading '$(file)'", "file=%s", bamFile));
}
+
+#if THREADING_BAMREAD
+ TimeoutInit(&bamq_tm,10000);
+ rc = KQueueMake (&bamq,4096);
+ if(rc) return rc;
+ rc = KThreadMake(&bamread_thread, run_bamread_thread, (void*)bam);
+ if(rc) return rc;
+#endif
+
while (rc == 0 && (rc = Quitting()) == 0) {
bool aligned;
uint32_t readlen;
@@ -1270,8 +1567,33 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
bool hasCG = false;
uint64_t ti = 0;
uint32_t csSeqLen = 0;
-
+ int lpad = 0;
+ int rpad = 0;
+ bool hardclipped = false;
+ bool revcmp = false;
+ char const *BX = NULL;
+
+#if THREADING_BAMREAD
+ for ( ; ; ) {
+ rc = KQueuePop(bamq, (void **)&rec, &bamq_tm);
+ if (rc == 0) break;
+ if ((int)GetRCObject(rc) == rcTimeout)
+ rc = 0;
+ else {
+ if ((int)GetRCObject(rc)==rcData && (int)GetRCState(rc)==rcDone)
+ (void)PLOGMSG(klogInfo, (klogInfo, "KQueuePop Done", NULL));
+ else
+ (void)PLOGERR(klogWarn, (klogWarn, rc, "KQueuePop Error", NULL));
+ KThreadWait(bamread_thread, &rc);
+ KThreadRelease(bamread_thread);
+ bamread_thread = NULL;
+ break;
+ }
+ }
+#else
rc = BAM_FileRead2(bam, &rec);
+#endif
+
if (rc) {
if (GetRCModule(rc) == rcAlign && GetRCObject(rc) == rcRow && GetRCState(rc) == rcNotFound) {
(void)PLOGMSG(klogInfo, (klogInfo, "EOF '$(file)'; read $(read); processed $(proc)", "file=%s,read=%lu,proc=%lu", bamFile, (unsigned long)recordsRead, (unsigned long)recordsProcessed));
@@ -1299,168 +1621,207 @@ static rc_t ProcessBAM(char const bamFile[], context_t *ctx, VDatabase *db,
}
}
+ BAM_AlignmentGetLinkageGroup(rec, &BX);
- /**************************************************************/
if (!G.noColorSpace) {
- if (BAM_AlignmentHasColorSpace(rec)) {/*BAM*/
+ if (BAM_AlignmentHasColorSpace(rec)) {
if (isNotColorSpace) {
MIXED_BASE_AND_COLOR:
rc = RC(rcApp, rcFile, rcReading, rcData, rcInconsistent);
(void)PLOGERR(klogErr, (klogErr, rc, "File '$(file)' contains base space and color space", "file=%s", bamFile));
goto LOOP_END;
}
- ctx->isColorSpace = isColorSpace = true;
+ /* COLORSPACE is disabled!
+ * ctx->isColorSpace = isColorSpace = true; */
}
else if (isColorSpace)
goto MIXED_BASE_AND_COLOR;
else
isNotColorSpace = true;
}
- rc = BAM_AlignmentCGReadLength(rec, &readlen);/*BAM*/
+ BAM_AlignmentGetFlags(rec, &flags);
+ BAM_AlignmentGetReadName2(rec, &name, &namelen);
+ isPrimary = (flags & (BAMFlags_IsNotPrimary|BAMFlags_IsSupplemental)) == 0 ? true : false;
+ if (!isPrimary && G.noSecondary)
+ goto LOOP_END;
+
+ {
+ char const *rgname;
+
+ BAM_AlignmentGetReadGroupName(rec, &rgname);
+ if (rgname)
+ strcpy(spotGroup, rgname);
+ else
+ spotGroup[0] = '\0';
+ }
+
+ rc = BAM_AlignmentCGReadLength(rec, &readlen);
if (rc != 0 && GetRCState(rc) != rcNotFound) {
(void)LOGERR(klogErr, rc, "Invalid CG data");
goto LOOP_END;
}
if (rc == 0) {
hasCG = true;
- BAM_AlignmentGetCigarCount(rec, &opCount);/*BAM*/
+ BAM_AlignmentGetCigarCount(rec, &opCount);
rc = KDataBufferResize(&cigBuf, opCount * 2 + 5);
if (rc) {
(void)LOGERR(klogErr, rc, "Failed to resize CIGAR buffer");
goto LOOP_END;
}
-
- rc = AlignmentRecordInit(&data, readlen);
- if (rc == 0)
- rc = KDataBufferResize(&buf, readlen);
- if (rc) {
- (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
- goto LOOP_END;
- }
-
- seqDNA = buf.base;
- qual = (uint8_t *)&seqDNA[readlen];
}
else {
uint32_t const *tmp;
- BAM_AlignmentGetRawCigar(rec, &tmp, &opCount);/*BAM*/
+ BAM_AlignmentGetRawCigar(rec, &tmp, &opCount);
rc = KDataBufferResize(&cigBuf, opCount);
if (rc) {
(void)LOGERR(klogErr, rc, "Failed to resize CIGAR buffer");
goto LOOP_END;
}
memcpy(cigBuf.base, tmp, opCount * sizeof(uint32_t));
-
- BAM_AlignmentGetReadLength(rec, &readlen);/*BAM*/
- if (isColorSpace) {
- BAM_AlignmentGetCSSeqLen(rec, &csSeqLen);
- if (readlen > csSeqLen) {
- rc = RC(rcAlign, rcRow, rcReading, rcData, rcInconsistent);
- (void)LOGERR(klogErr, rc, "Sequence length and CS Sequence length are inconsistent");
- goto LOOP_END;
+ {
+ hardclipped = isHardClipped(opCount, cigBuf.base);
+ if (hardclipped) {
+ if (isPrimary) {
+ if (!G.acceptHardClip) {
+ rc = RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated);
+ (void)PLOGERR(klogErr, (klogErr, rc, "File '$(file)' contains hard clipped primary alignments", "file=%s", bamFile));
+ goto LOOP_END;
+ }
+ }
+ else if (!G.acceptHardClip) { /* convert to soft clip */
+ uint32_t *const cigar = cigBuf.base;
+ uint32_t const lOp = cigar[0];
+ uint32_t const rOp = cigar[opCount - 1];
+
+ lpad = (lOp & 0xF) == 5 ? (lOp >> 4) : 0;
+ rpad = (rOp & 0xF) == 5 ? (rOp >> 4) : 0;
+
+ if (lpad + rpad == 0) {
+ rc = RC(rcApp, rcFile, rcReading, rcData, rcInvalid);
+ (void)PLOGERR(klogErr, (klogErr, rc, "File '$(file)' contains invalid CIGAR", "file=%s", bamFile));
+ goto LOOP_END;
+ }
+ if (lpad != 0) {
+ uint32_t const new_lOp = (((uint32_t)lpad) << 4) | 4;
+ cigar[0] = new_lOp;
+ }
+ if (rpad != 0) {
+ uint32_t const new_rOp = (((uint32_t)rpad) << 4) | 4;
+ cigar[opCount - 1] = new_rOp;
+ }
+ }
}
- else if (readlen < csSeqLen)
- readlen = 0;
}
- else if (readlen == 0) {
- }
-
- rc = AlignmentRecordInit(&data, readlen | csSeqLen);
+ }
+ if (hasCG) {
+ rc = AlignmentRecordInit(&data, readlen);
if (rc == 0)
- rc = KDataBufferResize(&buf, readlen | csSeqLen);
+ rc = KDataBufferResize(&buf, readlen);
if (rc) {
(void)LOGERR(klogErr, rc, "Failed to resize record buffer");
goto LOOP_END;
}
seqDNA = buf.base;
- qual = (uint8_t *)&seqDNA[readlen | csSeqLen];
- }
- BAM_AlignmentGetReadName2(rec, &name, &namelen);/*BAM*/
- BAM_AlignmentGetSequence(rec, seqDNA);/*BAM*/
- if (G.useQUAL) {
- uint8_t const *squal;
-
- BAM_AlignmentGetQuality(rec, &squal);/*BAM*/
- memcpy(qual, squal, readlen);
- }
- else {
- uint8_t const *squal;
- uint8_t qoffset = 0;
- unsigned i;
+ qual = (uint8_t *)&seqDNA[readlen];
- rc = BAM_AlignmentGetQuality2(rec, &squal, &qoffset);/*BAM*/
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "Spot '$(name)': length of original quality does not match sequence", "name=%s", name));
- goto LOOP_END;
- }
- if (qoffset) {
- for (i = 0; i != readlen; ++i)
- qual[i] = squal[i] - qoffset;
- QUAL_CHANGED_OQ;
- }
- else
- memcpy(qual, squal, readlen);
- }
- if (hasCG) {
rc = BAM_AlignmentGetCGSeqQual(rec, seqDNA, qual);
if (rc == 0) {
- rc = BAM_AlignmentGetCGCigar(rec, cigBuf.base, cigBuf.elem_count, &opCount);/*BAM*/
+ rc = BAM_AlignmentGetCGCigar(rec, cigBuf.base, cigBuf.elem_count, &opCount);
}
if (rc) {
(void)LOGERR(klogErr, rc, "Failed to read CG data");
goto LOOP_END;
}
+ data.data.align_group.elements = 0;
+ data.data.align_group.buffer = alignGroup;
+ if (BAM_AlignmentGetCGAlignGroup(rec, alignGroup, sizeof(alignGroup), &alignGroupLen) == 0)
+ data.data.align_group.elements = alignGroupLen;
+ }
+ else {
+ BAM_AlignmentGetReadLength(rec, &readlen);
+ rc = AlignmentRecordInit(&data, readlen + lpad + rpad);
+ if (rc == 0)
+ rc = KDataBufferResize(&buf, readlen + lpad + rpad);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
+ goto LOOP_END;
+ }
+
+ seqDNA = buf.base;
+ qual = (uint8_t *)&seqDNA[(readlen | csSeqLen) + lpad + rpad];
+ memset(seqDNA, 'N', (readlen | csSeqLen) + lpad + rpad);
+ memset(qual, 0, (readlen | csSeqLen) + lpad + rpad);
+
+ BAM_AlignmentGetSequence(rec, seqDNA + lpad);
+ if (G.useQUAL) {
+ uint8_t const *squal;
+
+ BAM_AlignmentGetQuality(rec, &squal);
+ memcpy(qual + lpad, squal, readlen);
+ }
+ else {
+ uint8_t const *squal;
+ uint8_t qoffset = 0;
+ unsigned i;
+
+ rc = BAM_AlignmentGetQuality2(rec, &squal, &qoffset);
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "Spot '$(name)': length of original quality does not match sequence", "name=%s", name));
+ goto LOOP_END;
+ }
+ if (qoffset) {
+ for (i = 0; i != readlen; ++i)
+ qual[i + lpad] = squal[i] - qoffset;
+ QUAL_CHANGED_OQ;
+ }
+ else
+ memcpy(qual + lpad, squal, readlen);
+ }
+ readlen = readlen + lpad + rpad;
+ data.data.align_group.elements = 0;
+ data.data.align_group.buffer = alignGroup;
}
if (G.hasTI) {
- rc = BAM_AlignmentGetTI(rec, &ti);/*BAM*/
+ rc = BAM_AlignmentGetTI(rec, &ti);
if (rc)
ti = 0;
rc = 0;
}
- data.data.align_group.buffer = alignGroup;
- if (BAM_AlignmentGetCGAlignGroup(rec, alignGroup, sizeof(alignGroup), &alignGroupLen) == 0)/*BAM*/
- data.data.align_group.elements = alignGroupLen;
- else
- data.data.align_group.elements = 0;
- AR_MAPQ(data) = GetMapQ(rec);
- BAM_AlignmentGetFlags(rec, &flags);/*BAM*/
- BAM_AlignmentGetReadName2(rec, &name, &namelen);/*BAM*/
- {{
- char const *rgname;
-
- BAM_AlignmentGetReadGroupName(rec, &rgname);/*BAM*/
- if (rgname)
- strcpy(spotGroup, rgname);
- else
- spotGroup[0] = '\0';
- }}
- AR_REF_ORIENT(data) = (flags & BAMFlags_SelfIsReverse) == 0 ? false : true;/*BAM*/
- isPrimary = (flags & BAMFlags_IsNotPrimary) == 0 ? true : false;/*BAM*/
- if (G.noSecondary && !isPrimary)
+ rc = KDataBufferResize(&seqBuffer, readlen);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
+ goto LOOP_END;
+ }
+ rc = KDataBufferResize(&qualBuffer, readlen);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
goto LOOP_END;
- originally_aligned = (flags & BAMFlags_SelfIsUnmapped) == 0;/*BAM*/
+ }
+ AR_REF_ORIENT(data) = (flags & BAMFlags_SelfIsReverse) == 0 ? false : true;
+ originally_aligned = (flags & BAMFlags_SelfIsUnmapped) == 0;
aligned = originally_aligned;
-#if 0
- if (originally_aligned && AR_MAPQ(data) < G.minMapQual) {
- aligned = false;
- UNALIGNED_LOW_MAPQ;
+
+ rpos = -1;
+ if (aligned) {
+ BAM_AlignmentGetPosition(rec, &rpos);
+ BAM_AlignmentGetRefSeqId(rec, &refSeqId);
+ if (refSeqId != lastRefSeqId) {
+ refSeq = NULL;
+ BAM_FileGetRefSeqById(bam, refSeqId, &refSeq);
+ }
}
-#else
- /* min-mapq now only applies to secondary alignment to match cg-load
- * see [SRA-2778] in JIRA
- */
+
+ revcmp = (isColorSpace && !aligned) ? false : AR_REF_ORIENT(data);
+ (void)PLOGMSG(klogDebug, (klogDebug, "Read '$(name)' is $(or) at $(ref):$(pos)", "name=%s,or=%s,ref=%s,pos=%i", name, revcmp ? "reverse" : "forward", refSeq ? refSeq->name : "(none)", rpos));
+ COPY_READ(seqBuffer.base, seqDNA, readlen, revcmp);
+ COPY_QUAL(qualBuffer.base, qual, readlen, revcmp);
+
+ AR_MAPQ(data) = GetMapQ(rec);
if (!isPrimary && AR_MAPQ(data) < G.minMapQual)
goto LOOP_END;
-#endif
- if (aligned && isColorSpace && readlen == 0) {
- /* detect hard clipped colorspace */
- /* reads and make unaligned */
- aligned = false;
- UNALIGNED_HARD_CLIPPED_CS;
- }
if (aligned && align == NULL) {
rc = RC(rcApp, rcFile, rcReading, rcData, rcInconsistent);
@@ -1468,8 +1829,6 @@ MIXED_BASE_AND_COLOR:
goto LOOP_END;
}
while (aligned) {
- BAM_AlignmentGetPosition(rec, &rpos);/*BAM*/
- BAM_AlignmentGetRefSeqId(rec, &refSeqId);/*BAM*/
if (rpos >= 0 && refSeqId >= 0) {
if (refSeqId == skipRefSeqID) {
DISCARD_SKIP_REFERENCE;
@@ -1481,10 +1840,6 @@ MIXED_BASE_AND_COLOR:
break;
}
unmapRefSeqId = -1;
- if (refSeqId == lastRefSeqId)
- break;
- refSeq = NULL;
- BAM_FileGetRefSeqById(bam, refSeqId, &refSeq);/*BAM*/
if (refSeq == NULL) {
rc = SILENT_RC(rcApp, rcFile, rcReading, rcData, rcInconsistent);
(void)PLOGERR(klogWarn, (klogWarn, rc, "File '$(file)': Spot '$(name)' refers to an unknown Reference number $(refSeqId)", "file=%s,refSeqId=%i,name=%s", bamFile, (int)refSeqId, name));
@@ -1494,7 +1849,6 @@ MIXED_BASE_AND_COLOR:
}
else {
bool shouldUnmap = false;
- bool wasRenamed = false;
if (G.refFilter && strcmp(G.refFilter, refSeq->name) != 0) {
(void)PLOGMSG(klogInfo, (klogInfo, "Skipping Reference '$(name)'", "name=%s", refSeq->name));
@@ -1511,9 +1865,6 @@ MIXED_BASE_AND_COLOR:
unmapRefSeqId = refSeqId;
UNALIGNED_UNALIGNED_REF;
}
- if (wasRenamed) {
- RENAMED_REFERENCE;
- }
break;
}
if (GetRCObject(rc) == rcConstraint && GetRCState(rc) == rcViolated) {
@@ -1544,10 +1895,15 @@ MIXED_BASE_AND_COLOR:
aligned = false;
}
if (!aligned && (G.refFilter != NULL || G.limit2config)) {
- assert("this shouldn't happen");
+ assert(!"this shouldn't happen");
goto LOOP_END;
}
- rc = GetKeyID(ctx, &keyId, &wasInserted, spotGroup, name, namelen);
+#if THREADING_BAMREAD_PRIME_NAME2KEY
+ keyId = rec->keyId;
+ wasInserted = rec->wasInserted;
+#else
+ rc = GetKeyID(&ctx->keyToID, &keyId, &wasInserted, spotGroup, name, namelen);
+#endif
if (rc) {
(void)PLOGERR(klogErr, (klogErr, rc, "KBTreeEntry: failed on key '$(key)'", "key=%.*s", namelen, name));
goto LOOP_END;
@@ -1561,10 +1917,10 @@ MIXED_BASE_AND_COLOR:
AR_KEY(data) = keyId;
mated = false;
- if (flags & BAMFlags_WasPaired) {/*BAM*/
- if ((flags & BAMFlags_IsFirst) != 0)/*BAM*/
+ if (flags & BAMFlags_WasPaired) {
+ if ((flags & BAMFlags_IsFirst) != 0)
AR_READNO(data) |= 1;
- if ((flags & BAMFlags_IsSecond) != 0)/*BAM*/
+ if ((flags & BAMFlags_IsSecond) != 0)
AR_READNO(data) |= 2;
switch (AR_READNO(data)) {
case 1:
@@ -1591,20 +1947,29 @@ MIXED_BASE_AND_COLOR:
AR_READNO(data) = 1;
if (wasInserted) {
+ if (G.mode == mode_Remap) {
+ (void)PLOGERR(klogErr, (klogErr, rc = RC(rcApp, rcFile, rcReading, rcData, rcInconsistent),
+ "Spot '$(name)' is a new spot, not a remapping",
+ "name=%s", name));
+ goto LOOP_END;
+ }
memset(value, 0, sizeof(*value));
value->unmated = !mated;
- value->pcr_dup = (flags & BAMFlags_IsDuplicate) == 0 ? 0 : 1;/*BAM*/
- value->platform = GetINSDCPlatform(bam, spotGroup);
+ if (isPrimary || G.assembleWithSecondary) {
+ value->pcr_dup = (flags & BAMFlags_IsDuplicate) == 0 ? 0 : 1;
+ value->platform = GetINSDCPlatform(bam, spotGroup);
+ value->primary_is_set = 1;
+ }
}
- else {
- int const o_pcr_dup = value->pcr_dup;
+ else if (isPrimary || G.assembleWithSecondary) {
+ int o_pcr_dup = value->pcr_dup;
int const n_pcr_dup = (flags & BAMFlags_IsDuplicate) == 0 ? 0 : 1;
-
- if (!G.acceptBadDups && o_pcr_dup != n_pcr_dup) {
- rc = LogDupConflict(name);
- DISCARD_PCR_DUP;
- goto LOOP_END;
+
+ if (!value->primary_is_set) {
+ o_pcr_dup = n_pcr_dup;
+ value->primary_is_set = 1;
}
+
value->pcr_dup = o_pcr_dup & n_pcr_dup;
if (o_pcr_dup != (o_pcr_dup & n_pcr_dup)) {
FLAG_CHANGED_PCR_DUP;
@@ -1626,9 +1991,6 @@ MIXED_BASE_AND_COLOR:
goto LOOP_END;
}
}
-
- ++recordsProcessed;
-
if (isPrimary) {
switch (AR_READNO(data)) {
case 1:
@@ -1657,40 +2019,56 @@ MIXED_BASE_AND_COLOR:
break;
}
}
+ if (hardclipped) {
+ value->hardclipped = 1;
+ }
+#if 0 /** EY TO REVIEW **/
+ if (!isPrimary && value->hardclipped) {
+ DISCARD_HARDCLIP_SECONDARY;
+ goto LOOP_END;
+ }
+#endif
+
+ ++recordsProcessed;
+
data.isPrimary = isPrimary;
if (aligned) {
uint32_t matches = 0;
+ uint32_t misses = 0;
uint8_t rna_orient = ' ';
+ FixOverhangingAlignment(&cigBuf, &opCount, rpos, refSeq->length, readlen);
BAM_AlignmentGetRNAStrand(rec, &rna_orient);
- rc = ReferenceRead(ref, &data, rpos, cigBuf.base, opCount, seqDNA, readlen,
- rna_orient == '+' ? NCBI_align_ro_intron_plus :
- rna_orient == '-' ? NCBI_align_ro_intron_minus :
- hasCG ? NCBI_align_ro_complete_genomics :
- NCBI_align_ro_intron_unknown, &matches);
+ {
+ int const intronType = rna_orient == '+' ? NCBI_align_ro_intron_plus :
+ rna_orient == '-' ? NCBI_align_ro_intron_minus :
+ hasCG ? NCBI_align_ro_complete_genomics :
+ NCBI_align_ro_intron_unknown;
+ rc = ReferenceRead(ref, &data, rpos, cigBuf.base, opCount, seqDNA, readlen, intronType, &matches, &misses);
+ }
+ if (rc == 0 && (matches < G.minMatchCount || (matches == 0 && !G.acceptNoMatch))) {
+ if (isPrimary) {
+ if (misses > matches) {
+ RecordNoMatch(name, refSeq->name, rpos);
+ rc = LogNoMatch(name, refSeq->name, (unsigned)rpos, (unsigned)matches);
+ if (rc)
+ goto LOOP_END;
+ }
+ }
+ else {
+ (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)' contains too few ($(count)) matching bases to reference '$(ref)' at $(pos); discarding secondary alignment",
+ "name=%s,ref=%s,pos=%u,count=%u", name, refSeq->name, (unsigned)rpos, (unsigned)matches));
+ DISCARD_BAD_SECONDARY;
+ rc = 0;
+ goto LOOP_END;
+ }
+ }
if (rc) {
aligned = false;
- if ( (GetRCState(rc) == rcViolated && GetRCObject(rc) == rcConstraint)
- || (GetRCState(rc) == rcExcessive && GetRCObject(rc) == rcRange))
- {
- RecordNoMatch(name, refSeq->name, rpos);
- }
- if (GetRCState(rc) == rcViolated && GetRCObject(rc) == rcConstraint) {
- rc = LogNoMatch(name, refSeq->name, (unsigned)rpos, (unsigned)matches);
- UNALIGNED_LOW_MATCH_COUNT;
- }
-#define DATA_INVALID_ERRORS_ARE_DEADLY 0
-#if DATA_INVALID_ERRORS_ARE_DEADLY
- else if (((int)GetRCObject(rc)) == ((int)rcData) && GetRCState(rc) == rcInvalid) {
- UNALIGNED_INVALID_INFO;
- (void)PLOGERR(klogWarn, (klogWarn, rc, "Spot '$(name)': bad alignment to reference '$(ref)' at $(pos)", "name=%s,ref=%s,pos=%u", name, refSeq->name, rpos));
- CheckLimitAndLogError();
- }
-#endif
- else if (((int)GetRCObject(rc)) == ((int)rcData) && GetRCState(rc) == rcNotAvailable) {
- (void)PLOGERR(klogWarn, (klogWarn, rc, "Spot '$(name)': sequence was hard clipped", "name=%s", name));
- CheckLimitAndLogError();
+ if (((int)GetRCObject(rc)) == ((int)rcData) && GetRCState(rc) == rcNotAvailable) {
+ /* because of code above converting hard clips to soft clips, this should be unreachable */
+ abort();
}
else if (((int)GetRCObject(rc)) == ((int)rcData)) {
UNALIGNED_INVALID_INFO;
@@ -1706,41 +2084,8 @@ MIXED_BASE_AND_COLOR:
if (rc) goto LOOP_END;
}
}
- if (isColorSpace) {
- /* must be after ReferenceRead */
- BAM_AlignmentGetCSKey(rec, &cskey);/*BAM*/
- BAM_AlignmentGetCSSequence(rec, seqDNA, csSeqLen);/*BAM*/
- if (!aligned && !G.useQUAL) {
- uint8_t const *squal;
- uint8_t qoffset = 0;
-
- rc = BAM_AlignmentGetCSQuality(rec, &squal, &qoffset);/*BAM*/
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "Spot '$(name)': length of colorspace quality does not match sequence", "name=%s", name));
- goto LOOP_END;
- }
- if (qoffset) {
- unsigned i;
-
- QUAL_CHANGED_UNALIGNED_CS;
- for (i = 0; i < csSeqLen; ++i)
- qual[i] = squal[i] - qoffset;
- }
- else
- memcpy(qual, squal, csSeqLen);
- readlen = csSeqLen;
- }
- }
- if (aligned) {
- if (G.editAlignedQual && EditAlignedQualities (qual, AR_HAS_MISMATCH(data), readlen)) {
- QUAL_CHANGED_ALIGNED_EDIT;
- }
- if (G.keepMismatchQual && EditUnalignedQualities(qual, AR_HAS_MISMATCH(data), readlen)) {
- QUAL_CHANGED_UNALIGN_EDIT;
- }
- }
- else if (isPrimary) {
+ if (!aligned && isPrimary) {
switch (AR_READNO(data)) {
case 1:
value->unaligned_1 = 1;
@@ -1770,204 +2115,316 @@ MIXED_BASE_AND_COLOR:
break;
}
}
- if (mated) {
- int64_t const spotId = CTX_VALUE_GET_S_ID(*value);
- uint32_t const fragmentId = value->fragmentId;
- bool const spotHasBeenWritten = (spotId != 0);
- bool const spotHasFragmentInfo = (fragmentId != 0);
- bool const spotIsFirstSeen = (spotHasBeenWritten || spotHasFragmentInfo) ? false : true;
-
- if (spotHasBeenWritten) {
- /* do nothing */
- }
- else if (spotIsFirstSeen) {
- /* start spot assembly */
- unsigned sz;
- FragmentInfo fi;
- int32_t mate_refSeqId = -1;
- int64_t pnext = 0;
+ if (G.mode == mode_Archive)
+ goto WRITE_SEQUENCE;
+ else
+ goto WRITE_ALIGNMENT;
+ if (0) {
+WRITE_SEQUENCE:
+ if (mated) {
+ int64_t const spotId = CTX_VALUE_GET_S_ID(*value);
+ uint32_t const fragmentId = value->fragmentId;
+ bool const spotHasBeenWritten = (spotId != 0);
+ bool const spotHasFragmentInfo = (fragmentId != 0);
+ bool const spotIsFirstSeen = (spotHasBeenWritten || spotHasFragmentInfo) ? false : true;
- memset(&fi, 0, sizeof(fi));
- fi.aligned = aligned;
- fi.ti = ti;
- fi.orientation = AR_REF_ORIENT(data);
- fi.otherReadNo = AR_READNO(data);
- fi.sglen = strlen(spotGroup);
- fi.readlen = readlen;
- fi.cskey = cskey;
- fi.is_bad = (flags & BAMFlags_IsLowQuality) != 0;/*BAM*/
- sz = sizeof(fi) + 2*fi.readlen + fi.sglen;
- if (align) {
- BAM_AlignmentGetMateRefSeqId(rec, &mate_refSeqId);/*BAM*/
- BAM_AlignmentGetMatePosition(rec, &pnext);/*BAM*/
- }
- if(align && mate_refSeqId == refSeqId && pnext > 0 && pnext!=rpos /*** weird case in some bams**/){
- rc = MemBankAlloc(ctx->frags, &value->fragmentId, sz, 0, false);
- fcountBoth++;
- } else {
- rc = MemBankAlloc(ctx->frags, &value->fragmentId, sz, 0, true);
- fcountOne++;
- }
- if (rc) {
- (void)LOGERR(klogErr, rc, "KMemBankAlloc failed");
- goto LOOP_END;
+ if (spotHasBeenWritten) {
+ /* do nothing */
}
- /*printf("IN:%10d\tcnt2=%ld\tcnt1=%ld\n",value->fragmentId,fcountBoth,fcountOne);*/
-
- rc = KDataBufferResize(&fragBuf, sz);
- if (rc) {
- (void)LOGERR(klogErr, rc, "Failed to resize fragment buffer");
- goto LOOP_END;
- }
- {{
- int const revcmp = (isColorSpace && !aligned) ? 0 : fi.orientation;
- uint8_t *dst = (uint8_t*) fragBuf.base;
-
- if (revcmp) {
- QUAL_CHANGED_REVERSED;
- SEQ__CHANGED_REV_COMP;
+ else if (spotIsFirstSeen) {
+ /* start spot assembly */
+ unsigned sz;
+ FragmentInfo fi;
+ int32_t mate_refSeqId = -1;
+ int64_t pnext = 0;
+
+ if (!isPrimary) {
+ if (!G.assembleWithSecondary || hardclipped) {
+ goto WRITE_ALIGNMENT;
+ }
+ (void)PLOGMSG(klogDebug, (klogDebug, "Spot '$(name)' (id $(id)) is being constructed from secondary alignment information", "id=%lx,name=%s", keyId, name));
+ }
+ memset(&fi, 0, sizeof(fi));
+
+ fi.aligned = isPrimary ? aligned:0;
+ fi.ti = ti;
+ fi.orientation = AR_REF_ORIENT(data);
+ fi.otherReadNo = AR_READNO(data);
+ fi.sglen = strlen(spotGroup);
+ fi.lglen = BX ? strlen(BX) : 0;
+ fi.readlen = readlen;
+ fi.cskey = cskey;
+ fi.is_bad = (flags & BAMFlags_IsLowQuality) != 0;
+ sz = sizeof(fi) + 2*fi.readlen + fi.sglen + fi.lglen;
+ if (align) {
+ BAM_AlignmentGetMateRefSeqId(rec, &mate_refSeqId);
+ BAM_AlignmentGetMatePosition(rec, &pnext);
+ }
+ if(align && mate_refSeqId == refSeqId && pnext > 0 && pnext!=rpos /*** weird case in some bams**/){
+ rc = MemBankAlloc(ctx->frags, &value->fragmentId, sz, 0, false);
+ fcountBoth++;
+ } else {
+ rc = MemBankAlloc(ctx->frags, &value->fragmentId, sz, 0, true);
+ fcountOne++;
}
- memcpy(dst,&fi,sizeof(fi));
- dst += sizeof(fi);
- COPY_READ((char *)dst, seqDNA, fi.readlen, revcmp);
- dst += fi.readlen;
- COPY_QUAL(dst, qual, fi.readlen, revcmp);
- dst += fi.readlen;
- memcpy(dst,spotGroup,fi.sglen);
- }}
- rc = MemBankWrite(ctx->frags, value->fragmentId, 0, fragBuf.base, sz, &rsize);
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankWrite failed writing fragment $(id)", "id=%u", value->fragmentId));
- goto LOOP_END;
- }
- value->has_a_read = 1;
- }
- else if (spotHasFragmentInfo) {
- /* continue spot assembly */
- FragmentInfo *fip;
- {
- size_t size1;
- size_t size2;
-
- rc = MemBankSize(ctx->frags, fragmentId, &size1);
if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankSize failed on fragment $(id)", "id=%u", fragmentId));
+ (void)LOGERR(klogErr, rc, "KMemBankAlloc failed");
goto LOOP_END;
}
+ /*printf("IN:%10d\tcnt2=%ld\tcnt1=%ld\n",value->fragmentId,fcountBoth,fcountOne);*/
- rc = KDataBufferResize(&fragBuf, size1);
- fip = (FragmentInfo *)fragBuf.base;
+ rc = KDataBufferResize(&fragBuf, sz);
if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "Failed to resize fragment buffer", ""));
+ (void)LOGERR(klogErr, rc, "Failed to resize fragment buffer");
goto LOOP_END;
}
-
- rc = MemBankRead(ctx->frags, fragmentId, 0, fragBuf.base, size1, &size2);
+ {{
+ uint8_t *dst = (uint8_t*) fragBuf.base;
+
+ memcpy(dst,&fi,sizeof(fi));
+ dst += sizeof(fi);
+ memcpy(dst, seqBuffer.base, readlen);
+ dst += readlen;
+ memcpy(dst, qualBuffer.base, readlen);
+ dst += fi.readlen;
+ memcpy(dst,spotGroup,fi.sglen);
+ dst += fi.sglen;
+ memcpy(dst, BX, fi.lglen);
+ }}
+ rc = MemBankWrite(ctx->frags, value->fragmentId, 0, fragBuf.base, sz, &rsize);
if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankRead failed on fragment $(id)", "id=%u", fragmentId));
+ (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankWrite failed writing fragment $(id)", "id=%u", value->fragmentId));
goto LOOP_END;
}
- assert(size1 == size2);
- }
- if (AR_READNO(data) == fip->otherReadNo) {
- /* is a repeat of the same read; do nothing */
+ if (revcmp) {
+ QUAL_CHANGED_REVERSED;
+ SEQ__CHANGED_REV_COMP;
+ }
}
- else {
- /* mate found; finish spot assembly */
- unsigned readLen[2];
- unsigned read1 = 0;
- unsigned read2 = 1;
- uint8_t *src = (uint8_t*) fip + sizeof(*fip);
-
- if (AR_READNO(data) < fip->otherReadNo) {
- read1 = 1;
- read2 = 0;
+ else if (spotHasFragmentInfo) {
+ /* continue spot assembly */
+ FragmentInfo *fip;
+ {
+ size_t size1;
+ size_t size2;
+
+ rc = MemBankSize(ctx->frags, fragmentId, &size1);
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankSize failed on fragment $(id)", "id=%u", fragmentId));
+ goto LOOP_END;
+ }
+
+ rc = KDataBufferResize(&fragBuf, size1);
+ fip = (FragmentInfo *)fragBuf.base;
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "Failed to resize fragment buffer", ""));
+ goto LOOP_END;
+ }
+
+ rc = MemBankRead(ctx->frags, fragmentId, 0, fragBuf.base, size1, &size2);
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankRead failed on fragment $(id)", "id=%u", fragmentId));
+ goto LOOP_END;
+ }
+ assert(size1 == size2);
}
- readLen[read1] = fip->readlen;
- readLen[read2] = readlen;
- rc = SequenceRecordInit(&srec, 2, readLen);
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "Failed resizing sequence record buffer", ""));
- goto LOOP_END;
+ if (AR_READNO(data) == fip->otherReadNo) {
+ /* is a repeat of the same read; do nothing */
}
- srec.ti[read1] = fip->ti;
- srec.aligned[read1] = fip->aligned;
- srec.is_bad[read1] = fip->is_bad;
- srec.orientation[read1] = fip->orientation;
- srec.cskey[read1] = fip->cskey;
- memcpy(srec.seq + srec.readStart[read1], src, fip->readlen);
- src += fip->readlen;
- memcpy(srec.qual + srec.readStart[read1], src, fip->readlen);
- src += fip->readlen;
-
- srec.orientation[read2] = AR_REF_ORIENT(data);
- {
- int const revcmp = (isColorSpace && !aligned) ? 0 : srec.orientation[read2];
+ else {
+ /* mate found; finish spot assembly */
+ unsigned read1 = 0;
+ unsigned read2 = 1;
+ uint8_t *src = (uint8_t*) fip + sizeof(*fip);
+ if (!isPrimary) {
+ if (!G.assembleWithSecondary || hardclipped ) {
+ goto WRITE_ALIGNMENT;
+ }
+ (void)PLOGMSG(klogDebug, (klogDebug, "Spot '$(name)' (id $(id)) is being constructed from secondary alignment information", "id=%lx,name=%s", keyId, name));
+ }
+ rc = KDataBufferResize(&seqBuffer, readlen + fip->readlen);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
+ goto LOOP_END;
+ }
+ rc = KDataBufferResize(&qualBuffer, readlen + fip->readlen);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "Failed to resize record buffer");
+ goto LOOP_END;
+ }
+ if (AR_READNO(data) < fip->otherReadNo) {
+ read1 = 1;
+ read2 = 0;
+ }
+
+ memset(&srecStorage, 0, sizeof(srecStorage));
+ srec.numreads = 2;
+ srec.readLen[read1] = fip->readlen;
+ srec.readLen[read2] = readlen;
+ srec.readStart[1] = srec.readLen[0];
+ {
+ char const *const s1 = (void *)src;
+ char const *const s2 = seqBuffer.base;
+ char *const d = seqBuffer.base;
+ char *const d1 = d + srec.readStart[read1];
+ char *const d2 = d + srec.readStart[read2];
+
+ srec.seq = seqBuffer.base;
+ if (d2 != s2) {
+ memcpy(d2, s2, readlen);
+ }
+ memcpy(d1, s1, fip->readlen);
+ src += fip->readlen;
+ }
+ {
+ char const *const s1 = (void *)src;
+ char const *const s2 = qualBuffer.base;
+ char *const d = qualBuffer.base;
+ char *const d1 = d + srec.readStart[read1];
+ char *const d2 = d + srec.readStart[read2];
+
+ srec.qual = qualBuffer.base;
+ if (d2 != s2) {
+ memcpy(d2, s2, readlen);
+ }
+ memcpy(d1, s1, fip->readlen);
+ src += fip->readlen;
+ }
+
+ srec.ti[read1] = fip->ti;
+ srec.ti[read2] = ti;
+
+ srec.aligned[read1] = fip->aligned;
+ srec.aligned[read2] = aligned;
+
+ srec.is_bad[read1] = fip->is_bad;
+ srec.is_bad[read2] = (flags & BAMFlags_IsLowQuality) != 0;
+
+ srec.orientation[read1] = fip->orientation;
+ srec.orientation[read2] = AR_REF_ORIENT(data);
+
+ srec.cskey[read1] = fip->cskey;
+ srec.cskey[read2] = cskey;
+
+ srec.keyId = keyId;
+
+ srec.spotGroup = spotGroup;
+ srec.spotGroupLen = strlen(spotGroup);
+
+ srec.linkageGroup = BX;
+ srec.linkageGroupLen = BX ? strlen(BX) : 0;
+
+ srec.seq = seqBuffer.base;
+ srec.qual = qualBuffer.base;
+
+ rc = SequenceWriteRecord(seq, &srec, isColorSpace, value->pcr_dup, value->platform);
+ if (rc) {
+ (void)LOGERR(klogErr, rc, "SequenceWriteRecord failed");
+ goto LOOP_END;
+ }
+ CTX_VALUE_SET_S_ID(*value, ++ctx->spotId);
+ if(fragmentId & 1){
+ fcountOne--;
+ } else {
+ fcountBoth--;
+ }
+ /* printf("OUT:%9d\tcnt2=%ld\tcnt1=%ld\n",fragmentId,fcountBoth,fcountOne);*/
+ rc = MemBankFree(ctx->frags, fragmentId);
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankFree failed on fragment $(id)", "id=%u", fragmentId));
+ goto LOOP_END;
+ }
+ value->fragmentId = 0;
if (revcmp) {
QUAL_CHANGED_REVERSED;
SEQ__CHANGED_REV_COMP;
}
- COPY_READ(srec.seq + srec.readStart[read2], seqDNA, srec.readLen[read2], revcmp);
- COPY_QUAL(srec.qual + srec.readStart[read2], qual, srec.readLen[read2], revcmp);
- }
- srec.keyId = keyId;
- srec.is_bad[read2] = (flags & BAMFlags_IsLowQuality) != 0;
- srec.aligned[read2] = aligned;
- srec.cskey[read2] = cskey;
- srec.ti[read2] = ti;
-
- srec.spotGroup = spotGroup;
- srec.spotGroupLen = strlen(spotGroup);
- if (value->pcr_dup && (srec.is_bad[0] || srec.is_bad[1])) {
- FLAG_CHANGED_400_AND_200;
- filterFlagConflictRecords++;
- if (filterFlagConflictRecords < MAX_WARNINGS_FLAG_CONFLICT) {
- (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
- }
- else if (filterFlagConflictRecords == MAX_WARNINGS_FLAG_CONFLICT) {
- (void)PLOGMSG(klogWarn, (klogWarn, "Last reported warning: Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
+ if (value->pcr_dup && (srec.is_bad[0] || srec.is_bad[1])) {
+ FLAG_CHANGED_400_AND_200;
+ filterFlagConflictRecords++;
+ if (filterFlagConflictRecords < MAX_WARNINGS_FLAG_CONFLICT) {
+ (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
+ }
+ else if (filterFlagConflictRecords == MAX_WARNINGS_FLAG_CONFLICT) {
+ (void)PLOGMSG(klogWarn, (klogWarn, "Last reported warning: Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
+ }
}
}
- rc = SequenceWriteRecord(seq, &srec, isColorSpace, value->pcr_dup, value->platform);
- if (rc) {
- (void)LOGERR(klogErr, rc, "SequenceWriteRecord failed");
- goto LOOP_END;
+ }
+ else {
+ (void)PLOGMSG(klogErr, (klogErr, "Spot '$(name)' has caused the loader to enter an illogical state", "name=%s", name));
+ assert("this should never happen");
+ }
+ }
+ else if (CTX_VALUE_GET_S_ID(*value) == 0) {
+ /* new unmated fragment - no spot assembly */
+ if (!isPrimary) {
+ if (!G.assembleWithSecondary || hardclipped ) {
+ goto WRITE_ALIGNMENT;
}
- CTX_VALUE_SET_S_ID(*value, ++ctx->spotId);
- if(fragmentId & 1){
- fcountOne--;
- } else {
- fcountBoth--;
+ (void)PLOGMSG(klogDebug, (klogDebug, "Spot '$(name)' (id $(id)) is being constructed from secondary alignment information", "id=%lx,name=%s", keyId, name));
+ }
+ memset(&srecStorage, 0, sizeof(srecStorage));
+ srec.numreads = 1;
+
+ srec.readLen[0] = readlen;
+ srec.ti[0] = ti;
+ srec.aligned[0] = isPrimary?aligned:0;
+ srec.is_bad[0] = (flags & BAMFlags_IsLowQuality) != 0;
+ srec.orientation[0] = AR_REF_ORIENT(data);
+ srec.cskey[0] = cskey;
+
+ srec.keyId = keyId;
+
+ srec.spotGroup = spotGroup;
+ srec.spotGroupLen = strlen(spotGroup);
+
+ srec.linkageGroup = BX;
+ srec.linkageGroupLen = BX ? strlen(BX) : 0;
+
+ srec.seq = seqBuffer.base;
+ srec.qual = qualBuffer.base;
+
+ rc = SequenceWriteRecord(seq, &srec, isColorSpace, value->pcr_dup, value->platform);
+ if (rc) {
+ (void)PLOGERR(klogErr, (klogErr, rc, "SequenceWriteRecord failed", ""));
+ goto LOOP_END;
+ }
+ CTX_VALUE_SET_S_ID(*value, ++ctx->spotId);
+ value->fragmentId = 0;
+ if (value->pcr_dup && srec.is_bad[0]) {
+ FLAG_CHANGED_400_AND_200;
+ filterFlagConflictRecords++;
+ if (filterFlagConflictRecords < MAX_WARNINGS_FLAG_CONFLICT) {
+ (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
}
- /* printf("OUT:%9d\tcnt2=%ld\tcnt1=%ld\n",fragmentId,fcountBoth,fcountOne);*/
- rc = MemBankFree(ctx->frags, fragmentId);
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "KMemBankFree failed on fragment $(id)", "id=%u", fragmentId));
- goto LOOP_END;
+ else if (filterFlagConflictRecords == MAX_WARNINGS_FLAG_CONFLICT) {
+ (void)PLOGMSG(klogWarn, (klogWarn, "Last reported warning: Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
}
- value->fragmentId = 0;
+ }
+ if (revcmp) {
+ QUAL_CHANGED_REVERSED;
+ SEQ__CHANGED_REV_COMP;
}
}
- else {
- (void)PLOGMSG(klogErr, (klogErr, "Spot '$(name)' has caused the loader to enter an illogical state", "name=%s", name));
- assert("this should never happen");
- }
-
- if (!isPrimary && aligned) {
+ }
+WRITE_ALIGNMENT:
+ if (aligned) {
+ if (mated && !isPrimary) {
int32_t bam_mrid;
int64_t mpos;
int64_t mrid = 0;
int64_t tlen;
- BAM_AlignmentGetMatePosition(rec, &mpos);/*BAM*/
- BAM_AlignmentGetMateRefSeqId(rec, &bam_mrid);/*BAM*/
- BAM_AlignmentGetInsertSize(rec, &tlen);/*BAM*/
+ BAM_AlignmentGetMatePosition(rec, &mpos);
+ BAM_AlignmentGetMateRefSeqId(rec, &bam_mrid);
+ BAM_AlignmentGetInsertSize(rec, &tlen);
if (mpos >= 0 && bam_mrid >= 0 && tlen != 0) {
- BAMRefSeq const *mref;/*BAM*/
+ BAMRefSeq const *mref;
- BAM_FileGetRefSeq(bam, bam_mrid, &mref);/*BAM*/
+ BAM_FileGetRefSeq(bam, bam_mrid, &mref);
if (mref) {
rc_t rc_temp = ReferenceGet1stRow(ref, &mrid, mref->name);
if (rc_temp == 0) {
@@ -1989,63 +2446,21 @@ MIXED_BASE_AND_COLOR:
MATE_INFO_LOST_MISSING;
}
}
- }
- else if (CTX_VALUE_GET_S_ID(*value) == 0 && (isPrimary || !originally_aligned)) {
- /* new unmated fragment - no spot assembly */
- unsigned readLen[1];
-
- readLen[0] = readlen;
- rc = SequenceRecordInit(&srec, 1, readLen);
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "Failed resizing sequence record buffer", ""));
- goto LOOP_END;
- }
- srec.ti[0] = ti;
- srec.aligned[0] = aligned;
- srec.is_bad[0] = (flags & BAMFlags_IsLowQuality) != 0;
- srec.orientation[0] = AR_REF_ORIENT(data);
- srec.cskey[0] = cskey;
- {
- int const revcmp = (isColorSpace && !aligned) ? 0 : srec.orientation[0];
-
- if (revcmp) {
- QUAL_CHANGED_REVERSED;
- SEQ__CHANGED_REV_COMP;
- }
- COPY_READ(srec.seq + srec.readStart[0], seqDNA, readlen, revcmp);
- COPY_QUAL(srec.qual + srec.readStart[0], qual, readlen, revcmp);
- }
- srec.keyId = keyId;
-
- srec.spotGroup = spotGroup;
- srec.spotGroupLen = strlen(spotGroup);
- if (value->pcr_dup && srec.is_bad[0]) {
- FLAG_CHANGED_400_AND_200;
- filterFlagConflictRecords++;
- if (filterFlagConflictRecords < MAX_WARNINGS_FLAG_CONFLICT) {
- (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
- }
- else if (filterFlagConflictRecords == MAX_WARNINGS_FLAG_CONFLICT) {
- (void)PLOGMSG(klogWarn, (klogWarn, "Last reported warning: Spot '$(name)': both 0x400 and 0x200 flag bits set, only 0x400 will be saved", "name=%s", name));
- }
- }
- rc = SequenceWriteRecord(seq, &srec, isColorSpace, value->pcr_dup, value->platform);
- if (rc) {
- (void)PLOGERR(klogErr, (klogErr, rc, "SequenceWriteRecord failed", ""));
- goto LOOP_END;
+ if (wasRenamed) {
+ RENAMED_REFERENCE;
}
- CTX_VALUE_SET_S_ID(*value, ++ctx->spotId);
- value->fragmentId = 0;
- }
-
- if (aligned) {
if (value->alignmentCount[AR_READNO(data) - 1] < 254)
++value->alignmentCount[AR_READNO(data) - 1];
++ctx->alignCount;
- assert(keyId >> 32 < ctx->key2id_count);
- assert((uint32_t)keyId < ctx->idCount[keyId >> 32]);
+ assert(keyId >> 32 < ctx->keyToID.key2id_count);
+ assert((uint32_t)keyId < ctx->keyToID.idCount[keyId >> 32]);
+
+ if (BX) {
+ AR_LINKAGE_GROUP(data).elements = strlen(BX);
+ AR_LINKAGE_GROUP(data).buffer = BX;
+ }
rc = AlignmentWriteRecord(align, &data);
if (rc == 0) {
@@ -2083,11 +2498,22 @@ MIXED_BASE_AND_COLOR:
"The file contained no records that were processed.");
rc = RC(rcAlign, rcFile, rcReading, rcData, rcEmpty);
}
+#if THREADING_BAMREAD
+ KQueueSeal(bamq);
+ KQueueRelease(bamq); bamq=NULL;
+ if(bamread_thread) {
+ rc_t rc1;
+ KThreadWait(bamread_thread,&rc1);
+ if(rc == 0){
+ rc=rc1;
+ }
+ KThreadRelease(bamread_thread);
+ }
+#endif
BAM_FileRelease(bam);
MMArrayLock(ctx->id2value);
KDataBufferWhack(&buf);
KDataBufferWhack(&fragBuf);
- KDataBufferWhack(&srec.storage);
KDataBufferWhack(&cigBuf);
KDataBufferWhack(&data.buffer);
return rc;
@@ -2100,31 +2526,39 @@ static rc_t WriteSoloFragments(context_t *ctx, Sequence *seq)
uint64_t idCount = 0;
rc_t rc;
KDataBuffer fragBuf;
+ SequenceRecordStorage srecStorage;
SequenceRecord srec;
++ctx->pass;
memset(&srec, 0, sizeof(srec));
+ srec.ti = srecStorage.ti;
+ srec.readStart = srecStorage.readStart;
+ srec.readLen = srecStorage.readLen;
+ srec.orientation = srecStorage.orientation;
+ srec.is_bad = srecStorage.is_bad;
+ srec.alignmentCount = srecStorage.alignmentCount;
+ srec.aligned = srecStorage.aligned;
+ srec.cskey = srecStorage. cskey;
+
rc = KDataBufferMake(&fragBuf, 8, 0);
if (rc) {
(void)LOGERR(klogErr, rc, "KDataBufferMake failed");
return rc;
}
- for (idCount = 0, j = 0; j < ctx->key2id_count; ++j) {
- idCount += ctx->idCount[j];
+ for (idCount = 0, j = 0; j < ctx->keyToID.key2id_count; ++j) {
+ idCount += ctx->keyToID.idCount[j];
}
KLoadProgressbar_Append(ctx->progress[ctx->pass - 1], idCount);
- for (idCount = 0, j = 0; j < ctx->key2id_count; ++j) {
- for (i = 0; i != ctx->idCount[j]; ++i, ++idCount) {
+ for (idCount = 0, j = 0; j < ctx->keyToID.key2id_count; ++j) {
+ for (i = 0; i != ctx->keyToID.idCount[j]; ++i, ++idCount) {
uint64_t const keyId = ((uint64_t)j << 32) | i;
ctx_value_t *value;
size_t rsize;
size_t sz;
- unsigned readLen[2];
- unsigned read = 0;
+ char const *src;
FragmentInfo const *fip;
- uint8_t const *src;
rc = MMArrayGet(ctx->id2value, (void **)&value, keyId);
if (rc)
@@ -2149,36 +2583,40 @@ static rc_t WriteSoloFragments(context_t *ctx, Sequence *seq)
break;
}
assert( rsize == sz );
- fip = (FragmentInfo const *)fragBuf.base;
- src = (uint8_t const *)&fip[1];
- readLen[0] = readLen[1] = 0;
- if (!value->unmated && ( (fip->aligned && CTX_VALUE_GET_P_ID(*value, 0) == 0)
- || (value->unaligned_2)))
- {
- read = 1;
- }
+ fip = fragBuf.base;
+ src = (char const *)&fip[1];
- readLen[read] = fip->readlen;
- rc = SequenceRecordInit(&srec, value->unmated ? 1 : 2, readLen);
- if (rc) {
- (void)LOGERR(klogErr, rc, "SequenceRecordInit failed");
- break;
+ memset(&srecStorage, 0, sizeof(srecStorage));
+ if (value->unmated) {
+ srec.numreads = 1;
+ srec.readLen[0] = fip->readlen;
+ srec.ti[0] = fip->ti;
+ srec.aligned[0] = fip->aligned;
+ srec.is_bad[0] = fip->is_bad;
+ srec.orientation[0] = fip->orientation;
+ srec.cskey[0] = fip->cskey;
}
-
- srec.ti[read] = fip->ti;
- srec.aligned[read] = fip->aligned;
- srec.is_bad[read] = fip->is_bad;
- srec.orientation[read] = fip->orientation;
- srec.cskey[read] = fip->cskey;
- memcpy(srec.seq + srec.readStart[read], src, srec.readLen[read]);
- src += fip->readlen;
- memcpy(srec.qual + srec.readStart[read], src, srec.readLen[read]);
- src += fip->readlen;
- srec.spotGroup = (char *)src;
+ else {
+ unsigned const read = ((fip->aligned && CTX_VALUE_GET_P_ID(*value, 0) == 0) || value->unaligned_2) ? 1 : 0;
+
+ srec.numreads = 2;
+ srec.readLen[read] = fip->readlen;
+ srec.readStart[1] = srec.readLen[0];
+ srec.ti[read] = fip->ti;
+ srec.aligned[read] = fip->aligned;
+ srec.is_bad[read] = fip->is_bad;
+ srec.orientation[read] = fip->orientation;
+ srec.cskey[0] = srec.cskey[1] = 'N';
+ srec.cskey[read] = fip->cskey;
+ }
+ srec.seq = (char *)src;
+ srec.qual = (uint8_t *)(src + fip->readlen);
+ srec.spotGroup = (char *)(src + 2 * fip->readlen);
srec.spotGroupLen = fip->sglen;
+ srec.linkageGroup = (char *)(src + 2 * fip->readlen * fip->sglen);
+ srec.linkageGroupLen = fip->lglen;
srec.keyId = keyId;
-
rc = SequenceWriteRecord(seq, &srec, ctx->isColorSpace, value->pcr_dup, value->platform);
if (rc) {
(void)LOGERR(klogErr, rc, "SequenceWriteRecord failed");
@@ -2190,7 +2628,6 @@ static rc_t WriteSoloFragments(context_t *ctx, Sequence *seq)
}
MMArrayLock(ctx->id2value);
KDataBufferWhack(&fragBuf);
- KDataBufferWhack(&srec.storage);
return rc;
}
@@ -2198,23 +2635,27 @@ static rc_t SequenceUpdateAlignInfo(context_t *ctx, Sequence *seq)
{
rc_t rc = 0;
uint64_t row;
- ctx_value_t const *value;
uint64_t keyId;
++ctx->pass;
KLoadProgressbar_Append(ctx->progress[ctx->pass - 1], ctx->spotId + 1);
for (row = 1; row <= ctx->spotId; ++row) {
+ ctx_value_t *value;
+
rc = SequenceReadKey(seq, row, &keyId);
if (rc) {
(void)PLOGERR(klogErr, (klogErr, rc, "Failed to get key for row $(row)", "row=%u", (unsigned)row));
break;
}
- rc = MMArrayGetRead(ctx->id2value, (void const **)&value, keyId);
+ rc = MMArrayGet(ctx->id2value, (void **)&value, keyId);
if (rc) {
(void)PLOGERR(klogErr, (klogErr, rc, "Failed to read info for row $(row), index $(idx)", "row=%u,idx=%u", (unsigned)row, (unsigned)keyId));
break;
}
+ if (G.mode == mode_Remap) {
+ CTX_VALUE_SET_S_ID(*value, row);
+ }
if (row != CTX_VALUE_GET_S_ID(*value)) {
rc = RC(rcApp, rcTable, rcWriting, rcData, rcUnexpected);
(void)PLOGMSG(klogErr, (klogErr, "Unexpected spot id $(spotId) for row $(row), index $(idx)", "spotId=%u,row=%u,idx=%u", (unsigned)CTX_VALUE_GET_S_ID(*value), (unsigned)row, (unsigned)keyId));
@@ -2222,10 +2663,22 @@ static rc_t SequenceUpdateAlignInfo(context_t *ctx, Sequence *seq)
}
{{
int64_t primaryId[2];
+ int const logLevel = klogWarn; /*G.assembleWithSecondary ? klogWarn : klogErr;*/
primaryId[0] = CTX_VALUE_GET_P_ID(*value, 0);
primaryId[1] = CTX_VALUE_GET_P_ID(*value, 1);
+ if (primaryId[0] == 0 && value->alignmentCount[0] != 0) {
+ rc = RC(rcApp, rcTable, rcWriting, rcConstraint, rcViolated);
+ (void)PLOGERR(logLevel, (logLevel, rc, "Spot id $(id) read 1 never had a primary alignment", "id=%lx", keyId));
+ }
+ if (!value->unmated && primaryId[1] == 0 && value->alignmentCount[1] != 0) {
+ rc = RC(rcApp, rcTable, rcWriting, rcConstraint, rcViolated);
+ (void)PLOGERR(logLevel, (logLevel, rc, "Spot id $(id) read 2 never had a primary alignment", "id=%lx", keyId));
+ }
+ if (rc != 0 && logLevel == klogErr)
+ break;
+
rc = SequenceUpdateAlignData(seq, row, value->unmated ? 1 : 2,
primaryId,
value->alignmentCount);
@@ -2251,7 +2704,7 @@ static rc_t AlignmentUpdateSpotInfo(context_t *ctx, Alignment *align)
rc = AlignmentStartUpdatingSpotIds(align);
while (rc == 0 && (rc = Quitting()) == 0) {
- ctx_value_t const *value;
+ ctx_value_t *value;
rc = AlignmentGetSpotKey(align, &keyId);
if (rc) {
@@ -2259,15 +2712,16 @@ static rc_t AlignmentUpdateSpotInfo(context_t *ctx, Alignment *align)
rc = 0;
break;
}
- assert(keyId >> 32 < ctx->key2id_count);
- assert((uint32_t)keyId < ctx->idCount[keyId >> 32]);
- rc = MMArrayGetRead(ctx->id2value, (void const **)&value, keyId);
+ assert(keyId >> 32 < ctx->keyToID.key2id_count);
+ assert((uint32_t)keyId < ctx->keyToID.idCount[keyId >> 32]);
+ rc = MMArrayGet(ctx->id2value, (void **)&value, keyId);
if (rc == 0) {
int64_t const spotId = CTX_VALUE_GET_S_ID(*value);
if (spotId == 0) {
- (void)PLOGMSG(klogWarn, (klogWarn, "Spot '$(id)' was never assigned a spot id, probably has no primary alignments", "id=%lx", keyId));
- /* (void)PLOGMSG(klogWarn, (klogWarn, "Spot #$(i): { $(s) }", "i=%lu,s=%s", keyId, Print_ctx_value_t(value))); */
+ rc = RC(rcApp, rcTable, rcWriting, rcConstraint, rcViolated);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Spot '$(id)' was never assigned a spot id, probably has no primary alignments", "id=%lx", keyId));
+ break;
}
rc = AlignmentWriteSpotId(align, spotId);
}
@@ -2277,17 +2731,19 @@ static rc_t AlignmentUpdateSpotInfo(context_t *ctx, Alignment *align)
return rc;
}
+
static rc_t ArchiveBAM(VDBManager *mgr, VDatabase *db,
unsigned bamFiles, char const *bamFile[],
unsigned seqFiles, char const *seqFile[],
- bool *has_alignments)
+ bool *has_alignments,
+ bool continuing)
{
rc_t rc = 0;
rc_t rc2;
Reference ref;
Sequence seq;
Alignment *align;
- context_t ctx;
+ static context_t *ctx = &GlobalContext;
bool has_sequences = false;
unsigned i;
@@ -2306,16 +2762,16 @@ static rc_t ArchiveBAM(VDBManager *mgr, VDatabase *db,
SequenceInit(&seq, db);
align = AlignmentMake(db);
- rc = SetupContext(&ctx, bamFiles + seqFiles);
+ rc = SetupContext(ctx, bamFiles + seqFiles);
if (rc)
return rc;
- ++ctx.pass;
+ ctx->pass = 1;
for (i = 0; i < bamFiles && rc == 0; ++i) {
bool this_has_alignments = false;
bool this_has_sequences = false;
- rc = ProcessBAM(bamFile[i], &ctx, db, &ref, &seq, align, &this_has_alignments, &this_has_sequences);
+ rc = ProcessBAM(bamFile[i], ctx, db, &ref, &seq, align, &this_has_alignments, &this_has_sequences);
*has_alignments |= this_has_alignments;
has_sequences |= this_has_sequences;
}
@@ -2323,30 +2779,34 @@ static rc_t ArchiveBAM(VDBManager *mgr, VDatabase *db,
bool this_has_alignments = false;
bool this_has_sequences = false;
- rc = ProcessBAM(seqFile[i], &ctx, db, &ref, &seq, align, &this_has_alignments, &this_has_sequences);
+ rc = ProcessBAM(seqFile[i], ctx, db, &ref, &seq, align, &this_has_alignments, &this_has_sequences);
*has_alignments |= this_has_alignments;
has_sequences |= this_has_sequences;
}
+ if (!continuing) {
/*** No longer need memory for key2id ***/
- for (i = 0; i != ctx.key2id_count; ++i) {
- KBTreeDropBacking(ctx.key2id[i]);
- KBTreeRelease(ctx.key2id[i]);
- ctx.key2id[i] = NULL;
- }
- free(ctx.key2id_names);
- ctx.key2id_names = NULL;
+ for (i = 0; i != ctx->keyToID.key2id_count; ++i) {
+ KBTreeDropBacking(ctx->keyToID.key2id[i]);
+ KBTreeRelease(ctx->keyToID.key2id[i]);
+ ctx->keyToID.key2id[i] = NULL;
+ }
+ free(ctx->keyToID.key2id_names);
+ ctx->keyToID.key2id_names = NULL;
/*******************/
+ }
if (has_sequences) {
if (rc == 0 && (rc = Quitting()) == 0) {
- (void)LOGMSG(klogInfo, "Writing unpaired sequences");
- rc = WriteSoloFragments(&ctx, &seq);
- ContextReleaseMemBank(&ctx);
+ if (G.mode == mode_Archive) {
+ (void)LOGMSG(klogInfo, "Writing unpaired sequences");
+ rc = WriteSoloFragments(ctx, &seq);
+ ContextReleaseMemBank(ctx);
+ }
if (rc == 0) {
rc = SequenceDoneWriting(&seq);
if (rc == 0) {
(void)LOGMSG(klogInfo, "Updating sequence alignment info");
- rc = SequenceUpdateAlignInfo(&ctx, &seq);
+ rc = SequenceUpdateAlignInfo(ctx, &seq);
}
}
}
@@ -2354,7 +2814,7 @@ static rc_t ArchiveBAM(VDBManager *mgr, VDatabase *db,
if (*has_alignments && rc == 0 && (rc = Quitting()) == 0) {
(void)LOGMSG(klogInfo, "Writing alignment spot ids");
- rc = AlignmentUpdateSpotInfo(&ctx, align);
+ rc = AlignmentUpdateSpotInfo(ctx, align);
}
rc2 = AlignmentWhack(align, *has_alignments && rc == 0 && (rc = Quitting()) == 0);
if (rc == 0)
@@ -2366,7 +2826,7 @@ static rc_t ArchiveBAM(VDBManager *mgr, VDatabase *db,
SequenceWhack(&seq, rc == 0);
- ContextRelease(&ctx);
+ ContextRelease(ctx, continuing);
if (rc == 0) {
(void)LOGMSG(klogInfo, "Successfully loaded all files");
@@ -2416,9 +2876,11 @@ rc_t ConvertDatabaseToUnmapped(VDatabase *db)
}
return rc;
}
+
rc_t run(char const progName[],
unsigned bamFiles, char const *bamFile[],
- unsigned seqFiles, char const *seqFile[])
+ unsigned seqFiles, char const *seqFile[],
+ bool continuing)
{
VDBManager *mgr;
rc_t rc;
@@ -2432,12 +2894,12 @@ rc_t run(char const progName[],
else {
bool has_alignments = false;
+ /* VDBManagerDisableFlushThread(mgr); */
rc = VDBManagerDisablePagemapThread(mgr);
if (rc == 0)
{
-
if (G.onlyVerifyReferences) {
- rc = ArchiveBAM(mgr, NULL, bamFiles, bamFile, 0, NULL, &has_alignments);
+ rc = ArchiveBAM(mgr, NULL, bamFiles, bamFile, 0, NULL, &has_alignments, continuing);
}
else {
VSchema *schema;
@@ -2457,24 +2919,49 @@ rc_t run(char const progName[],
rc = VDBManagerCreateDB(mgr, &db, schema, db_type,
kcmInit + kcmMD5, "%s", G.outpath);
- rc2 = VSchemaRelease(schema);
- if (rc2)
- (void)LOGERR(klogWarn, rc2, "Failed to release schema");
- if (rc == 0)
- rc = rc2;
+ VSchemaRelease(schema);
if (rc == 0) {
- rc = ArchiveBAM(mgr, db, bamFiles, bamFile, seqFiles, seqFile, &has_alignments);
+ rc = ArchiveBAM(mgr, db, bamFiles, bamFile, seqFiles, seqFile, &has_alignments, continuing);
if (rc == 0)
PrintChangeReport();
if (rc == 0 && !has_alignments) {
rc = ConvertDatabaseToUnmapped(db);
}
+ else if (rc == 0 && lmc != NULL) {
+ VTable *tbl = NULL;
+ KTable *ktbl = NULL;
+ KMetadata *meta = NULL;
+ KMDataNode *node = NULL;
- rc2 = VDatabaseRelease(db);
- if (rc2)
- (void)LOGERR(klogWarn, rc2, "Failed to close database");
- if (rc == 0)
- rc = rc2;
+ VDatabaseOpenTableUpdate(db, &tbl, "REFERENCE");
+ VTableOpenKTableUpdate(tbl, &ktbl);
+ VTableRelease(tbl);
+
+ KTableOpenMetadataUpdate(ktbl, &meta);
+ KTableRelease(ktbl);
+
+ KMetadataOpenNodeUpdate(meta, &node, "LOW_MATCH_COUNT");
+ KMetadataRelease(meta);
+
+ RecordLowMatchCounts(node);
+
+ KMDataNodeRelease(node);
+
+ LowMatchCounterFree(lmc);
+ lmc = NULL;
+ }
+ VDatabaseRelease(db);
+
+ if (rc == 0 && G.globalMode == mode_Remap && !continuing) {
+ VTable *tbl = NULL;
+
+ VDBManagerOpenDBUpdate(mgr, &db, NULL, G.firstOut);
+ VDatabaseOpenTableUpdate(db, &tbl, "SEQUENCE");
+ VDatabaseRelease(db);
+ VTableDropColumn(tbl, "TMP_KEY_ID");
+ VTableDropColumn(tbl, "READ");
+ VTableRelease(tbl);
+ }
if (rc == 0) {
KMetadata *meta = NULL;
diff --git a/tools/bam-loader/loader-imp.h b/tools/bam-loader/loader-imp.h
index 6f524a7..d90625f 100644
--- a/tools/bam-loader/loader-imp.h
+++ b/tools/bam-loader/loader-imp.h
@@ -26,4 +26,5 @@
rc_t run(char const argv0[],
unsigned countAligned, char const *bamFile[],
- unsigned countUnaligned, char const *unaligned[]);
+ unsigned countUnaligned, char const *unaligned[],
+ bool continuing);
diff --git a/tools/bam-loader/loader-imp.h b/tools/bam-loader/low-match-count.cpp
similarity index 56%
copy from tools/bam-loader/loader-imp.h
copy to tools/bam-loader/low-match-count.cpp
index 6f524a7..48f9f44 100644
--- a/tools/bam-loader/loader-imp.h
+++ b/tools/bam-loader/low-match-count.cpp
@@ -24,6 +24,49 @@
*
*/
-rc_t run(char const argv0[],
- unsigned countAligned, char const *bamFile[],
- unsigned countUnaligned, char const *unaligned[]);
+extern "C" {
+#include "low-match-count.h"
+}
+
+#include <map>
+#include <string>
+
+struct LowMatchCounter {
+ typedef unsigned counter_t;
+ typedef std::map<std::string, counter_t> map_t;
+
+ map_t counter;
+
+ void add(std::string const &name) {
+ ++counter[name];
+ }
+
+ void each(void *ctx, callback_f callback) const {
+ map_t::const_iterator i;
+
+ for (i = counter.begin(); i != counter.end(); ++i) {
+ char const *const name = i->first.c_str();
+ callback(ctx, name, i->second);
+ }
+ }
+};
+
+extern "C" {
+
+LowMatchCounter *LowMatchCounterMake() {
+ return new LowMatchCounter;
+}
+
+void LowMatchCounterAdd(LowMatchCounter *const self, char const *const name) {
+ self->add(name);
+}
+
+void LowMatchCounterEach(LowMatchCounter const *const self, void *callback_ctx, callback_f callback) {
+ self->each(callback_ctx, callback);
+}
+
+void LowMatchCounterFree(LowMatchCounter *const self) {
+ delete self;
+}
+
+}
diff --git a/tools/bam-loader/loader-imp.h b/tools/bam-loader/low-match-count.h
similarity index 78%
copy from tools/bam-loader/loader-imp.h
copy to tools/bam-loader/low-match-count.h
index 6f524a7..76b9b85 100644
--- a/tools/bam-loader/loader-imp.h
+++ b/tools/bam-loader/low-match-count.h
@@ -24,6 +24,10 @@
*
*/
-rc_t run(char const argv0[],
- unsigned countAligned, char const *bamFile[],
- unsigned countUnaligned, char const *unaligned[]);
+typedef void (*callback_f)(void *ctx, char const *name, unsigned count);
+typedef struct LowMatchCounter LowMatchCounter;
+
+LowMatchCounter *LowMatchCounterMake();
+void LowMatchCounterAdd(LowMatchCounter *, char const *);
+void LowMatchCounterEach(LowMatchCounter const *, void *, callback_f);
+void LowMatchCounterFree(LowMatchCounter *);
diff --git a/tools/bam-loader/reference-writer.c b/tools/bam-loader/reference-writer.c
index fa6c7de..755f391 100644
--- a/tools/bam-loader/reference-writer.c
+++ b/tools/bam-loader/reference-writer.c
@@ -324,11 +324,7 @@ rc_t ReferenceSetFile(Reference *const self, char const id[],
ReferenceSeq const *rseq;
int found = 0;
unsigned at = 0;
- char const *actid = id;
- if (!G.allowMultiMapping) {
- assert(actid != NULL);
- }
if (self->last_id < self->ref_info.elem_count) {
struct s_reference_info const *const refInfoBase = self->ref_info.base;
struct s_reference_info const refInfo = refInfoBase[self->last_id];
@@ -343,21 +339,15 @@ rc_t ReferenceSetFile(Reference *const self, char const id[],
BAIL_ON_FAIL(FlushBuffers(self, self->length, true, true));
BAIL_ON_FAIL(ReferenceMgr_GetSeq(self->mgr, &rseq, id, shouldUnmap, G.allowMultiMapping, wasRenamed));
- if (self->rseq)
- ReferenceSeq_Release(self->rseq);
self->rseq = rseq;
- if (*wasRenamed)
- ReferenceSeq_GetID(rseq, &actid);
-
- at = bsearch_name(actid, self->ref_names.base, self->ref_info.elem_count, self->ref_info.base, &found);
+ at = bsearch_name(id, self->ref_names.base, self->ref_info.elem_count, self->ref_info.base, &found);
if (!found) {
- unsigned const len1 = str__len(actid);
- unsigned const len2 = str__len(id);
+ unsigned const len = str__len(id);
unsigned const name_at = self->ref_names.elem_count;
- unsigned const id_at = *wasRenamed ? (name_at + len1 + 1) : name_at;
+ unsigned const id_at = name_at;
struct s_reference_info const new_elem = s_reference_info_make(name_at, id_at);
- rc_t const rc = KDataBufferResize(&self->ref_names, name_at + len1 + 1 + (*wasRenamed ? (len2 + 1) : 0));
+ rc_t const rc = KDataBufferResize(&self->ref_names, name_at + len + 1);
if (rc)
return rc;
@@ -369,16 +359,16 @@ rc_t ReferenceSetFile(Reference *const self, char const id[],
if (rc)
return rc;
- memmove(((char *)self->ref_names.base) + name_at, actid, len1 + 1);
- if (*wasRenamed)
- memmove(((char *)self->ref_names.base) + id_at, id, len2 + 1);
-
+ memmove(((char *)self->ref_names.base) + name_at, id, len + 1);
memmove(refInfoBase + at + 1, refInfoBase + at, (count - at) * sizeof(*refInfoBase));
refInfoBase[at] = new_elem;
}
(void)PLOGMSG(klogInfo, (klogInfo, "Processing Reference '$(id)'", "id=%s", id));
- if (*wasRenamed)
- (void)PLOGMSG(klogInfo, (klogInfo, "Reference '$(id)' was renamed to '$(actid)'", "id=%s,actid=%s", id, *actid));
+ if (*wasRenamed) {
+ char const *actid = NULL;
+ ReferenceSeq_GetID(rseq, &actid);
+ (void)PLOGMSG(klogInfo, (klogInfo, "Reference '$(id)' was renamed to '$(actid)'", "id=%s,actid=%s", id, actid));
+ }
}
else if (!self->out_of_order)
Unsorted(self);
@@ -425,7 +415,10 @@ rc_t ReferenceAddCoverage(Reference *const self,
)
{
unsigned const refEnd = refStart + refLength;
-
+
+ if (refLength == 0) /* this happens for insert-only alignments */
+ return 0;
+
if (refEnd > self->endPos) {
unsigned const t1 = refEnd + (G.maxSeqLen - 1);
unsigned const adjust = t1 % G.maxSeqLen;
@@ -529,8 +522,12 @@ static void GetCounts(AlignmentRecord const *data, unsigned const seqLen,
rc_t ReferenceRead(Reference *self, AlignmentRecord *data, uint64_t const pos,
uint32_t const rawCigar[], uint32_t const cigCount,
char const seqDNA[], uint32_t const seqLen,
- uint8_t rna_orient, uint32_t *matches)
+ uint8_t rna_orient, uint32_t *matches, uint32_t *misses)
{
+ unsigned nmis = 0;
+ unsigned nmatch = 0;
+ unsigned indels = 0;
+
*matches = 0;
BAIL_ON_FAIL(ReferenceSeq_Compress(self->rseq,
(G.acceptHardClip ? ewrefmgr_co_AcceptHardClip : 0) + ewrefmgr_cmp_Binary,
@@ -541,27 +538,25 @@ rc_t ReferenceRead(Reference *self, AlignmentRecord *data, uint64_t const pos,
rna_orient,
&data->data));
+ GetCounts(data, seqLen, &nmatch, &nmis, &indels);
+ *matches = nmatch;
+ *misses = nmis;
+/* removed before more comlete implementation - EY
if (!G.acceptNoMatch && data->data.ref_len == 0)
return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated);
+***********************/
if (!self->out_of_order && pos < GetLastOffset(self)) {
return Unsorted(self);
}
if (!self->out_of_order) {
- unsigned nmis;
- unsigned nmatch;
- unsigned indels;
-
SetLastOffset(self, data->data.effective_offset);
- GetCounts(data, seqLen, &nmatch, &nmis, &indels);
- *matches = nmatch;
- if (G.acceptNoMatch || nmatch >= G.minMatchCount)
+ /* if (G.acceptNoMatch || nmatch >= G.minMatchCount) --- removed before more comlete implementation - EY ***/
return ReferenceAddCoverage(self, data->data.effective_offset,
data->data.ref_len, nmis, indels,
data->isPrimary);
- else
- return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated);
+ /* else return RC(rcApp, rcFile, rcReading, rcConstraint, rcViolated); --- removed before more comlete implementation - EY ***/
}
return 0;
}
diff --git a/tools/bam-loader/reference-writer.h b/tools/bam-loader/reference-writer.h
index efb61f7..2946c82 100644
--- a/tools/bam-loader/reference-writer.h
+++ b/tools/bam-loader/reference-writer.h
@@ -70,7 +70,7 @@ rc_t ReferenceAddAlignId(Reference *self,
rc_t ReferenceRead(Reference *self, AlignmentRecord *data, uint64_t pos,
uint32_t const rawCigar[], uint32_t cigCount,
char const seqDNA[], uint32_t seqLen,
- uint8_t rna_orient, uint32_t *matches);
+ uint8_t rna_orient, uint32_t *matches, uint32_t *misses);
rc_t ReferenceWhack(Reference *self, bool commit);
#endif
diff --git a/tools/bam-loader/sequence-writer.c b/tools/bam-loader/sequence-writer.c
index 5002a5b..7c45292 100644
--- a/tools/bam-loader/sequence-writer.c
+++ b/tools/bam-loader/sequence-writer.c
@@ -28,6 +28,7 @@
#include <klib/log.h>
#include <vdb/database.h>
+#include <vdb/vdb-priv.h>
#include <kdb/manager.h>
@@ -53,7 +54,22 @@ Sequence *SequenceInit(Sequence *self, VDatabase *db) {
return self;
}
-rc_t SequenceWriteRecord(Sequence *self,
+static rc_t getTable(Sequence *self, bool color)
+{
+ int const options = (color ? ewseq_co_ColorSpace : 0)
+ | (G.hasTI ? ewseq_co_TI : 0)
+ | (G.globalMode == mode_Remap ? (ewseq_co_SaveRead | ewseq_co_KeepKey) : 0)
+ | ewseq_co_NoLabelData
+ | ewseq_co_SpotGroup;
+
+ if (self->tbl) return 0;
+
+ return TableWriterSeq_Make(&self->tbl, self->db,
+ options,
+ G.QualQuantizer);
+}
+
+static rc_t writeRecordX(Sequence *self,
SequenceRecord const *rec,
bool color,
bool isDup,
@@ -65,10 +81,10 @@ rc_t SequenceWriteRecord(Sequence *self,
unsigned i;
unsigned seqLen;
int64_t dummyRowId;
-
+
uint8_t readInfo[4096];
void *h_readInfo = NULL;
-
+
INSDC_coord_zero *readStart = (void *)readInfo;
INSDC_coord_len *readLen;
uint8_t *alcnt;
@@ -78,13 +94,15 @@ rc_t SequenceWriteRecord(Sequence *self,
size_t const elemSize = sizeof(alcnt[0]) + sizeof(readType[0])
+ sizeof(readStart[0]) + sizeof(readLen[0])
+ sizeof(readFilter[0]);
-
+
TableWriterSeqData data;
+ assert(G.mode == mode_Archive);
+
for (i = seqLen = 0; i != nreads; ++i) {
seqLen += rec->readLen[i];
}
-
+
if (nreads * elemSize + G.keepMismatchQual * seqLen * sizeof(mask[0]) > sizeof(readInfo))
{
h_readInfo = malloc(nreads * elemSize + G.keepMismatchQual * seqLen * sizeof(mask[0]));
@@ -96,15 +114,15 @@ rc_t SequenceWriteRecord(Sequence *self,
alcnt = (uint8_t *)&readLen[nreads];
readType = (INSDC_SRA_xread_type *)&alcnt[nreads];
readFilter = (INSDC_SRA_read_filter *)&readType[nreads];
-
+
if (G.keepMismatchQual) {
mask = (bool *)&readFilter[nreads];
-
+
for (i = 0; i != seqLen; ++i) {
mask[i] = (rec->qual[i] & 0x80) != 0;
}
}
-
+
for (i = 0; i != nreads; ++i) {
int const count = rec->aligned[i] ? 1 : 0;
int const len = rec->readLen[i];
@@ -121,71 +139,321 @@ rc_t SequenceWriteRecord(Sequence *self,
}
memset(&data, 0, sizeof(data));
-
+
data.sequence.buffer = rec->seq;
data.sequence.elements = seqLen;
-
+
data.quality.buffer = rec->qual;
data.quality.elements = seqLen;
-
+
if (G.keepMismatchQual) {
data.no_quantize_mask.buffer = mask;
data.no_quantize_mask.elements = seqLen;
}
-
+
data.alignment_count.buffer = alcnt;
data.alignment_count.elements = nreads;
-
+
data.nreads = nreads;
-
+
data.read_type.buffer = readType;
data.read_type.elements = nreads;
-
+
data.read_start.buffer = readStart;
data.read_start.elements = nreads;
-
+
data.read_len.buffer = readLen;
data.read_len.elements = nreads;
-
+
data.tmp_key_id = rec->keyId;
-
+
data.spot_group.buffer = rec->spotGroup;
data.spot_group.elements = rec->spotGroupLen;
-
+
data.cskey.buffer = rec->cskey;
data.cskey.elements = nreads;
-
+
data.read_filter.buffer = readFilter;
data.read_filter.elements = nreads;
-
+
data.platform.buffer = &platform;
data.platform.elements = 1;
-
+
data.ti.buffer = rec->ti;
data.ti.elements = nreads;
-
- if (!G.no_real_output) {
- if (self->tbl == NULL) {
- int csoption = (color ? ewseq_co_ColorSpace : 0);
- if(G.hasTI) csoption |= ewseq_co_TI;
-
- rc = TableWriterSeq_Make(&self->tbl, self->db,
- csoption | ewseq_co_NoLabelData | ewseq_co_SpotGroup, G.QualQuantizer);
- }
+ if (!G.no_real_output) {
+ rc = getTable(self, color);
if (rc == 0) {
rc = TableWriterSeq_Write(self->tbl, &data, &dummyRowId);
}
}
-
+
if (h_readInfo)
free(h_readInfo);
-
+
+ return rc;
+}
+
+static unsigned totalSequenceLength(SequenceRecord const *const rec)
+{
+ unsigned const nreads = rec->numreads;
+ unsigned rslt = 0;
+ unsigned i;
+
+ for (i = 0; i < nreads; ++i)
+ rslt += rec->readLen[i];
+
+ return rslt;
+}
+
+static rc_t writeRecord2(Sequence *self,
+ SequenceRecord const *rec,
+ bool color,
+ bool isDup,
+ INSDC_SRA_platform_id platform
+ )
+{
+ INSDC_SRA_xread_type readType[2];
+ INSDC_SRA_read_filter readFilter[2];
+ uint8_t alcnt[2];
+
+ rc_t rc = 0;
+ unsigned const nreads = rec->numreads;
+ unsigned const seqLen = totalSequenceLength(rec);
+ unsigned i;
+ bool fullyUnaligned = true;
+
+ TableWriterSeqData data;
+
+ assert(G.mode == mode_Archive);
+
+ for (i = 0; i != nreads; ++i) {
+ int const count = rec->aligned[i] ? 1 : 0;
+ int const len = rec->readLen[i];
+ int const type = len == 0 ? SRA_READ_TYPE_TECHNICAL : SRA_READ_TYPE_BIOLOGICAL;
+ int const dir = len == 0 ? 0 : rec->orientation[i] ? SRA_READ_TYPE_REVERSE : SRA_READ_TYPE_FORWARD;
+ int const filter = isDup ? SRA_READ_FILTER_CRITERIA : rec->is_bad[i] ? SRA_READ_FILTER_REJECT : SRA_READ_FILTER_PASS;
+
+ if (rec->aligned[i])
+ fullyUnaligned = false;
+ alcnt[i] = count;
+ readType[i] = type | dir;
+ readFilter[i] = filter;
+ }
+
+ memset(&data, 0, sizeof(data));
+
+ data.sequence.buffer = rec->seq;
+ data.sequence.elements = seqLen;
+
+ data.quality.buffer = rec->qual;
+ data.quality.elements = seqLen;
+
+ data.alignment_count.buffer = alcnt;
+ data.alignment_count.elements = nreads;
+
+ data.nreads = nreads;
+
+ data.read_type.buffer = readType;
+ data.read_type.elements = nreads;
+
+ data.read_start.buffer = rec->readStart;
+ data.read_start.elements = nreads;
+
+ data.read_len.buffer = rec->readLen;
+ data.read_len.elements = nreads;
+
+ data.tmp_key_id = rec->keyId;
+
+ data.spot_group.buffer = rec->spotGroup;
+ data.spot_group.elements = rec->spotGroupLen;
+
+ data.cskey.buffer = rec->cskey;
+ data.cskey.elements = nreads;
+
+ data.read_filter.buffer = readFilter;
+ data.read_filter.elements = nreads;
+
+ data.platform.buffer = &platform;
+ data.platform.elements = 1;
+
+ data.ti.buffer = rec->ti;
+ data.ti.elements = nreads;
+
+ if (fullyUnaligned && rec->linkageGroup && rec->linkageGroupLen > 0) {
+ data.linkageGroup.buffer = rec->linkageGroup;
+ data.linkageGroup.elements = rec->linkageGroupLen;
+ }
+ if (!G.no_real_output) {
+ rc = getTable(self, color);
+ if (rc == 0) {
+ int64_t dummyRowId;
+ rc = TableWriterSeq_Write(self->tbl, &data, &dummyRowId);
+ }
+ }
+
return rc;
}
+rc_t SequenceWriteRecord(Sequence *self,
+ SequenceRecord const *rec,
+ bool color,
+ bool isDup,
+ INSDC_SRA_platform_id platform
+ )
+{
+ if (rec->numreads <= 2 && !G.keepMismatchQual) {
+ return writeRecord2(self, rec, color, isDup, platform);
+ }
+ else {
+ return writeRecordX(self, rec, color, isDup, platform);
+ }
+}
+
+static rc_t ReadSequenceData(TableWriterSeqData *const data, VCursor const *const curs, int64_t const row, uint32_t const colId[])
+{
+ int i;
+
+ memset(data, 0, sizeof(*data));
+
+ for (i = 0; i <= 8; ++i) {
+ uint32_t elem_bits = 0;
+ uint32_t row_len = 0;
+ uint32_t boff = 0;
+ void const *base = NULL;
+ rc_t const rc = VCursorCellDataDirect(curs, row, colId[i], &elem_bits, &base, &boff, &row_len);
+ if (rc == 0) {
+ TableWriterData *tdata = NULL;
+
+ switch (i) {
+ case 0:
+ assert(elem_bits == sizeof(data->tmp_key_id) * 8);
+ assert(row_len == 1);
+ memcpy(&data->tmp_key_id, base, sizeof(data->tmp_key_id));
+ break;
+ case 1:
+ tdata = &data->sequence;
+ break;
+ case 2:
+ tdata = &data->quality;
+ break;
+ case 3:
+ tdata = &data->read_type;
+ break;
+ case 4:
+ tdata = &data->read_start;
+ break;
+ case 5:
+ tdata = &data->read_len;
+ break;
+ case 6:
+ tdata = &data->spot_group;
+ break;
+ case 7:
+ tdata = &data->read_filter;
+ break;
+ case 8:
+ tdata = &data->platform;
+ break;
+ default:
+ assert(!"reachable");
+ break;
+ }
+ if (tdata) {
+ tdata->buffer = base;
+ tdata->elements = row_len;
+ }
+ }
+ else
+ return rc;
+ }
+ return 0;
+}
+
rc_t SequenceDoneWriting(Sequence *self)
{
+ if (G.mode == mode_Remap) {
+ /* copy the SEQUENCE table from the first output */
+ VDBManager *mgr = NULL;
+ rc_t rc;
+
+ getTable(self, false);
+
+ rc = VDatabaseOpenManagerUpdate(self->db, &mgr);
+ assert(rc == 0);
+
+ if (rc == 0) {
+ VDatabase const *db = NULL;
+
+ rc = VDBManagerOpenDBRead(mgr, &db, NULL, G.firstOut);
+ assert(rc == 0);
+
+ VDBManagerRelease(mgr);
+ if (rc == 0) {
+ VTable const *tbl = NULL;
+
+ rc = VDatabaseOpenTableRead(db, &tbl, "SEQUENCE");
+ assert(rc == 0);
+ VDatabaseRelease(db);
+ if (rc == 0) {
+ VCursor const *curs = NULL;
+ rc = VTableCreateCursorRead(tbl, &curs);
+ assert(rc == 0);
+ VTableRelease(tbl);
+ if (rc == 0) {
+ uint32_t colId[9];
+
+ rc = VCursorAddColumn(curs, &colId[0], "TMP_KEY_ID");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[1], "(INSDC:dna:text)READ");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[2], "QUALITY");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[3], "READ_TYPE");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[4], "READ_START");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[5], "READ_LEN");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[6], "SPOT_GROUP");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[7], "READ_FILTER");
+ assert(rc == 0);
+ rc = VCursorAddColumn(curs, &colId[8], "PLATFORM");
+ assert(rc == 0);
+ if (rc == 0) {
+ rc = VCursorOpen(curs);
+ assert(rc == 0);
+ if (rc == 0) {
+ int64_t first;
+ uint64_t count;
+ uint64_t row;
+ TableWriterSeqData data;
+
+ rc = VCursorIdRange(curs, colId[0], &first, &count);
+ assert(rc == 0);
+ for (row = 0; row < count; ++row) {
+ int64_t dummyRowId = 0;
+
+ rc = ReadSequenceData(&data, curs, row+first, colId);
+ assert(rc == 0);
+ if (rc) return rc;
+
+ data.nreads = data.read_start.elements;
+
+ rc = TableWriterSeq_Write(self->tbl, &data, &dummyRowId);
+ assert(rc == 0);
+ if (rc) return rc;
+ }
+ }
+ }
+ VCursorRelease(curs);
+ }
+ }
+ }
+ }
+ }
return TableWriterSeq_TmpKeyStart(self->tbl);
}
@@ -202,146 +470,28 @@ rc_t SequenceUpdateAlignData(Sequence *self, int64_t rowId, unsigned nreads,
data[0].buffer = primeId; data[0].elements = nreads;
data[1].buffer = algnCnt; data[1].elements = nreads;
-
+
return TableWriterSeq_WriteAlignmentData(self->tbl, rowId, &data[0], &data[1]);
}
void SequenceWhack(Sequence *self, bool commit) {
uint64_t dummyRows;
- VDatabaseRelease(self->db);
-
if (self->tbl == NULL)
return;
(void)TableWriterSeq_Whack(self->tbl, commit, &dummyRows);
-}
-
-/* MARK: SequenceRecord Object */
-static
-rc_t SequenceRecordResize(SequenceRecord *self,
- KDataBuffer *storage,
- unsigned numreads,
- unsigned seqLen)
-{
- size_t sz;
- rc_t rc;
-
- sz = seqLen * (sizeof(self->seq[0]) + sizeof(self->qual[0])) +
- numreads * (sizeof(self->ti) +
- sizeof(self->readStart[0]) +
- sizeof(self->readLen[0]) +
- sizeof(self->aligned[0]) +
- sizeof(self->orientation[0]) +
- sizeof(self->alignmentCount[0]) +
- sizeof(self->cskey[0])
- );
- storage->elem_bits = 8;
- rc = KDataBufferResize(storage, sz);
- if (rc)
- return rc;
- self->numreads = numreads;
-
- self->ti = (uint64_t *)storage->base;
- self->readStart = (uint32_t *)&self->ti[numreads];
- self->readLen = (uint32_t *)&self->readStart[numreads];
- self->aligned = (bool *)&self->readLen[numreads];
- self->orientation = (uint8_t *)&self->aligned[numreads];
- self->is_bad = (uint8_t *)&self->orientation[numreads];
- self->alignmentCount = (uint8_t *)&self->is_bad[numreads];
- self->cskey = (char *)&self->alignmentCount[numreads];
- self->seq = (char *)&self->cskey[numreads];
- self->qual = (uint8_t *)&self->seq[seqLen];
-
- self->spotGroup = NULL;
- self->spotGroupLen = 0;
-
- return 0;
-}
-
-rc_t SequenceRecordInit(SequenceRecord *self, unsigned numreads, unsigned readLen[])
-{
- unsigned i;
- unsigned seqlen = 0;
- rc_t rc;
-
- for (i = 0; i != numreads; ++i) {
- seqlen += readLen[i];
- }
- rc = SequenceRecordResize(self, &self->storage, numreads, seqlen);
- if (rc)
- return rc;
- memset(self->storage.base, 0, KDataBufferBytes(&self->storage));
-
- for (seqlen = 0, i = 0; i != numreads; ++i) {
- self->readLen[i] = readLen[i];
- self->readStart[i] = seqlen;
- seqlen += readLen[i];
+ if (G.mode == mode_Remap) {
+ /* This only happens for the second and subsequent loads.
+ * Cleaning up the first load is handled by the bam-load itself
+ * when everything is done.
+ */
+ VTable *tbl = NULL;
+ rc_t rc = VDatabaseOpenTableUpdate(self->db, &tbl, "SEQUENCE");
+ assert(rc == 0);
+ VTableDropColumn(tbl, "TMP_KEY_ID");
+ VTableDropColumn(tbl, "READ");
+ VTableRelease(tbl);
}
- self->numreads = numreads;
- memset(self->cskey, 'T', numreads);
- return 0;
-}
-
-rc_t SequenceRecordAppend(SequenceRecord *self,
- const SequenceRecord *other
- )
-{
- /* save the locations of the original data */
- unsigned const seq = (uint8_t const *)self->seq - (uint8_t const *)self->storage.base;
- unsigned const qual = (uint8_t const *)self->qual - (uint8_t const *)self->storage.base;
- unsigned const cskey = (uint8_t const *)self->cskey - (uint8_t const *)self->storage.base;
- unsigned const alignmentCount = (uint8_t const *)self->alignmentCount - (uint8_t const *)self->storage.base;
- unsigned const is_bad = (uint8_t const *)self->is_bad - (uint8_t const *)self->storage.base;
- unsigned const orientation = (uint8_t const *)self->orientation - (uint8_t const *)self->storage.base;
- unsigned const aligned = (uint8_t const *)self->aligned - (uint8_t const *)self->storage.base;
- unsigned const ti = (uint8_t const *)self->ti - (uint8_t const *)self->storage.base;
- unsigned const readLen = (uint8_t const *)self->readLen - (uint8_t const *)self->storage.base;
- unsigned const readStart = (uint8_t const *)self->readStart - (uint8_t const *)self->storage.base;
-
- rc_t rc;
- unsigned seqlen;
- unsigned otherSeqlen;
- unsigned i;
- unsigned numreads = self->numreads;
-
- for (seqlen = 0, i = 0; i != numreads; ++i) {
- seqlen += self->readLen[i];
- }
- for (otherSeqlen = 0, i = 0; i != other->numreads; ++i) {
- otherSeqlen += other->readLen[i];
- }
-
- rc = SequenceRecordResize(self, &self->storage, self->numreads + other->numreads, seqlen + otherSeqlen);
- if (rc)
- return rc;
- /* this needs to be reverse order from assignment in Resize function
- * these regions can overlap
- */
- memmove(self->qual, &((uint8_t const *)self->storage.base)[qual], seqlen);
- memmove(self->seq, &((uint8_t const *)self->storage.base)[seq], seqlen);
- memmove(self->cskey, &((uint8_t const *)self->storage.base)[cskey], numreads * sizeof(self->cskey[0]));
- memmove(self->alignmentCount, &((uint8_t const *)self->storage.base)[alignmentCount], numreads * sizeof(self->alignmentCount[0]));
- memmove(self->is_bad, &((uint8_t const *)self->storage.base)[is_bad], numreads * sizeof(self->is_bad[0]));
- memmove(self->orientation, &((uint8_t const *)self->storage.base)[orientation], numreads * sizeof(self->orientation[0]));
- memmove(self->aligned, &((uint8_t const *)self->storage.base)[aligned], numreads * sizeof(self->aligned[0]));
- memmove(self->readLen, &((uint8_t const *)self->storage.base)[readLen], numreads * sizeof(self->readLen[0]));
- memmove(self->ti, &((uint8_t const *)self->storage.base)[ti], numreads * sizeof(self->ti[0]));
-
- memcpy(&self->ti[numreads], other->ti, other->numreads * sizeof(self->ti[0]));
- memcpy(&self->readLen[numreads], other->readLen, other->numreads * sizeof(self->readLen[0]));
- memcpy(&self->aligned[numreads], other->aligned, other->numreads * sizeof(self->aligned[0]));
- memcpy(&self->orientation[numreads], other->orientation, other->numreads * sizeof(self->orientation[0]));
- memcpy(&self->is_bad[numreads], other->is_bad, other->numreads * sizeof(self->is_bad[0]));
- memcpy(&self->alignmentCount[numreads], other->alignmentCount, other->numreads * sizeof(self->alignmentCount[0]));
- memcpy(&self->cskey[numreads], other->cskey, other->numreads * sizeof(self->cskey[0]));
- memcpy(&self->seq[seqlen], other->seq, otherSeqlen);
- memcpy(&self->qual[seqlen], other->qual, otherSeqlen);
-
- for (i = 0, seqlen = 0; i != self->numreads; ++i) {
- self->readStart[i] = seqlen;
- seqlen += self->readLen[i];
- }
-
- return 0;
+ VDatabaseRelease(self->db);
}
diff --git a/tools/bam-loader/sequence-writer.h b/tools/bam-loader/sequence-writer.h
index ccd4071..a847db8 100644
--- a/tools/bam-loader/sequence-writer.h
+++ b/tools/bam-loader/sequence-writer.h
@@ -45,22 +45,27 @@ typedef struct s_sequence_record {
uint8_t *orientation;
uint8_t *is_bad;
uint8_t *alignmentCount;
- char *spotGroup;
+ char const *spotGroup;
+ char const *linkageGroup;
bool *aligned;
char *cskey;
uint64_t *ti;
uint64_t keyId;
unsigned spotGroupLen;
- KDataBuffer storage;
- uint8_t numreads;
+ unsigned linkageGroupLen;
+ unsigned numreads;
} SequenceRecord;
-
-rc_t SequenceRecordInit(SequenceRecord *self,
- unsigned numreads, unsigned readLen[]);
-
-rc_t SequenceRecordAppend(SequenceRecord *self,
- const SequenceRecord *other);
+typedef struct s_sequence_record_storage {
+ uint64_t ti[2];
+ uint32_t readStart[2];
+ uint32_t readLen[2];
+ uint8_t orientation[2];
+ uint8_t is_bad[2];
+ uint8_t alignmentCount[2];
+ bool aligned[2];
+ char cskey[2];
+} SequenceRecordStorage;
typedef struct s_sequence {
VDatabase *db;
diff --git a/tools/cache-mgr/cache-mgr.vers b/tools/cache-mgr/cache-mgr.vers
index 35d16fb..097a15a 100644
--- a/tools/cache-mgr/cache-mgr.vers
+++ b/tools/cache-mgr/cache-mgr.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/cache-mgr/cache-mgr.vers.h b/tools/cache-mgr/cache-mgr.vers.h
index 7b7372c..554e80d 100644
--- a/tools/cache-mgr/cache-mgr.vers.h
+++ b/tools/cache-mgr/cache-mgr.vers.h
@@ -1 +1 @@
-#define CACHE_MGR_VERS 0x02050007
+#define CACHE_MGR_VERS 0x02060002
diff --git a/tools/ccextract/ccextract.vers b/tools/ccextract/ccextract.vers
index 35d16fb..097a15a 100644
--- a/tools/ccextract/ccextract.vers
+++ b/tools/ccextract/ccextract.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/cg-load/cg-load.vers b/tools/cg-load/cg-load.vers
index 35d16fb..097a15a 100644
--- a/tools/cg-load/cg-load.vers
+++ b/tools/cg-load/cg-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/copycat/Makefile b/tools/copycat/Makefile
index 14c2367..822f028 100644
--- a/tools/copycat/Makefile
+++ b/tools/copycat/Makefile
@@ -111,3 +111,4 @@ COPYCAT_LIB = \
$(BINDIR)/copycat: $(COPYCAT_OBJ)
$(LD) --exe --vers $(SRCDIR) -o $@ $^ $(COPYCAT_LIB)
+ @ cp $(SRCDIR)/magic $(BINDIR)
diff --git a/tools/copycat/ccfileformat.c b/tools/copycat/ccfileformat.c
index 15eee8c..58ca4db 100644
--- a/tools/copycat/ccfileformat.c
+++ b/tools/copycat/ccfileformat.c
@@ -31,11 +31,14 @@
#include <klib/rc.h>
#include <klib/debug.h>
#include <klib/log.h>
+#include <klib/text.h>
+#include <klib/printf.h>
#include <kfs/file.h>
#include <kfs/fileformat.h>
#include <kfs/ffext.h>
#include <kfs/ffmagic.h>
#include <krypto/wgaencrypt.h>
+#include <kfg/config.h>
#include <atomic32.h>
#include <stddef.h>
#include "copycat-priv.h"
@@ -171,18 +174,37 @@ rc_t CCFileFormatMake (CCFileFormat ** p)
}
else
{
- rc = KExtFileFormatMake (&self->ext, exttable, sizeof (exttable) - 1,
- formattable, sizeof (formattable) - 1);
- if (rc == 0)
+ /* magic file has to be located next to the executable */
+ KConfig* kfg;
+ rc = KConfigMake ( &kfg, NULL );
+ if ( rc == 0 )
{
- rc = KMagicFileFormatMake (&self->magic, magicpath, magictable,
- sizeof (magictable) - 1,
- formattable, sizeof (formattable) - 1);
- if (rc == 0)
+ String* bindir;
+ rc = KConfigReadString ( kfg, "vdb/lib/paths/kfg", &bindir );
+ KConfigRelease ( kfg );
+ if ( rc == 0 )
{
- atomic32_set (&self->refcount , 1);
- *p = self;
- return 0;
+ char magicpath[1024];
+ size_t num_writ;
+ rc = string_printf ( magicpath, sizeof ( magicpath ), &num_writ, "%S/magic", bindir );
+ StringWhack ( bindir );
+ if ( rc == 0 )
+ {
+ rc = KExtFileFormatMake (&self->ext, exttable, sizeof (exttable) - 1,
+ formattable, sizeof (formattable) - 1);
+ if (rc == 0)
+ {
+ rc = KMagicFileFormatMake (&self->magic, magicpath, magictable,
+ sizeof (magictable) - 1,
+ formattable, sizeof (formattable) - 1);
+ if (rc == 0)
+ {
+ atomic32_set (&self->refcount , 1);
+ *p = self;
+ return 0;
+ }
+ }
+ }
}
}
free (self);
diff --git a/tools/copycat/copycat.vers b/tools/copycat/copycat.vers
index 35d16fb..097a15a 100644
--- a/tools/copycat/copycat.vers
+++ b/tools/copycat/copycat.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/copycat/magic b/tools/copycat/magic
new file mode 100644
index 0000000..1f6abea
--- /dev/null
+++ b/tools/copycat/magic
@@ -0,0 +1,18398 @@
+
+#------------------------------------------------------------------------------
+# $File: acorn,v 1.5 2009/09/19 16:28:07 christos Exp $
+# acorn: file(1) magic for files found on Acorn systems
+#
+
+# RISC OS Chunk File Format
+# From RISC OS Programmer's Reference Manual, Appendix D
+# We guess the file type from the type of the first chunk.
+0 lelong 0xc3cbc6c5 RISC OS Chunk data
+>12 string OBJ_ \b, AOF object
+>12 string LIB_ \b, ALF library
+
+# RISC OS AIF, contains "SWI OS_Exit" at offset 16.
+16 lelong 0xef000011 RISC OS AIF executable
+
+# RISC OS Draw files
+# From RISC OS Programmer's Reference Manual, Appendix E
+0 string Draw RISC OS Draw file data
+
+# RISC OS new format font files
+# From RISC OS Programmer's Reference Manual, Appendix E
+0 string FONT\0 RISC OS outline font data,
+>5 byte x version %d
+0 string FONT\1 RISC OS 1bpp font data,
+>5 byte x version %d
+0 string FONT\4 RISC OS 4bpp font data
+>5 byte x version %d
+
+# RISC OS Music files
+# From RISC OS Programmer's Reference Manual, Appendix E
+0 string Maestro\r RISC OS music file
+>8 byte x version %d
+
+>8 byte x type %d
+
+# Digital Symphony data files
+# From: Bernard Jungen (bern8817 at euphonynet.be)
+0 string \x02\x01\x13\x13\x13\x01\x0d\x10 Digital Symphony sound sample (RISC OS),
+>8 byte x version %d,
+>9 pstring x named "%s",
+>(9.b+19) byte =0 8-bit logarithmic
+>(9.b+19) byte =1 LZW-compressed linear
+>(9.b+19) byte =2 8-bit linear signed
+>(9.b+19) byte =3 16-bit linear signed
+>(9.b+19) byte =4 SigmaDelta-compressed linear
+>(9.b+19) byte =5 SigmaDelta-compressed logarithmic
+>(9.b+19) byte >5 unknown format
+
+0 string \x02\x01\x13\x13\x14\x12\x01\x0b Digital Symphony song (RISC OS),
+>8 byte x version %d,
+>9 byte =1 1 voice,
+>9 byte !1 %d voices,
+>10 leshort =1 1 track,
+>10 leshort !1 %d tracks,
+>12 leshort =1 1 pattern
+>12 leshort !1 %d patterns
+
+0 string \x02\x01\x13\x13\x10\x14\x12\x0e
+>9 byte =0 Digital Symphony sequence (RISC OS),
+>>8 byte x version %d,
+>>10 byte =1 1 line,
+>>10 byte !1 %d lines,
+>>11 leshort =1 1 position
+>>11 leshort !1 %d positions
+>9 byte =1 Digital Symphony pattern data (RISC OS),
+>>8 byte x version %d,
+>>10 leshort =1 1 pattern
+>>10 leshort !1 %d patterns
+
+#------------------------------------------------------------------------------
+# $File: adi,v 1.4 2009/09/19 16:28:07 christos Exp $
+# adi: file(1) magic for ADi's objects
+# From Gregory McGarry <g.mcgarry at ieee.org>
+#
+0 leshort 0x521c COFF DSP21k
+>18 lelong &02 executable,
+>18 lelong ^02
+>>18 lelong &01 static object,
+>>18 lelong ^01 relocatable object,
+>18 lelong &010 stripped
+>18 lelong ^010 not stripped
+
+#------------------------------------------------------------------------------
+# $File: adventure,v 1.10 2009/09/19 16:28:07 christos Exp $
+# adventure: file(1) magic for Adventure game files
+#
+# from Allen Garvin <earendil at faeryland.tamu-commerce.edu>
+# Edited by Dave Chapeskie <dchapes at ddm.on.ca> Jun 28, 1998
+# Edited by Chris Chittleborough <cchittleborough at yahoo.com.au>, March 2002
+#
+# ALAN
+# I assume there are other, lower versions, but these are the only ones I
+# saw in the archive.
+0 beshort 0x0206 ALAN game data
+>2 byte <10 version 2.6%d
+
+
+# Infocom (see z-machine)
+#------------------------------------------------------------------------------
+# Z-machine: file(1) magic for Z-machine binaries.
+#
+# This will match ${TEX_BASE}/texmf/omega/ocp/char2uni/inbig5.ocp which
+# appears to be a version-0 Z-machine binary.
+#
+# The (false match) message is to correct that behavior. Perhaps it is
+# not needed.
+#
+16 belong&0xfe00f0f0 0x3030 Infocom game data
+>0 ubyte 0 (false match)
+>0 ubyte >0 (Z-machine %d,
+>>2 ubeshort x Release %d /
+>>18 string >\0 Serial %.6s)
+
+#------------------------------------------------------------------------------
+# Glulx: file(1) magic for Glulx binaries.
+#
+# I haven't checked for false matches yet.
+#
+0 string Glul Glulx game data
+>4 beshort x (Version %d
+>>6 byte x \b.%d
+>>8 byte x \b.%d)
+>36 string Info Compiled by Inform
+
+
+
+# For Quetzal and blorb magic see iff
+
+
+# TADS (Text Adventure Development System)
+# All files are machine-independent (games compile to byte-code) and are tagged
+# with a version string of the form "V2.<digit>.<digit>\0" (but TADS 3 is
+# on the way).
+# Game files start with "TADS2 bin\n\r\032\0" then the compiler version.
+0 string TADS2\ bin TADS
+>9 belong !0x0A0D1A00 game data, CORRUPTED
+>9 belong 0x0A0D1A00
+>>13 string >\0 %s game data
+# Resource files start with "TADS2 rsc\n\r\032\0" then the compiler version.
+0 string TADS2\ rsc TADS
+>9 belong !0x0A0D1A00 resource data, CORRUPTED
+>9 belong 0x0A0D1A00
+>>13 string >\0 %s resource data
+# Some saved game files start with "TADS2 save/g\n\r\032\0", a little-endian
+# 2-byte length N, the N-char name of the game file *without* a NUL (darn!),
+# "TADS2 save\n\r\032\0" and the interpreter version.
+0 string TADS2\ save/g TADS
+>12 belong !0x0A0D1A00 saved game data, CORRUPTED
+>12 belong 0x0A0D1A00
+>>(16.s+32) string >\0 %s saved game data
+# Other saved game files start with "TADS2 save\n\r\032\0" and the interpreter
+# version.
+0 string TADS2\ save TADS
+>10 belong !0x0A0D1A00 saved game data, CORRUPTED
+>10 belong 0x0A0D1A00
+>>14 string >\0 %s saved game data
+
+# Danny Milosavljevic <danny.milo at gmx.net>
+# this are adrift (adventure game standard) game files, extension .taf
+# depending on version magic continues with 0x93453E6139FA (V 4.0)
+# 0x9445376139FA (V 3.90)
+# 0x9445366139FA (V 3.80)
+# this is from source (http://www.adrift.org.uk/) and I have some taf
+# files, and checked them.
+#0 belong 0x3C423FC9
+#>4 belong 0x6A87C2CF Adrift game file
+#!:mime application/x-adrift
+
+#------------------------------------------------------------------------------
+# $File: allegro,v 1.4 2009/09/19 16:28:07 christos Exp $
+# allegro: file(1) magic for Allegro datafiles
+# Toby Deshane <hac at shoelace.digivill.net>
+#
+0 belong 0x736C6821 Allegro datafile (packed)
+0 belong 0x736C682E Allegro datafile (not packed/autodetect)
+0 belong 0x736C682B Allegro datafile (appended exe data)
+
+#------------------------------------------------------------------------------
+# $File: alliant,v 1.7 2009/09/19 16:28:07 christos Exp $
+# alliant: file(1) magic for Alliant FX series a.out files
+#
+# If the FX series is the one that had a processor with a 68K-derived
+# instruction set, the "short" should probably become "beshort" and the
+# "long" should probably become "belong".
+# If it's the i860-based one, they should probably become either the
+# big-endian or little-endian versions, depending on the mode they ran
+# the 860 in....
+#
+0 short 0420 0420 Alliant virtual executable
+>2 short &0x0020 common library
+>16 long >0 not stripped
+0 short 0421 0421 Alliant compact executable
+>2 short &0x0020 common library
+>16 long >0 not stripped
+
+#------------------------------------------------------------------------------
+# $File: alpha,v 1.7 2009/09/19 16:28:07 christos Exp $
+# alpha architecture description
+#
+
+0 leshort 0603 COFF format alpha
+>22 leshort&030000 !020000 executable
+>24 leshort 0410 pure
+>24 leshort 0413 paged
+>22 leshort&020000 !0 dynamically linked
+>16 lelong !0 not stripped
+>16 lelong 0 stripped
+>22 leshort&030000 020000 shared library
+>24 leshort 0407 object
+>27 byte x - version %d
+>26 byte x .%d
+>28 byte x -%d
+
+# Basic recognition of Digital UNIX core dumps - Mike Bremford <mike at opac.bl.uk>
+#
+# The actual magic number is just "Core", followed by a 2-byte version
+# number; however, treating any file that begins with "Core" as a Digital
+# UNIX core dump file may produce too many false hits, so we include one
+# byte of the version number as well; DU 5.0 appears only to be up to
+# version 2.
+#
+0 string Core\001 Alpha COFF format core dump (Digital UNIX)
+>24 string >\0 \b, from '%s'
+0 string Core\002 Alpha COFF format core dump (Digital UNIX)
+>24 string >\0 \b, from '%s'
+
+
+#------------------------------------------------------------------------------
+# $File: amanda,v 1.5 2009/09/19 16:28:07 christos Exp $
+# amanda: file(1) magic for amanda file format
+#
+0 string AMANDA:\ AMANDA
+>8 string TAPESTART\ DATE tape header file,
+>>23 string X
+>>>25 string >\ Unused %s
+>>23 string >\ DATE %s
+>8 string FILE\ dump file,
+>>13 string >\ DATE %s
+
+#------------------------------------------------------------------------------
+# $File: amigaos,v 1.14 2009/09/19 16:28:07 christos Exp $
+# amigaos: file(1) magic for AmigaOS binary formats:
+
+#
+# From ignatios at cs.uni-bonn.de (Ignatios Souvatzis)
+#
+0 belong 0x000003fa AmigaOS shared library
+0 belong 0x000003f3 AmigaOS loadseg()ble executable/binary
+0 belong 0x000003e7 AmigaOS object/library data
+#
+0 beshort 0xe310 Amiga Workbench
+>2 beshort 1
+>>48 byte 1 disk icon
+>>48 byte 2 drawer icon
+>>48 byte 3 tool icon
+>>48 byte 4 project icon
+>>48 byte 5 garbage icon
+>>48 byte 6 device icon
+>>48 byte 7 kickstart icon
+>>48 byte 8 workbench application icon
+>2 beshort >1 icon, vers. %d
+#
+# various sound formats from the Amiga
+# G=F6tz Waschk <waschk at informatik.uni-rostock.de>
+#
+0 string FC14 Future Composer 1.4 Module sound file
+0 string SMOD Future Composer 1.3 Module sound file
+0 string AON4artofnoise Art Of Noise Module sound file
+1 string MUGICIAN/SOFTEYES Mugician Module sound file
+58 string SIDMON\ II\ -\ THE Sidmon 2.0 Module sound file
+0 string Synth4.0 Synthesis Module sound file
+0 string ARP. The Holy Noise Module sound file
+0 string BeEp\0 JamCracker Module sound file
+0 string COSO\0 Hippel-COSO Module sound file
+# Too simple (short, pure ASCII, deep), MPi
+#26 string V.3 Brian Postma's Soundmon Module sound file v3
+#26 string BPSM Brian Postma's Soundmon Module sound file v3
+#26 string V.2 Brian Postma's Soundmon Module sound file v2
+
+# The following are from: "Stefan A. Haubenthal" <polluks at web.de>
+0 beshort 0x0f00 AmigaOS bitmap font
+0 beshort 0x0f03 AmigaOS outline font
+0 belong 0x80001001 AmigaOS outline tag
+0 string ##\ version catalog translation
+0 string EMOD\0 Amiga E module
+8 string ECXM\0 ECX module
+0 string/c @database AmigaGuide file
+
+# Amiga disk types
+#
+0 string RDSK Rigid Disk Block
+>160 string x on %.24s
+0 string DOS\0 Amiga DOS disk
+0 string DOS\1 Amiga FFS disk
+0 string DOS\2 Amiga Inter DOS disk
+0 string DOS\3 Amiga Inter FFS disk
+0 string DOS\4 Amiga Fastdir DOS disk
+0 string DOS\5 Amiga Fastdir FFS disk
+0 string KICK Kickstart disk
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+0 string LZX LZX compressed archive (Amiga)
+
+
+#------------------------------------------------------------------------------
+# $File: animation,v 1.39 2009/09/27 19:02:12 christos Exp $
+# animation: file(1) magic for animation/movie formats
+#
+# animation formats
+# MPEG, FLI, DL originally from vax at ccwf.cc.utexas.edu (VaX#n8)
+# FLC, SGI, Apple originally from Daniel Quinlan (quinlan at yggdrasil.com)
+
+# SGI and Apple formats
+0 string MOVI Silicon Graphics movie file
+!:mime video/x-sgi-movie
+4 string moov Apple QuickTime
+!:mime video/quicktime
+>12 string mvhd \b movie (fast start)
+>12 string mdra \b URL
+>12 string cmov \b movie (fast start, compressed header)
+>12 string rmra \b multiple URLs
+4 string mdat Apple QuickTime movie (unoptimized)
+!:mime video/quicktime
+#4 string wide Apple QuickTime movie (unoptimized)
+#!:mime video/quicktime
+#4 string skip Apple QuickTime movie (modified)
+#!:mime video/quicktime
+#4 string free Apple QuickTime movie (modified)
+#!:mime video/quicktime
+4 string idsc Apple QuickTime image (fast start)
+!:mime image/x-quicktime
+#4 string idat Apple QuickTime image (unoptimized)
+#!:mime image/x-quicktime
+4 string pckg Apple QuickTime compressed archive
+!:mime application/x-quicktime-player
+4 string/W jP JPEG 2000 image
+!:mime image/jp2
+4 string ftyp ISO Media
+>8 string isom \b, MPEG v4 system, version 1
+!:mime video/mp4
+>8 string iso2 \b, MPEG v4 system, part 12 revision
+>8 string mp41 \b, MPEG v4 system, version 1
+!:mime video/mp4
+>8 string mp42 \b, MPEG v4 system, version 2
+!:mime video/mp4
+>8 string mp7t \b, MPEG v4 system, MPEG v7 XML
+>8 string mp7b \b, MPEG v4 system, MPEG v7 binary XML
+>8 string/W jp2 \b, JPEG 2000
+!:mime image/jp2
+>8 string 3gp \b, MPEG v4 system, 3GPP
+!:mime video/3gpp
+>>11 byte 4 \b v4 (H.263/AMR GSM 6.10)
+>>11 byte 5 \b v5 (H.263/AMR GSM 6.10)
+>>11 byte 6 \b v6 (ITU H.264/AMR GSM 6.10)
+>8 string mmp4 \b, MPEG v4 system, 3GPP Mobile
+!:mime video/mp4
+>8 string avc1 \b, MPEG v4 system, 3GPP JVT AVC
+!:mime video/3gpp
+>8 string/W M4A \b, MPEG v4 system, iTunes AAC-LC
+!:mime audio/mp4
+>8 string/W M4V \b, MPEG v4 system, iTunes AVC-LC
+!:mime video/mp4
+>8 string/W M4P \b, MPEG v4 system, iTunes AES encrypted
+>8 string/W M4B \b, MPEG v4 system, iTunes bookmarked
+>8 string/W qt \b, Apple QuickTime movie
+!:mime video/quicktime
+
+# MPEG sequences
+# Scans for all common MPEG header start codes
+0 belong 0x00000001
+>4 byte&0x1F 0x07 JVT NAL sequence, H.264 video
+>>5 byte 66 \b, baseline
+>>5 byte 77 \b, main
+>>5 byte 88 \b, extended
+>>7 byte x \b @ L %u
+0 belong&0xFFFFFF00 0x00000100
+>3 byte 0xBA MPEG sequence
+!:mime video/mpeg
+>>4 byte &0x40 \b, v2, program multiplex
+>>4 byte ^0x40 \b, v1, system multiplex
+>3 byte 0xBB MPEG sequence, v1/2, multiplex (missing pack header)
+>3 byte&0x1F 0x07 MPEG sequence, H.264 video
+>>4 byte 66 \b, baseline
+>>4 byte 77 \b, main
+>>4 byte 88 \b, extended
+>>6 byte x \b @ L %u
+>3 byte 0xB0 MPEG sequence, v4
+!:mime video/mpeg4-generic
+>>5 belong 0x000001B5
+>>>9 byte &0x80
+>>>>10 byte&0xF0 16 \b, video
+>>>>10 byte&0xF0 32 \b, still texture
+>>>>10 byte&0xF0 48 \b, mesh
+>>>>10 byte&0xF0 64 \b, face
+>>>9 byte&0xF8 8 \b, video
+>>>9 byte&0xF8 16 \b, still texture
+>>>9 byte&0xF8 24 \b, mesh
+>>>9 byte&0xF8 32 \b, face
+>>4 byte 1 \b, simple @ L1
+>>4 byte 2 \b, simple @ L2
+>>4 byte 3 \b, simple @ L3
+>>4 byte 4 \b, simple @ L0
+>>4 byte 17 \b, simple scalable @ L1
+>>4 byte 18 \b, simple scalable @ L2
+>>4 byte 33 \b, core @ L1
+>>4 byte 34 \b, core @ L2
+>>4 byte 50 \b, main @ L2
+>>4 byte 51 \b, main @ L3
+>>4 byte 53 \b, main @ L4
+>>4 byte 66 \b, n-bit @ L2
+>>4 byte 81 \b, scalable texture @ L1
+>>4 byte 97 \b, simple face animation @ L1
+>>4 byte 98 \b, simple face animation @ L2
+>>4 byte 99 \b, simple face basic animation @ L1
+>>4 byte 100 \b, simple face basic animation @ L2
+>>4 byte 113 \b, basic animation text @ L1
+>>4 byte 114 \b, basic animation text @ L2
+>>4 byte 129 \b, hybrid @ L1
+>>4 byte 130 \b, hybrid @ L2
+>>4 byte 145 \b, advanced RT simple @ L!
+>>4 byte 146 \b, advanced RT simple @ L2
+>>4 byte 147 \b, advanced RT simple @ L3
+>>4 byte 148 \b, advanced RT simple @ L4
+>>4 byte 161 \b, core scalable @ L1
+>>4 byte 162 \b, core scalable @ L2
+>>4 byte 163 \b, core scalable @ L3
+>>4 byte 177 \b, advanced coding efficiency @ L1
+>>4 byte 178 \b, advanced coding efficiency @ L2
+>>4 byte 179 \b, advanced coding efficiency @ L3
+>>4 byte 180 \b, advanced coding efficiency @ L4
+>>4 byte 193 \b, advanced core @ L1
+>>4 byte 194 \b, advanced core @ L2
+>>4 byte 209 \b, advanced scalable texture @ L1
+>>4 byte 210 \b, advanced scalable texture @ L2
+>>4 byte 211 \b, advanced scalable texture @ L3
+>>4 byte 225 \b, simple studio @ L1
+>>4 byte 226 \b, simple studio @ L2
+>>4 byte 227 \b, simple studio @ L3
+>>4 byte 228 \b, simple studio @ L4
+>>4 byte 229 \b, core studio @ L1
+>>4 byte 230 \b, core studio @ L2
+>>4 byte 231 \b, core studio @ L3
+>>4 byte 232 \b, core studio @ L4
+>>4 byte 240 \b, advanced simple @ L0
+>>4 byte 241 \b, advanced simple @ L1
+>>4 byte 242 \b, advanced simple @ L2
+>>4 byte 243 \b, advanced simple @ L3
+>>4 byte 244 \b, advanced simple @ L4
+>>4 byte 245 \b, advanced simple @ L5
+>>4 byte 247 \b, advanced simple @ L3b
+>>4 byte 248 \b, FGS @ L0
+>>4 byte 249 \b, FGS @ L1
+>>4 byte 250 \b, FGS @ L2
+>>4 byte 251 \b, FGS @ L3
+>>4 byte 252 \b, FGS @ L4
+>>4 byte 253 \b, FGS @ L5
+>3 byte 0xB5 MPEG sequence, v4
+>>4 byte &0x80
+>>>5 byte&0xF0 16 \b, video (missing profile header)
+>>>5 byte&0xF0 32 \b, still texture (missing profile header)
+>>>5 byte&0xF0 48 \b, mesh (missing profile header)
+>>>5 byte&0xF0 64 \b, face (missing profile header)
+>>4 byte&0xF8 8 \b, video (missing profile header)
+>>4 byte&0xF8 16 \b, still texture (missing profile header)
+>>4 byte&0xF8 24 \b, mesh (missing profile header)
+>>4 byte&0xF8 32 \b, face (missing profile header)
+>3 byte 0xB3 MPEG sequence
+>>12 belong 0x000001B8 \b, v1, progressive Y'CbCr 4:2:0 video
+>>12 belong 0x000001B2 \b, v1, progressive Y'CbCr 4:2:0 video
+>>12 belong 0x000001B5 \b, v2,
+>>>16 byte&0x0F 1 \b HP
+>>>16 byte&0x0F 2 \b Spt
+>>>16 byte&0x0F 3 \b SNR
+>>>16 byte&0x0F 4 \b MP
+>>>16 byte&0x0F 5 \b SP
+>>>17 byte&0xF0 64 \b at HL
+>>>17 byte&0xF0 96 \b at H-14
+>>>17 byte&0xF0 128 \b at ML
+>>>17 byte&0xF0 160 \b at LL
+>>>17 byte &0x08 \b progressive
+>>>17 byte ^0x08 \b interlaced
+>>>17 byte&0x06 2 \b Y'CbCr 4:2:0 video
+>>>17 byte&0x06 4 \b Y'CbCr 4:2:2 video
+>>>17 byte&0x06 6 \b Y'CbCr 4:4:4 video
+>>11 byte &0x02
+>>>75 byte &0x01
+>>>>140 belong 0x000001B8 \b, v1, progressive Y'CbCr 4:2:0 video
+>>>>140 belong 0x000001B2 \b, v1, progressive Y'CbCr 4:2:0 video
+>>>>140 belong 0x000001B5 \b, v2,
+>>>>>144 byte&0x0F 1 \b HP
+>>>>>144 byte&0x0F 2 \b Spt
+>>>>>144 byte&0x0F 3 \b SNR
+>>>>>144 byte&0x0F 4 \b MP
+>>>>>144 byte&0x0F 5 \b SP
+>>>>>145 byte&0xF0 64 \b at HL
+>>>>>145 byte&0xF0 96 \b at H-14
+>>>>>145 byte&0xF0 128 \b at ML
+>>>>>145 byte&0xF0 160 \b at LL
+>>>>>145 byte &0x08 \b progressive
+>>>>>145 byte ^0x08 \b interlaced
+>>>>>145 byte&0x06 2 \b Y'CbCr 4:2:0 video
+>>>>>145 byte&0x06 4 \b Y'CbCr 4:2:2 video
+>>>>>145 byte&0x06 6 \b Y'CbCr 4:4:4 video
+>>76 belong 0x000001B8 \b, v1, progressive Y'CbCr 4:2:0 video
+>>76 belong 0x000001B2 \b, v1, progressive Y'CbCr 4:2:0 video
+>>76 belong 0x000001B5 \b, v2,
+>>>80 byte&0x0F 1 \b HP
+>>>80 byte&0x0F 2 \b Spt
+>>>80 byte&0x0F 3 \b SNR
+>>>80 byte&0x0F 4 \b MP
+>>>80 byte&0x0F 5 \b SP
+>>>81 byte&0xF0 64 \b at HL
+>>>81 byte&0xF0 96 \b at H-14
+>>>81 byte&0xF0 128 \b at ML
+>>>81 byte&0xF0 160 \b at LL
+>>>81 byte &0x08 \b progressive
+>>>81 byte ^0x08 \b interlaced
+>>>81 byte&0x06 2 \b Y'CbCr 4:2:0 video
+>>>81 byte&0x06 4 \b Y'CbCr 4:2:2 video
+>>>81 byte&0x06 6 \b Y'CbCr 4:4:4 video
+>>4 belong&0xFFFFFF00 0x78043800 \b, HD-TV 1920P
+>>>7 byte&0xF0 0x10 \b, 16:9
+>>4 belong&0xFFFFFF00 0x50002D00 \b, SD-TV 1280I
+>>>7 byte&0xF0 0x10 \b, 16:9
+>>4 belong&0xFFFFFF00 0x30024000 \b, PAL Capture
+>>>7 byte&0xF0 0x10 \b, 4:3
+>>4 beshort&0xFFF0 0x2C00 \b, 4CIF
+>>>5 beshort&0x0FFF 0x01E0 \b NTSC
+>>>5 beshort&0x0FFF 0x0240 \b PAL
+>>>7 byte&0xF0 0x20 \b, 4:3
+>>>7 byte&0xF0 0x30 \b, 16:9
+>>>7 byte&0xF0 0x40 \b, 11:5
+>>>7 byte&0xF0 0x80 \b, PAL 4:3
+>>>7 byte&0xF0 0xC0 \b, NTSC 4:3
+>>4 belong&0xFFFFFF00 0x2801E000 \b, LD-TV 640P
+>>>7 byte&0xF0 0x10 \b, 4:3
+>>4 belong&0xFFFFFF00 0x1400F000 \b, 320x240
+>>>7 byte&0xF0 0x10 \b, 4:3
+>>4 belong&0xFFFFFF00 0x0F00A000 \b, 240x160
+>>>7 byte&0xF0 0x10 \b, 4:3
+>>4 belong&0xFFFFFF00 0x0A007800 \b, 160x120
+>>>7 byte&0xF0 0x10 \b, 4:3
+>>4 beshort&0xFFF0 0x1600 \b, CIF
+>>>5 beshort&0x0FFF 0x00F0 \b NTSC
+>>>5 beshort&0x0FFF 0x0120 \b PAL
+>>>7 byte&0xF0 0x20 \b, 4:3
+>>>7 byte&0xF0 0x30 \b, 16:9
+>>>7 byte&0xF0 0x40 \b, 11:5
+>>>7 byte&0xF0 0x80 \b, PAL 4:3
+>>>7 byte&0xF0 0xC0 \b, NTSC 4:3
+>>>5 beshort&0x0FFF 0x0240 \b PAL 625
+>>>>7 byte&0xF0 0x20 \b, 4:3
+>>>>7 byte&0xF0 0x30 \b, 16:9
+>>>>7 byte&0xF0 0x40 \b, 11:5
+>>4 beshort&0xFFF0 0x2D00 \b, CCIR/ITU
+>>>5 beshort&0x0FFF 0x01E0 \b NTSC 525
+>>>5 beshort&0x0FFF 0x0240 \b PAL 625
+>>>7 byte&0xF0 0x20 \b, 4:3
+>>>7 byte&0xF0 0x30 \b, 16:9
+>>>7 byte&0xF0 0x40 \b, 11:5
+>>4 beshort&0xFFF0 0x1E00 \b, SVCD
+>>>5 beshort&0x0FFF 0x01E0 \b NTSC 525
+>>>5 beshort&0x0FFF 0x0240 \b PAL 625
+>>>7 byte&0xF0 0x20 \b, 4:3
+>>>7 byte&0xF0 0x30 \b, 16:9
+>>>7 byte&0xF0 0x40 \b, 11:5
+>>7 byte&0x0F 1 \b, 23.976 fps
+>>7 byte&0x0F 2 \b, 24 fps
+>>7 byte&0x0F 3 \b, 25 fps
+>>7 byte&0x0F 4 \b, 29.97 fps
+>>7 byte&0x0F 5 \b, 30 fps
+>>7 byte&0x0F 6 \b, 50 fps
+>>7 byte&0x0F 7 \b, 59.94 fps
+>>7 byte&0x0F 8 \b, 60 fps
+>>11 byte &0x04 \b, Constrained
+
+# MPEG ADTS Audio (*.mpx/mxa/aac)
+# from dreesen at math.fu-berlin.de
+# modified to fully support MPEG ADTS
+
+# MP3, M1A
+# modified by Joerg Jenderek
+# GRR the original test are too common for many DOS files
+# so don't accept as MP3 until we've tested the rate
+0 beshort&0xFFFE 0xFFFA
+# rates
+>2 byte&0xF0 0x10 MPEG ADTS, layer III, v1, 32 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x20 MPEG ADTS, layer III, v1, 40 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x30 MPEG ADTS, layer III, v1, 48 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x40 MPEG ADTS, layer III, v1, 56 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x50 MPEG ADTS, layer III, v1, 64 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x60 MPEG ADTS, layer III, v1, 80 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x70 MPEG ADTS, layer III, v1, 96 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x80 MPEG ADTS, layer III, v1, 112 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0x90 MPEG ADTS, layer III, v1, 128 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0xA0 MPEG ADTS, layer III, v1, 160 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0xB0 MPEG ADTS, layer III, v1, 192 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0xC0 MPEG ADTS, layer III, v1, 224 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0xD0 MPEG ADTS, layer III, v1, 256 kbps
+!:mime audio/mpeg
+>2 byte&0xF0 0xE0 MPEG ADTS, layer III, v1, 320 kbps
+!:mime audio/mpeg
+# timing
+>2 byte&0x0C 0x00 \b, 44.1 kHz
+>2 byte&0x0C 0x04 \b, 48 kHz
+>2 byte&0x0C 0x08 \b, 32 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MP2, M1A
+0 beshort&0xFFFE 0xFFFC MPEG ADTS, layer II, v1
+!:mime audio/mpeg
+# rates
+>2 byte&0xF0 0x10 \b, 32 kbps
+>2 byte&0xF0 0x20 \b, 48 kbps
+>2 byte&0xF0 0x30 \b, 56 kbps
+>2 byte&0xF0 0x40 \b, 64 kbps
+>2 byte&0xF0 0x50 \b, 80 kbps
+>2 byte&0xF0 0x60 \b, 96 kbps
+>2 byte&0xF0 0x70 \b, 112 kbps
+>2 byte&0xF0 0x80 \b, 128 kbps
+>2 byte&0xF0 0x90 \b, 160 kbps
+>2 byte&0xF0 0xA0 \b, 192 kbps
+>2 byte&0xF0 0xB0 \b, 224 kbps
+>2 byte&0xF0 0xC0 \b, 256 kbps
+>2 byte&0xF0 0xD0 \b, 320 kbps
+>2 byte&0xF0 0xE0 \b, 384 kbps
+# timing
+>2 byte&0x0C 0x00 \b, 44.1 kHz
+>2 byte&0x0C 0x04 \b, 48 kHz
+>2 byte&0x0C 0x08 \b, 32 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MPA, M1A
+# updated by Joerg Jenderek
+# GRR the original test are too common for many DOS files, so test 32 <= kbits <= 448
+# GRR this test is still too general as it catches a BOM of UTF-16 files (0xFFFE)
+# FIXME: Almost all little endian UTF-16 text with BOM are clobbered by these entries
+#0 beshort&0xFFFE 0xFFFE
+#>2 ubyte&0xF0 >0x0F
+#>>2 ubyte&0xF0 <0xE1 MPEG ADTS, layer I, v1
+## rate
+#>>>2 byte&0xF0 0x10 \b, 32 kbps
+#>>>2 byte&0xF0 0x20 \b, 64 kbps
+#>>>2 byte&0xF0 0x30 \b, 96 kbps
+#>>>2 byte&0xF0 0x40 \b, 128 kbps
+#>>>2 byte&0xF0 0x50 \b, 160 kbps
+#>>>2 byte&0xF0 0x60 \b, 192 kbps
+#>>>2 byte&0xF0 0x70 \b, 224 kbps
+#>>>2 byte&0xF0 0x80 \b, 256 kbps
+#>>>2 byte&0xF0 0x90 \b, 288 kbps
+#>>>2 byte&0xF0 0xA0 \b, 320 kbps
+#>>>2 byte&0xF0 0xB0 \b, 352 kbps
+#>>>2 byte&0xF0 0xC0 \b, 384 kbps
+#>>>2 byte&0xF0 0xD0 \b, 416 kbps
+#>>>2 byte&0xF0 0xE0 \b, 448 kbps
+## timing
+#>>>2 byte&0x0C 0x00 \b, 44.1 kHz
+#>>>2 byte&0x0C 0x04 \b, 48 kHz
+#>>>2 byte&0x0C 0x08 \b, 32 kHz
+## channels/options
+#>>>3 byte&0xC0 0x00 \b, Stereo
+#>>>3 byte&0xC0 0x40 \b, JntStereo
+#>>>3 byte&0xC0 0x80 \b, 2x Monaural
+#>>>3 byte&0xC0 0xC0 \b, Monaural
+##>1 byte ^0x01 \b, Data Verify
+##>2 byte &0x02 \b, Packet Pad
+##>2 byte &0x01 \b, Custom Flag
+##>3 byte &0x08 \b, Copyrighted
+##>3 byte &0x04 \b, Original Source
+##>3 byte&0x03 1 \b, NR: 50/15 ms
+##>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MP3, M2A
+0 beshort&0xFFFE 0xFFF2 MPEG ADTS, layer III, v2
+!:mime audio/mpeg
+# rate
+>2 byte&0xF0 0x10 \b, 8 kbps
+>2 byte&0xF0 0x20 \b, 16 kbps
+>2 byte&0xF0 0x30 \b, 24 kbps
+>2 byte&0xF0 0x40 \b, 32 kbps
+>2 byte&0xF0 0x50 \b, 40 kbps
+>2 byte&0xF0 0x60 \b, 48 kbps
+>2 byte&0xF0 0x70 \b, 56 kbps
+>2 byte&0xF0 0x80 \b, 64 kbps
+>2 byte&0xF0 0x90 \b, 80 kbps
+>2 byte&0xF0 0xA0 \b, 96 kbps
+>2 byte&0xF0 0xB0 \b, 112 kbps
+>2 byte&0xF0 0xC0 \b, 128 kbps
+>2 byte&0xF0 0xD0 \b, 144 kbps
+>2 byte&0xF0 0xE0 \b, 160 kbps
+# timing
+>2 byte&0x0C 0x00 \b, 22.05 kHz
+>2 byte&0x0C 0x04 \b, 24 kHz
+>2 byte&0x0C 0x08 \b, 16 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MP2, M2A
+0 beshort&0xFFFE 0xFFF4 MPEG ADTS, layer II, v2
+# rate
+>2 byte&0xF0 0x10 \b, 8 kbps
+>2 byte&0xF0 0x20 \b, 16 kbps
+>2 byte&0xF0 0x30 \b, 24 kbps
+>2 byte&0xF0 0x40 \b, 32 kbps
+>2 byte&0xF0 0x50 \b, 40 kbps
+>2 byte&0xF0 0x60 \b, 48 kbps
+>2 byte&0xF0 0x70 \b, 56 kbps
+>2 byte&0xF0 0x80 \b, 64 kbps
+>2 byte&0xF0 0x90 \b, 80 kbps
+>2 byte&0xF0 0xA0 \b, 96 kbps
+>2 byte&0xF0 0xB0 \b, 112 kbps
+>2 byte&0xF0 0xC0 \b, 128 kbps
+>2 byte&0xF0 0xD0 \b, 144 kbps
+>2 byte&0xF0 0xE0 \b, 160 kbps
+# timing
+>2 byte&0x0C 0x00 \b, 22.05 kHz
+>2 byte&0x0C 0x04 \b, 24 kHz
+>2 byte&0x0C 0x08 \b, 16 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MPA, M2A
+0 beshort&0xFFFE 0xFFF6 MPEG ADTS, layer I, v2
+!:mime audio/mpeg
+# rate
+>2 byte&0xF0 0x10 \b, 32 kbps
+>2 byte&0xF0 0x20 \b, 48 kbps
+>2 byte&0xF0 0x30 \b, 56 kbps
+>2 byte&0xF0 0x40 \b, 64 kbps
+>2 byte&0xF0 0x50 \b, 80 kbps
+>2 byte&0xF0 0x60 \b, 96 kbps
+>2 byte&0xF0 0x70 \b, 112 kbps
+>2 byte&0xF0 0x80 \b, 128 kbps
+>2 byte&0xF0 0x90 \b, 144 kbps
+>2 byte&0xF0 0xA0 \b, 160 kbps
+>2 byte&0xF0 0xB0 \b, 176 kbps
+>2 byte&0xF0 0xC0 \b, 192 kbps
+>2 byte&0xF0 0xD0 \b, 224 kbps
+>2 byte&0xF0 0xE0 \b, 256 kbps
+# timing
+>2 byte&0x0C 0x00 \b, 22.05 kHz
+>2 byte&0x0C 0x04 \b, 24 kHz
+>2 byte&0x0C 0x08 \b, 16 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# MP3, M25A
+0 beshort&0xFFFE 0xFFE2 MPEG ADTS, layer III, v2.5
+!:mime audio/mpeg
+# rate
+>2 byte&0xF0 0x10 \b, 8 kbps
+>2 byte&0xF0 0x20 \b, 16 kbps
+>2 byte&0xF0 0x30 \b, 24 kbps
+>2 byte&0xF0 0x40 \b, 32 kbps
+>2 byte&0xF0 0x50 \b, 40 kbps
+>2 byte&0xF0 0x60 \b, 48 kbps
+>2 byte&0xF0 0x70 \b, 56 kbps
+>2 byte&0xF0 0x80 \b, 64 kbps
+>2 byte&0xF0 0x90 \b, 80 kbps
+>2 byte&0xF0 0xA0 \b, 96 kbps
+>2 byte&0xF0 0xB0 \b, 112 kbps
+>2 byte&0xF0 0xC0 \b, 128 kbps
+>2 byte&0xF0 0xD0 \b, 144 kbps
+>2 byte&0xF0 0xE0 \b, 160 kbps
+# timing
+>2 byte&0x0C 0x00 \b, 11.025 kHz
+>2 byte&0x0C 0x04 \b, 12 kHz
+>2 byte&0x0C 0x08 \b, 8 kHz
+# channels/options
+>3 byte&0xC0 0x00 \b, Stereo
+>3 byte&0xC0 0x40 \b, JntStereo
+>3 byte&0xC0 0x80 \b, 2x Monaural
+>3 byte&0xC0 0xC0 \b, Monaural
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Packet Pad
+#>2 byte &0x01 \b, Custom Flag
+#>3 byte &0x08 \b, Copyrighted
+#>3 byte &0x04 \b, Original Source
+#>3 byte&0x03 1 \b, NR: 50/15 ms
+#>3 byte&0x03 3 \b, NR: CCIT J.17
+
+# AAC (aka MPEG-2 NBC audio) and MPEG-4 audio
+
+# Stored AAC streams (instead of the MP4 format)
+0 string ADIF MPEG ADIF, AAC
+!:mime audio/x-hx-aac-adif
+>4 byte &0x80
+>>13 byte &0x10 \b, VBR
+>>13 byte ^0x10 \b, CBR
+>>16 byte&0x1E 0x02 \b, single stream
+>>16 byte&0x1E 0x04 \b, 2 streams
+>>16 byte&0x1E 0x06 \b, 3 streams
+>>16 byte &0x08 \b, 4 or more streams
+>>16 byte &0x10 \b, 8 or more streams
+>>4 byte &0x80 \b, Copyrighted
+>>13 byte &0x40 \b, Original Source
+>>13 byte &0x20 \b, Home Flag
+>4 byte ^0x80
+>>4 byte &0x10 \b, VBR
+>>4 byte ^0x10 \b, CBR
+>>7 byte&0x1E 0x02 \b, single stream
+>>7 byte&0x1E 0x04 \b, 2 streams
+>>7 byte&0x1E 0x06 \b, 3 streams
+>>7 byte &0x08 \b, 4 or more streams
+>>7 byte &0x10 \b, 8 or more streams
+>>4 byte &0x40 \b, Original Stream(s)
+>>4 byte &0x20 \b, Home Source
+
+# Live or stored single AAC stream (used with MPEG-2 systems)
+0 beshort&0xFFF6 0xFFF0 MPEG ADTS, AAC
+!:mime audio/x-hx-aac-adts
+>1 byte &0x08 \b, v2
+>1 byte ^0x08 \b, v4
+# profile
+>>2 byte &0xC0 \b LTP
+>2 byte&0xc0 0x00 \b Main
+>2 byte&0xc0 0x40 \b LC
+>2 byte&0xc0 0x80 \b SSR
+# timing
+>2 byte&0x3c 0x00 \b, 96 kHz
+>2 byte&0x3c 0x04 \b, 88.2 kHz
+>2 byte&0x3c 0x08 \b, 64 kHz
+>2 byte&0x3c 0x0c \b, 48 kHz
+>2 byte&0x3c 0x10 \b, 44.1 kHz
+>2 byte&0x3c 0x14 \b, 32 kHz
+>2 byte&0x3c 0x18 \b, 24 kHz
+>2 byte&0x3c 0x1c \b, 22.05 kHz
+>2 byte&0x3c 0x20 \b, 16 kHz
+>2 byte&0x3c 0x24 \b, 12 kHz
+>2 byte&0x3c 0x28 \b, 11.025 kHz
+>2 byte&0x3c 0x2c \b, 8 kHz
+# channels
+>2 beshort&0x01c0 0x0040 \b, monaural
+>2 beshort&0x01c0 0x0080 \b, stereo
+>2 beshort&0x01c0 0x00c0 \b, stereo + center
+>2 beshort&0x01c0 0x0100 \b, stereo+center+LFE
+>2 beshort&0x01c0 0x0140 \b, surround
+>2 beshort&0x01c0 0x0180 \b, surround + LFE
+>2 beshort &0x01C0 \b, surround + side
+#>1 byte ^0x01 \b, Data Verify
+#>2 byte &0x02 \b, Custom Flag
+#>3 byte &0x20 \b, Original Stream
+#>3 byte &0x10 \b, Home Source
+#>3 byte &0x08 \b, Copyrighted
+
+# Live MPEG-4 audio streams (instead of RTP FlexMux)
+0 beshort&0xFFE0 0x56E0 MPEG-4 LOAS
+!:mime audio/x-mp4a-latm
+#>1 beshort&0x1FFF x \b, %u byte packet
+>3 byte&0xE0 0x40
+>>4 byte&0x3C 0x04 \b, single stream
+>>4 byte&0x3C 0x08 \b, 2 streams
+>>4 byte&0x3C 0x0C \b, 3 streams
+>>4 byte &0x08 \b, 4 or more streams
+>>4 byte &0x20 \b, 8 or more streams
+>3 byte&0xC0 0
+>>4 byte&0x78 0x08 \b, single stream
+>>4 byte&0x78 0x10 \b, 2 streams
+>>4 byte&0x78 0x18 \b, 3 streams
+>>4 byte &0x20 \b, 4 or more streams
+>>4 byte &0x40 \b, 8 or more streams
+# This magic isn't strong enough (matches plausible ISO-8859-1 text)
+#0 beshort 0x4DE1 MPEG-4 LO-EP audio stream
+#!:mime audio/x-mp4a-latm
+
+# Summary: FLI animation format
+# Created by: Daniel Quinlan <quinlan at yggdrasil.com>
+# Modified by (1): Abel Cheung <abelcheung at gmail.com> (avoid over-generic detection)
+4 leshort 0xAF11
+# standard FLI always has 320x200 resolution and 8 bit color
+>8 leshort 320
+>>10 leshort 200
+>>>12 leshort 8 FLI animation, 320x200x8
+!:mime video/x-fli
+>>>>6 leshort x \b, %d frames
+# frame speed is multiple of 1/70s
+>>>>16 leshort x \b, %d/70s per frame
+
+# Summary: FLC animation format
+# Created by: Daniel Quinlan <quinlan at yggdrasil.com>
+# Modified by (1): Abel Cheung <abelcheung at gmail.com> (avoid over-generic detection)
+4 leshort 0xAF12
+# standard FLC always use 8 bit color
+>12 leshort 8 FLC animation
+!:mime video/x-flc
+>>8 leshort x \b, %d
+>>10 leshort x \bx%dx8
+>>6 uleshort x \b, %d frames
+>>16 uleshort x \b, %dms per frame
+
+# DL animation format
+# XXX - collision with most `mips' magic
+#
+# I couldn't find a real magic number for these, however, this
+# -appears- to work. Note that it might catch other files, too, so be
+# careful!
+#
+# Note that title and author appear in the two 20-byte chunks
+# at decimal offsets 2 and 22, respectively, but they are XOR'ed with
+# 255 (hex FF)! The DL format is really bad.
+#
+#0 byte 1 DL version 1, medium format (160x100, 4 images/screen)
+#!:mime video/x-unknown
+#>42 byte x - %d screens,
+#>43 byte x %d commands
+#0 byte 2 DL version 2
+#!:mime video/x-unknown
+#>1 byte 1 - large format (320x200,1 image/screen),
+#>1 byte 2 - medium format (160x100,4 images/screen),
+#>1 byte >2 - unknown format,
+#>42 byte x %d screens,
+#>43 byte x %d commands
+# Based on empirical evidence, DL version 3 have several nulls following the
+# \003. Most of them start with non-null values at hex offset 0x34 or so.
+#0 string \3\0\0\0\0\0\0\0\0\0\0\0 DL version 3
+
+# iso 13818 transport stream
+#
+# from Oskar Schirmer <schirmer at scara.com> Feb 3, 2001 (ISO 13818.1)
+# (the following is a little bit restrictive and works fine for a stream
+# that starts with PAT properly. it won't work for stream data, that is
+# cut from an input device data right in the middle, but this shouldn't
+# disturb)
+# syncbyte 8 bit 0x47
+# error_ind 1 bit -
+# payload_start 1 bit 1
+# priority 1 bit -
+# PID 13 bit 0x0000
+# scrambling 2 bit -
+# adaptfld_ctrl 2 bit 1 or 3
+# conti_count 4 bit 0
+0 belong&0xFF5FFF1F 0x47400010 MPEG transport stream data
+>188 byte !0x47 CORRUPTED
+
+# DIF digital video file format <mpruett at sgi.com>
+0 belong&0xffffff00 0x1f070000 DIF
+>4 byte &0x01 (DVCPRO) movie file
+>4 byte ^0x01 (DV) movie file
+>3 byte &0x80 (PAL)
+>3 byte ^0x80 (NTSC)
+
+# Microsoft Advanced Streaming Format (ASF) <mpruett at sgi.com>
+0 belong 0x3026b275 Microsoft ASF
+!:mime video/x-ms-asf
+
+# MNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
+0 string \x8aMNG MNG video data,
+!:mime video/x-mng
+>4 belong !0x0d0a1a0a CORRUPTED,
+>4 belong 0x0d0a1a0a
+>>16 belong x %ld x
+>>20 belong x %ld
+
+# JNG Video Format, <URL:http://www.libpng.org/pub/mng/spec/>
+0 string \x8bJNG JNG video data,
+!:mime video/x-jng
+>4 belong !0x0d0a1a0a CORRUPTED,
+>4 belong 0x0d0a1a0a
+>>16 belong x %ld x
+>>20 belong x %ld
+
+# Vivo video (Wolfram Kleff)
+3 string \x0D\x0AVersion:Vivo Vivo video data
+
+# VRML (Virtual Reality Modelling Language)
+0 string/w #VRML\ V1.0\ ascii VRML 1 file
+!:mime model/vrml
+0 string/w #VRML\ V2.0\ utf8 ISO/IEC 14772 VRML 97 file
+!:mime model/vrml
+
+# X3D (Extensible 3D) [http://www.web3d.org/specifications/x3d-3.0.dtd]
+# From Michel Briand <michelbriand at free.fr>
+0 string \<?xml\ version="
+!:strength +1
+>20 search/1000/cw \<!DOCTYPE\ X3D X3D (Extensible 3D) model xml text
+!:mime model/x3d
+
+#---------------------------------------------------------------------------
+# HVQM4: compressed movie format designed by Hudson for Nintendo GameCube
+# From Mark Sheppard <msheppard at climax.co.uk>, 2002-10-03
+#
+0 string HVQM4 %s
+>6 string >\0 v%s
+>0 byte x GameCube movie,
+>0x34 ubeshort x %d x
+>0x36 ubeshort x %d,
+>0x26 ubeshort x %dµs,
+>0x42 ubeshort 0 no audio
+>0x42 ubeshort >0 %dHz audio
+
+# From: "Stefan A. Haubenthal" <polluks at web.de>
+0 string DVDVIDEO-VTS Video title set,
+>0x21 byte x v%x
+0 string DVDVIDEO-VMG Video manager,
+>0x21 byte x v%x
+
+# From: Behan Webster <behanw at websterwood.com>
+# NuppelVideo used by Mythtv (*.nuv)
+# Note: there are two identical stanzas here differing only in the
+# initial string matched. It used to be done with a regex, but we're
+# trying to get rid of those.
+0 string NuppelVideo MythTV NuppelVideo
+>12 string x v%s
+>20 lelong x (%d
+>24 lelong x \bx%d),
+>36 string P \bprogressive,
+>36 string I \binterlaced,
+>40 ledouble x \baspect:%.2f,
+>48 ledouble x \bfps:%.2f
+0 string MythTV MythTV NuppelVideo
+>12 string x v%s
+>20 lelong x (%d
+>24 lelong x \bx%d),
+>36 string P \bprogressive,
+>36 string I \binterlaced,
+>40 ledouble x \baspect:%.2f,
+>48 ledouble x \bfps:%.2f
+
+# MPEG file
+# MPEG sequences
+# FIXME: This section is from the old magic.mime file and needs integrating with the rest
+0 belong 0x000001BA
+>4 byte &0x40
+!:mime video/mp2p
+>4 byte ^0x40
+!:mime video/mpeg
+0 belong 0x000001BB
+!:mime video/mpeg
+0 belong 0x000001B0
+!:mime video/mp4v-es
+0 belong 0x000001B5
+!:mime video/mp4v-es
+0 belong 0x000001B3
+!:mime video/mpv
+0 belong&0xFF5FFF1F 0x47400010
+!:mime video/mp2t
+0 belong 0x00000001
+>4 byte&0x1F 0x07
+!:mime video/h264
+
+# Type: Bink Video
+# URL: http://wiki.multimedia.cx/index.php?title=3DBink_Container
+# From: <hoehle at users.sourceforge.net> 2008-07-18
+0 string BIK Bink Video
+>3 regex =[a-z] rev.%s
+#>4 ulelong x size %d
+>20 ulelong x \b, %d
+>24 ulelong x \bx%d
+>8 ulelong x \b, %d frames
+>32 ulelong x at rate %d/
+>28 ulelong >1 \b%d
+>40 ulelong =0 \b, no audio
+>40 ulelong !0 \b, %d audio track
+>>40 ulelong !1 \bs
+# follow properties of the first audio track only
+>>48 uleshort x %dHz
+>>51 byte&0x20 0 mono
+>>51 byte&0x20 !0 stereo
+#>>51 byte&0x10 0 FFT
+#>>51 byte&0x10 !0 DCT
+
+#------------------------------------------------------------------------------
+# $File: apl,v 1.6 2009/09/19 16:28:07 christos Exp $
+# apl: file(1) magic for APL (see also "pdp" and "vax" for other APL
+# workspaces)
+#
+0 long 0100554 APL workspace (Ken's original?)
+
+#------------------------------------------------------------------------------
+# $File: apple,v 1.23 2009/09/19 16:28:08 christos Exp $
+# apple: file(1) magic for Apple file formats
+#
+0 search/1 FiLeStArTfIlEsTaRt binscii (apple ][) text
+0 string \x0aGL Binary II (apple ][) data
+0 string \x76\xff Squeezed (apple ][) data
+0 string NuFile NuFile archive (apple ][) data
+0 string N\xf5F\xe9l\xe5 NuFile archive (apple ][) data
+0 belong 0x00051600 AppleSingle encoded Macintosh file
+0 belong 0x00051607 AppleDouble encoded Macintosh file
+
+# Type: Apple Emulator 2IMG format
+# From: Radek Vokal <rvokal at redhat.com>
+0 string 2IMG Apple ][ 2IMG Disk Image
+>4 string XGS! \b, XGS
+>4 string CTKG \b, Catakig
+>4 string ShIm \b, Sheppy's ImageMaker
+>4 string WOOF \b, Sweet 16
+>4 string B2TR \b, Bernie ][ the Rescue
+>4 string !nfc \b, ASIMOV2
+>4 string x \b, Unknown Format
+>0xc byte 00 \b, DOS 3.3 sector order
+>>0x10 byte 00 \b, Volume 254
+>>0x10 byte&0x7f x \b, Volume %u
+>0xc byte 01 \b, ProDOS sector order
+>>0x14 short x \b, %u Blocks
+>0xc byte 02 \b, NIB data
+
+# magic for Newton PDA package formats
+# from Ruda Moura <ruda at helllabs.org>
+0 string package0 Newton package, NOS 1.x,
+>12 belong &0x80000000 AutoRemove,
+>12 belong &0x40000000 CopyProtect,
+>12 belong &0x10000000 NoCompression,
+>12 belong &0x04000000 Relocation,
+>12 belong &0x02000000 UseFasterCompression,
+>16 belong x version %d
+
+0 string package1 Newton package, NOS 2.x,
+>12 belong &0x80000000 AutoRemove,
+>12 belong &0x40000000 CopyProtect,
+>12 belong &0x10000000 NoCompression,
+>12 belong &0x04000000 Relocation,
+>12 belong &0x02000000 UseFasterCompression,
+>16 belong x version %d
+
+0 string package4 Newton package,
+>8 byte 8 NOS 1.x,
+>8 byte 9 NOS 2.x,
+>12 belong &0x80000000 AutoRemove,
+>12 belong &0x40000000 CopyProtect,
+>12 belong &0x10000000 NoCompression,
+
+# The following entries for the Apple II are for files that have
+# been transferred as raw binary data from an Apple, without having
+# been encapsulated by any of the above archivers.
+#
+# In general, Apple II formats are hard to identify because Apple DOS
+# and especially Apple ProDOS have strong typing in the file system and
+# therefore programmers never felt much need to include type information
+# in the files themselves.
+#
+# Eric Fischer <enf at pobox.com>
+
+# AppleWorks word processor:
+#
+# This matches the standard tab stops for an AppleWorks file, but if
+# a file has a tab stop set in the first four columns this will fail.
+#
+# The "O" is really the magic number, but that's so common that it's
+# necessary to check the tab stops that follow it to avoid false positives.
+
+4 string O==== AppleWorks word processor data
+>85 byte&0x01 >0 \b, zoomed
+>90 byte&0x01 >0 \b, paginated
+>92 byte&0x01 >0 \b, with mail merge
+#>91 byte x \b, left margin %d
+
+# AppleWorks database:
+#
+# This isn't really a magic number, but it's the closest thing to one
+# that I could find. The 1 and 2 really mean "order in which you defined
+# categories" and "left to right, top to bottom," respectively; the D and R
+# mean that the cursor should move either down or right when you press Return.
+
+#30 string \x01D AppleWorks database data
+#30 string \x02D AppleWorks database data
+#30 string \x01R AppleWorks database data
+#30 string \x02R AppleWorks database data
+
+# AppleWorks spreadsheet:
+#
+# Likewise, this isn't really meant as a magic number. The R or C means
+# row- or column-order recalculation; the A or M means automatic or manual
+# recalculation.
+
+#131 string RA AppleWorks spreadsheet data
+#131 string RM AppleWorks spreadsheet data
+#131 string CA AppleWorks spreadsheet data
+#131 string CM AppleWorks spreadsheet data
+
+# Applesoft BASIC:
+#
+# This is incredibly sloppy, but will be true if the program was
+# written at its usual memory location of 2048 and its first line
+# number is less than 256. Yuck.
+
+0 belong&0xff00ff 0x80000 Applesoft BASIC program data
+#>2 leshort x \b, first line number %d
+
+# ORCA/EZ assembler:
+#
+# This will not identify ORCA/M source files, since those have
+# some sort of date code instead of the two zero bytes at 6 and 7
+# XXX Conflicts with ELF
+#4 belong&0xff00ffff 0x01000000 ORCA/EZ assembler source data
+#>5 byte x \b, build number %d
+
+# Broderbund Fantavision
+#
+# I don't know what these values really mean, but they seem to recur.
+# Will they cause too many conflicts?
+
+# Probably :-)
+#2 belong&0xFF00FF 0x040008 Fantavision movie data
+
+# Some attempts at images.
+#
+# These are actually just bit-for-bit dumps of the frame buffer, so
+# there's really no reasonably way to distinguish them except for their
+# address (if preserved) -- 8192 or 16384 -- and their length -- 8192
+# or, occasionally, 8184.
+#
+# Nevertheless this will manage to catch a lot of images that happen
+# to have a solid-colored line at the bottom of the screen.
+
+# GRR: Magic too weak
+#8144 string \x7F\x7F\x7F\x7F\x7F\x7F\x7F\x7F Apple II image with white background
+#8144 string \x55\x2A\x55\x2A\x55\x2A\x55\x2A Apple II image with purple background
+#8144 string \x2A\x55\x2A\x55\x2A\x55\x2A\x55 Apple II image with green background
+#8144 string \xD5\xAA\xD5\xAA\xD5\xAA\xD5\xAA Apple II image with blue background
+#8144 string \xAA\xD5\xAA\xD5\xAA\xD5\xAA\xD5 Apple II image with orange background
+
+# Beagle Bros. Apple Mechanic fonts
+
+0 belong&0xFF00FFFF 0x6400D000 Apple Mechanic font
+
+# Apple Universal Disk Image Format (UDIF) - dmg files.
+# From Johan Gade.
+# These entries are disabled for now until we fix the following issues.
+#
+# Note there might be some problems with the "VAX COFF executable"
+# entry. Note this entry should be placed before the mac filesystem section,
+# particularly the "Apple Partition data" entry.
+#
+# The intended meaning of these tests is, that the file is only of the
+# specified type if both of the lines are correct - i.e. if the first
+# line matches and the second doesn't then it is not of that type.
+#
+#0 long 0x7801730d
+#>4 long 0x62626060 UDIF read-only zlib-compressed image (UDZO)
+#
+# Note that this entry is recognized correctly by the "Apple Partition
+# data" entry - however since this entry is more specific - this
+# information seems to be more useful.
+#0 long 0x45520200
+#>0x410 string disk\ image UDIF read/write image (UDRW)
+
+# From: Toby Peterson <toby at apple.com>
+0 string bplist00 Apple binary property list
+
+# Apple binary property list (bplist)
+# Assumes version bytes are hex.
+# Provides content hints for version 0 files. Assumes that the root
+# object is the first object (true for CoreFoundation implementation).
+# From: David Remahl <dremahl at apple.com>
+0 string bplist
+>6 byte x \bCoreFoundation binary property list data, version 0x%c
+>>7 byte x \b%c
+>6 string 00 \b
+>>8 byte&0xF0 0x00 \b
+>>>8 byte&0x0F 0x00 \b, root type: null
+>>>8 byte&0x0F 0x08 \b, root type: false boolean
+>>>8 byte&0x0F 0x09 \b, root type: true boolean
+>>8 byte&0xF0 0x10 \b, root type: integer
+>>8 byte&0xF0 0x20 \b, root type: real
+>>8 byte&0xF0 0x30 \b, root type: date
+>>8 byte&0xF0 0x40 \b, root type: data
+>>8 byte&0xF0 0x50 \b, root type: ascii string
+>>8 byte&0xF0 0x60 \b, root type: unicode string
+>>8 byte&0xF0 0x80 \b, root type: uid (CORRUPT)
+>>8 byte&0xF0 0xa0 \b, root type: array
+>>8 byte&0xF0 0xd0 \b, root type: dictionary
+
+# Apple/NeXT typedstream data
+# Serialization format used by NeXT and Apple for various
+# purposes in YellowStep/Cocoa, including some nib files.
+# From: David Remahl <dremahl at apple.com>
+2 string typedstream NeXT/Apple typedstream data, big endian
+>0 byte x \b, version %hhd
+>0 byte <5 \b
+>>13 byte 0x81 \b
+>>>14 ubeshort x \b, system %hd
+2 string streamtyped NeXT/Apple typedstream data, little endian
+>0 byte x \b, version %hhd
+>0 byte <5 \b
+>>13 byte 0x81 \b
+>>>14 uleshort x \b, system %hd
+
+#------------------------------------------------------------------------------
+# CAF: Apple CoreAudio File Format
+#
+# Container format for high-end audio purposes.
+# From: David Remahl <dremahl at apple.com>
+#
+0 string caff CoreAudio Format audio file
+>4 beshort <10 version %d
+>6 beshort x
+
+
+#------------------------------------------------------------------------------
+# Keychain database files
+0 string kych Mac OS X Keychain File
+
+#------------------------------------------------------------------------------
+# Code Signing related file types
+0 belong 0xfade0c00 Mac OS X Code Requirement
+>8 belong 1 (opExpr)
+>4 belong x - %d bytes
+
+0 belong 0xfade0c01 Mac OS X Code Requirement Set
+>8 belong >1 containing %d items
+>4 belong x - %d bytes
+
+0 belong 0xfade0c02 Mac OS X Code Directory
+>8 belong x version %x
+>12 belong >0 flags 0x%x
+>4 belong x - %d bytes
+
+0 belong 0xfade0cc0 Mac OS X Detached Code Signature (non-executable)
+>4 belong x - %d bytes
+
+0 belong 0xfade0cc1 Mac OS X Detached Code Signature
+>8 belong >1 (%d elements)
+>4 belong x - %d bytes
+
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+# .vdi
+4 string innotek\ VirtualBox\ Disk\ Image %s
+
+#------------------------------------------------------------------------------
+# $File: applix,v 1.5 2009/09/19 16:28:08 christos Exp $
+# applix: file(1) magic for Applixware
+# From: Peter Soos <sp at osb.hu>
+#
+0 string *BEGIN Applixware
+>7 string WORDS Words Document
+>7 string GRAPHICS Graphic
+>7 string RASTER Bitmap
+>7 string SPREADSHEETS Spreadsheet
+>7 string MACRO Macro
+>7 string BUILDER Builder Object
+
+#------------------------------------------------------------------------------
+# $File: archive,v 1.55 2009/12/04 15:00:47 christos Exp $
+# archive: file(1) magic for archive formats (see also "msdos" for self-
+# extracting compressed archives)
+#
+# cpio, ar, arc, arj, hpack, lha/lharc, rar, squish, uc2, zip, zoo, etc.
+# pre-POSIX "tar" archives are handled in the C code.
+
+# POSIX tar archives
+257 string ustar\0 POSIX tar archive
+!:mime application/x-tar # encoding: posix
+257 string ustar\040\040\0 GNU tar archive
+!:mime application/x-tar # encoding: gnu
+
+# cpio archives
+#
+# Yes, the top two "cpio archive" formats *are* supposed to just be "short".
+# The idea is to indicate archives produced on machines with the same
+# byte order as the machine running "file" with "cpio archive", and
+# to indicate archives produced on machines with the opposite byte order
+# from the machine running "file" with "byte-swapped cpio archive".
+#
+# The SVR4 "cpio(4)" hints that there are additional formats, but they
+# are defined as "short"s; I think all the new formats are
+# character-header formats and thus are strings, not numbers.
+0 short 070707 cpio archive
+!:mime application/x-cpio
+0 short 0143561 byte-swapped cpio archive
+!:mime application/x-cpio # encoding: swapped
+0 string 070707 ASCII cpio archive (pre-SVR4 or odc)
+0 string 070701 ASCII cpio archive (SVR4 with no CRC)
+0 string 070702 ASCII cpio archive (SVR4 with CRC)
+
+# Debian package (needs to go before regular portable archives)
+#
+0 string =!<arch>\ndebian
+!:mime application/x-debian-package
+>8 string debian-split part of multipart Debian package
+>8 string debian-binary Debian binary package
+>8 string !debian
+>68 string >\0 (format %s)
+# These next two lines do not work, because a bzip2 Debian archive
+# still uses gzip for the control.tar (first in the archive). Only
+# data.tar varies, and the location of its filename varies too.
+# file/libmagic does not current have support for ascii-string based
+# (offsets) as of 2005-09-15.
+#>81 string bz2 \b, uses bzip2 compression
+#>84 string gz \b, uses gzip compression
+#>136 ledate x created: %s
+
+# other archives
+0 long 0177555 very old archive
+0 short 0177555 very old PDP-11 archive
+0 long 0177545 old archive
+0 short 0177545 old PDP-11 archive
+0 long 0100554 apl workspace
+0 string =<ar> archive
+!:mime application/x-archive
+
+# MIPS archive (needs to go before regular portable archives)
+#
+0 string =!<arch>\n__________E MIPS archive
+>20 string U with MIPS Ucode members
+>21 string L with MIPSEL members
+>21 string B with MIPSEB members
+>19 string L and an EL hash table
+>19 string B and an EB hash table
+>22 string X -- out of date
+
+0 search/1 -h- Software Tools format archive text
+
+#
+# XXX - why are there multiple <ar> thingies? Note that 0x213c6172 is
+# "!<ar", so, for new-style (4.xBSD/SVR2andup) archives, we have:
+#
+# 0 string =!<arch> current ar archive
+# 0 long 0x213c6172 archive file
+#
+# and for SVR1 archives, we have:
+#
+# 0 string \<ar> System V Release 1 ar archive
+# 0 string =<ar> archive
+#
+# XXX - did Aegis really store shared libraries, breakpointed modules,
+# and absolute code program modules in the same format as new-style
+# "ar" archives?
+#
+0 string =!<arch> current ar archive
+!:mime application/x-archive
+>8 string __.SYMDEF random library
+>0 belong =65538 - pre SR9.5
+>0 belong =65539 - post SR9.5
+>0 beshort 2 - object archive
+>0 beshort 3 - shared library module
+>0 beshort 4 - debug break-pointed module
+>0 beshort 5 - absolute code program module
+0 string \<ar> System V Release 1 ar archive
+0 string =<ar> archive
+#
+# XXX - from "vax", which appears to collect a bunch of byte-swapped
+# thingies, to help you recognize VAX files on big-endian machines;
+# with "leshort", "lelong", and "string", that's no longer necessary....
+#
+0 belong 0x65ff0000 VAX 3.0 archive
+0 belong 0x3c61723e VAX 5.0 archive
+#
+0 long 0x213c6172 archive file
+0 lelong 0177555 very old VAX archive
+0 leshort 0177555 very old PDP-11 archive
+#
+# XXX - "pdp" claims that 0177545 can have an __.SYMDEF member and thus
+# be a random library (it said 0xff65 rather than 0177545).
+#
+0 lelong 0177545 old VAX archive
+>8 string __.SYMDEF random library
+0 leshort 0177545 old PDP-11 archive
+>8 string __.SYMDEF random library
+#
+# From "pdp" (but why a 4-byte quantity?)
+#
+0 lelong 0x39bed PDP-11 old archive
+0 lelong 0x39bee PDP-11 4.0 archive
+
+# ARC archiver, from Daniel Quinlan (quinlan at yggdrasil.com)
+#
+# The first byte is the magic (0x1a), byte 2 is the compression type for
+# the first file (0x01 through 0x09), and bytes 3 to 15 are the MS-DOS
+# filename of the first file (null terminated). Since some types collide
+# we only test some types on basis of frequency: 0x08 (83%), 0x09 (5%),
+# 0x02 (5%), 0x03 (3%), 0x04 (2%), 0x06 (2%). 0x01 collides with terminfo.
+0 lelong&0x8080ffff 0x0000081a ARC archive data, dynamic LZW
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000091a ARC archive data, squashed
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000021a ARC archive data, uncompressed
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000031a ARC archive data, packed
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000041a ARC archive data, squeezed
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000061a ARC archive data, crunched
+!:mime application/x-arc
+# [JW] stuff taken from idarc, obviously ARC successors:
+0 lelong&0x8080ffff 0x00000a1a PAK archive data
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000141a ARC+ archive data
+!:mime application/x-arc
+0 lelong&0x8080ffff 0x0000481a HYP archive data
+!:mime application/x-arc
+
+# Acorn archive formats (Disaster prone simpleton, m91dps at ecs.ox.ac.uk)
+# I can't create either SPARK or ArcFS archives so I have not tested this stuff
+# [GRR: the original entries collide with ARC, above; replaced with combined
+# version (not tested)]
+#0 byte 0x1a RISC OS archive (spark format)
+0 string \032archive RISC OS archive (ArcFS format)
+0 string Archive\000 RISC OS archive (ArcFS format)
+
+# All these were taken from idarc, many could not be verified. Unfortunately,
+# there were many low-quality sigs, i.e. easy to trigger false positives.
+# Please notify me of any real-world fishy/ambiguous signatures and I'll try
+# to get my hands on the actual archiver and see if I find something better. [JW]
+# probably many can be enhanced by finding some 0-byte or control char near the start
+
+# idarc calls this Crush/Uncompressed... *shrug*
+0 string CRUSH Crush archive data
+# Squeeze It (.sqz)
+0 string HLSQZ Squeeze It archive data
+# SQWEZ
+0 string SQWEZ SQWEZ archive data
+# HPack (.hpk)
+0 string HPAK HPack archive data
+# HAP
+0 string \x91\x33HF HAP archive data
+# MD/MDCD
+0 string MDmd MDCD archive data
+# LIM
+0 string LIM\x1a LIM archive data
+# SAR
+3 string LH5 SAR archive data
+# BSArc/BS2
+0 string \212\3SB \0 BSArc/BS2 archive data
+# MAR
+2 string =-ah MAR archive data
+# ACB
+0 belong&0x00f800ff 0x00800000 ACB archive data
+# CPZ
+# TODO, this is what idarc says: 0 string \0\0\0 CPZ archive data
+# JRC
+0 string JRchive JRC archive data
+# Quantum
+0 string DS\0 Quantum archive data
+# ReSOF
+0 string PK\3\6 ReSOF archive data
+# QuArk
+0 string 7\4 QuArk archive data
+# YAC
+14 string YC YAC archive data
+# X1
+0 string X1 X1 archive data
+0 string XhDr X1 archive data
+# CDC Codec (.dqt)
+0 belong&0xffffe000 0x76ff2000 CDC Codec archive data
+# AMGC
+0 string \xad6" AMGC archive data
+# NuLIB
+0 string NõFélå NuLIB archive data
+# PakLeo
+0 string LEOLZW PAKLeo archive data
+# ChArc
+0 string SChF ChArc archive data
+# PSA
+0 string PSA PSA archive data
+# CrossePAC
+0 string DSIGDCC CrossePAC archive data
+# Freeze
+0 string \x1f\x9f\x4a\x10\x0a Freeze archive data
+# KBoom
+0 string ¨MP¨ KBoom archive data
+# NSQ, must go after CDC Codec
+0 string \x76\xff NSQ archive data
+# DPA
+0 string Dirk\ Paehl DPA archive data
+# BA
+# TODO: idarc says "bytes 0-2 == bytes 3-5"
+# TTComp
+0 string \0\6 TTComp archive data
+# ESP, could this conflict with Easy Software Products' (e.g.ESP ghostscript) documentation?
+0 string ESP ESP archive data
+# ZPack
+0 string \1ZPK\1 ZPack archive data
+# Sky
+0 string \xbc\x40 Sky archive data
+# UFA
+0 string UFA UFA archive data
+# Dry
+0 string =-H2O DRY archive data
+# FoxSQZ
+0 string FOXSQZ FoxSQZ archive data
+# AR7
+0 string ,AR7 AR7 archive data
+# PPMZ
+0 string PPMZ PPMZ archive data
+# MS Compress
+4 string \x88\xf0\x27 MS Compress archive data
+# updated by Joerg Jenderek
+>9 string \0
+>>0 string KWAJ
+>>>7 string \321\003 MS Compress archive data
+>>>>14 ulong >0 \b, original size: %ld bytes
+>>>>18 ubyte >0x65
+>>>>>18 string x \b, was %.8s
+>>>>>(10.b-4) string x \b.%.3s
+# MP3 (archiver, not lossy audio compression)
+0 string MP3\x1a MP3-Archiver archive data
+# ZET
+0 string OZÝ ZET archive data
+# TSComp
+0 string \x65\x5d\x13\x8c\x08\x01\x03\x00 TSComp archive data
+# ARQ
+0 string gW\4\1 ARQ archive data
+# Squash
+3 string OctSqu Squash archive data
+# Terse
+0 string \5\1\1\0 Terse archive data
+# PUCrunch
+0 string \x01\x08\x0b\x08\xef\x00\x9e\x32\x30\x36\x31 PUCrunch archive data
+# UHarc
+0 string UHA UHarc archive data
+# ABComp
+0 string \2AB ABComp archive data
+0 string \3AB2 ABComp archive data
+# CMP
+0 string CO\0 CMP archive data
+# Splint
+0 string \x93\xb9\x06 Splint archive data
+# InstallShield
+0 string \x13\x5d\x65\x8c InstallShield Z archive Data
+# Gather
+1 string GTH Gather archive data
+# BOA
+0 string BOA BOA archive data
+# RAX
+0 string ULEB\xa RAX archive data
+# Xtreme
+0 string ULEB\0 Xtreme archive data
+# Pack Magic
+0 string @â\1\0 Pack Magic archive data
+# BTS
+0 belong&0xfeffffff 0x1a034465 BTS archive data
+# ELI 5750
+0 string Ora\ ELI 5750 archive data
+# QFC
+0 string \x1aFC\x1a QFC archive data
+0 string \x1aQF\x1a QFC archive data
+# PRO-PACK
+0 string RNC PRO-PACK archive data
+# 777
+0 string 777 777 archive data
+# LZS221
+0 string sTaC LZS221 archive data
+# HPA
+0 string HPA HPA archive data
+# Arhangel
+0 string LG Arhangel archive data
+# EXP1, uses bzip2
+0 string 0123456789012345BZh EXP1 archive data
+# IMP
+0 string IMP\xa IMP archive data
+# NRV
+0 string \x00\x9E\x6E\x72\x76\xFF NRV archive data
+# Squish
+0 string \x73\xb2\x90\xf4 Squish archive data
+# Par
+0 string PHILIPP Par archive data
+0 string PAR Par archive data
+# HIT
+0 string UB HIT archive data
+# SBX
+0 belong&0xfffff000 0x53423000 SBX archive data
+# NaShrink
+0 string NSK NaShrink archive data
+# SAPCAR
+0 string #\ CAR\ archive\ header SAPCAR archive data
+0 string CAR\ 2.00RG SAPCAR archive data
+# Disintegrator
+0 string DST Disintegrator archive data
+# ASD
+0 string ASD ASD archive data
+# InstallShield CAB
+0 string ISc( InstallShield CAB
+# TOP4
+0 string T4\x1a TOP4 archive data
+# BatComp left out: sig looks like COM executable
+# so TODO: get real 4dos batcomp file and find sig
+# BlakHole
+0 string BH\5\7 BlakHole archive data
+# BIX
+0 string BIX0 BIX archive data
+# ChiefLZA
+0 string ChfLZ ChiefLZA archive data
+# Blink
+0 string Blink Blink archive data
+# Logitech Compress
+0 string \xda\xfa Logitech Compress archive data
+# ARS-Sfx (FIXME: really a SFX? then goto COM/EXE)
+1 string (C)\ STEPANYUK ARS-Sfx archive data
+# AKT/AKT32
+0 string AKT32 AKT32 archive data
+0 string AKT AKT archive data
+# NPack
+0 string MSTSM NPack archive data
+# PFT
+0 string \0\x50\0\x14 PFT archive data
+# SemOne
+0 string SEM SemOne archive data
+# PPMD
+0 string \x8f\xaf\xac\x84 PPMD archive data
+# FIZ
+0 string FIZ FIZ archive data
+# MSXiE
+0 belong&0xfffff0f0 0x4d530000 MSXiE archive data
+# DeepFreezer
+0 belong&0xfffffff0 0x797a3030 DeepFreezer archive data
+# DC
+0 string =<DC- DC archive data
+# TPac
+0 string \4TPAC\3 TPac archive data
+# Ai
+0 string Ai\1\1\0 Ai archive data
+0 string Ai\1\0\0 Ai archive data
+# Ai32
+0 string Ai\2\0 Ai32 archive data
+0 string Ai\2\1 Ai32 archive data
+# SBC
+0 string SBC SBC archive data
+# Ybs
+0 string YBS Ybs archive data
+# DitPack
+0 string \x9e\0\0 DitPack archive data
+# DMS
+0 string DMS! DMS archive data
+# EPC
+0 string \x8f\xaf\xac\x8c EPC archive data
+# VSARC
+0 string VS\x1a VSARC archive data
+# PDZ
+0 string PDZ PDZ archive data
+# ReDuq
+0 string rdqx ReDuq archive data
+# GCA
+0 string GCAX GCA archive data
+# PPMN
+0 string pN PPMN archive data
+# WinImage
+3 string WINIMAGE WinImage archive data
+# Compressia
+0 string CMP0CMP Compressia archive data
+# UHBC
+0 string UHB UHBC archive data
+# WinHKI
+0 string \x61\x5C\x04\x05 WinHKI archive data
+# WWPack data file
+0 string WWP WWPack archive data
+# BSN (BSA, PTS-DOS)
+0 string \xffBSG BSN archive data
+1 string \xffBSG BSN archive data
+3 string \xffBSG BSN archive data
+1 string \0\xae\2 BSN archive data
+1 string \0\xae\3 BSN archive data
+1 string \0\xae\7 BSN archive data
+# AIN
+0 string \x33\x18 AIN archive data
+0 string \x33\x17 AIN archive data
+# XPA32
+0 string xpa\0\1 XPA32 archive data
+# SZip (TODO: doesn't catch all versions)
+0 string SZ\x0a\4 SZip archive data
+# XPack DiskImage
+0 string jm XPack DiskImage archive data
+# XPack Data
+0 string xpa XPack archive data
+# XPack Single Data
+0 string Í\ jm XPack single archive data
+
+# TODO: missing due to unknown magic/magic at end of file:
+#DWC
+#ARG
+#ZAR
+#PC/3270
+#InstallIt
+#RKive
+#RK
+#XPack Diskimage
+
+# These were inspired by idarc, but actually verified
+# Dzip archiver (.dz)
+0 string DZ Dzip archive data
+>2 byte x \b, version %i
+>3 byte x \b.%i
+# ZZip archiver (.zz)
+0 string ZZ\ \0\0 ZZip archive data
+0 string ZZ0 ZZip archive data
+# PAQ archiver (.paq)
+0 string \xaa\x40\x5f\x77\x1f\xe5\x82\x0d PAQ archive data
+0 string PAQ PAQ archive data
+>3 byte&0xf0 0x30
+>>3 byte x (v%c)
+# JAR archiver (.j), this is the successor to ARJ, not Java's JAR (which is essentially ZIP)
+0xe string \x1aJar\x1b JAR (ARJ Software, Inc.) archive data
+0 string JARCS JAR (ARJ Software, Inc.) archive data
+
+# ARJ archiver (jason at jarthur.Claremont.EDU)
+0 leshort 0xea60 ARJ archive data
+!:mime application/x-arj
+>5 byte x \b, v%d,
+>8 byte &0x04 multi-volume,
+>8 byte &0x10 slash-switched,
+>8 byte &0x20 backup,
+>34 string x original name: %s,
+>7 byte 0 os: MS-DOS
+>7 byte 1 os: PRIMOS
+>7 byte 2 os: Unix
+>7 byte 3 os: Amiga
+>7 byte 4 os: Macintosh
+>7 byte 5 os: OS/2
+>7 byte 6 os: Apple ][ GS
+>7 byte 7 os: Atari ST
+>7 byte 8 os: NeXT
+>7 byte 9 os: VAX/VMS
+>3 byte >0 %d]
+# [JW] idarc says this is also possible
+2 leshort 0xea60 ARJ archive data
+
+# HA archiver (Greg Roelofs, newt at uchicago.edu)
+# This is a really bad format. A file containing HAWAII will match this...
+#0 string HA HA archive data,
+#>2 leshort =1 1 file,
+#>2 leshort >1 %u files,
+#>4 byte&0x0f =0 first is type CPY
+#>4 byte&0x0f =1 first is type ASC
+#>4 byte&0x0f =2 first is type HSC
+#>4 byte&0x0f =0x0e first is type DIR
+#>4 byte&0x0f =0x0f first is type SPECIAL
+# suggestion: at least identify small archives (<1024 files)
+0 belong&0xffff00fc 0x48410000 HA archive data
+>2 leshort =1 1 file,
+>2 leshort >1 %u files,
+>4 byte&0x0f =0 first is type CPY
+>4 byte&0x0f =1 first is type ASC
+>4 byte&0x0f =2 first is type HSC
+>4 byte&0x0f =0x0e first is type DIR
+>4 byte&0x0f =0x0f first is type SPECIAL
+
+# HPACK archiver (Peter Gutmann, pgut1 at cs.aukuni.ac.nz)
+0 string HPAK HPACK archive data
+
+# JAM Archive volume format, by Dmitry.Kohmanyuk at UA.net
+0 string \351,\001JAM\ JAM archive,
+>7 string >\0 version %.4s
+>0x26 byte =0x27 -
+>>0x2b string >\0 label %.11s,
+>>0x27 lelong x serial %08x,
+>>0x36 string >\0 fstype %.8s
+
+# LHARC/LHA archiver (Greg Roelofs, newt at uchicago.edu)
+2 string -lh0- LHarc 1.x/ARX archive data [lh0]
+!:mime application/x-lharc
+2 string -lh1- LHarc 1.x/ARX archive data [lh1]
+!:mime application/x-lharc
+2 string -lz4- LHarc 1.x archive data [lz4]
+!:mime application/x-lharc
+2 string -lz5- LHarc 1.x archive data [lz5]
+!:mime application/x-lharc
+# [never seen any but the last; -lh4- reported in comp.compression:]
+2 string -lzs- LHa/LZS archive data [lzs]
+!:mime application/x-lha
+2 string -lh\40- LHa 2.x? archive data [lh ]
+!:mime application/x-lha
+2 string -lhd- LHa 2.x? archive data [lhd]
+!:mime application/x-lha
+2 string -lh2- LHa 2.x? archive data [lh2]
+!:mime application/x-lha
+2 string -lh3- LHa 2.x? archive data [lh3]
+!:mime application/x-lha
+2 string -lh4- LHa (2.x) archive data [lh4]
+!:mime application/x-lha
+2 string -lh5- LHa (2.x) archive data [lh5]
+!:mime application/x-lha
+2 string -lh6- LHa (2.x) archive data [lh6]
+!:mime application/x-lha
+2 string -lh7- LHa (2.x)/LHark archive data [lh7]
+!:mime application/x-lha
+>20 byte x - header level %d
+# taken from idarc [JW]
+2 string -lZ PUT archive data
+2 string -lz LZS archive data
+2 string -sw1- Swag archive data
+
+# RAR archiver (Greg Roelofs, newt at uchicago.edu)
+0 string Rar! RAR archive data,
+!:mime application/x-rar
+>44 byte x v%0x,
+>10 byte >0 flags:
+>>10 byte &0x01 Archive volume,
+>>10 byte &0x02 Commented,
+>>10 byte &0x04 Locked,
+>>10 byte &0x08 Solid,
+>>10 byte &0x20 Authenticated,
+>35 byte 0 os: MS-DOS
+>35 byte 1 os: OS/2
+>35 byte 2 os: Win32
+>35 byte 3 os: Unix
+# some old version? idarc says:
+0 string RE\x7e\x5e RAR archive data
+
+# SQUISH archiver (Greg Roelofs, newt at uchicago.edu)
+0 string SQSH squished archive data (Acorn RISCOS)
+
+# UC2 archiver (Greg Roelofs, newt at uchicago.edu)
+# [JW] see exe section for self-extracting version
+0 string UC2\x1a UC2 archive data
+
+# PKZIP multi-volume archive
+0 string PK\x07\x08PK\x03\x04 Zip multi-volume archive data, at least PKZIP v2.50 to extract
+!:mime application/zip
+
+# ZIP archives (Greg Roelofs, c/o zip-bugs at wkuvx1.wku.edu)
+0 string PK\003\004
+>30 ubelong !0x6d696d65
+>>4 byte 0x00 Zip archive data
+!:mime application/zip
+>>4 byte 0x09 Zip archive data, at least v0.9 to extract
+!:mime application/zip
+>>4 byte 0x0a Zip archive data, at least v1.0 to extract
+!:mime application/zip
+>>4 byte 0x0b Zip archive data, at least v1.1 to extract
+!:mime application/zip
+>>0x161 string WINZIP Zip archive data, WinZIP self-extracting
+!:mime application/zip
+>>4 byte 0x14 Zip archive data, at least v2.0 to extract
+!:mime application/zip
+>>4 byte 0x2d Zip64 archive data, at least v3.0 to extract
+!:mime application/zip
+
+# OpenOffice.org / KOffice / StarOffice documents
+# Listed here because they ARE zip files
+#
+# From: Abel Cheung <abel at oaka.org>
+>30 string mimetype
+
+# KOffice (1.2 or above) formats
+>>50 string vnd.kde. KOffice (>=1.2)
+>>>58 string karbon Karbon document
+>>>58 string kchart KChart document
+>>>58 string kformula KFormula document
+>>>58 string kivio Kivio document
+>>>58 string kontour Kontour document
+>>>58 string kpresenter KPresenter document
+>>>58 string kspread KSpread document
+>>>58 string kword KWord document
+
+# OpenOffice formats (for OpenOffice 1.x / StarOffice 6/7)
+>>50 string vnd.sun.xml. OpenOffice.org 1.x
+>>>62 string writer Writer
+>>>>68 byte !0x2e document
+>>>>68 string .template template
+>>>>68 string .global global document
+>>>62 string calc Calc
+>>>>66 byte !0x2e spreadsheet
+>>>>66 string .template template
+>>>62 string draw Draw
+>>>>66 byte !0x2e document
+>>>>66 string .template template
+>>>62 string impress Impress
+>>>>69 byte !0x2e presentation
+>>>>69 string .template template
+>>>62 string math Math document
+>>>62 string base Database file
+
+# OpenDocument formats (for OpenOffice 2.x / StarOffice >= 8)
+# http://lists.oasis-open.org/archives/office/200505/msg00006.html
+>>50 string vnd.oasis.opendocument. OpenDocument
+>>>73 string text
+>>>>77 byte !0x2d Text
+!:mime application/vnd.oasis.opendocument.text
+>>>>77 string -template Text Template
+!:mime application/vnd.oasis.opendocument.text-template
+>>>>77 string -web HTML Document Template
+!:mime application/vnd.oasis.opendocument.text-web
+>>>>77 string -master Master Document
+!:mime application/vnd.oasis.opendocument.text-master
+>>>73 string graphics
+>>>>81 byte !0x2d Drawing
+!:mime application/vnd.oasis.opendocument.graphics
+>>>>81 string -template Template
+!:mime application/vnd.oasis.opendocument.graphics-template
+>>>73 string presentation
+>>>>85 byte !0x2d Presentation
+!:mime application/vnd.oasis.opendocument.presentation
+>>>>85 string -template Template
+!:mime application/vnd.oasis.opendocument.presentation-template
+>>>73 string spreadsheet
+>>>>84 byte !0x2d Spreadsheet
+!:mime application/vnd.oasis.opendocument.spreadsheet
+>>>>84 string -template Template
+!:mime application/vnd.oasis.opendocument.spreadsheet-template
+>>>73 string chart
+>>>>78 byte !0x2d Chart
+!:mime application/vnd.oasis.opendocument.chart
+>>>>78 string -template Template
+!:mime application/vnd.oasis.opendocument.chart-template
+>>>73 string formula
+>>>>80 byte !0x2d Formula
+!:mime application/vnd.oasis.opendocument.formula
+>>>>80 string -template Template
+!:mime application/vnd.oasis.opendocument.formula-template
+>>>73 string database Database
+!:mime application/vnd.oasis.opendocument.database
+>>>73 string image
+>>>>78 byte !0x2d Image
+!:mime application/vnd.oasis.opendocument.image
+>>>>78 string -template Template
+!:mime application/vnd.oasis.opendocument.image-template
+
+# Zoo archiver
+20 lelong 0xfdc4a7dc Zoo archive data
+!:mime application/x-zoo
+>4 byte >48 \b, v%c.
+>>6 byte >47 \b%c
+>>>7 byte >47 \b%c
+>32 byte >0 \b, modify: v%d
+>>33 byte x \b.%d+
+>42 lelong 0xfdc4a7dc \b,
+>>70 byte >0 extract: v%d
+>>>71 byte x \b.%d+
+
+# Shell archives
+10 string #\ This\ is\ a\ shell\ archive shell archive text
+!:mime application/octet-stream
+
+#
+# LBR. NB: May conflict with the questionable
+# "binary Computer Graphics Metafile" format.
+#
+0 string \0\ \ \ \ \ \ \ \ \ \ \ \0\0 LBR archive data
+#
+# PMA (CP/M derivative of LHA)
+#
+2 string -pm0- PMarc archive data [pm0]
+2 string -pm1- PMarc archive data [pm1]
+2 string -pm2- PMarc archive data [pm2]
+2 string -pms- PMarc SFX archive (CP/M, DOS)
+5 string -pc1- PopCom compressed executable (CP/M)
+
+# From Rafael Laboissiere <rafael at laboissiere.net>
+# The Project Revision Control System (see
+# http://prcs.sourceforge.net) generates a packaged project
+# file which is recognized by the following entry:
+0 leshort 0xeb81 PRCS packaged project
+
+# Microsoft cabinets
+# by David Necas (Yeti) <yeti at physics.muni.cz>
+#0 string MSCF\0\0\0\0 Microsoft cabinet file data,
+#>25 byte x v%d
+#>24 byte x \b.%d
+# MPi: All CABs have version 1.3, so this is pointless.
+# Better magic in debian-additions.
+
+# GTKtalog catalogs
+# by David Necas (Yeti) <yeti at physics.muni.cz>
+4 string gtktalog\ GTKtalog catalog data,
+>13 string 3 version 3
+>>14 beshort 0x677a (gzipped)
+>>14 beshort !0x677a (not gzipped)
+>13 string >3 version %s
+
+############################################################################
+# Parity archive reconstruction file, the 'par' file format now used on Usenet.
+0 string PAR\0 PARity archive data
+>48 leshort =0 - Index file
+>48 leshort >0 - file number %d
+
+# Felix von Leitner <felix-file at fefe.de>
+0 string d8:announce BitTorrent file
+!:mime application/x-bittorrent
+
+# Atari MSA archive - Teemu Hukkanen <tjhukkan at iki.fi>
+0 beshort 0x0e0f Atari MSA archive data
+>2 beshort x \b, %d sectors per track
+>4 beshort 0 \b, 1 sided
+>4 beshort 1 \b, 2 sided
+>6 beshort x \b, starting track: %d
+>8 beshort x \b, ending track: %d
+
+# Alternate ZIP string (amc at arwen.cs.berkeley.edu)
+0 string PK00PK\003\004 Zip archive data
+
+# ACE archive (from http://www.wotsit.org/download.asp?f=ace)
+# by Stefan `Sec` Zehl <sec at 42.org>
+7 string **ACE** ACE archive data
+>15 byte >0 version %d
+>16 byte =0x00 \b, from MS-DOS
+>16 byte =0x01 \b, from OS/2
+>16 byte =0x02 \b, from Win/32
+>16 byte =0x03 \b, from Unix
+>16 byte =0x04 \b, from MacOS
+>16 byte =0x05 \b, from WinNT
+>16 byte =0x06 \b, from Primos
+>16 byte =0x07 \b, from AppleGS
+>16 byte =0x08 \b, from Atari
+>16 byte =0x09 \b, from Vax/VMS
+>16 byte =0x0A \b, from Amiga
+>16 byte =0x0B \b, from Next
+>14 byte x \b, version %d to extract
+>5 leshort &0x0080 \b, multiple volumes,
+>>17 byte x \b (part %d),
+>5 leshort &0x0002 \b, contains comment
+>5 leshort &0x0200 \b, sfx
+>5 leshort &0x0400 \b, small dictionary
+>5 leshort &0x0800 \b, multi-volume
+>5 leshort &0x1000 \b, contains AV-String
+>>30 string \x16*UNREGISTERED\x20VERSION* (unregistered)
+>5 leshort &0x2000 \b, with recovery record
+>5 leshort &0x4000 \b, locked
+>5 leshort &0x8000 \b, solid
+# Date in MS-DOS format (whatever that is)
+#>18 lelong x Created on
+
+# sfArk : compression program for Soundfonts (sf2) by Dirk Jagdmann
+# <doj at cubic.org>
+0x1A string sfArk sfArk compressed Soundfont
+>0x15 string 2
+>>0x1 string >\0 Version %s
+>>0x2A string >\0 : %s
+
+# DR-DOS 7.03 Packed File *.??_
+0 string Packed\ File\ Personal NetWare Packed File
+>12 string x \b, was "%.12s"
+
+# EET archive
+# From: Tilman Sauerbeck <tilman at code-monkey.de>
+0 belong 0x1ee7ff00 EET archive
+!:mime application/x-eet
+
+# rzip archives
+0 string RZIP rzip compressed data
+>4 byte x - version %d
+>5 byte x \b.%d
+>6 belong x (%d bytes)
+
+# From: "Robert Dale" <robdale at gmail.com>
+0 belong 123 dar archive,
+>4 belong x label "%.8x
+>>8 belong x %.8x
+>>>12 beshort x %.4x"
+>14 byte 0x54 end slice
+>14 beshort 0x4e4e multi-part
+>14 beshort 0x4e53 multi-part, with -S
+
+# Symbian installation files
+# http://www.thouky.co.uk/software/psifs/sis.html
+# http://developer.symbian.com/main/downloads/papers/SymbianOSv91/softwareinstallsis.pdf
+8 lelong 0x10000419 Symbian installation file
+!:mime application/vnd.symbian.install
+>4 lelong 0x1000006D (EPOC release 3/4/5)
+>4 lelong 0x10003A12 (EPOC release 6)
+0 lelong 0x10201A7A Symbian installation file (Symbian OS 9.x)
+!:mime x-epoc/x-sisx-app
+
+# From "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string MPQ\032 MoPaQ (MPQ) archive
+
+# From: Dirk Jagdmann <doj at cubic.org>
+# xar archive format: http://code.google.com/p/xar/
+0 string xar! xar archive
+>6 beshort x - version %ld
+
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+# .kgb
+0 string KGB_arch KGB Archiver file
+>10 string x with compression level %.1s
+
+# xar (eXtensible ARchiver) archive
+# From: "David Remahl" <dremahl at apple.com>
+0 string xar! xar archive
+#>4 beshort x header size %d
+>6 beshort x version %d,
+#>8 quad x compressed TOC: %d,
+#>16 quad x uncompressed TOC: %d,
+>24 belong 0 no checksum
+>24 belong 1 SHA-1 checksum
+>24 belong 2 MD5 checksum
+
+#------------------------------------------------------------------------------
+# $File: asterix,v 1.5 2009/09/19 16:28:08 christos Exp $
+# asterix: file(1) magic for Aster*x; SunOS 5.5.1 gave the 4-character
+# strings as "long" - we assume they're just strings:
+# From: guy at netapp.com (Guy Harris)
+#
+0 string *STA Aster*x
+>7 string WORD Words Document
+>7 string GRAP Graphic
+>7 string SPRE Spreadsheet
+>7 string MACR Macro
+0 string 2278 Aster*x Version 2
+>29 byte 0x36 Words Document
+>29 byte 0x35 Graphic
+>29 byte 0x32 Spreadsheet
+>29 byte 0x38 Macro
+
+
+#------------------------------------------------------------------------------
+# $File: att3b,v 1.8 2009/09/19 16:28:08 christos Exp $
+# att3b: file(1) magic for AT&T 3B machines
+#
+# The `versions' should be un-commented if they work for you.
+# (Was the problem just one of endianness?)
+#
+# 3B20
+#
+# The 3B20 conflicts with SCCS.
+#0 beshort 0550 3b20 COFF executable
+#>12 belong >0 not stripped
+#>22 beshort >0 - version %ld
+#0 beshort 0551 3b20 COFF executable (TV)
+#>12 belong >0 not stripped
+#>22 beshort >0 - version %ld
+#
+# WE32K
+#
+0 beshort 0560 WE32000 COFF
+>18 beshort ^00000020 object
+>18 beshort &00000020 executable
+>12 belong >0 not stripped
+>18 beshort ^00010000 N/A on 3b2/300 w/paging
+>18 beshort &00020000 32100 required
+>18 beshort &00040000 and MAU hardware required
+>20 beshort 0407 (impure)
+>20 beshort 0410 (pure)
+>20 beshort 0413 (demand paged)
+>20 beshort 0443 (target shared library)
+>22 beshort >0 - version %ld
+0 beshort 0561 WE32000 COFF executable (TV)
+>12 belong >0 not stripped
+#>18 beshort &00020000 - 32100 required
+#>18 beshort &00040000 and MAU hardware required
+#>22 beshort >0 - version %ld
+#
+# core file for 3b2
+0 string \000\004\036\212\200 3b2 core file
+>364 string >\0 of '%s'
+
+#------------------------------------------------------------------------------
+# $File: audio,v 1.59 2009/11/04 17:27:37 christos Exp $
+# audio: file(1) magic for sound formats (see also "iff")
+#
+# Jan Nicolai Langfeldt (janl at ifi.uio.no), Dan Quinlan (quinlan at yggdrasil.com),
+# and others
+#
+
+# Sun/NeXT audio data
+0 string .snd Sun/NeXT audio data:
+>12 belong 1 8-bit ISDN mu-law,
+!:mime audio/basic
+>12 belong 2 8-bit linear PCM [REF-PCM],
+!:mime audio/basic
+>12 belong 3 16-bit linear PCM,
+!:mime audio/basic
+>12 belong 4 24-bit linear PCM,
+!:mime audio/basic
+>12 belong 5 32-bit linear PCM,
+!:mime audio/basic
+>12 belong 6 32-bit IEEE floating point,
+!:mime audio/basic
+>12 belong 7 64-bit IEEE floating point,
+!:mime audio/basic
+>12 belong 8 Fragmented sample data,
+>12 belong 10 DSP program,
+>12 belong 11 8-bit fixed point,
+>12 belong 12 16-bit fixed point,
+>12 belong 13 24-bit fixed point,
+>12 belong 14 32-bit fixed point,
+>12 belong 18 16-bit linear with emphasis,
+>12 belong 19 16-bit linear compressed,
+>12 belong 20 16-bit linear with emphasis and compression,
+>12 belong 21 Music kit DSP commands,
+>12 belong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
+!:mime audio/x-adpcm
+>12 belong 24 compressed (8-bit CCITT G.722 ADPCM)
+>12 belong 25 compressed (3-bit CCITT G.723.3 ADPCM),
+>12 belong 26 compressed (5-bit CCITT G.723.5 ADPCM),
+>12 belong 27 8-bit A-law (CCITT G.711),
+>20 belong 1 mono,
+>20 belong 2 stereo,
+>20 belong 4 quad,
+>16 belong >0 %d Hz
+
+# DEC systems (e.g. DECstation 5000) use a variant of the Sun/NeXT format
+# that uses little-endian encoding and has a different magic number
+0 lelong 0x0064732E DEC audio data:
+>12 lelong 1 8-bit ISDN mu-law,
+!:mime audio/x-dec-basic
+>12 lelong 2 8-bit linear PCM [REF-PCM],
+!:mime audio/x-dec-basic
+>12 lelong 3 16-bit linear PCM,
+!:mime audio/x-dec-basic
+>12 lelong 4 24-bit linear PCM,
+!:mime audio/x-dec-basic
+>12 lelong 5 32-bit linear PCM,
+!:mime audio/x-dec-basic
+>12 lelong 6 32-bit IEEE floating point,
+!:mime audio/x-dec-basic
+>12 lelong 7 64-bit IEEE floating point,
+!:mime audio/x-dec-basic
+>12 belong 8 Fragmented sample data,
+>12 belong 10 DSP program,
+>12 belong 11 8-bit fixed point,
+>12 belong 12 16-bit fixed point,
+>12 belong 13 24-bit fixed point,
+>12 belong 14 32-bit fixed point,
+>12 belong 18 16-bit linear with emphasis,
+>12 belong 19 16-bit linear compressed,
+>12 belong 20 16-bit linear with emphasis and compression,
+>12 belong 21 Music kit DSP commands,
+>12 lelong 23 8-bit ISDN mu-law compressed (CCITT G.721 ADPCM voice enc.),
+!:mime audio/x-dec-basic
+>12 belong 24 compressed (8-bit CCITT G.722 ADPCM)
+>12 belong 25 compressed (3-bit CCITT G.723.3 ADPCM),
+>12 belong 26 compressed (5-bit CCITT G.723.5 ADPCM),
+>12 belong 27 8-bit A-law (CCITT G.711),
+>20 lelong 1 mono,
+>20 lelong 2 stereo,
+>20 lelong 4 quad,
+>16 lelong >0 %d Hz
+
+# Creative Labs AUDIO stuff
+0 string MThd Standard MIDI data
+!:mime audio/midi
+>8 beshort x (format %d)
+>10 beshort x using %d track
+>10 beshort >1 \bs
+>12 beshort&0x7fff x at 1/%d
+>12 beshort&0x8000 >0 SMPTE
+
+0 string CTMF Creative Music (CMF) data
+!:mime audio/x-unknown
+0 string SBI SoundBlaster instrument data
+!:mime audio/x-unknown
+0 string Creative\ Voice\ File Creative Labs voice data
+!:mime audio/x-unknown
+# is this next line right? it came this way...
+>19 byte 0x1A
+>23 byte >0 - version %d
+>22 byte >0 \b.%d
+
+# first entry is also the string "NTRK"
+0 belong 0x4e54524b MultiTrack sound data
+>4 belong x - version %ld
+
+# Extended MOD format (*.emd) (Greg Roelofs, newt at uchicago.edu); NOT TESTED
+# [based on posting 940824 by "Dirk/Elastik", husberg at lehtori.cc.tut.fi]
+0 string EMOD Extended MOD sound data,
+>4 byte&0xf0 x version %d
+>4 byte&0x0f x \b.%d,
+>45 byte x %d instruments
+>83 byte 0 (module)
+>83 byte 1 (song)
+
+# Real Audio (Magic .ra\0375)
+0 belong 0x2e7261fd RealAudio sound file
+!:mime audio/x-pn-realaudio
+0 string .RMF RealMedia file
+!:mime application/vnd.rn-realmedia
+#video/x-pn-realvideo
+#video/vnd.rn-realvideo
+#application/vnd.rn-realmedia
+# sigh, there are many mimes for that but the above are the most common.
+
+# MTM/669/FAR/S3M/ULT/XM format checking [Aaron Eppert, aeppert at dialin.ind.net]
+# Oct 31, 1995
+# fixed by <doj at cubic.org> 2003-06-24
+# Too short...
+#0 string MTM MultiTracker Module sound file
+#0 string if Composer 669 Module sound data
+#0 string JN Composer 669 Module sound data (extended format)
+0 string MAS_U ULT(imate) Module sound data
+
+#0 string FAR Module sound data
+#>4 string >\15 Title: "%s"
+
+0x2c string SCRM ScreamTracker III Module sound data
+>0 string >\0 Title: "%s"
+
+# Gravis UltraSound patches
+# From <ache at nagual.ru>
+
+0 string GF1PATCH110\0ID#000002\0 GUS patch
+0 string GF1PATCH100\0ID#000002\0 Old GUS patch
+
+# mime types according to http://www.geocities.com/nevilo/mod.htm:
+# audio/it .it
+# audio/x-zipped-it .itz
+# audio/xm fasttracker modules
+# audio/x-s3m screamtracker modules
+# audio/s3m screamtracker modules
+# audio/x-zipped-mod mdz
+# audio/mod mod
+# audio/x-mod All modules (mod, s3m, 669, mtm, med, xm, it, mdz, stm, itz, xmz, s3z)
+
+#
+# Taken from loader code from mikmod version 2.14
+# by Steve McIntyre (stevem at chiark.greenend.org.uk)
+# <doj at cubic.org> added title printing on 2003-06-24
+0 string MAS_UTrack_V00
+>14 string >/0 ultratracker V1.%.1s module sound data
+!:mime audio/x-mod
+#audio/x-tracker-module
+
+0 string UN05 MikMod UNI format module sound data
+
+0 string Extended\ Module: Fasttracker II module sound data
+!:mime audio/x-mod
+#audio/x-tracker-module
+>17 string >\0 Title: "%s"
+
+21 string/c =!SCREAM! Screamtracker 2 module sound data
+!:mime audio/x-mod
+#audio/x-screamtracker-module
+21 string BMOD2STM Screamtracker 2 module sound data
+!:mime audio/x-mod
+#audio/x-screamtracker-module
+1080 string M.K. 4-channel Protracker module sound data
+!:mime audio/x-mod
+#audio/x-protracker-module
+>0 string >\0 Title: "%s"
+1080 string M!K! 4-channel Protracker module sound data
+!:mime audio/x-mod
+#audio/x-protracker-module
+>0 string >\0 Title: "%s"
+1080 string FLT4 4-channel Startracker module sound data
+!:mime audio/x-mod
+#audio/x-startracker-module
+>0 string >\0 Title: "%s"
+1080 string FLT8 8-channel Startracker module sound data
+!:mime audio/x-mod
+#audio/x-startracker-module
+>0 string >\0 Title: "%s"
+1080 string 4CHN 4-channel Fasttracker module sound data
+!:mime audio/x-mod
+#audio/x-fasttracker-module
+>0 string >\0 Title: "%s"
+1080 string 6CHN 6-channel Fasttracker module sound data
+!:mime audio/x-mod
+#audio/x-fasttracker-module
+>0 string >\0 Title: "%s"
+1080 string 8CHN 8-channel Fasttracker module sound data
+!:mime audio/x-mod
+#audio/x-fasttracker-module
+>0 string >\0 Title: "%s"
+1080 string CD81 8-channel Octalyser module sound data
+!:mime audio/x-mod
+#audio/x-octalysertracker-module
+>0 string >\0 Title: "%s"
+1080 string OKTA 8-channel Octalyzer module sound data
+!:mime audio/x-mod
+#audio/x-octalysertracker-module
+>0 string >\0 Title: "%s"
+# Not good enough.
+#1082 string CH
+#>1080 string >/0 %.2s-channel Fasttracker "oktalyzer" module sound data
+1080 string 16CN 16-channel Taketracker module sound data
+!:mime audio/x-mod
+#audio/x-taketracker-module
+>0 string >\0 Title: "%s"
+1080 string 32CN 32-channel Taketracker module sound data
+!:mime audio/x-mod
+#audio/x-taketracker-module
+>0 string >\0 Title: "%s"
+
+# TOC sound files -Trevor Johnson <trevor at jpj.net>
+#
+0 string TOC TOC sound file
+
+# sidfiles <pooka at iki.fi>
+# added name,author,(c) and new RSID type by <doj at cubic.org> 2003-06-24
+0 string SIDPLAY\ INFOFILE Sidplay info file
+
+0 string PSID PlaySID v2.2+ (AMIGA) sidtune
+>4 beshort >0 w/ header v%d,
+>14 beshort =1 single song,
+>14 beshort >1 %d songs,
+>16 beshort >0 default song: %d
+>0x16 string >\0 name: "%s"
+>0x36 string >\0 author: "%s"
+>0x56 string >\0 copyright: "%s"
+
+0 string RSID RSID sidtune PlaySID compatible
+>4 beshort >0 w/ header v%d,
+>14 beshort =1 single song,
+>14 beshort >1 %d songs,
+>16 beshort >0 default song: %d
+>0x16 string >\0 name: "%s"
+>0x36 string >\0 author: "%s"
+>0x56 string >\0 copyright: "%s"
+
+# IRCAM <mpruett at sgi.com>
+# VAX and MIPS files are little-endian; Sun and NeXT are big-endian
+0 belong 0x64a30100 IRCAM file (VAX)
+0 belong 0x64a30200 IRCAM file (Sun)
+0 belong 0x64a30300 IRCAM file (MIPS little-endian)
+0 belong 0x64a30400 IRCAM file (NeXT)
+
+# NIST SPHERE <mpruett at sgi.com>
+0 string NIST_1A\n\ \ \ 1024\n NIST SPHERE file
+
+# Sample Vision <mpruett at sgi.com>
+0 string SOUND\ SAMPLE\ DATA\ Sample Vision file
+
+# Audio Visual Research <tonigonenstein at users.sourceforge.net>
+0 string 2BIT Audio Visual Research file,
+>12 beshort =0 mono,
+>12 beshort =-1 stereo,
+>14 beshort x %d bits
+>16 beshort =0 unsigned,
+>16 beshort =-1 signed,
+>22 belong&0x00ffffff x %d Hz,
+>18 beshort =0 no loop,
+>18 beshort =-1 loop,
+>21 ubyte <128 note %d,
+>22 byte =0 replay 5.485 KHz
+>22 byte =1 replay 8.084 KHz
+>22 byte =2 replay 10.971 KHz
+>22 byte =3 replay 16.168 KHz
+>22 byte =4 replay 21.942 KHz
+>22 byte =5 replay 32.336 KHz
+>22 byte =6 replay 43.885 KHz
+>22 byte =7 replay 47.261 KHz
+
+# SGI SoundTrack <mpruett at sgi.com>
+0 string _SGI_SoundTrack SGI SoundTrack project file
+# ID3 version 2 tags <waschk at informatik.uni-rostock.de>
+0 string ID3 Audio file with ID3 version 2
+>3 byte x \b.%d
+>4 byte x \b.%d
+>>5 byte &0x80 \b, unsynchronized frames
+>>5 byte &0x40 \b, extended header
+>>5 byte &0x20 \b, experimental
+>>5 byte &0x10 \b, footer present
+>(6.I) indirect x \b, contains:
+
+# NSF (NES sound file) magic
+0 string NESM\x1a NES Sound File
+>14 string >\0 ("%s" by
+>46 string >\0 %s, copyright
+>78 string >\0 %s),
+>5 byte x version %d,
+>6 byte x %d tracks,
+>122 byte&0x2 =1 dual PAL/NTSC
+>122 byte&0x1 =1 PAL
+>122 byte&0x1 =0 NTSC
+
+# Impulse tracker module (audio/x-it)
+0 string IMPM Impulse Tracker module sound data -
+!:mime audio/x-mod
+>4 string >\0 "%s"
+>40 leshort !0 compatible w/ITv%x
+>42 leshort !0 created w/ITv%x
+
+# Imago Orpheus module (audio/x-imf)
+60 string IM10 Imago Orpheus module sound data -
+>0 string >\0 "%s"
+
+# From <collver1 at attbi.com>
+# These are the /etc/magic entries to decode modules, instruments, and
+# samples in Impulse Tracker's native format.
+
+0 string IMPS Impulse Tracker Sample
+>18 byte &2 16 bit
+>18 byte ^2 8 bit
+>18 byte &4 stereo
+>18 byte ^4 mono
+0 string IMPI Impulse Tracker Instrument
+>28 leshort !0 ITv%x
+>30 byte !0 %d samples
+
+# Yamaha TX Wave: file(1) magic for Yamaha TX Wave audio files
+# From <collver1 at attbi.com>
+0 string LM8953 Yamaha TX Wave
+>22 byte 0x49 looped
+>22 byte 0xC9 non-looped
+>23 byte 1 33kHz
+>23 byte 2 50kHz
+>23 byte 3 16kHz
+
+# scream tracker: file(1) magic for Scream Tracker sample files
+#
+# From <collver1 at attbi.com>
+76 string SCRS Scream Tracker Sample
+>0 byte 1 sample
+>0 byte 2 adlib melody
+>0 byte >2 adlib drum
+>31 byte &2 stereo
+>31 byte ^2 mono
+>31 byte &4 16bit little endian
+>31 byte ^4 8bit
+>30 byte 0 unpacked
+>30 byte 1 packed
+
+# audio
+# From: Cory Dikkers <cdikkers at swbell.net>
+0 string MMD0 MED music file, version 0
+0 string MMD1 OctaMED Pro music file, version 1
+0 string MMD3 OctaMED Soundstudio music file, version 3
+0 string OctaMEDCmpr OctaMED Soundstudio compressed file
+0 string MED MED_Song
+0 string SymM Symphonie SymMOD music file
+#
+0 string THX AHX version
+>3 byte =0 1 module data
+>3 byte =1 2 module data
+#
+0 string OKTASONG Oktalyzer module data
+#
+0 string DIGI\ Booster\ module\0 %s
+>20 byte >0 %c
+>>21 byte >0 \b%c
+>>>22 byte >0 \b%c
+>>>>23 byte >0 \b%c
+>610 string >\0 \b, "%s"
+#
+0 string DBM0 DIGI Booster Pro Module
+>4 byte >0 V%X.
+>>5 byte x \b%02X
+>16 string >\0 \b, "%s"
+#
+0 string FTMN FaceTheMusic module
+>16 string >\0d \b, "%s"
+
+# From: <doj at cubic.org> 2003-06-24
+0 string AMShdr\32 Velvet Studio AMS Module v2.2
+0 string Extreme Extreme Tracker AMS Module v1.3
+0 string DDMF Xtracker DMF Module
+>4 byte x v%i
+>0xD string >\0 Title: "%s"
+>0x2B string >\0 Composer: "%s"
+0 string DSM\32 Dynamic Studio Module DSM
+0 string SONG DigiTrekker DTM Module
+0 string DMDL DigiTrakker MDL Module
+0 string PSM\32 Protracker Studio PSM Module
+44 string PTMF Poly Tracker PTM Module
+>0 string >\32 Title: "%s"
+0 string MT20 MadTracker 2.0 Module MT2
+0 string RAD\40by\40REALiTY!! RAD Adlib Tracker Module RAD
+0 string RTMM RTM Module
+0x426 string MaDoKaN96 XMS Adlib Module
+>0 string >\0 Composer: "%s"
+0 string AMF AMF Module
+>4 string >\0 Title: "%s"
+0 string MODINFO1 Open Cubic Player Module Inforation MDZ
+0 string Extended\40Instrument: Fast Tracker II Instrument
+
+# From: Takeshi Hamasaki <hma at syd.odn.ne.jp>
+# NOA Nancy Codec file
+0 string \210NOA\015\012\032 NOA Nancy Codec Movie file
+# Yamaha SMAF format
+0 string MMMD Yamaha SMAF file
+# Sharp Jisaku Melody format for PDC
+0 string \001Sharp\040JisakuMelody SHARP Cell-Phone ringing Melody
+>20 string Ver01.00 Ver. 1.00
+>>32 byte x , %d tracks
+
+# Free lossless audio codec <http://flac.sourceforge.net>
+# From: Przemyslaw Augustyniak <silvathraec at rpg.pl>
+0 string fLaC FLAC audio bitstream data
+!:mime audio/x-flac
+>4 byte&0x7f >0 \b, unknown version
+>4 byte&0x7f 0 \b
+# some common bits/sample values
+>>20 beshort&0x1f0 0x030 \b, 4 bit
+>>20 beshort&0x1f0 0x050 \b, 6 bit
+>>20 beshort&0x1f0 0x070 \b, 8 bit
+>>20 beshort&0x1f0 0x0b0 \b, 12 bit
+>>20 beshort&0x1f0 0x0f0 \b, 16 bit
+>>20 beshort&0x1f0 0x170 \b, 24 bit
+>>20 byte&0xe 0x0 \b, mono
+>>20 byte&0xe 0x2 \b, stereo
+>>20 byte&0xe 0x4 \b, 3 channels
+>>20 byte&0xe 0x6 \b, 4 channels
+>>20 byte&0xe 0x8 \b, 5 channels
+>>20 byte&0xe 0xa \b, 6 channels
+>>20 byte&0xe 0xc \b, 7 channels
+>>20 byte&0xe 0xe \b, 8 channels
+# some common sample rates
+>>17 belong&0xfffff0 0x0ac440 \b, 44.1 kHz
+>>17 belong&0xfffff0 0x0bb800 \b, 48 kHz
+>>17 belong&0xfffff0 0x07d000 \b, 32 kHz
+>>17 belong&0xfffff0 0x056220 \b, 22.05 kHz
+>>17 belong&0xfffff0 0x05dc00 \b, 24 kHz
+>>17 belong&0xfffff0 0x03e800 \b, 16 kHz
+>>17 belong&0xfffff0 0x02b110 \b, 11.025 kHz
+>>17 belong&0xfffff0 0x02ee00 \b, 12 kHz
+>>17 belong&0xfffff0 0x01f400 \b, 8 kHz
+>>17 belong&0xfffff0 0x177000 \b, 96 kHz
+>>17 belong&0xfffff0 0x0fa000 \b, 64 kHz
+>>21 byte&0xf >0 \b, >4G samples
+>>21 byte&0xf 0 \b
+>>>22 belong >0 \b, %u samples
+>>>22 belong 0 \b, length unknown
+
+# (ISDN) VBOX voice message file (Wolfram Kleff)
+0 string VBOX VBOX voice message data
+
+# ReBorn Song Files (.rbs)
+# David J. Singer <doc at deadvirgins.org.uk>
+8 string RB40 RBS Song file
+>29 string ReBorn created by ReBorn
+>37 string Propellerhead created by ReBirth
+
+# Synthesizer Generator and Kimwitu share their file format
+0 string A#S#C#S#S#L#V#3 Synthesizer Generator or Kimwitu data
+# Kimwitu++ uses a slightly different magic
+0 string A#S#C#S#S#L#HUB Kimwitu++ data
+
+# From "Simon Hosie
+0 string TFMX-SONG TFMX module sound data
+
+# Monkey's Audio compressed audio format (.ape)
+# From danny.milo at gmx.net (Danny Milosavljevic)
+# New version from Abel Cheung <abel (@) oaka.org>
+0 string MAC\040 Monkey's Audio compressed format
+>4 uleshort >0x0F8B version %d
+>>(0x08.l) uleshort =1000 with fast compression
+>>(0x08.l) uleshort =2000 with normal compression
+>>(0x08.l) uleshort =3000 with high compression
+>>(0x08.l) uleshort =4000 with extra high compression
+>>(0x08.l) uleshort =5000 with insane compression
+>>(0x08.l+18) uleshort =1 \b, mono
+>>(0x08.l+18) uleshort =2 \b, stereo
+>>(0x08.l+20) ulelong x \b, sample rate %d
+>4 uleshort <0x0F8C version %d
+>>6 uleshort =1000 with fast compression
+>>6 uleshort =2000 with normal compression
+>>6 uleshort =3000 with high compression
+>>6 uleshort =4000 with extra high compression
+>>6 uleshort =5000 with insane compression
+>>10 uleshort =1 \b, mono
+>>10 uleshort =2 \b, stereo
+>>12 ulelong x \b, sample rate %d
+
+# adlib sound files
+# From Gürkan Sengün <gurkan at linuks.mine.nu>, http://www.linuks.mine.nu
+0 string RAWADATA RdosPlay RAW
+
+1068 string RoR AMUSIC Adlib Tracker
+
+0 string JCH EdLib
+
+0 string mpu401tr MPU-401 Trakker
+
+0 string SAdT Surprise! Adlib Tracker
+>4 byte x Version %d
+
+0 string XAD! eXotic ADlib
+
+0 string ofTAZ! eXtra Simple Music
+
+# Spectrum 128 tunes (.ay files).
+# From: Emanuel Haupt <ehaupt at critical.ch>
+0 string ZXAYEMUL Spectrum 128 tune
+
+0 string \0BONK BONK,
+#>5 byte x version %d
+>14 byte x %d channel(s),
+>15 byte =1 lossless,
+>15 byte =0 lossy,
+>16 byte x mid-side
+
+384 string LockStream LockStream Embedded file (mostly MP3 on old Nokia phones)
+
+# format VQF (proprietary codec for sound)
+# some infos on the header file available at :
+# http://www.twinvq.org/english/technology_format.html
+0 string TWIN97012000 VQF data
+>27 short 0 \b, Mono
+>27 short 1 \b, Stereo
+>31 short >0 \b, %d kbit/s
+>35 short >0 \b, %d kHz
+
+# Nelson A. de Oliveira (naoliv at gmail.com)
+# .eqf
+0 string Winamp\ EQ\ library\ file %s
+# it will match only versions like v<digit>.<digit>
+# Since I saw only eqf files with version v1.1 I think that it's OK
+>23 string x \b%.4s
+# .preset
+0 string [Equalizer\ preset] XMMS equalizer preset
+# .m3u
+0 search/1 #EXTM3U M3U playlist text
+# .pls
+0 search/1 [playlist] PLS playlist text
+# licq.conf
+1 string [licq] LICQ configuration file
+
+# Atari ST audio files by Dirk Jagdmann <doj at cubic.org>
+0 string ICE! SNDH Atari ST music
+0 string SC68\ Music-file\ /\ (c)\ (BeN)jami sc68 Atari ST music
+
+# musepak support From: "Jiri Pejchal" <jiri.pejchal at gmail.com>
+0 string MP+ Musepack audio
+>3 byte 255 \b, SV pre8
+>3 byte&0xF 0x6 \b, SV 6
+>3 byte&0xF 0x8 \b, SV 8
+>3 byte&0xF 0x7 \b, SV 7
+>>3 byte&0xF0 0x0 \b.0
+>>3 byte&0xF0 0x10 \b.1
+>>3 byte&0xF0 240 \b.15
+>>10 byte&0xF0 0x0 \b, no profile
+>>10 byte&0xF0 0x10 \b, profile 'Unstable/Experimental'
+>>10 byte&0xF0 0x50 \b, quality 0
+>>10 byte&0xF0 0x60 \b, quality 1
+>>10 byte&0xF0 0x70 \b, quality 2 (Telephone)
+>>10 byte&0xF0 0x80 \b, quality 3 (Thumb)
+>>10 byte&0xF0 0x90 \b, quality 4 (Radio)
+>>10 byte&0xF0 0xA0 \b, quality 5 (Standard)
+>>10 byte&0xF0 0xB0 \b, quality 6 (Xtreme)
+>>10 byte&0xF0 0xC0 \b, quality 7 (Insane)
+>>10 byte&0xF0 0xD0 \b, quality 8 (BrainDead)
+>>10 byte&0xF0 0xE0 \b, quality 9
+>>10 byte&0xF0 0xF0 \b, quality 10
+>>27 byte 0x0 \b, Buschmann 1.7.0-9, Klemm 0.90-1.05
+>>27 byte 102 \b, Beta 1.02
+>>27 byte 104 \b, Beta 1.04
+>>27 byte 105 \b, Alpha 1.05
+>>27 byte 106 \b, Beta 1.06
+>>27 byte 110 \b, Release 1.1
+>>27 byte 111 \b, Alpha 1.11
+>>27 byte 112 \b, Beta 1.12
+>>27 byte 113 \b, Alpha 1.13
+>>27 byte 114 \b, Beta 1.14
+>>27 byte 115 \b, Alpha 1.15
+
+# IMY
+# from http://filext.com/detaillist.php?extdetail=IMY
+# http://cellphones.about.com/od/cellularfaqs/f/rf_imelody.htm
+# http://download.ncl.ie/doc/api/ie/ncl/media/music/IMelody.html
+# http://www.wx800.com/msg/download/irda/iMelody.pdf
+0 string BEGIN:IMELODY iMelody Ringtone Format
+
+# From: "Mateus Caruccio" <mateus at caruccio.com>
+# guitar pro v3,4,5 from http://filext.com/file-extension/gp3
+0 string \030FICHIER\ GUITAR\ PRO\ v3. Guitar Pro Ver. 3 Tablature
+
+# From: "Leslie P. Polzer" <leslie.polzer at gmx.net>
+60 string SONG SoundFX Module sound file
+
+# Type: Adaptive Multi-Rate Codec
+# URL: http://filext.com/detaillist.php?extdetail=AMR
+# From: Russell Coker <russell at coker.com.au>
+0 string #!AMR Adaptive Multi-Rate Codec (GSM telephony)
+
+#----------------------------------------------------------------
+# $File: basis,v 1.4 2009/09/19 16:28:08 christos Exp $
+# basis: file(1) magic for BBx/Pro5-files
+# Oliver Dammer <dammer at olida.de> 2005/11/07
+# http://www.basis.com business-basic-files.
+#
+0 string \074\074bbx\076\076 BBx
+>7 string \000 indexed file
+>7 string \001 serial file
+>7 string \002 keyed file
+>>13 short 0 (sort)
+>7 string \004 program
+>>18 byte x (LEVEL %d)
+>>>23 string >\000 psaved
+>7 string \006 mkeyed file
+>>13 short 0 (sort)
+>>8 string \000 (mkey)
+
+#------------------------------------------------------------------------------
+# $File: bflt,v 1.4 2009/09/19 16:28:08 christos Exp $
+# bFLT: file(1) magic for BFLT uclinux binary files
+#
+# From Philippe De Muyter <phdm at macqel.be>
+#
+0 string bFLT BFLT executable
+>4 belong x - version %ld
+>4 belong 4
+>>36 belong&0x1 0x1 ram
+>>36 belong&0x2 0x2 gotpic
+>>36 belong&0x4 0x4 gzip
+>>36 belong&0x8 0x8 gzdata
+
+#------------------------------------------------------------------------------
+# $File: blender,v 1.5 2009/09/19 16:28:08 christos Exp $
+# blender: file(1) magic for Blender 3D related files
+#
+# Native format rule v1.2. For questions use the developers list
+# http://lists.blender.org/mailman/listinfo/bf-committers
+# GLOB chunk was moved near start and provides subversion info since 2.42
+
+0 string =BLENDER Blender3D,
+>7 string =_ saved as 32-bits
+>>8 string =v little endian
+>>>9 byte x with version %c.
+>>>10 byte x \b%c
+>>>11 byte x \b%c
+>>>0x40 string =GLOB \b.
+>>>>0x58 leshort x \b%.4d
+>>8 string =V big endian
+>>>9 byte x with version %c.
+>>>10 byte x \b%c
+>>>11 byte x \b%c
+>>>0x40 string =GLOB \b.
+>>>>0x58 beshort x \b%.4d
+>7 string =- saved as 64-bits
+>>8 string =v little endian
+>>9 byte x with version %c.
+>>10 byte x \b%c
+>>11 byte x \b%c
+>>0x44 string =GLOB \b.
+>>>0x60 leshort x \b%.4d
+>>8 string =V big endian
+>>>9 byte x with version %c.
+>>>10 byte x \b%c
+>>>11 byte x \b%c
+>>>0x44 string =GLOB \b.
+>>>>0x60 beshort x \b%.4d
+
+# Scripts that run in the embeded Python interpreter
+0 string #!BPY Blender3D BPython script
+
+#------------------------------------------------------------------------------
+# $File: blit,v 1.8 2009/09/19 16:28:08 christos Exp $
+# blit: file(1) magic for 68K Blit stuff as seen from 680x0 machine
+#
+# Note that this 0407 conflicts with several other a.out formats...
+#
+# XXX - should this be redone with "be" and "le", so that it works on
+# little-endian machines as well? If so, what's the deal with
+# "VAX-order" and "VAX-order2"?
+#
+#0 long 0407 68K Blit (standalone) executable
+#0 short 0407 VAX-order2 68K Blit (standalone) executable
+0 short 03401 VAX-order 68K Blit (standalone) executable
+0 long 0406 68k Blit mpx/mux executable
+0 short 0406 VAX-order2 68k Blit mpx/mux executable
+0 short 03001 VAX-order 68k Blit mpx/mux executable
+# Need more values for WE32 DMD executables.
+# Note that 0520 is the same as COFF
+#0 short 0520 tty630 layers executable
+
+#------------------------------------------------------------------------------
+# $File: bout,v 1.5 2009/09/19 16:28:08 christos Exp $
+# i80960 b.out objects and archives
+#
+0 long 0x10d i960 b.out relocatable object
+>16 long >0 not stripped
+#
+# b.out archive (hp-rt on i960)
+0 string =!<bout> b.out archive
+>8 string __.SYMDEF random library
+
+#------------------------------------------------------------------------------
+# $File: bsdi,v 1.5 2009/09/19 16:28:08 christos Exp $
+# bsdi: file(1) magic for BSD/OS (from BSDI) objects
+#
+
+0 lelong 0314 386 compact demand paged pure executable
+>16 lelong >0 not stripped
+>32 byte 0x6a (uses shared libs)
+
+0 lelong 0407 386 executable
+>16 lelong >0 not stripped
+>32 byte 0x6a (uses shared libs)
+
+0 lelong 0410 386 pure executable
+>16 lelong >0 not stripped
+>32 byte 0x6a (uses shared libs)
+
+0 lelong 0413 386 demand paged pure executable
+>16 lelong >0 not stripped
+>32 byte 0x6a (uses shared libs)
+
+# same as in SunOS 4.x, except for static shared libraries
+0 belong&077777777 0600413 sparc demand paged
+>0 byte &0x80
+>>20 belong <4096 shared library
+>>20 belong =4096 dynamically linked executable
+>>20 belong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+>36 belong 0xb4100001 (uses shared libs)
+
+0 belong&077777777 0600410 sparc pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+>36 belong 0xb4100001 (uses shared libs)
+
+0 belong&077777777 0600407 sparc
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+>36 belong 0xb4100001 (uses shared libs)
+
+#------------------------------------------------------------------------------
+# $File: btsnoop,v 1.5 2009/09/19 16:28:08 christos Exp $
+# BTSnoop: file(1) magic for BTSnoop files
+#
+# From <marcel at holtmann.org>
+0 string btsnoop\0 BTSnoop
+>8 belong x version %d,
+>12 belong 1001 Unencapsulated HCI
+>12 belong 1002 HCI UART (H4)
+>12 belong 1003 HCI BCSP
+>12 belong 1004 HCI Serial (H5)
+>>12 belong x type %d
+
+#------------------------------------------------------------------------------
+# $File: c-lang,v 1.14 2009/09/19 16:28:08 christos Exp $
+# c-lang: file(1) magic for C programs (or REXX)
+#
+
+# XPM icons (Greg Roelofs, newt at uchicago.edu)
+# if you uncomment "/*" for C/REXX below, also uncomment this entry
+#0 string /*\ XPM\ */ X pixmap image data
+#!:mime image/x-xpmi
+
+# 3DS (3d Studio files) Conflicts with diff output 0x3d '='
+#16 beshort 0x3d3d image/x-3ds
+
+# this first will upset you if you're a PL/1 shop...
+# in which case rm it; ascmagic will catch real C programs
+#0 search/1 /* C or REXX program text
+#0 search/1 // C++ program text
+
+# From: Mikhail Teterin <mi at aldan.algebra.com>
+0 string cscope cscope reference data
+>7 string x version %.2s
+# We skip the path here, because it is often long (so file will
+# truncate it) and mostly redundant.
+# The inverted index functionality was added some time betwen
+# versions 11 and 15, so look for -q if version is above 14:
+>7 string >14
+>>10 search/100 \ -q\ with inverted index
+>10 search/100 \ -c\ text (non-compressed)
+
+#------------------------------------------------------------------------------
+# $File: c64,v 1.5 2009/09/19 16:28:08 christos Exp $
+# c64: file(1) magic for various commodore 64 related files
+#
+# From: Dirk Jagdmann <doj at cubic.org>
+
+0x16500 belong 0x12014100 D64 Image
+0x16500 belong 0x12014180 D71 Image
+0x61800 belong 0x28034400 D81 Image
+0 string C64\40CARTRIDGE CCS C64 Emultar Cartridge Image
+0 belong 0x43154164 X64 Image
+
+0 string GCR-1541 GCR Image
+>8 byte x version: %i
+>9 byte x tracks: %i
+
+9 string PSUR ARC archive (c64)
+2 string -LH1- LHA archive (c64)
+
+0 string C64File PC64 Emulator file
+>8 string >\0 "%s"
+0 string C64Image PC64 Freezer Image
+
+0 beshort 0x38CD C64 PCLink Image
+0 string CBM\144\0\0 Power 64 C64 Emulator Snapshot
+
+0 belong 0xFF424CFF WRAptor packer (c64)
+
+0 string C64S\x20tape\x20file T64 tape Image
+>32 leshort x Version:0x%x
+>36 leshort !0 Entries:%i
+>40 string x Name:%.24s
+
+0 string C64\x20tape\x20image\x20file\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0 T64 tape Image
+>32 leshort x Version:0x%x
+>36 leshort !0 Entries:%i
+>40 string x Name:%.24s
+
+0 string C64S\x20tape\x20image\x20file\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0 T64 tape Image
+>32 leshort x Version:0x%x
+>36 leshort !0 Entries:%i
+>40 string x Name:%.24s
+
+#------------------------------------------------------------------------------
+# $File: cad,v 1.9 2009/09/19 16:28:08 christos Exp $
+# autocad: file(1) magic for cad files
+#
+
+# AutoCAD DWG versions R13/R14 (www.autodesk.com)
+# Written December 01, 2003 by Lester Hightower
+# Based on the DWG File Format Specifications at http://www.opendwg.org/
+0 string \101\103\061\060\061 AutoCAD
+>5 string \062\000\000\000\000 DWG ver. R13
+>5 string \064\000\000\000\000 DWG ver. R14
+
+# Microstation DGN/CIT Files (www.bentley.com)
+# Last updated July 29, 2005 by Lester Hightower
+# DGN is the default file extension of Microstation/Intergraph CAD files.
+# CIT is the proprietary raster format (similar to TIFF) used to attach
+# raster underlays to Microstation DGN (vector) drawings.
+#
+# http://www.wotsit.org/search.asp
+# http://filext.com/detaillist.php?extdetail=DGN
+# http://filext.com/detaillist.php?extdetail=CIT
+#
+# http://www.bentley.com/products/default.cfm?objectid=97F351F5-9C35-4E5E-89C2
+# 3F86C928&method=display&p_objectid=97F351F5-9C35-4E5E-89C280A93F86C928
+# http://www.bentley.com/products/default.cfm?objectid=A5C2FD43-3AC9-4C71-B682
+# 721C479F&method=display&p_objectid=A5C2FD43-3AC9-4C71-B682C7BE721C479F
+0 string \010\011\376 Microstation
+>3 string \002
+>>30 string \026\105 DGNFile
+>>30 string \034\105 DGNFile
+>>30 string \073\107 DGNFile
+>>30 string \073\110 DGNFile
+>>30 string \106\107 DGNFile
+>>30 string \110\103 DGNFile
+>>30 string \120\104 DGNFile
+>>30 string \172\104 DGNFile
+>>30 string \172\105 DGNFile
+>>30 string \172\106 DGNFile
+>>30 string \234\106 DGNFile
+>>30 string \273\105 DGNFile
+>>30 string \306\106 DGNFile
+>>30 string \310\104 DGNFile
+>>30 string \341\104 DGNFile
+>>30 string \372\103 DGNFile
+>>30 string \372\104 DGNFile
+>>30 string \372\106 DGNFile
+>>30 string \376\103 DGNFile
+>4 string \030\000\000 CITFile
+>4 string \030\000\003 CITFile
+
+# AutoCad, from Nahuel Greco
+# AutoCAD DWG versions R12/R13/R14 (www.autodesk.com)
+0 string AC1012 AutoCad (release 12)
+0 string AC1013 AutoCad (release 13)
+0 string AC1014 AutoCad (release 14)
+
+# CAD: file(1) magic for computer aided design files
+# Phillip Griffith <phillip dot griffith at gmail dot com>
+# AutoCAD magic taken from the Open Design Alliance's OpenDWG specifications.
+#
+0 belong 0x08051700 Bentley/Intergraph MicroStation DGN cell library
+0 belong 0x0809fe02 Bentley/Intergraph MicroStation DGN vector CAD
+0 belong 0xc809fe02 Bentley/Intergraph MicroStation DGN vector CAD
+0 beshort 0x0809 Bentley/Intergraph MicroStation
+>0x02 byte 0xfe
+>>0x04 beshort 0x1800 CIT raster CAD
+0 string AC1012 AutoDesk AutoCAD R13
+0 string AC1014 AutoDesk AutoCAD R14
+0 string AC1015 AutoDesk AutoCAD R2000
+
+#------------------------------------------------------------------------------
+# $File: cafebabe,v 1.8 2009/09/19 16:28:08 christos Exp $
+# Cafe Babes unite!
+#
+# Since Java bytecode and Mach-O fat-files have the same magic number, the test
+# must be performed in the same "magic" sequence to get both right. The long
+# at offset 4 in a mach-O fat file tells the number of architectures; the short at
+# offset 4 in a Java bytecode file is the JVM minor version and the
+# short at offset 6 is the JVM major version. Since there are only
+# only 18 labeled Mach-O architectures at current, and the first released
+# Java class format was version 43.0, we can safely choose any number
+# between 18 and 39 to test the number of architectures against
+# (and use as a hack). Let's not use 18, because the Mach-O people
+# might add another one or two as time goes by...
+#
+0 belong 0xcafebabe
+!:mime application/x-java-applet
+>4 belong >30 compiled Java class data,
+>>6 beshort x version %d.
+>>4 beshort x \b%d
+# Which is which?
+#>>4 belong 0x032d (Java 1.0)
+#>>4 belong 0x032d (Java 1.1)
+>>4 belong 0x002e (Java 1.2)
+>>4 belong 0x002f (Java 1.3)
+>>4 belong 0x0030 (Java 1.4)
+>>4 belong 0x0031 (Java 1.5)
+>>4 belong 0x0032 (Java 1.6)
+
+
+0 belong 0xcafebabe
+>4 belong 1 Mach-O fat file with 1 architecture
+>4 belong >1
+>>4 belong <20 Mach-O fat file with %ld architectures
+
+0 belong 0xcafed00d JAR compressed with pack200,
+>>5 byte x version %d.
+>>4 byte x \b%d
+!:mime application/x-java-pack200
+
+#------------------------------------------------------------------------------
+# $File: cddb,v 1.4 2009/09/19 16:28:08 christos Exp $
+# CDDB: file(1) magic for CDDB(tm) format CD text data files
+#
+# From <steve at gracenote.com>
+#
+# This is the /etc/magic entry to decode datafiles as used by
+# CDDB-enabled CD player applications.
+#
+
+0 search/1/w #\040xmcd CDDB(tm) format CD text data
+
+#------------------------------------------------------------------------------
+# $File: chord,v 1.4 2009/09/19 16:28:08 christos Exp $
+# chord: file(1) magic for Chord music sheet typesetting utility input files
+#
+# From Philippe De Muyter <phdm at macqel.be>
+# File format is actually free, but many distributed files begin with `{title'
+#
+0 string {title Chord text file
+
+
+#------------------------------------------------------------------------------
+# $File: cisco,v 1.4 2009/09/19 16:28:08 christos Exp $
+# cisco: file(1) magic for cisco Systems routers
+#
+# Most cisco file-formats are covered by the generic elf code
+#
+# Microcode files are non-ELF, 0x8501 conflicts with NetBSD/alpha.
+0 belong&0xffffff00 0x85011400 cisco IOS microcode
+>7 string >\0 for '%s'
+0 belong&0xffffff00 0x8501cb00 cisco IOS experimental microcode
+>7 string >\0 for '%s'
+
+#------------------------------------------------------------------------------
+# $File: citrus,v 1.4 2009/09/19 16:28:08 christos Exp $
+# citrus locale declaration
+#
+
+0 string RuneCT Citrus locale declaration for LC_CTYPE
+
+
+#------------------------------------------------------------------------------
+# $File: clarion,v 1.4 2009/09/19 16:28:08 christos Exp $
+# clarion: file(1) magic for # Clarion Personal/Professional Developer
+# (v2 and above)
+# From: Julien Blache <jb at jblache.org>
+
+# Database files
+# signature
+0 leshort 0x3343 Clarion Developer (v2 and above) data file
+# attributes
+>2 leshort &0x0001 \b, locked
+>2 leshort &0x0004 \b, encrypted
+>2 leshort &0x0008 \b, memo file exists
+>2 leshort &0x0010 \b, compressed
+>2 leshort &0x0040 \b, read only
+# number of records
+>5 lelong x \b, %ld records
+
+# Memo files
+0 leshort 0x334d Clarion Developer (v2 and above) memo data
+
+# Key/Index files
+# No magic? :(
+
+# Help files
+0 leshort 0x49e0 Clarion Developer (v2 and above) help data
+
+#------------------------------------------------------------------------------
+# $File: claris,v 1.5 2009/09/19 16:28:08 christos Exp $
+# claris: file(1) magic for claris
+# "H. Nanosecond" <aldomel at ix.netcom.com>
+# Claris Works a word processor, etc.
+# Version 3.0
+
+# .pct claris works clip art files
+#0000000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000 000
+#*
+#0001000 #010 250 377 377 377 377 000 213 000 230 000 021 002 377 014 000
+#null to byte 1000 octal
+514 string \377\377\377\377\000 Claris clip art?
+>0 string \0\0\0\0\0\0\0\0\0\0\0\0\0 yes.
+514 string \377\377\377\377\001 Claris clip art?
+>0 string \0\0\0\0\0\0\0\0\0\0\0\0\0 yes.
+
+# Claris works files
+# .cwk
+0 string \002\000\210\003\102\117\102\117\000\001\206 Claris works document
+# .plt
+0 string \020\341\000\000\010\010 Claris Works pallete files .plt
+
+# .msp a dictionary file I am not sure about this I have only one .msp file
+0 string \002\271\262\000\040\002\000\164 Claris works dictionary
+
+# .usp are user dictionary bits
+# I am not sure about a magic header:
+#0000000 001 123 160 146 070 125 104 040 136 123 015 012 160 157 144 151
+# soh S p f 8 U D sp ^ S cr nl p o d i
+#0000020 141 164 162 151 163 164 040 136 123 015 012 144 151 166 040 043
+# a t r i s t sp ^ S cr nl d i v sp #
+
+# .mth Thesaurus
+# starts with \0 but no magic header
+
+# .chy Hyphenation file
+# I am not sure: 000 210 034 000 000
+
+# other claris files
+#./windows/claris/useng.ndx: data
+#./windows/claris/xtndtran.l32: data
+#./windows/claris/xtndtran.lst: data
+#./windows/claris/clworks.lbl: data
+#./windows/claris/clworks.prf: data
+#./windows/claris/userd.spl: data
+
+#------------------------------------------------------------------------------
+# $File: clipper,v 1.6 2009/09/19 16:28:08 christos Exp $
+# clipper: file(1) magic for Intergraph (formerly Fairchild) Clipper.
+#
+# XXX - what byte order does the Clipper use?
+#
+# XXX - what's the "!" stuff:
+#
+# >18 short !074000,000000 C1 R1
+# >18 short !074000,004000 C2 R1
+# >18 short !074000,010000 C3 R1
+# >18 short !074000,074000 TEST
+#
+# I shall assume it's ANDing the field with the first value and
+# comparing it with the second, and rewrite it as:
+#
+# >18 short&074000 000000 C1 R1
+# >18 short&074000 004000 C2 R1
+# >18 short&074000 010000 C3 R1
+# >18 short&074000 074000 TEST
+#
+# as SVR3.1's "file" doesn't support anything of the "!074000,000000"
+# sort, nor does SunOS 4.x, so either it's something Intergraph added
+# in CLIX, or something AT&T added in SVR3.2 or later, or something
+# somebody else thought was a good idea; it's not documented in the
+# man page for this version of "magic", nor does it appear to be
+# implemented (at least not after I blew off the bogus code to turn
+# old-style "&"s into new-style "&"s, which just didn't work at all).
+#
+0 short 0575 CLIPPER COFF executable (VAX #)
+>20 short 0407 (impure)
+>20 short 0410 (5.2 compatible)
+>20 short 0411 (pure)
+>20 short 0413 (demand paged)
+>20 short 0443 (target shared library)
+>12 long >0 not stripped
+>22 short >0 - version %ld
+0 short 0577 CLIPPER COFF executable
+>18 short&074000 000000 C1 R1
+>18 short&074000 004000 C2 R1
+>18 short&074000 010000 C3 R1
+>18 short&074000 074000 TEST
+>20 short 0407 (impure)
+>20 short 0410 (pure)
+>20 short 0411 (separate I&D)
+>20 short 0413 (paged)
+>20 short 0443 (target shared library)
+>12 long >0 not stripped
+>22 short >0 - version %ld
+>48 long&01 01 alignment trap enabled
+>52 byte 1 -Ctnc
+>52 byte 2 -Ctsw
+>52 byte 3 -Ctpw
+>52 byte 4 -Ctcb
+>53 byte 1 -Cdnc
+>53 byte 2 -Cdsw
+>53 byte 3 -Cdpw
+>53 byte 4 -Cdcb
+>54 byte 1 -Csnc
+>54 byte 2 -Cssw
+>54 byte 3 -Cspw
+>54 byte 4 -Cscb
+4 string pipe CLIPPER instruction trace
+4 string prof CLIPPER instruction profile
+
+#------------------------------------------------------------------------------
+# $File: commands,v 1.36 2010/01/24 18:41:11 christos Exp $
+# commands: file(1) magic for various shells and interpreters
+#
+#0 string : shell archive or script for antique kernel text
+0 string/w #!\ /bin/sh POSIX shell script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /bin/csh C shell script text executable
+!:mime text/x-shellscript
+# korn shell magic, sent by George Wu, gwu at clyde.att.com
+0 string/w #!\ /bin/ksh Korn shell script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /bin/tcsh Tenex C shell script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/tcsh Tenex C shell script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/bin/tcsh Tenex C shell script text executable
+!:mime text/x-shellscript
+
+#
+# zsh/ash/ae/nawk/gawk magic from cameron at cs.unsw.oz.au (Cameron Simpson)
+0 string/w #!\ /bin/zsh Paul Falstad's zsh script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/bin/zsh Paul Falstad's zsh script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/bin/zsh Paul Falstad's zsh script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/bin/ash Neil Brown's ash script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/bin/ae Neil Brown's ae script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /bin/nawk new awk script text executable
+!:mime text/x-nawk
+0 string/w #!\ /usr/bin/nawk new awk script text executable
+!:mime text/x-nawk
+0 string/w #!\ /usr/local/bin/nawk new awk script text executable
+!:mime text/x-nawk
+0 string/w #!\ /bin/gawk GNU awk script text executable
+!:mime text/x-gawk
+0 string/w #!\ /usr/bin/gawk GNU awk script text executable
+!:mime text/x-gawk
+0 string/w #!\ /usr/local/bin/gawk GNU awk script text executable
+!:mime text/x-gawk
+#
+0 string/w #!\ /bin/awk awk script text executable
+!:mime text/x-awk
+0 string/w #!\ /usr/bin/awk awk script text executable
+!:mime text/x-awk
+# update to distinguish from *.vcf files
+# this is broken because postscript has /EBEGIN{ for example.
+#0 search/Ww BEGIN { awk script text
+
+# AT&T Bell Labs' Plan 9 shell
+0 string/w #!\ /bin/rc Plan 9 rc shell script text executable
+
+# bash shell magic, from Peter Tobias (tobias at server.et-inf.fho-emden.de)
+0 string/w #!\ /bin/bash Bourne-Again shell script text executable
+!:mime text/x-shellscript
+0 string/w #!\ /usr/local/bin/bash Bourne-Again shell script text executable
+!:mime text/x-shellscript
+
+# using env
+0 string/t #!/usr/bin/env a
+>15 string/t >\0 %s script text executable
+0 string/t #!\ /usr/bin/env a
+>16 string/t >\0 %s script text executable
+
+# PHP scripts
+# Ulf Harnhammar <ulfh at update.uu.se>
+0 search/1/c =<?php PHP script text
+!:mime text/x-php
+0 search/1 =<?\n PHP script text
+!:mime text/x-php
+0 search/1 =<?\r PHP script text
+!:mime text/x-php
+0 search/1/w #!\ /usr/local/bin/php PHP script text executable
+!:mime text/x-php
+0 search/1/w #!\ /usr/bin/php PHP script text executable
+!:mime text/x-php
+
+0 string Zend\x00 PHP script Zend Optimizer data
+
+0 string $! DCL command file
+
+#----------------------------------------------------------------------------
+# $File: communications,v 1.5 2009/09/19 16:28:08 christos Exp $
+# communication
+
+# TTCN is the Tree and Tabular Combined Notation described in ISO 9646-3.
+# It is used for conformance testing of communication protocols.
+# Added by W. Borgert <debacle at debian.org>.
+0 string $Suite TTCN Abstract Test Suite
+>&1 string $SuiteId
+>>&1 string >\n %s
+>&2 string $SuiteId
+>>&1 string >\n %s
+>&3 string $SuiteId
+>>&1 string >\n %s
+
+# MSC (message sequence charts) are a formal description technique,
+# described in ITU-T Z.120, mainly used for communication protocols.
+# Added by W. Borgert <debacle at debian.org>.
+0 string mscdocument Message Sequence Chart (document)
+0 string msc Message Sequence Chart (chart)
+0 string submsc Message Sequence Chart (subchart)
+
+#------------------------------------------------------------------------------
+# $File: compress,v 1.42 2009/09/19 16:28:08 christos Exp $
+# compress: file(1) magic for pure-compression formats (no archives)
+#
+# compress, gzip, pack, compact, huf, squeeze, crunch, freeze, yabba, etc.
+#
+# Formats for various forms of compressed data
+# Formats for "compress" proper have been moved into "compress.c",
+# because it tries to uncompress it to figure out what's inside.
+
+# standard unix compress
+0 string \037\235 compress'd data
+!:mime application/x-compress
+!:apple LZIVZIVU
+>2 byte&0x80 >0 block compressed
+>2 byte&0x1f x %d bits
+
+# gzip (GNU zip, not to be confused with Info-ZIP or PKWARE zip archiver)
+# Edited by Chris Chittleborough <cchittleborough at yahoo.com.au>, March 2002
+# * Original filename is only at offset 10 if "extra field" absent
+# * Produce shorter output - notably, only report compression methods
+# other than 8 ("deflate", the only method defined in RFC 1952).
+0 string \037\213 gzip compressed data
+!:mime application/x-gzip
+>2 byte <8 \b, reserved method
+>2 byte >8 \b, unknown method
+>3 byte &0x01 \b, ASCII
+>3 byte &0x02 \b, has CRC
+>3 byte &0x04 \b, extra field
+>3 byte&0xC =0x08
+>>10 string x \b, was "%s"
+>3 byte &0x10 \b, has comment
+>9 byte =0x00 \b, from FAT filesystem (MS-DOS, OS/2, NT)
+>9 byte =0x01 \b, from Amiga
+>9 byte =0x02 \b, from VMS
+>9 byte =0x03 \b, from Unix
+>9 byte =0x04 \b, from VM/CMS
+>9 byte =0x05 \b, from Atari
+>9 byte =0x06 \b, from HPFS filesystem (OS/2, NT)
+>9 byte =0x07 \b, from MacOS
+>9 byte =0x08 \b, from Z-System
+>9 byte =0x09 \b, from CP/M
+>9 byte =0x0A \b, from TOPS/20
+>9 byte =0x0B \b, from NTFS filesystem (NT)
+>9 byte =0x0C \b, from QDOS
+>9 byte =0x0D \b, from Acorn RISCOS
+>3 byte &0x10 \b, comment
+>3 byte &0x20 \b, encrypted
+>4 ledate >0 \b, last modified: %s
+>8 byte 2 \b, max compression
+>8 byte 4 \b, max speed
+
+# packed data, Huffman (minimum redundancy) codes on a byte-by-byte basis
+0 string \037\036 packed data
+!:mime application/octet-stream
+>2 belong >1 \b, %d characters originally
+>2 belong =1 \b, %d character originally
+#
+# This magic number is byte-order-independent.
+0 short 0x1f1f old packed data
+!:mime application/octet-stream
+
+# XXX - why *two* entries for "compacted data", one of which is
+# byte-order independent, and one of which is byte-order dependent?
+#
+0 short 0x1fff compacted data
+!:mime application/octet-stream
+# This string is valid for SunOS (BE) and a matching "short" is listed
+# in the Ultrix (LE) magic file.
+0 string \377\037 compacted data
+!:mime application/octet-stream
+0 short 0145405 huf output
+!:mime application/octet-stream
+
+# bzip2
+0 string BZh bzip2 compressed data
+!:mime application/x-bzip2
+>3 byte >47 \b, block size = %c00k
+
+# lzip
+0 string LZIP lzip compressed data
+!:mime application/x-lzip
+>4 byte x \b, version: %d
+
+# squeeze and crunch
+# Michael Haardt <michael at cantor.informatik.rwth-aachen.de>
+0 beshort 0x76FF squeezed data,
+>4 string x original name %s
+0 beshort 0x76FE crunched data,
+>2 string x original name %s
+0 beshort 0x76FD LZH compressed data,
+>2 string x original name %s
+
+# Freeze
+0 string \037\237 frozen file 2.1
+0 string \037\236 frozen file 1.0 (or gzip 0.5)
+
+# SCO compress -H (LZH)
+0 string \037\240 SCO compress -H (LZH) data
+
+# European GSM 06.10 is a provisional standard for full-rate speech
+# transcoding, prI-ETS 300 036, which uses RPE/LTP (residual pulse
+# excitation/long term prediction) coding at 13 kbit/s.
+#
+# There's only a magic nibble (4 bits); that nibble repeats every 33
+# bytes. This isn't suited for use, but maybe we can use it someday.
+#
+# This will cause very short GSM files to be declared as data and
+# mismatches to be declared as data too!
+#0 byte&0xF0 0xd0 data
+#>33 byte&0xF0 0xd0
+#>66 byte&0xF0 0xd0
+#>99 byte&0xF0 0xd0
+#>132 byte&0xF0 0xd0 GSM 06.10 compressed audio
+
+# bzip a block-sorting file compressor
+# by Julian Seward <sewardj at cs.man.ac.uk> and others
+#
+#0 string BZ bzip compressed data
+#>2 byte x \b, version: %c
+#>3 string =1 \b, compression block size 100k
+#>3 string =2 \b, compression block size 200k
+#>3 string =3 \b, compression block size 300k
+#>3 string =4 \b, compression block size 400k
+#>3 string =5 \b, compression block size 500k
+#>3 string =6 \b, compression block size 600k
+#>3 string =7 \b, compression block size 700k
+#>3 string =8 \b, compression block size 800k
+#>3 string =9 \b, compression block size 900k
+
+# lzop from <markus.oberhumer at jk.uni-linz.ac.at>
+0 string \x89\x4c\x5a\x4f\x00\x0d\x0a\x1a\x0a lzop compressed data
+>9 beshort <0x0940
+>>9 byte&0xf0 =0x00 - version 0.
+>>9 beshort&0x0fff x \b%03x,
+>>13 byte 1 LZO1X-1,
+>>13 byte 2 LZO1X-1(15),
+>>13 byte 3 LZO1X-999,
+## >>22 bedate >0 last modified: %s,
+>>14 byte =0x00 os: MS-DOS
+>>14 byte =0x01 os: Amiga
+>>14 byte =0x02 os: VMS
+>>14 byte =0x03 os: Unix
+>>14 byte =0x05 os: Atari
+>>14 byte =0x06 os: OS/2
+>>14 byte =0x07 os: MacOS
+>>14 byte =0x0A os: Tops/20
+>>14 byte =0x0B os: WinNT
+>>14 byte =0x0E os: Win32
+>9 beshort >0x0939
+>>9 byte&0xf0 =0x00 - version 0.
+>>9 byte&0xf0 =0x10 - version 1.
+>>9 byte&0xf0 =0x20 - version 2.
+>>9 beshort&0x0fff x \b%03x,
+>>15 byte 1 LZO1X-1,
+>>15 byte 2 LZO1X-1(15),
+>>15 byte 3 LZO1X-999,
+## >>25 bedate >0 last modified: %s,
+>>17 byte =0x00 os: MS-DOS
+>>17 byte =0x01 os: Amiga
+>>17 byte =0x02 os: VMS
+>>17 byte =0x03 os: Unix
+>>17 byte =0x05 os: Atari
+>>17 byte =0x06 os: OS/2
+>>17 byte =0x07 os: MacOS
+>>17 byte =0x0A os: Tops/20
+>>17 byte =0x0B os: WinNT
+>>17 byte =0x0E os: Win32
+
+# 4.3BSD-Quasijarus Strong Compression
+# http://minnie.tuhs.org/Quasijarus/compress.html
+0 string \037\241 Quasijarus strong compressed data
+
+# From: Cory Dikkers <cdikkers at swbell.net>
+0 string XPKF Amiga xpkf.library compressed data
+0 string PP11 Power Packer 1.1 compressed data
+0 string PP20 Power Packer 2.0 compressed data,
+>4 belong 0x09090909 fast compression
+>4 belong 0x090A0A0A mediocre compression
+>4 belong 0x090A0B0B good compression
+>4 belong 0x090A0C0C very good compression
+>4 belong 0x090A0C0D best compression
+
+# 7-zip archiver, from Thomas Klausner (wiz at danbala.tuwien.ac.at)
+# http://www.7-zip.org or DOC/7zFormat.txt
+#
+0 string 7z\274\257\047\034 7-zip archive data,
+>6 byte x version %d
+>7 byte x \b.%d
+
+# Type: LZMA
+# URL: http://www.7-zip.org/sdk.html
+# From: Robert Millan <rmh at aybabtu.com> and Reuben Thomas <rrt at sc3d.org>
+# Commented out because apparently not reliable (according to Debian
+# bug #364260)
+#0 string ]\000\000\200\000 LZMA compressed data
+
+# Type: LZMA
+0 lelong&0xffffff =0x5d
+>12 leshort =0xff LZMA compressed data,
+>>5 lequad =0xffffffffffffffff streamed
+>>5 lequad !0xffffffffffffffff non-streamed, size %lld
+!:mime application/x-lzma
+
+# http://tukaani.org/xz/xz-file-format.txt
+0 ustring \xFD7zXZ\x00 xz compressed data
+!:mime application/x-xz
+
+# AFX compressed files (Wolfram Kleff)
+2 string -afx- AFX compressed file data
+
+# Supplementary magic data for the file(1) command to support
+# rzip(1). The format is described in magic(5).
+#
+# Copyright (C) 2003 by Andrew Tridgell. You may do whatever you want with
+# this file.
+#
+0 string RZIP rzip compressed data
+>4 byte x - version %d
+>5 byte x \b.%d
+>6 belong x (%d bytes)
+
+# Type: XZ
+# URL: http://tukaani.org/xz/
+0 string \xfd\x37\x7a\x58\x5a\x00 XZ compressed data
+!:mime application/x-xz
+
+#------------------------------------------------------------------------------
+# $File: console,v 1.16 2009/09/19 16:28:08 christos Exp $
+# Console game magic
+# Toby Deshane <hac at shoelace.digivill.net>
+# ines: file(1) magic for Marat's iNES Nintendo Entertainment System
+# ROM dump format
+
+0 string NES\032 iNES ROM dump,
+>4 byte x %dx16k PRG
+>5 byte x \b, %dx8k CHR
+>6 byte&0x01 =0x1 \b, [Vert.]
+>6 byte&0x01 =0x0 \b, [Horiz.]
+>6 byte&0x02 =0x2 \b, [SRAM]
+>6 byte&0x04 =0x4 \b, [Trainer]
+>6 byte&0x04 =0x8 \b, [4-Scr]
+
+#------------------------------------------------------------------------------
+# gameboy: file(1) magic for the Nintendo (Color) Gameboy raw ROM format
+#
+0x104 belong 0xCEED6666 Gameboy ROM:
+>0x134 string >\0 "%.16s"
+>0x146 byte 0x03 \b,[SGB]
+>0x147 byte 0x00 \b, [ROM ONLY]
+>0x147 byte 0x01 \b, [ROM+MBC1]
+>0x147 byte 0x02 \b, [ROM+MBC1+RAM]
+>0x147 byte 0x03 \b, [ROM+MBC1+RAM+BATT]
+>0x147 byte 0x05 \b, [ROM+MBC2]
+>0x147 byte 0x06 \b, [ROM+MBC2+BATTERY]
+>0x147 byte 0x08 \b, [ROM+RAM]
+>0x147 byte 0x09 \b, [ROM+RAM+BATTERY]
+>0x147 byte 0x0B \b, [ROM+MMM01]
+>0x147 byte 0x0C \b, [ROM+MMM01+SRAM]
+>0x147 byte 0x0D \b, [ROM+MMM01+SRAM+BATT]
+>0x147 byte 0x0F \b, [ROM+MBC3+TIMER+BATT]
+>0x147 byte 0x10 \b, [ROM+MBC3+TIMER+RAM+BATT]
+>0x147 byte 0x11 \b, [ROM+MBC3]
+>0x147 byte 0x12 \b, [ROM+MBC3+RAM]
+>0x147 byte 0x13 \b, [ROM+MBC3+RAM+BATT]
+>0x147 byte 0x19 \b, [ROM+MBC5]
+>0x147 byte 0x1A \b, [ROM+MBC5+RAM]
+>0x147 byte 0x1B \b, [ROM+MBC5+RAM+BATT]
+>0x147 byte 0x1C \b, [ROM+MBC5+RUMBLE]
+>0x147 byte 0x1D \b, [ROM+MBC5+RUMBLE+SRAM]
+>0x147 byte 0x1E \b, [ROM+MBC5+RUMBLE+SRAM+BATT]
+>0x147 byte 0x1F \b, [Pocket Camera]
+>0x147 byte 0xFD \b, [Bandai TAMA5]
+>0x147 byte 0xFE \b, [Hudson HuC-3]
+>0x147 byte 0xFF \b, [Hudson HuC-1]
+
+>0x148 byte 0 \b, ROM: 256Kbit
+>0x148 byte 1 \b, ROM: 512Kbit
+>0x148 byte 2 \b, ROM: 1Mbit
+>0x148 byte 3 \b, ROM: 2Mbit
+>0x148 byte 4 \b, ROM: 4Mbit
+>0x148 byte 5 \b, ROM: 8Mbit
+>0x148 byte 6 \b, ROM: 16Mbit
+>0x148 byte 0x52 \b, ROM: 9Mbit
+>0x148 byte 0x53 \b, ROM: 10Mbit
+>0x148 byte 0x54 \b, ROM: 12Mbit
+
+>0x149 byte 1 \b, RAM: 16Kbit
+>0x149 byte 2 \b, RAM: 64Kbit
+>0x149 byte 3 \b, RAM: 128Kbit
+>0x149 byte 4 \b, RAM: 1Mbit
+
+#>0x14e long x \b, CRC: %x
+
+#------------------------------------------------------------------------------
+# genesis: file(1) magic for the Sega MegaDrive/Genesis raw ROM format
+#
+0x100 string SEGA Sega MegaDrive/Genesis raw ROM dump
+>0x120 string >\0 Name: "%.16s"
+>0x110 string >\0 %.16s
+>0x1B0 string RA with SRAM
+
+#------------------------------------------------------------------------------
+# genesis: file(1) magic for the Super MegaDrive ROM dump format
+#
+0x280 string EAGN Super MagicDrive ROM dump
+>0 byte x %dx16k blocks
+>2 byte 0 \b, last in series or standalone
+>2 byte >0 \b, split ROM
+>8 byte 0xAA
+>9 byte 0xBB
+
+#------------------------------------------------------------------------------
+# genesis: file(1) alternate magic for the Super MegaDrive ROM dump format
+#
+0x280 string EAMG Super MagicDrive ROM dump
+>0 byte x %dx16k blocks
+>2 byte x \b, last in series or standalone
+>8 byte 0xAA
+>9 byte 0xBB
+
+#------------------------------------------------------------------------------
+# smsgg: file(1) magic for Sega Master System and Game Gear ROM dumps
+#
+# Does not detect all images. Very preliminary guesswork. Need more data
+# on format.
+#
+# FIXME: need a little more info...;P
+#
+#0 byte 0xF3
+#>1 byte 0xED Sega Master System/Game Gear ROM dump
+#>1 byte 0x31 Sega Master System/Game Gear ROM dump
+#>1 byte 0xDB Sega Master System/Game Gear ROM dump
+#>1 byte 0xAF Sega Master System/Game Gear ROM dump
+#>1 byte 0xC3 Sega Master System/Game Gear ROM dump
+
+#------------------------------------------------------------------------------
+# dreamcast: file(1) uncertain magic for the Sega Dreamcast VMU image format
+#
+0 belong 0x21068028 Sega Dreamcast VMU game image
+0 string LCDi Dream Animator file
+
+#------------------------------------------------------------------------------
+# v64: file(1) uncertain magic for the V64 format N64 ROM dumps
+#
+0 belong 0x37804012 V64 Nintendo 64 ROM dump
+
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+# Nintendo .nds
+192 string \044\377\256Qi\232 Nintendo DS Game ROM Image
+# Nintendo .gba
+0 string \056\000\000\352$\377\256Qi Nintendo Game Boy Advance ROM Image
+
+#------------------------------------------------------------------------------
+# msx: file(1) magic for MSX game cartridge dumps
+# Too simple - MPi
+#0 beshort 0x4142 MSX game cartridge dump
+
+#------------------------------------------------------------------------------
+# Sony Playstation executables (Adam Sjoegren <asjo at diku.dk>) :
+0 string PS-X\ EXE Sony Playstation executable
+# Area:
+>113 string x (%s)
+
+#------------------------------------------------------------------------------
+# Microsoft Xbox executables .xbe (Esa Hyytiä <ehyytia at cc.hut.fi>)
+0 string XBEH XBE, Microsoft Xbox executable
+# probabilistic checks whether signed or not
+>0x0004 ulelong =0x0
+>>&2 ulelong =0x0
+>>>&2 ulelong =0x0 \b, not signed
+>0x0004 ulelong >0
+>>&2 ulelong >0
+>>>&2 ulelong >0 \b, signed
+# expect base address of 0x10000
+>0x0104 ulelong =0x10000
+>>(0x0118-0x0FF60) ulelong&0x80000007 0x80000007 \b, all regions
+>>(0x0118-0x0FF60) ulelong&0x80000007 !0x80000007
+>>>(0x0118-0x0FF60) ulelong >0 (regions:
+>>>>(0x0118-0x0FF60) ulelong &0x00000001 NA
+>>>>(0x0118-0x0FF60) ulelong &0x00000002 Japan
+>>>>(0x0118-0x0FF60) ulelong &0x00000004 Rest_of_World
+>>>>(0x0118-0x0FF60) ulelong &0x80000000 Manufacturer
+>>>(0x0118-0x0FF60) ulelong >0 \b)
+
+# --------------------------------
+# Microsoft Xbox data file formats
+0 string XIP0 XIP, Microsoft Xbox data
+0 string XTF0 XTF, Microsoft Xbox data
+
+# Atari Lynx cartridge dump (EXE/BLL header)
+# From: "Stefan A. Haubenthal" <polluks at web.de>
+
+0 beshort 0x8008 Lynx cartridge,
+>2 beshort x RAM start $%04x
+>6 string BS93
+
+# Opera file system that is used on the 3DO console
+# From: Serge van den Boom <svdb at stack.nl>
+0 string \x01ZZZZZ\x01 3DO "Opera" file system
+
+# From Gürkan Sengün <gurkan at linuks.mine.nu>, www.linuks.mine.nu
+0 string GBS Nintendo Gameboy Music/Audio Data
+12 string GameBoy\ Music\ Module Nintendo Gameboy Music Module
+
+# Playstations Patch Files from: From: Thomas Klausner <tk at giga.or.at>
+0 string PPF30 Playstation Patch File version 3.0
+>5 byte 0 \b, PPF 1.0 patch
+>5 byte 1 \b, PPF 2.0 patch
+>5 byte 2 \b, PPF 3.0 patch
+>>56 byte 0 \b, Imagetype BIN (any)
+>>56 byte 1 \b, Imagetype GI (PrimoDVD)
+>>57 byte 0 \b, Blockcheck disabled
+>>57 byte 1 \b, Blockcheck enabled
+>>58 byte 0 \b, Undo data not available
+>>58 byte 1 \b, Undo data available
+>6 string x \b, description: %s
+
+0 string PPF20 Playstation Patch File version 2.0
+>5 byte 0 \b, PPF 1.0 patch
+>5 byte 1 \b, PPF 2.0 patch
+>>56 lelong >0 \b, size of file to patch %d
+>6 string x \b, description: %s
+
+0 string PPF10 Playstation Patch File version 1.0
+>5 byte 0 \b, Simple Encoding
+>6 string x \b, description: %s
+
+# From: Daniel Dawson <ddawson at icehouse.net>
+# SNES9x .smv "movie" file format.
+0 string SMV\x1A SNES9x input recording
+>0x4 lelong x \b, version %d
+# version 4 is latest so far
+>0x4 lelong <5
+>>0x8 ledate x \b, recorded at %s
+>>0xc lelong >0 \b, rerecorded %d times
+>>0x10 lelong x \b, %d frames long
+>>0x14 byte >0 \b, data for controller(s):
+>>>0x14 byte &0x1 #1
+>>>0x14 byte &0x2 #2
+>>>0x14 byte &0x4 #3
+>>>0x14 byte &0x8 #4
+>>>0x14 byte &0x10 #5
+>>0x15 byte ^0x1 \b, begins from snapshot
+>>0x15 byte &0x1 \b, begins from reset
+>>0x15 byte ^0x2 \b, NTSC standard
+>>0x15 byte &0x2 \b, PAL standard
+>>0x17 byte &0x1 \b, settings:
+# WIP1Timing not used as of version 4
+>>>0x4 lelong <4
+>>>>0x17 byte &0x2 WIP1Timing
+>>>0x17 byte &0x4 Left+Right
+>>>0x17 byte &0x8 VolumeEnvX
+>>>0x17 byte &0x10 FakeMute
+>>>0x17 byte &0x20 SyncSound
+# New flag as of version 4
+>>>0x4 lelong >3
+>>>>0x17 byte &0x80 NoCPUShutdown
+>>0x4 lelong <4
+>>>0x18 lelong >0x23
+>>>>0x20 leshort !0
+>>>>>0x20 lestring16 x \b, metadata: "%s"
+>>0x4 lelong >3
+>>>0x24 byte >0 \b, port 1:
+>>>>0x24 byte 1 joypad
+>>>>0x24 byte 2 mouse
+>>>>0x24 byte 3 SuperScope
+>>>>0x24 byte 4 Justifier
+>>>>0x24 byte 5 multitap
+>>>0x24 byte >0 \b, port 2:
+>>>>0x25 byte 1 joypad
+>>>>0x25 byte 2 mouse
+>>>>0x25 byte 3 SuperScope
+>>>>0x25 byte 4 Justifier
+>>>>0x25 byte 5 multitap
+>>>0x18 lelong >0x43
+>>>>0x40 leshort !0
+>>>>>0x40 lestring16 x \b, metadata: "%s"
+>>0x17 byte &0x40 \b, ROM:
+>>>(0x18.l-26) lelong x CRC32 0x%08x
+>>>(0x18.l-23) string x "%s"
+
+
+#------------------------------------------------------------------------------
+# $File: convex,v 1.7 2009/09/19 16:28:08 christos Exp $
+# convex: file(1) magic for Convex boxes
+#
+# Convexes are big-endian.
+#
+# /*\
+# * Below are the magic numbers and tests added for Convex.
+# * Added at beginning, because they are expected to be used most.
+# \*/
+0 belong 0507 Convex old-style object
+>16 belong >0 not stripped
+0 belong 0513 Convex old-style demand paged executable
+>16 belong >0 not stripped
+0 belong 0515 Convex old-style pre-paged executable
+>16 belong >0 not stripped
+0 belong 0517 Convex old-style pre-paged, non-swapped executable
+>16 belong >0 not stripped
+0 belong 0x011257 Core file
+#
+# The following are a series of dump format magic numbers. Each one
+# corresponds to a drastically different dump format. The first on is
+# the original dump format on a 4.1 BSD or earlier file system. The
+# second marks the change between the 4.1 file system and the 4.2 file
+# system. The Third marks the changing of the block size from 1K
+# to 2K to be compatible with an IDC file system. The fourth indicates
+# a dump that is dependent on Convex Storage Manager, because data in
+# secondary storage is not physically contained within the dump.
+# The restore program uses these number to determine how the data is
+# to be extracted.
+#
+24 belong =60013 dump format, 4.2 or 4.3 BSD (IDC compatible)
+24 belong =60014 dump format, Convex Storage Manager by-reference dump
+#
+# what follows is a bunch of bit-mask checks on the flags field of the opthdr.
+# If there is no `=' sign, assume just checking for whether the bit is set?
+#
+0 belong 0601 Convex SOFF
+>88 belong&0x000f0000 =0x00000000 c1
+>88 belong &0x00010000 c2
+>88 belong &0x00020000 c2mp
+>88 belong &0x00040000 parallel
+>88 belong &0x00080000 intrinsic
+>88 belong &0x00000001 demand paged
+>88 belong &0x00000002 pre-paged
+>88 belong &0x00000004 non-swapped
+>88 belong &0x00000008 POSIX
+#
+>84 belong &0x80000000 executable
+>84 belong &0x40000000 object
+>84 belong&0x20000000 =0 not stripped
+>84 belong&0x18000000 =0x00000000 native fpmode
+>84 belong&0x18000000 =0x10000000 ieee fpmode
+>84 belong&0x18000000 =0x18000000 undefined fpmode
+#
+0 belong 0605 Convex SOFF core
+#
+0 belong 0607 Convex SOFF checkpoint
+>88 belong&0x000f0000 =0x00000000 c1
+>88 belong &0x00010000 c2
+>88 belong &0x00020000 c2mp
+>88 belong &0x00040000 parallel
+>88 belong &0x00080000 intrinsic
+>88 belong &0x00000008 POSIX
+#
+>84 belong&0x18000000 =0x00000000 native fpmode
+>84 belong&0x18000000 =0x10000000 ieee fpmode
+>84 belong&0x18000000 =0x18000000 undefined fpmode
+
+#------------------------------------------------------------------------------
+# $File: cracklib,v 1.7 2009/09/19 16:28:08 christos Exp $
+# cracklib: file (1) magic for cracklib v2.7
+
+0 lelong 0x70775631 Cracklib password index, little endian
+>4 long >0 (%i words)
+>4 long 0 ("64-bit")
+>>8 long >-1 (%i words)
+0 belong 0x70775631 Cracklib password index, big endian
+>4 belong >-1 (%i words)
+# really bellong 0x0000000070775631
+0 search/1 \0\0\0\0pwV1 Cracklib password index, big endian ("64-bit")
+>12 belong >0 (%i words)
+
+# ----------------------------------------------------------------------------
+# $File: ctags,v 1.6 2009/09/19 16:28:08 christos Exp $
+# ctags: file (1) magic for Exuberant Ctags files
+# From: Alexander Mai <mai at migdal.ikp.physik.tu-darmstadt.de>
+0 search/1 =!_TAG Exuberant Ctags tag file text
+
+#------------------------------------------------------------------------------
+# $File: dact,v 1.4 2009/09/19 16:28:08 christos Exp $
+# dact: file(1) magic for DACT compressed files
+#
+0 long 0x444354C3 DACT compressed data
+>4 byte >-1 (version %i.
+>5 byte >-1 $BS%i.
+>6 byte >-1 $BS%i)
+>7 long >0 $BS, original size: %i bytes
+>15 long >30 $BS, block size: %i bytes
+
+#------------------------------------------------------------------------------
+# $File: database,v 1.24 2009/09/19 16:28:08 christos Exp $
+# database: file(1) magic for various databases
+#
+# extracted from header/code files by Graeme Wilford (eep2gw at ee.surrey.ac.uk)
+#
+#
+# GDBM magic numbers
+# Will be maintained as part of the GDBM distribution in the future.
+# <downsj at teeny.org>
+0 belong 0x13579ace GNU dbm 1.x or ndbm database, big endian
+!:mime application/x-gdbm
+0 lelong 0x13579ace GNU dbm 1.x or ndbm database, little endian
+!:mime application/x-gdbm
+0 string GDBM GNU dbm 2.x database
+!:mime application/x-gdbm
+#
+# Berkeley DB
+#
+# Ian Darwin's file /etc/magic files: big/little-endian version.
+#
+# Hash 1.85/1.86 databases store metadata in network byte order.
+# Btree 1.85/1.86 databases store the metadata in host byte order.
+# Hash and Btree 2.X and later databases store the metadata in host byte order.
+
+0 long 0x00061561 Berkeley DB
+!:mime application/x-dbm
+>8 belong 4321
+>>4 belong >2 1.86
+>>4 belong <3 1.85
+>>4 belong >0 (Hash, version %d, native byte-order)
+>8 belong 1234
+>>4 belong >2 1.86
+>>4 belong <3 1.85
+>>4 belong >0 (Hash, version %d, little-endian)
+
+0 belong 0x00061561 Berkeley DB
+>8 belong 4321
+>>4 belong >2 1.86
+>>4 belong <3 1.85
+>>4 belong >0 (Hash, version %d, big-endian)
+>8 belong 1234
+>>4 belong >2 1.86
+>>4 belong <3 1.85
+>>4 belong >0 (Hash, version %d, native byte-order)
+
+0 long 0x00053162 Berkeley DB 1.85/1.86
+>4 long >0 (Btree, version %d, native byte-order)
+0 belong 0x00053162 Berkeley DB 1.85/1.86
+>4 belong >0 (Btree, version %d, big-endian)
+0 lelong 0x00053162 Berkeley DB 1.85/1.86
+>4 lelong >0 (Btree, version %d, little-endian)
+
+12 long 0x00061561 Berkeley DB
+>16 long >0 (Hash, version %d, native byte-order)
+12 belong 0x00061561 Berkeley DB
+>16 belong >0 (Hash, version %d, big-endian)
+12 lelong 0x00061561 Berkeley DB
+>16 lelong >0 (Hash, version %d, little-endian)
+
+12 long 0x00053162 Berkeley DB
+>16 long >0 (Btree, version %d, native byte-order)
+12 belong 0x00053162 Berkeley DB
+>16 belong >0 (Btree, version %d, big-endian)
+12 lelong 0x00053162 Berkeley DB
+>16 lelong >0 (Btree, version %d, little-endian)
+
+12 long 0x00042253 Berkeley DB
+>16 long >0 (Queue, version %d, native byte-order)
+12 belong 0x00042253 Berkeley DB
+>16 belong >0 (Queue, version %d, big-endian)
+12 lelong 0x00042253 Berkeley DB
+>16 lelong >0 (Queue, version %d, little-endian)
+
+# From Max Bowsher.
+12 long 0x00040988 Berkeley DB
+>16 long >0 (Log, version %d, native byte-order)
+12 belong 0x00040988 Berkeley DB
+>16 belong >0 (Log, version %d, big-endian)
+12 lelong 0x00040988 Berkeley DB
+>16 lelong >0 (Log, version %d, little-endian)
+
+#
+#
+# Round Robin Database Tool by Tobias Oetiker <oetiker at ee.ethz.ch>
+0 string/b RRD\0 RRDTool DB
+>4 string/b x version %s
+#----------------------------------------------------------------------
+# ROOT: file(1) magic for ROOT databases
+#
+0 string root\0 ROOT file
+>4 belong x Version %d
+>33 belong x (Compression: %d)
+
+# XXX: Weak magic.
+# Alex Ott <ott at jet.msk.su>
+## Paradox file formats
+#2 leshort 0x0800 Paradox
+#>0x39 byte 3 v. 3.0
+#>0x39 byte 4 v. 3.5
+#>0x39 byte 9 v. 4.x
+#>0x39 byte 10 v. 5.x
+#>0x39 byte 11 v. 5.x
+#>0x39 byte 12 v. 7.x
+#>>0x04 byte 0 indexed .DB data file
+#>>0x04 byte 1 primary index .PX file
+#>>0x04 byte 2 non-indexed .DB data file
+#>>0x04 byte 3 non-incrementing secondary index .Xnn file
+#>>0x04 byte 4 secondary index .Ynn file
+#>>0x04 byte 5 incrementing secondary index .Xnn file
+#>>0x04 byte 6 non-incrementing secondary index .XGn file
+#>>0x04 byte 7 secondary index .YGn file
+#>>>0x04 byte 8 incrementing secondary index .XGn file
+
+## XBase database files
+#0 byte 0x02
+#>8 leshort >0
+#>>12 leshort 0 FoxBase
+#!:mime application/x-dbf
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x03
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 FoxBase+, FoxPro, dBaseIII+, dBaseIV, no memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x04
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 dBASE IV no memo file
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x05
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 dBASE V no memo file
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x30
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 Visual FoxPro
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x43
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 FlagShip with memo var size
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x7b
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 dBASEIV with memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x83
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 FoxBase+, dBaseIII+ with memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x8b
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 dBaseIV with memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0x8e
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 dBaseIV with SQL Table
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0xb3
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 FlagShip with .dbt memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 byte 0xf5
+#!:mime application/x-dbf
+#>8 leshort >0
+#>>12 leshort 0 FoxPro with memo
+#>>>0x04 lelong 0 (no records)
+#>>>0x04 lelong >0 (%ld records)
+#
+#0 leshort 0x0006 DBase 3 index file
+
+# MS Access database
+4 string Standard\ Jet\ DB Microsoft Access Database
+!:mime application/x-msaccess
+
+# TDB database from Samba et al - Martin Pool <mbp at samba.org>
+0 string TDB\ file TDB database
+>32 lelong 0x2601196D version 6, little-endian
+>>36 lelong x hash size %d bytes
+
+# SE Linux policy database
+0 lelong 0xf97cff8c SE Linux policy
+>16 lelong x v%d
+>20 lelong 1 MLS
+>24 lelong x %d symbols
+>28 lelong x %d ocons
+
+# ICE authority file data (Wolfram Kleff)
+2 string ICE ICE authority data
+
+# X11 Xauthority file (Wolfram Kleff)
+10 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+11 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+12 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+13 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+14 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+15 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+16 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+17 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+18 string MIT-MAGIC-COOKIE-1 X11 Xauthority data
+
+# From: Maxime Henrion <mux at FreeBSD.org>
+# PostgreSQL's custom dump format, Maxime Henrion <mux at FreeBSD.org>
+0 string PGDMP PostgreSQL custom database dump
+>5 byte x - v%d
+>6 byte x \b.%d
+>5 beshort <0x101 \b-0
+>5 beshort >0x100
+>>7 byte x \b-%d
+
+# Type: Advanced Data Format (ADF) database
+# URL: http://www.grc.nasa.gov/WWW/cgns/adf/
+# From: Nicolas Chauvat <nicolas.chauvat at logilab.fr>
+0 string @(#)ADF\ Database CGNS Advanced Data Format
+
+# Tokyo Cabinet magic data
+# http://tokyocabinet.sourceforge.net/index.html
+0 string ToKyO\ CaBiNeT\n Tokyo Cabinet
+>14 string x \b (%s)
+>32 byte 0 \b, Hash
+!:mime application/x-tokyocabinet-hash
+>32 byte 1 \b, B+ tree
+!:mime application/x-tokyocabinet-btree
+>32 byte 2 \b, Fixed-length
+!:mime application/x-tokyocabinet-fixed
+>32 byte 3 \b, Table
+!:mime application/x-tokyocabinet-table
+>33 byte &1 \b, [open]
+>33 byte &2 \b, [fatal]
+>34 byte x \b, apow=%d
+>35 byte x \b, fpow=%d
+>36 byte &0x01 \b, [large]
+>36 byte &0x02 \b, [deflate]
+>36 byte &0x04 \b, [bzip]
+>36 byte &0x08 \b, [tcbs]
+>36 byte &0x10 \b, [excodec]
+>40 lequad x \b, bnum=%lld
+>48 lequad x \b, rnum=%lld
+>56 lequad x \b, fsiz=%lld
+
+#------------------------------------------------------------------------------
+# $File: diamond,v 1.7 2009/09/19 16:28:08 christos Exp $
+# diamond: file(1) magic for Diamond system
+#
+# ... diamond is a multi-media mail and electronic conferencing system....
+#
+# XXX - I think it was either renamed Slate, or replaced by Slate....
+#
+# The full deal is too long...
+#0 string <list>\n<protocol\ bbn-multimedia-format> Diamond Multimedia Document
+0 string =<list>\n<protocol\ bbn-m Diamond Multimedia Document
+
+#------------------------------------------------------------------------------
+# $File: diff,v 1.10 2009/09/19 16:28:08 christos Exp $
+# diff: file(1) magic for diff(1) output
+#
+0 search/1 diff\ diff output text
+!:mime text/x-diff
+0 search/1 ***\ diff output text
+!:mime text/x-diff
+0 search/1 Only\ in\ diff output text
+!:mime text/x-diff
+0 search/1 Common\ subdirectories:\ diff output text
+!:mime text/x-diff
+
+0 search/1 Index: RCS/CVS diff output text
+!:mime text/x-diff
+
+# bsdiff: file(1) magic for bsdiff(1) output
+0 string BSDIFF40 bsdiff(1) patch file
+
+# unified diff
+0 search/4096 ---\
+>&0 search/1024 \n
+>>&0 search/1 +++\
+>>>&0 search/1024 \n
+>>>>&0 search/1 @@ unified diff output text
+!:mime text/x-diff
+!:strength + 30
+#------------------------------------------------------------------------------
+# $File: digital,v 1.8 2009/09/19 16:28:08 christos Exp $
+# Digital UNIX - Info
+#
+0 string =!<arch>\n________64E Alpha archive
+>22 string X -- out of date
+#
+# Alpha COFF Based Executables
+# The stripped stuff really needs to be an 8 byte (64 bit) compare,
+# but this works
+0 leshort 0x183 COFF format alpha
+>22 leshort&020000 &010000 sharable library,
+>22 leshort&020000 ^010000 dynamically linked,
+>24 leshort 0410 pure
+>24 leshort 0413 demand paged
+>8 lelong >0 executable or object module, not stripped
+>8 lelong 0
+>>12 lelong 0 executable or object module, stripped
+>>12 lelong >0 executable or object module, not stripped
+>27 byte >0 - version %d.
+>26 byte >0 %d-
+>28 leshort >0 %d
+#
+# The next is incomplete, we could tell more about this format,
+# but its not worth it.
+0 leshort 0x188 Alpha compressed COFF
+0 leshort 0x18f Alpha u-code object
+#
+#
+# Some other interesting Digital formats,
+0 string \377\377\177 ddis/ddif
+0 string \377\377\174 ddis/dots archive
+0 string \377\377\176 ddis/dtif table data
+0 string \033c\033 LN03 output
+0 long 04553207 X image
+#
+0 string =!<PDF>!\n profiling data file
+#
+# Locale data tables (MIPS and Alpha).
+#
+0 short 0x0501 locale data table
+>6 short 0x24 for MIPS
+>6 short 0x40 for Alpha
+
+#------------------------------------------------------------------------------
+# $File: dolby,v 1.5 2009/09/19 16:28:08 christos Exp $
+# ATSC A/53 aka AC-3 aka Dolby Digital <ashitaka at gmx.at>
+# from http://www.atsc.org/standards/a_52a.pdf
+# corrections, additions, etc. are always welcome!
+#
+# syncword
+0 beshort 0x0b77 ATSC A/52 aka AC-3 aka Dolby Digital stream,
+# fscod
+>4 byte&0xc0 0x00 48 kHz,
+>4 byte&0xc0 0x40 44.1 kHz,
+>4 byte&0xc0 0x80 32 kHz,
+# is this one used for 96 kHz?
+>4 byte&0xc0 0xc0 reserved frequency,
+#
+>5 byte&7 = 0 \b, complete main (CM)
+>5 byte&7 = 1 \b, music and effects (ME)
+>5 byte&7 = 2 \b, visually impaired (VI)
+>5 byte&7 = 3 \b, hearing impaired (HI)
+>5 byte&7 = 4 \b, dialogue (D)
+>5 byte&7 = 5 \b, commentary (C)
+>5 byte&7 = 6 \b, emergency (E)
+# acmod
+>6 byte&0xe0 0x00 1+1 front,
+>6 byte&0xe0 0x20 1 front/0 rear,
+>6 byte&0xe0 0x40 2 front/0 rear,
+>6 byte&0xe0 0x60 3 front/0 rear,
+>6 byte&0xe0 0x80 2 front/1 rear,
+>6 byte&0xe0 0xa0 3 front/1 rear,
+>6 byte&0xe0 0xc0 2 front/2 rear,
+>6 byte&0xe0 0xe0 3 front/2 rear,
+# lfeon (these may be incorrect)
+>7 byte&0x40 0x00 LFE off,
+>7 byte&0x40 0x40 LFE on,
+#
+>4 byte&0x3e = 0x00 \b, 32 kbit/s
+>4 byte&0x3e = 0x02 \b, 40 kbit/s
+>4 byte&0x3e = 0x04 \b, 48 kbit/s
+>4 byte&0x3e = 0x06 \b, 56 kbit/s
+>4 byte&0x3e = 0x08 \b, 64 kbit/s
+>4 byte&0x3e = 0x0a \b, 80 kbit/s
+>4 byte&0x3e = 0x0c \b, 96 kbit/s
+>4 byte&0x3e = 0x0e \b, 112 kbit/s
+>4 byte&0x3e = 0x10 \b, 128 kbit/s
+>4 byte&0x3e = 0x12 \b, 160 kbit/s
+>4 byte&0x3e = 0x14 \b, 192 kbit/s
+>4 byte&0x3e = 0x16 \b, 224 kbit/s
+>4 byte&0x3e = 0x18 \b, 256 kbit/s
+>4 byte&0x3e = 0x1a \b, 320 kbit/s
+>4 byte&0x3e = 0x1c \b, 384 kbit/s
+>4 byte&0x3e = 0x1e \b, 448 kbit/s
+>4 byte&0x3e = 0x20 \b, 512 kbit/s
+>4 byte&0x3e = 0x22 \b, 576 kbit/s
+>4 byte&0x3e = 0x24 \b, 640 kbit/s
+# dsurmod (these may be incorrect)
+>6 beshort&0x0180 0x0000 Dolby Surround not indicated
+>6 beshort&0x0180 0x0080 not Dolby Surround encoded
+>6 beshort&0x0180 0x0100 Dolby Surround encoded
+>6 beshort&0x0180 0x0180 reserved Dolby Surround mode
+
+#------------------------------------------------------------------------------
+# $File: dump,v 1.11 2009/09/19 16:28:09 christos Exp $
+# dump: file(1) magic for dump file format--for new and old dump filesystems
+#
+# We specify both byte orders in order to recognize byte-swapped dumps.
+#
+24 belong 60012 new-fs dump file (big endian),
+>4 bedate x Previous dump %s,
+>8 bedate x This dump %s,
+>12 belong >0 Volume %ld,
+>692 belong 0 Level zero, type:
+>692 belong >0 Level %d, type:
+>0 belong 1 tape header,
+>0 belong 2 beginning of file record,
+>0 belong 3 map of inodes on tape,
+>0 belong 4 continuation of file record,
+>0 belong 5 end of volume,
+>0 belong 6 map of inodes deleted,
+>0 belong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 belong >0 Flags %x
+
+24 belong 60011 old-fs dump file (big endian),
+#>4 bedate x Previous dump %s,
+#>8 bedate x This dump %s,
+>12 belong >0 Volume %ld,
+>692 belong 0 Level zero, type:
+>692 belong >0 Level %d, type:
+>0 belong 1 tape header,
+>0 belong 2 beginning of file record,
+>0 belong 3 map of inodes on tape,
+>0 belong 4 continuation of file record,
+>0 belong 5 end of volume,
+>0 belong 6 map of inodes deleted,
+>0 belong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 belong >0 Flags %x
+
+24 lelong 60012 new-fs dump file (little endian),
+>4 ledate x This dump %s,
+>8 ledate x Previous dump %s,
+>12 lelong >0 Volume %ld,
+>692 lelong 0 Level zero, type:
+>692 lelong >0 Level %d, type:
+>0 lelong 1 tape header,
+>0 lelong 2 beginning of file record,
+>0 lelong 3 map of inodes on tape,
+>0 lelong 4 continuation of file record,
+>0 lelong 5 end of volume,
+>0 lelong 6 map of inodes deleted,
+>0 lelong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 lelong >0 Flags %x
+
+24 lelong 60011 old-fs dump file (little endian),
+#>4 ledate x Previous dump %s,
+#>8 ledate x This dump %s,
+>12 lelong >0 Volume %ld,
+>692 lelong 0 Level zero, type:
+>692 lelong >0 Level %d, type:
+>0 lelong 1 tape header,
+>0 lelong 2 beginning of file record,
+>0 lelong 3 map of inodes on tape,
+>0 lelong 4 continuation of file record,
+>0 lelong 5 end of volume,
+>0 lelong 6 map of inodes deleted,
+>0 lelong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 lelong >0 Flags %x
+
+18 leshort 60011 old-fs dump file (16-bit, assuming PDP-11 endianness),
+>2 medate x Previous dump %s,
+>6 medate x This dump %s,
+>10 leshort >0 Volume %ld,
+>0 leshort 1 tape header.
+>0 leshort 2 beginning of file record.
+>0 leshort 3 map of inodes on tape.
+>0 leshort 4 continuation of file record.
+>0 leshort 5 end of volume.
+>0 leshort 6 map of inodes deleted.
+>0 leshort 7 end of medium (for floppy).
+
+24 belong 0x19540119 new-fs dump file (ufs2, big endian),
+>896 beqdate x Previous dump %s,
+>904 beqdate x This dump %s,
+>12 belong >0 Volume %ld,
+>692 belong 0 Level zero, type:
+>692 belong >0 Level %d, type:
+>0 belong 1 tape header,
+>0 belong 2 beginning of file record,
+>0 belong 3 map of inodes on tape,
+>0 belong 4 continuation of file record,
+>0 belong 5 end of volume,
+>0 belong 6 map of inodes deleted,
+>0 belong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 belong >0 Flags %x
+
+24 lelong 0x19540119 new-fs dump file (ufs2, little endian),
+>896 leqdate x This dump %s,
+>904 leqdate x Previous dump %s,
+>12 lelong >0 Volume %ld,
+>692 lelong 0 Level zero, type:
+>692 lelong >0 Level %d, type:
+>0 lelong 1 tape header,
+>0 lelong 2 beginning of file record,
+>0 lelong 3 map of inodes on tape,
+>0 lelong 4 continuation of file record,
+>0 lelong 5 end of volume,
+>0 lelong 6 map of inodes deleted,
+>0 lelong 7 end of medium (for floppy),
+>676 string >\0 Label %s,
+>696 string >\0 Filesystem %s,
+>760 string >\0 Device %s,
+>824 string >\0 Host %s,
+>888 lelong >0 Flags %x
+
+#------------------------------------------------------------------------------
+# $File: dyadic,v 1.4 2009/09/19 16:28:09 christos Exp $
+# Dyadic: file(1) magic for Dyalog APL.
+#
+0 byte 0xaa
+>1 byte <4 Dyalog APL
+>>1 byte 0x00 incomplete workspace
+>>1 byte 0x01 component file
+>>1 byte 0x02 external variable
+>>1 byte 0x03 workspace
+>>2 byte x version %d
+>>3 byte x .%d
+
+#------------------------------------------------------------------------------
+# $File: editors,v 1.8 2009/09/19 16:28:09 christos Exp $
+# T602 editor documents
+# by David Necas <yeti at physics.muni.cz>
+0 string @CT\ T602 document data,
+>4 string 0 Kamenicky
+>4 string 1 CP 852
+>4 string 2 KOI8-CS
+>4 string >2 unknown encoding
+
+# Vi IMproved Encrypted file
+# by David Necas <yeti at physics.muni.cz>
+0 string VimCrypt~ Vim encrypted file data
+# Vi IMproved Swap file
+# by Sven Wegener <swegener at gentoo.org>
+0 string b0VIM\ Vim swap file
+>&0 string >\0 \b, version %s
+
+#------------------------------------------------------------------------------
+# $File: efi,v 1.4 2009/09/19 16:28:09 christos Exp $
+# efi: file(1) magic for Universal EFI binaries
+
+0 lelong 0x0ef1fab9
+>4 lelong 1 Universal EFI binary with 1 architecture
+>>&0 lelong 7 \b, i386
+>>&0 lelong 0x01000007 \b, x86_64
+>4 lelong 2 Universal EFI binary with 2 architectures
+>>&0 lelong 7 \b, i386
+>>&0 lelong 0x01000007 \b, x86_64
+>>&20 lelong 7 \b, i386
+>>&20 lelong 0x01000007 \b, x86_64
+>4 lelong >2 Universal EFI binary with %ld architectures
+
+#------------------------------------------------------------------------------
+# $File: elf,v 1.53 2009/09/19 16:28:09 christos Exp $
+# elf: file(1) magic for ELF executables
+#
+# We have to check the byte order flag to see what byte order all the
+# other stuff in the header is in.
+#
+# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
+#
+# Created by: unknown
+# Modified by (1): Daniel Quinlan <quinlan at yggdrasil.com>
+# Modified by (2): Peter Tobias <tobias at server.et-inf.fho-emden.de> (core support)
+# Modified by (3): Christian 'Dr. Disk' Hechelmann <drdisk at ds9.au.s.shuttle.de> (fix of core support)
+# Modified by (4): <gerardo.cacciari at gmail.com> (VMS Itanium)
+# Modified by (5): Matthias Urlichs <smurf at debian.org> (Listing of many architectures)
+0 string \177ELF ELF
+>4 byte 0 invalid class
+>4 byte 1 32-bit
+>4 byte 2 64-bit
+>5 byte 0 invalid byte order
+>5 byte 1 LSB
+>>16 leshort 0 no file type,
+!:strength *2
+!:mime application/octet-stream
+>>16 leshort 1 relocatable,
+!:mime application/x-object
+>>16 leshort 2 executable,
+!:mime application/x-executable
+>>16 leshort 3 shared object,
+!:mime application/x-sharedlib
+>>16 leshort 4 core file
+!:mime application/x-coredump
+# Core file detection is not reliable.
+#>>>(0x38+0xcc) string >\0 of '%s'
+#>>>(0x38+0x10) lelong >0 (signal %d),
+>>16 leshort &0xff00 processor-specific,
+>>18 leshort 0 no machine,
+>>18 leshort 1 AT&T WE32100 - invalid byte order,
+>>18 leshort 2 SPARC - invalid byte order,
+>>18 leshort 3 Intel 80386,
+>>18 leshort 4 Motorola
+>>>36 lelong &0x01000000 68000 - invalid byte order,
+>>>36 lelong &0x00810000 CPU32 - invalid byte order,
+>>>36 lelong 0 68020 - invalid byte order,
+>>18 leshort 5 Motorola 88000 - invalid byte order,
+>>18 leshort 6 Intel 80486,
+>>18 leshort 7 Intel 80860,
+# The official e_machine number for MIPS is now #8, regardless of endianness.
+# The second number (#10) will be deprecated later. For now, we still
+# say something if #10 is encountered, but only gory details for #8.
+>>18 leshort 8 MIPS,
+>>>36 lelong &0x20 N32
+>>18 leshort 10 MIPS,
+>>>36 lelong &0x20 N32
+>>18 leshort 8
+# only for 32-bit
+>>>4 byte 1
+>>>>36 lelong&0xf0000000 0x00000000 MIPS-I
+>>>>36 lelong&0xf0000000 0x10000000 MIPS-II
+>>>>36 lelong&0xf0000000 0x20000000 MIPS-III
+>>>>36 lelong&0xf0000000 0x30000000 MIPS-IV
+>>>>36 lelong&0xf0000000 0x40000000 MIPS-V
+>>>>36 lelong&0xf0000000 0x50000000 MIPS32
+>>>>36 lelong&0xf0000000 0x60000000 MIPS64
+>>>>36 lelong&0xf0000000 0x70000000 MIPS32 rel2
+>>>>36 lelong&0xf0000000 0x80000000 MIPS64 rel2
+# only for 64-bit
+>>>4 byte 2
+>>>>48 lelong&0xf0000000 0x00000000 MIPS-I
+>>>>48 lelong&0xf0000000 0x10000000 MIPS-II
+>>>>48 lelong&0xf0000000 0x20000000 MIPS-III
+>>>>48 lelong&0xf0000000 0x30000000 MIPS-IV
+>>>>48 lelong&0xf0000000 0x40000000 MIPS-V
+>>>>48 lelong&0xf0000000 0x50000000 MIPS32
+>>>>48 lelong&0xf0000000 0x60000000 MIPS64
+>>>>48 lelong&0xf0000000 0x70000000 MIPS32 rel2
+>>>>48 lelong&0xf0000000 0x80000000 MIPS64 rel2
+>>18 leshort 9 Amdahl - invalid byte order,
+>>18 leshort 10 MIPS (deprecated),
+>>18 leshort 11 RS6000 - invalid byte order,
+>>18 leshort 15 PA-RISC - invalid byte order,
+>>>50 leshort 0x0214 2.0
+>>>48 leshort &0x0008 (LP64),
+>>18 leshort 16 nCUBE,
+>>18 leshort 17 Fujitsu VPP500,
+>>18 leshort 18 SPARC32PLUS - invalid byte order,
+>>18 leshort 20 PowerPC,
+>>18 leshort 22 IBM S/390,
+>>18 leshort 36 NEC V800,
+>>18 leshort 37 Fujitsu FR20,
+>>18 leshort 38 TRW RH-32,
+>>18 leshort 39 Motorola RCE,
+>>18 leshort 40 ARM,
+>>18 leshort 41 Alpha,
+>>18 leshort 0xa390 IBM S/390 (obsolete),
+>>18 leshort 42 Renesas SH,
+>>18 leshort 43 SPARC V9 - invalid byte order,
+>>18 leshort 44 Siemens Tricore Embedded Processor,
+>>18 leshort 45 Argonaut RISC Core, Argonaut Technologies Inc.,
+>>18 leshort 46 Renesas H8/300,
+>>18 leshort 47 Renesas H8/300H,
+>>18 leshort 48 Renesas H8S,
+>>18 leshort 49 Renesas H8/500,
+>>18 leshort 50 IA-64,
+>>18 leshort 51 Stanford MIPS-X,
+>>18 leshort 52 Motorola Coldfire,
+>>18 leshort 53 Motorola M68HC12,
+>>18 leshort 54 Fujitsu MMA,
+>>18 leshort 55 Siemens PCP,
+>>18 leshort 56 Sony nCPU,
+>>18 leshort 57 Denso NDR1,
+>>18 leshort 58 Start*Core,
+>>18 leshort 59 Toyota ME16,
+>>18 leshort 60 ST100,
+>>18 leshort 61 Tinyj emb.,
+>>18 leshort 62 x86-64,
+>>18 leshort 63 Sony DSP,
+>>18 leshort 66 FX66,
+>>18 leshort 67 ST9+ 8/16 bit,
+>>18 leshort 68 ST7 8 bit,
+>>18 leshort 69 MC68HC16,
+>>18 leshort 70 MC68HC11,
+>>18 leshort 71 MC68HC08,
+>>18 leshort 72 MC68HC05,
+>>18 leshort 73 SGI SVx,
+>>18 leshort 74 ST19 8 bit,
+>>18 leshort 75 Digital VAX,
+>>18 leshort 76 Axis cris,
+>>18 leshort 77 Infineon 32-bit embedded,
+>>18 leshort 78 Element 14 64-bit DSP,
+>>18 leshort 79 LSI Logic 16-bit DSP,
+>>18 leshort 80 MMIX,
+>>18 leshort 81 Harvard machine-independent,
+>>18 leshort 82 SiTera Prism,
+>>18 leshort 83 Atmel AVR 8-bit,
+>>18 leshort 84 Fujitsu FR30,
+>>18 leshort 85 Mitsubishi D10V,
+>>18 leshort 86 Mitsubishi D30V,
+>>18 leshort 87 NEC v850,
+>>18 leshort 88 Renesas M32R,
+>>18 leshort 89 Matsushita MN10300,
+>>18 leshort 90 Matsushita MN10200,
+>>18 leshort 91 picoJava,
+>>18 leshort 92 OpenRISC,
+>>18 leshort 93 ARC Cores Tangent-A5,
+>>18 leshort 94 Tensilica Xtensa,
+>>18 leshort 97 NatSemi 32k,
+>>18 leshort 106 Analog Devices Blackfin,
+>>18 leshort 113 Altera Nios II,
+>>18 leshort 0xae META,
+>>18 leshort 0x3426 OpenRISC (obsolete),
+>>18 leshort 0x8472 OpenRISC (obsolete),
+>>18 leshort 0x9026 Alpha (unofficial),
+>>20 lelong 0 invalid version
+>>20 lelong 1 version 1
+>>36 lelong 1 MathCoPro/FPU/MAU Required
+>5 byte 2 MSB
+>>16 beshort 0 no file type,
+!:mime application/octet-stream
+>>16 beshort 1 relocatable,
+!:mime application/x-object
+>>16 beshort 2 executable,
+!:mime application/x-executable
+>>16 beshort 3 shared object,
+!:mime application/x-sharedlib
+>>16 beshort 4 core file,
+!:mime application/x-coredump
+#>>>(0x38+0xcc) string >\0 of '%s'
+#>>>(0x38+0x10) belong >0 (signal %d),
+>>16 beshort &0xff00 processor-specific,
+>>18 beshort 0 no machine,
+>>18 beshort 1 AT&T WE32100,
+>>18 beshort 2 SPARC,
+>>18 beshort 3 Intel 80386 - invalid byte order,
+>>18 beshort 4 Motorola
+>>>36 belong &0x01000000 68000,
+>>>36 belong &0x00810000 CPU32,
+>>>36 belong 0 68020,
+>>18 beshort 5 Motorola 88000,
+>>18 beshort 6 Intel 80486 - invalid byte order,
+>>18 beshort 7 Intel 80860,
+# only for MIPS - see comment in little-endian section above.
+>>18 beshort 8 MIPS,
+>>>36 belong &0x20 N32
+>>18 beshort 10 MIPS,
+>>>36 belong &0x20 N32
+>>18 beshort 8
+# only for 32-bit
+>>>4 byte 1
+>>>>36 belong&0xf0000000 0x00000000 MIPS-I
+>>>>36 belong&0xf0000000 0x10000000 MIPS-II
+>>>>36 belong&0xf0000000 0x20000000 MIPS-III
+>>>>36 belong&0xf0000000 0x30000000 MIPS-IV
+>>>>36 belong&0xf0000000 0x40000000 MIPS-V
+>>>>36 belong&0xf0000000 0x50000000 MIPS32
+>>>>36 belong&0xf0000000 0x60000000 MIPS64
+>>>>36 belong&0xf0000000 0x70000000 MIPS32 rel2
+>>>>36 belong&0xf0000000 0x80000000 MIPS64 rel2
+# only for 64-bit
+>>>4 byte 2
+>>>>48 belong&0xf0000000 0x00000000 MIPS-I
+>>>>48 belong&0xf0000000 0x10000000 MIPS-II
+>>>>48 belong&0xf0000000 0x20000000 MIPS-III
+>>>>48 belong&0xf0000000 0x30000000 MIPS-IV
+>>>>48 belong&0xf0000000 0x40000000 MIPS-V
+>>>>48 belong&0xf0000000 0x50000000 MIPS32
+>>>>48 belong&0xf0000000 0x60000000 MIPS64
+>>>>48 belong&0xf0000000 0x70000000 MIPS32 rel2
+>>>>48 belong&0xf0000000 0x80000000 MIPS64 rel2
+>>18 beshort 9 Amdahl,
+>>18 beshort 10 MIPS (deprecated),
+>>18 beshort 11 RS6000,
+>>18 beshort 15 PA-RISC
+>>>50 beshort 0x0214 2.0
+>>>48 beshort &0x0008 (LP64)
+>>18 beshort 16 nCUBE,
+>>18 beshort 17 Fujitsu VPP500,
+>>18 beshort 18 SPARC32PLUS,
+>>>36 belong&0xffff00 0x000100 V8+ Required,
+>>>36 belong&0xffff00 0x000200 Sun UltraSPARC1 Extensions Required,
+>>>36 belong&0xffff00 0x000400 HaL R1 Extensions Required,
+>>>36 belong&0xffff00 0x000800 Sun UltraSPARC3 Extensions Required,
+>>18 beshort 20 PowerPC or cisco 4500,
+>>18 beshort 21 64-bit PowerPC or cisco 7500,
+>>18 beshort 22 IBM S/390,
+>>18 beshort 23 Cell SPU,
+>>18 beshort 24 cisco SVIP,
+>>18 beshort 25 cisco 7200,
+>>18 beshort 36 NEC V800 or cisco 12000,
+>>18 beshort 37 Fujitsu FR20,
+>>18 beshort 38 TRW RH-32,
+>>18 beshort 39 Motorola RCE,
+>>18 beshort 40 ARM,
+>>18 beshort 41 Alpha,
+>>18 beshort 42 Renesas SH,
+>>18 beshort 43 SPARC V9,
+>>>48 belong&0xffff00 0x000200 Sun UltraSPARC1 Extensions Required,
+>>>48 belong&0xffff00 0x000400 HaL R1 Extensions Required,
+>>>48 belong&0xffff00 0x000800 Sun UltraSPARC3 Extensions Required,
+>>>48 belong&0x3 0 total store ordering,
+>>>48 belong&0x3 1 partial store ordering,
+>>>48 belong&0x3 2 relaxed memory ordering,
+>>18 beshort 44 Siemens Tricore Embedded Processor,
+>>18 beshort 45 Argonaut RISC Core, Argonaut Technologies Inc.,
+>>18 beshort 46 Renesas H8/300,
+>>18 beshort 47 Renesas H8/300H,
+>>18 beshort 48 Renesas H8S,
+>>18 beshort 49 Renesas H8/500,
+>>18 beshort 50 IA-64,
+>>18 beshort 51 Stanford MIPS-X,
+>>18 beshort 52 Motorola Coldfire,
+>>18 beshort 53 Motorola M68HC12,
+>>18 beshort 73 Cray NV1,
+>>18 beshort 75 Digital VAX,
+>>18 beshort 88 Renesas M32R,
+>>18 leshort 92 OpenRISC,
+>>18 leshort 0x3426 OpenRISC (obsolete),
+>>18 leshort 0x8472 OpenRISC (obsolete),
+>>18 beshort 94 Tensilica Xtensa,
+>>18 beshort 97 NatSemi 32k,
+>>18 beshort 0x18ad AVR32 (unofficial),
+>>18 beshort 0x9026 Alpha (unofficial),
+>>18 beshort 0xa390 IBM S/390 (obsolete),
+>>20 belong 0 invalid version
+>>20 belong 1 version 1
+>>36 belong 1 MathCoPro/FPU/MAU Required
+# Up to now only 0, 1 and 2 are defined; I've seen a file with 0x83, it seemed
+# like proper ELF, but extracting the string had bad results.
+>4 byte <0x80
+>>8 string >\0 (%s)
+>8 string \0
+>>7 byte 0 (SYSV)
+>>7 byte 1 (HP-UX)
+>>7 byte 2 (NetBSD)
+>>7 byte 3 (GNU/Linux)
+>>7 byte 4 (GNU/Hurd)
+>>7 byte 5 (86Open)
+>>7 byte 6 (Solaris)
+>>7 byte 7 (Monterey)
+>>7 byte 8 (IRIX)
+>>7 byte 9 (FreeBSD)
+>>7 byte 10 (Tru64)
+>>7 byte 11 (Novell Modesto)
+>>7 byte 12 (OpenBSD)
+>8 string \2
+>>7 byte 13 (OpenVMS)
+>>7 byte 97 (ARM)
+>>7 byte 255 (embedded)
+
+#------------------------------------------------------------------------------
+# $File: encore,v 1.6 2009/09/19 16:28:09 christos Exp $
+# encore: file(1) magic for Encore machines
+#
+# XXX - needs to have the byte order specified (NS32K was little-endian,
+# dunno whether they run the 88K in little-endian mode or not).
+#
+0 short 0x154 Encore
+>20 short 0x107 executable
+>20 short 0x108 pure executable
+>20 short 0x10b demand-paged executable
+>20 short 0x10f unsupported executable
+>12 long >0 not stripped
+>22 short >0 - version %ld
+>22 short 0 -
+#>4 date x stamp %s
+0 short 0x155 Encore unsupported executable
+>12 long >0 not stripped
+>22 short >0 - version %ld
+>22 short 0 -
+#>4 date x stamp %s
+
+#------------------------------------------------------------------------------
+# $File: epoc,v 1.7 2009/09/19 16:28:09 christos Exp $
+# EPOC : file(1) magic for EPOC documents [Psion Series 5/Osaris/Geofox 1]
+# Stefan Praszalowicz <hpicollo at worldnet.fr> and Peter Breitenlohner <peb at mppmu.mpg.de>
+# Useful information for improving this file can be found at:
+# http://software.frodo.looijaard.name/psiconv/formats/Index.html
+#------------------------------------------------------------------------------
+0 lelong 0x10000037 Psion Series 5
+>4 lelong 0x10000039 font file
+>4 lelong 0x1000003A printer driver
+>4 lelong 0x1000003B clipboard
+>4 lelong 0x10000042 multi-bitmap image
+!:mime image/x-epoc-mbm
+>4 lelong 0x1000006A application information file
+>4 lelong 0x1000006D
+>>8 lelong 0x1000007D Sketch image
+!:mime image/x-epoc-sketch
+>>8 lelong 0x1000007E voice note
+>>8 lelong 0x1000007F Word file
+!:mime application/x-epoc-word
+>>8 lelong 0x10000085 OPL program (TextEd)
+!:mime application/x-epoc-opl
+>>8 lelong 0x10000088 Sheet file
+!:mime application/x-epoc-sheet
+>>8 lelong 0x100001C4 EasyFax initialisation file
+>4 lelong 0x10000073 OPO module
+!:mime application/x-epoc-opo
+>4 lelong 0x10000074 OPL application
+!:mime application/x-epoc-app
+>4 lelong 0x1000008A exported multi-bitmap image
+
+0 lelong 0x10000041 Psion Series 5 ROM multi-bitmap image
+
+0 lelong 0x10000050 Psion Series 5
+>4 lelong 0x1000006D database
+>4 lelong 0x100000E4 ini file
+
+0 lelong 0x10000079 Psion Series 5 binary:
+>4 lelong 0x00000000 DLL
+>4 lelong 0x10000049 comms hardware library
+>4 lelong 0x1000004A comms protocol library
+>4 lelong 0x1000005D OPX
+>4 lelong 0x1000006C application
+>4 lelong 0x1000008D DLL
+>4 lelong 0x100000AC logical device driver
+>4 lelong 0x100000AD physical device driver
+>4 lelong 0x100000E5 file transfer protocol
+>4 lelong 0x100000E5 file transfer protocol
+>4 lelong 0x10000140 printer definition
+>4 lelong 0x10000141 printer definition
+
+0 lelong 0x1000007A Psion Series 5 executable
+
+#------------------------------------------------------------------------------
+# $File: erlang,v 1.5 2009/09/19 16:28:09 christos Exp $
+# erlang: file(1) magic for Erlang JAM and BEAM files
+# URL: http://www.erlang.org/faq/x779.html#AEN812
+
+# OTP R3-R4
+0 string \0177BEAM! Old Erlang BEAM file
+>6 short >0 - version %d
+
+# OTP R5 and onwards
+0 string FOR1
+>8 string BEAM Erlang BEAM file
+
+# 4.2 version may have a copyright notice!
+4 string Tue\ Jan\ 22\ 14:32:44\ MET\ 1991 Erlang JAM file - version 4.2
+79 string Tue\ Jan\ 22\ 14:32:44\ MET\ 1991 Erlang JAM file - version 4.2
+
+4 string 1.0\ Fri\ Feb\ 3\ 09:55:56\ MET\ 1995 Erlang JAM file - version 4.3
+
+#------------------------------------------------------------------------------
+# $File: esri,v 1.4 2009/09/19 16:28:09 christos Exp $
+# ESRI Shapefile format (.shp .shx .dbf=DBaseIII)
+# Based on info from
+# <URL:http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf>
+0 belong 9994 ESRI Shapefile
+>4 belong =0
+>8 belong =0
+>12 belong =0
+>16 belong =0
+>20 belong =0
+>28 lelong x version %d
+>24 belong x length %d
+>32 lelong =0 type Null Shape
+>32 lelong =1 type Point
+>32 lelong =3 type PolyLine
+>32 lelong =5 type Polygon
+>32 lelong =8 type MultiPoint
+>32 lelong =11 type PointZ
+>32 lelong =13 type PolyLineZ
+>32 lelong =15 type PolygonZ
+>32 lelong =18 type MultiPointZ
+>32 lelong =21 type PointM
+>32 lelong =23 type PolyLineM
+>32 lelong =25 type PolygonM
+>32 lelong =28 type MultiPointM
+>32 lelong =31 type MultiPatch
+
+#------------------------------------------------------------------------------
+# $File: fcs,v 1.4 2009/09/19 16:28:09 christos Exp $
+# fcs: file(1) magic for FCS (Flow Cytometry Standard) data files
+# From Roger Leigh <roger at whinlatter.uklinux.net>
+0 string FCS1.0 Flow Cytometry Standard (FCS) data, version 1.0
+0 string FCS2.0 Flow Cytometry Standard (FCS) data, version 2.0
+0 string FCS3.0 Flow Cytometry Standard (FCS) data, version 3.0
+
+
+#------------------------------------------------------------------------------
+# $File: filesystems,v 1.55 2010/01/16 17:45:12 chl Exp $
+# filesystems: file(1) magic for different filesystems
+#
+0 string \366\366\366\366 PC formatted floppy with no filesystem
+# Sun disk labels
+# From /usr/include/sun/dklabel.h:
+0774 beshort 0xdabe
+# modified by Joerg Jenderek, because original test
+# succeeds for Cabinet archive dao360.dl_ with negative blocks
+>0770 long >0 Sun disk label
+>>0 string x '%s
+>>>31 string >\0 \b%s
+>>>>63 string >\0 \b%s
+>>>>>95 string >\0 \b%s
+>>0 string x \b'
+>>0734 short >0 %d rpm,
+>>0736 short >0 %d phys cys,
+>>0740 short >0 %d alts/cyl,
+>>0746 short >0 %d interleave,
+>>0750 short >0 %d data cyls,
+>>0752 short >0 %d alt cyls,
+>>0754 short >0 %d heads/partition,
+>>0756 short >0 %d sectors/track,
+>>0764 long >0 start cyl %ld,
+>>0770 long x %ld blocks
+# Is there a boot block written 1 sector in?
+>512 belong&077777777 0600407 \b, boot block present
+# Joerg Jenderek: Smart Boot Manager backup file is 41 byte header + first sectors of disc
+# (http://btmgr.sourceforge.net/docs/user-guide-3.html)
+0 string SBMBAKUP_ Smart Boot Manager backup file
+>9 string x \b, version %-5.5s
+>>14 string =_
+>>>15 string x %-.1s
+>>>>16 string =_ \b.
+>>>>>17 string x \b%-.1s
+>>>>>>18 string =_ \b.
+>>>>>>>19 string x \b%-.1s
+>>>22 ubyte 0
+>>>>21 ubyte x \b, from drive 0x%x
+>>>22 ubyte >0
+>>>>21 string x \b, from drive %s
+
+# Joerg Jenderek
+# DOS Emulator image is 128 byte, null right padded header + harddisc image
+0 string DOSEMU\0
+>0x27E leshort 0xAA55
+#offset is 128
+>>19 ubyte 128
+>>>(19.b-1) ubyte 0x0 DOS Emulator image
+>>>>7 ulelong >0 \b, %u heads
+>>>>11 ulelong >0 \b, %d sectors/track
+>>>>15 ulelong >0 \b, %d cylinders
+
+# updated by Joerg Jenderek at Sep 2007
+# only for sector sizes with 512 or more Bytes
+0x1FE leshort 0xAA55 x86 boot sector
+# to do also for sectors < than 512 Bytes and some other files, GRR
+#30 search/481 \x55\xAA x86 boot sector
+# not for BeOS floppy 1440k, MBRs
+#(11.s-2) uleshort 0xAA55 x86 boot sector
+>2 string OSBS \b, OS/BS MBR
+# J\xf6rg Jenderek <joerg dot jenderek at web dot de>
+>0x8C string Invalid\ partition\ table \b, MS-DOS MBR
+# dr-dos with some upper-, lowercase variants
+>0x9D string Invalid\ partition\ table$
+>>181 string No\ Operating\ System$
+>>>201 string Operating\ System\ load\ error$ \b, DR-DOS MBR, Version 7.01 to 7.03
+>0x9D string Invalid\ partition\ table$
+>>181 string No\ operating\ system$
+>>>201 string Operating\ system\ load\ error$ \b, DR-DOS MBR, Version 7.01 to 7.03
+>342 string Invalid\ partition\ table$
+>>366 string No\ operating\ system$
+>>>386 string Operating\ system\ load\ error$ \b, DR-DOS MBR, version 7.01 to 7.03
+>295 string NEWLDR\0
+>>302 string Bad\ PT\ $
+>>>310 string No\ OS\ $
+>>>>317 string OS\ load\ err$
+>>>>>329 string Moved\ or\ missing\ IBMBIO.LDR\n\r
+>>>>>>358 string Press\ any\ key\ to\ continue.\n\r$
+>>>>>>>387 string Copyright\ (c)\ 1984,1998
+>>>>>>>>411 string Caldera\ Inc.\0 \b, DR-DOS MBR (IBMBIO.LDR)
+>0x10F string Ung\201ltige\ Partitionstabelle \b, MS-DOS MBR, german version 4.10.1998, 4.10.2222
+>>0x1B8 ubelong >0 \b, Serial 0x%-.4x
+>0x8B string Ung\201ltige\ Partitionstabelle \b, MS-DOS MBR, german version 5.00 to 4.00.950
+>271 string Invalid\ partition\ table\0
+>>295 string Error\ loading\ operating\ system\0
+>>>326 string Missing\ operating\ system\0 \b, mbr
+#
+>139 string Invalid\ partition\ table\0
+>>163 string Error\ loading\ operating\ system\0
+>>>194 string Missing\ operating\ system\0 \b, Microsoft Windows XP mbr
+# http://www.heise.de/ct/05/09/006/ page 184
+#HKEY_LOCAL_MACHINE\SYSTEM\MountedDevices\DosDevices\?:=Serial4Bytes+8Bytes
+>>>>0x1B8 ulelong >0 \b,Serial 0x%-.4x
+>300 string Invalid\ partition\ table\0
+>>324 string Error\ loading\ operating\ system\0
+>>>355 string Missing\ operating\ system\0 \b, Microsoft Windows XP MBR
+#??>>>389 string Invalid\ system\ disk
+>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
+>300 string Ung\201ltige\ Partitionstabelle
+#split string to avoid error: String too long
+>>328 string Fehler\ beim\ Laden\
+>>>346 string des\ Betriebssystems
+>>>>366 string Betriebssystem\ nicht\ vorhanden \b, Microsoft Windows XP MBR (german)
+>>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
+#>0x145 string Default:\ F \b, FREE-DOS MBR
+#>0x14B string Default:\ F \b, FREE-DOS 1.0 MBR
+>0x145 search/7 Default:\ F \b, FREE-DOS MBR
+#>>313 string F0\ .\ .\ .
+#>>>322 string disk\ 1
+#>>>>382 string FAT3
+>64 string no\ active\ partition\ found
+>>96 string read\ error\ while\ reading\ drive \b, FREE-DOS Beta 0.9 MBR
+# Ranish Partition Manager http://www.ranish.com/part/
+>387 search/4 \0\ Error!\r
+>>378 search/7 Virus!
+>>>397 search/4 Booting\
+>>>>408 search/4 HD1/\0 \b, Ranish MBR (
+>>>>>416 string Writing\ changes... \b2.37
+>>>>>>438 ubyte x \b,0x%x dots
+>>>>>>440 ubyte >0 \b,virus check
+>>>>>>441 ubyte >0 \b,partition %c
+#2.38,2.42,2.44
+>>>>>416 string !Writing\ changes... \b
+>>>>>>418 ubyte 1 \bvirus check,
+>>>>>>419 ubyte x \b0x%x seconds
+>>>>>>420 ubyte&0x0F >0 \b,partition
+>>>>>>>420 ubyte&0x0F <5 \b %x
+>>>>>>>420 ubyte&0x0F 0Xf \b ask
+>>>>>420 ubyte x \b)
+#
+>271 string Operating\ system\ loading
+>>296 string error\r \b, SYSLINUX MBR (2.10)
+# http://www.acronis.de/
+>362 string MBR\ Error\ \0\r
+>>376 string ress\ any\ key\ to\
+>>>392 string boot\ from\ floppy...\0 \b, Acronis MBR
+# added by Joerg Jenderek
+# http://www.visopsys.org/
+# http://partitionlogic.org.uk/
+>309 string No\ bootable\ partition\ found\r
+>>339 string I/O\ Error\ reading\ boot\ sector\r \b, Visopsys MBR
+>349 string No\ bootable\ partition\ found\r
+>>379 string I/O\ Error\ reading\ boot\ sector\r \b, simple Visopsys MBR
+# bootloader, bootmanager
+>0x40 string SBML
+# label with 11 characters of FAT 12 bit filesystem
+>>43 string SMART\ BTMGR
+>>>430 string SBMK\ Bad!\r \b, Smart Boot Manager
+# OEM-ID not always "SBM"
+#>>>>3 strings SBM
+>>>>6 string >\0 \b, version %s
+>382 string XOSLLOADXCF \b, eXtended Operating System Loader
+>6 string LILO \b, LInux i386 boot LOader
+>>120 string LILO \b, version 22.3.4 SuSe
+>>172 string LILO \b, version 22.5.8 Debian
+# updated by Joerg Jenderek at Oct 2008
+# variables according to grub-0.97/stage1/stage1.S or
+# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
+# usual values are marked with comments to get only informations of strange GRUB loaders
+>342 search/60 \0Geom\0
+#>0 ulelong x %x=0x009048EB , 0x2a9048EB 0
+>>0x41 ubyte <2
+>>>0x3E ubyte >2 \b; GRand Unified Bootloader
+# 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90
+>>>>0x3E ubyte x \b, stage1 version 0x%x
+#If it is 0xFF, use a drive passed by BIOS
+>>>>0x40 ubyte <0xFF \b, boot drive 0x%x
+# in most case 0,1,0x2e for GRUB 0.5.95
+>>>>0x41 ubyte >0 \b, LBA flag 0x%x
+>>>>0x42 uleshort <0x8000 \b, stage2 address 0x%x
+#>>>>0x42 uleshort =0x8000 \b, stage2 address 0x%x (usual)
+>>>>0x42 uleshort >0x8000 \b, stage2 address 0x%x
+#>>>>0x44 ulelong =1 \b, 1st sector stage2 0x%x (default)
+>>>>0x44 ulelong >1 \b, 1st sector stage2 0x%x
+>>>>0x48 uleshort <0x800 \b, stage2 segment 0x%x
+#>>>>0x48 uleshort =0x800 \b, stage2 segment 0x%x (usual)
+>>>>0x48 uleshort >0x800 \b, stage2 segment 0x%x
+>>>>402 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>394 string stage1 \b, GRUB version 0.5.95
+>>>>382 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>376 string GRUB\ \0 \b, GRUB version 0.93 or 1.94
+>>>>383 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>377 string GRUB\ \0 \b, GRUB version 0.94
+>>>>385 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>379 string GRUB\ \0 \b, GRUB version 0.95 or 0.96
+>>>>391 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>385 string GRUB\ \0 \b, GRUB version 0.97
+#unkown version
+>>>343 string Geom\0Read\0\ Error\0
+>>>>321 string Loading\ stage1.5 \b, GRUB version x.y
+>>>380 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>374 string GRUB\ \0 \b, GRUB version n.m
+# http://syslinux.zytor.com/
+>478 string Boot\ failed\r
+>>495 string LDLINUX\ SYS \b, SYSLINUX bootloader (1.62)
+>480 string Boot\ failed\r
+>>495 string LDLINUX\ SYS \b, SYSLINUX bootloader (2.06 or 2.11)
+>484 string Boot\ error\r \b, SYSLINUX bootloader (3.11)
+>395 string chksum\0\ ERROR!\0 \b, Gujin bootloader
+# http://www.bcdwb.de/bcdw/index_e.htm
+>3 string BCDL
+>>498 string BCDL\ \ \ \ BIN \b, Bootable CD Loader (1.50Z)
+# mbr partition table entries
+# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
+>3 string !MS
+>>3 string !SYSLINUX
+>>>3 string !MTOOL
+>>>>3 string !NEWLDR
+>>>>>5 string !DOS
+# not FAT (32 bit)
+>>>>>>82 string !FAT32
+#not Linux kernel
+>>>>>>>514 string !HdrS
+#not BeOS
+>>>>>>>>422 string !Be\ Boot\ Loader
+# active flag 0 or 0x80 and type > 0
+>>>>>>>>>446 ubyte <0x81
+>>>>>>>>>>446 ubyte&0x7F 0
+>>>>>>>>>>>450 ubyte >0 \b; partition 1: ID=0x%x
+>>>>>>>>>>>>446 ubyte 0x80 \b, active
+>>>>>>>>>>>>447 ubyte x \b, starthead %u
+#>>>>>>>>>>>>448 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>448 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>454 ulelong x \b, startsector %u
+>>>>>>>>>>>>458 ulelong x \b, %u sectors
+#
+>>>>>>>>>462 ubyte <0x81
+>>>>>>>>>>462 ubyte&0x7F 0
+>>>>>>>>>>>466 ubyte >0 \b; partition 2: ID=0x%x
+>>>>>>>>>>>>462 ubyte 0x80 \b, active
+>>>>>>>>>>>>463 ubyte x \b, starthead %u
+#>>>>>>>>>>>>464 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>464 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>470 ulelong x \b, startsector %u
+>>>>>>>>>>>>474 ulelong x \b, %u sectors
+#
+>>>>>>>>>478 ubyte <0x81
+>>>>>>>>>>478 ubyte&0x7F 0
+>>>>>>>>>>>482 ubyte >0 \b; partition 3: ID=0x%x
+>>>>>>>>>>>>478 ubyte 0x80 \b, active
+>>>>>>>>>>>>479 ubyte x \b, starthead %u
+#>>>>>>>>>>>>480 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>481 ubyte x \b, start C2S: 0x%x
+#>>>>>>>>>>>>480 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>486 ulelong x \b, startsector %u
+>>>>>>>>>>>>490 ulelong x \b, %u sectors
+#
+>>>>>>>>>494 ubyte <0x81
+>>>>>>>>>>494 ubyte&0x7F 0
+>>>>>>>>>>>498 ubyte >0 \b; partition 4: ID=0x%x
+>>>>>>>>>>>>494 ubyte 0x80 \b, active
+>>>>>>>>>>>>495 ubyte x \b, starthead %u
+#>>>>>>>>>>>>496 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>496 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>502 ulelong x \b, startsector %u
+>>>>>>>>>>>>506 ulelong x \b, %u sectors
+# mbr partition table entries end
+# http://www.acronis.de/
+#FAT label=ACRONIS\ SZ
+#OEM-ID=BOOTWIZ0
+>442 string Non-system\ disk,\
+>>459 string press\ any\ key...\x7\0 \b, Acronis Startup Recovery Loader
+# DOS names like F11.SYS are 8 right space padded bytes+3 bytes
+>>>477 ubyte&0xDF >0
+>>>>477 string x \b %-.3s
+>>>>>480 ubyte&0xDF >0
+>>>>>>480 string x \b%-.5s
+>>>>485 ubyte&0xDF >0
+>>>>>485 string x \b.%-.3s
+#
+>185 string FDBOOT\ Version\
+>>204 string \rNo\ Systemdisk.\
+>>>220 string Booting\ from\ harddisk.\n\r
+>>>245 string Cannot\ load\ from\ harddisk.\n\r
+>>>>273 string Insert\ Systemdisk\
+>>>>>291 string and\ press\ any\ key.\n\r \b, FDBOOT harddisk Bootloader
+>>>>>>200 string >\0 \b, version %-3s
+>242 string Bootsector\ from\ C.H.\ Hochst\204
+>>278 string No\ Systemdisk.\
+>>>293 string Booting\ from\ harddisk.\n\r
+>>>441 string Cannot\ load\ from\ harddisk.\n\r
+>>>>469 string Insert\ Systemdisk\
+>>>>>487 string and\ press\ any\ key.\n\r \b, WinImage harddisk Bootloader
+>>>>>>209 string >\0 \b, version %-4.4s
+>(1.b+2) ubyte 0xe
+>>(1.b+3) ubyte 0x1f
+>>>(1.b+4) ubyte 0xbe
+>>>>(1.b+5) ubyte 0x77
+>>>>(1.b+6) ubyte 0x7c
+>>>>>(1.b+7) ubyte 0xac
+>>>>>>(1.b+8) ubyte 0x22
+>>>>>>>(1.b+9) ubyte 0xc0
+>>>>>>>>(1.b+10) ubyte 0x74
+>>>>>>>>>(1.b+11) ubyte 0xb
+>>>>>>>>>>(1.b+12) ubyte 0x56
+>>>>>>>>>>(1.b+13) ubyte 0xb4 \b, mkdosfs boot message display
+>214 string Please\ try\ to\ install\ FreeDOS\ \b, DOS Emulator boot message display
+#>>244 string from\ dosemu-freedos-*-bin.tgz\r
+#>>>170 string Sorry,\ could\ not\ load\ an\
+#>>>>195 string operating\ system.\r\n
+#
+>103 string This\ is\ not\ a\ bootable\ disk.\
+>>132 string Please\ insert\ a\ bootable\
+>>>157 string floppy\ and\r\n
+>>>>169 string press\ any\ key\ to\ try\ again...\r \b, FREE-DOS message display
+#
+>66 string Solaris\ Boot\ Sector
+>>99 string Incomplete\ MDBoot\ load.
+>>>89 string Version \b, Sun Solaris Bootloader
+>>>>97 byte x version %c
+#
+>408 string OS/2\ !!\ SYS01475\r\0
+>>429 string OS/2\ !!\ SYS02025\r\0
+>>>450 string OS/2\ !!\ SYS02027\r\0
+>>>469 string OS2BOOT\ \ \ \ \b, IBM OS/2 Warp bootloader
+#
+>409 string OS/2\ !!\ SYS01475\r\0
+>>430 string OS/2\ !!\ SYS02025\r\0
+>>>451 string OS/2\ !!\ SYS02027\r\0
+>>>470 string OS2BOOT\ \ \ \ \b, IBM OS/2 Warp Bootloader
+>112 string This\ disk\ is\ not\ bootable\r
+>>142 string If\ you\ wish\ to\ make\ it\ bootable
+>>>176 string run\ the\ DOS\ program\ SYS\
+>>>200 string after\ the\r
+>>>>216 string system\ has\ been\ loaded\r\n
+>>>>>242 string Please\ insert\ a\ DOS\ diskette\
+>>>>>271 string into\r\n\ the\ drive\ and\
+>>>>>>292 string strike\ any\ key...\0 \b, IBM OS/2 Warp message display
+# XP
+>430 string NTLDR\ is\ missing\xFF\r\n
+>>449 string Disk\ error\xFF\r\n
+>>>462 string Press\ any\ key\ to\ restart\r \b, Microsoft Windows XP Bootloader
+# DOS names like NTLDR,CMLDR,$LDR$ are 8 right space padded bytes+3 bytes
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+#
+>430 string NTLDR\ nicht\ gefunden\xFF\r\n
+>>453 string Datentr\204gerfehler\xFF\r\n
+>>>473 string Neustart\ mit\ beliebiger\ Taste\r \b, Microsoft Windows XP Bootloader (german)
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+# offset variant
+>>>>379 string \0
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+#
+>430 string NTLDR\ fehlt\xFF\r\n
+>>444 string Datentr\204gerfehler\xFF\r\n
+>>>464 string Neustart\ mit\ beliebiger\ Taste\r \b, Microsoft Windows XP Bootloader (2.german)
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+# variant
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+#
+>430 string NTLDR\ fehlt\xFF\r\n
+>>444 string Medienfehler\xFF\r\n
+>>>459 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (3.german)
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+# variant
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+>430 string Datentr\204ger\ entfernen\xFF\r\n
+>>454 string Medienfehler\xFF\r\n
+>>>469 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (4.german)
+>>>>379 string \0
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+# variant
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+
+#>3 string NTFS\ \ \ \
+>389 string Fehler\ beim\ Lesen\
+>>407 string des\ Datentr\204gers
+>>>426 string NTLDR\ fehlt
+>>>>440 string NTLDR\ ist\ komprimiert
+>>>>>464 string Neustart\ mit\ Strg+Alt+Entf\r \b, Microsoft Windows XP Bootloader NTFS (german)
+#>3 string NTFS\ \ \ \
+>313 string A\ disk\ read\ error\ occurred.\r
+>>345 string A\ kernel\ file\ is\ missing\
+>>>370 string from\ the\ disk.\r
+>>>>484 string NTLDR\ is\ compressed
+>>>>>429 string Insert\ a\ system\ diskette\
+>>>>>>454 string and\ restart\r\nthe\ system.\r \b, Microsoft Windows XP Bootloader NTFS
+# DOS loader variants different languages,offsets
+>472 ubyte&0xDF >0
+>>389 string Invalid\ system\ disk\xFF\r\n
+>>>411 string Disk\ I/O\ error
+>>>>428 string Replace\ the\ disk,\ and\
+>>>>>455 string press\ any\ key \b, Microsoft Windows 98 Bootloader
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.5s
+>>>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>>>479 string x \b%-.1s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>390 string Invalid\ system\ disk\xFF\r\n
+>>>412 string Disk\ I/O\ error\xFF\r\n
+>>>>429 string Replace\ the\ disk,\ and\
+>>>>>451 string then\ press\ any\ key\r \b, Microsoft Windows 98 Bootloader
+>>388 string Ungueltiges\ System\ \xFF\r\n
+>>>410 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>427 string Datentraeger\ wechseln\ und\
+>>>>>453 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (german)
+#WINBOOT.SYS only not spaces (0xDF)
+>>>>>>497 ubyte&0xDF >0
+>>>>>>>497 string x %-.5s
+>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>505 ubyte&0xDF >0
+>>>>>>>505 string x \b.%-.3s
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0 or
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.5s
+>>>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>>>479 string x \b%-.1s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>390 string Ungueltiges\ System\ \xFF\r\n
+>>>412 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>429 string Datentraeger\ wechseln\ und\
+>>>>>455 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (German)
+#WINBOOT.SYS only not spaces (0xDF)
+>>>>>>497 ubyte&0xDF >0
+>>>>>>>497 string x %-.7s
+>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>504 string x \b%-.1s
+>>>>>>505 ubyte&0xDF >0
+>>>>>>>505 string x \b.%-.3s
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0 or
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.6s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>389 string Ungueltiges\ System\ \xFF\r\n
+>>>411 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>428 string Datentraeger\ wechseln\ und\
+>>>>>454 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (GERMAN)
+# DOS names like IO.SYS,WINBOOT.SYS,MSDOS.SYS,WINBOOT.INI are 8 right space padded bytes+3 bytes
+>>>>>>472 string x %-.2s
+>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>474 string x \b%-.5s
+>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>479 string x \b%-.1s
+>>>>>>480 ubyte&0xDF >0
+>>>>>>>480 string x \b.%-.3s
+>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>483 string x \b%-.5s
+>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>488 string x \b%-.2s
+>>>>>>>>490 ubyte&0xDF >0
+>>>>>>>>>490 string x \b%-.1s
+>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>491 string x \b.%-.3s
+>479 ubyte&0xDF >0
+>>416 string Kein\ System\ oder\
+>>>433 string Laufwerksfehler
+>>>>450 string Wechseln\ und\ Taste\ dr\201cken \b, Microsoft DOS Bootloader (german)
+#IO.SYS
+>>>>>479 string x \b %-.2s
+>>>>>>481 ubyte&0xDF >0
+>>>>>>>481 string x \b%-.6s
+>>>>>487 ubyte&0xDF >0
+>>>>>>487 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>490 ubyte&0xDF >0 \b+
+>>>>>>>490 string x \b%-.5s
+>>>>>>>>495 ubyte&0xDF >0
+>>>>>>>>>495 string x \b%-.3s
+>>>>>>>498 ubyte&0xDF >0
+>>>>>>>>498 string x \b.%-.3s
+#
+>376 search/41 Non-System\ disk\ or\
+>>395 search/41 disk\ error\r
+>>>407 search/41 Replace\ and\
+>>>>419 search/41 press\ \b,
+>>>>419 search/41 strike\ \b, old
+>>>>426 search/41 any\ key\ when\ ready\r MS or PC-DOS bootloader
+#449 Disk\ Boot\ failure\r MS 3.21
+#466 Boot\ Failure\r MS 3.30
+>>>>>468 search/18 \0
+#IO.SYS,IBMBIO.COM
+>>>>>>&0 string x \b %-.2s
+>>>>>>>&-20 ubyte&0xDF >0
+>>>>>>>>&-1 string x \b%-.4s
+>>>>>>>>>&-16 ubyte&0xDF >0
+>>>>>>>>>>&-1 string x \b%-.2s
+>>>>>>&8 ubyte&0xDF >0 \b.
+>>>>>>>&-1 string x \b%-.3s
+#MSDOS.SYS,IBMDOS.COM
+>>>>>>&11 ubyte&0xDF >0 \b+
+>>>>>>>&-1 string x \b%-.5s
+>>>>>>>>&-6 ubyte&0xDF >0
+>>>>>>>>>&-1 string x \b%-.1s
+>>>>>>>>>>&-5 ubyte&0xDF >0
+>>>>>>>>>>>&-1 string x \b%-.2s
+>>>>>>>&7 ubyte&0xDF >0 \b.
+>>>>>>>>&-1 string x \b%-.3s
+>441 string Cannot\ load\ from\ harddisk.\n\r
+>>469 string Insert\ Systemdisk\
+>>>487 string and\ press\ any\ key.\n\r \b, MS (2.11) DOS bootloader
+#>43 string \224R-LOADER\ \ SYS =label
+>54 string SYS
+>>324 string VASKK
+>>>495 string NEWLDR\0 \b, DR-DOS Bootloader (LOADER.SYS)
+#
+>98 string Press\ a\ key\ to\ retry\0\r
+>>120 string Cannot\ find\ file\ \0\r
+>>>139 string Disk\ read\ error\0\r
+>>>>156 string Loading\ ...\0 \b, DR-DOS (3.41) Bootloader
+#DRBIOS.SYS
+>>>>>44 ubyte&0xDF >0
+>>>>>>44 string x \b %-.6s
+>>>>>>>50 ubyte&0xDF >0
+>>>>>>>>50 string x \b%-.2s
+>>>>>>52 ubyte&0xDF >0
+>>>>>>>52 string x \b.%-.3s
+#
+>70 string IBMBIO\ \ COM
+>>472 string Cannot\ load\ DOS!\
+>>>489 string Any\ key\ to\ retry \b, DR-DOS Bootloader
+>>471 string Cannot\ load\ DOS\
+>>487 string press\ key\ to\ retry \b, Open-DOS Bootloader
+#??
+>444 string KERNEL\ \ SYS
+>>314 string BOOT\ error! \b, FREE-DOS Bootloader
+>499 string KERNEL\ \ SYS
+>>305 string BOOT\ err!\0 \b, Free-DOS Bootloader
+>449 string KERNEL\ \ SYS
+>>319 string BOOT\ error! \b, FREE-DOS 0.5 Bootloader
+#
+>449 string Loading\ FreeDOS
+>>0x1AF ulelong >0 \b, FREE-DOS 0.95,1.0 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+#
+>331 string Error!.0 \b, FREE-DOS 1.0 bootloader
+#
+>125 string Loading\ FreeDOS...\r
+>>311 string BOOT\ error!\r \b, FREE-DOS bootloader
+>>>441 ubyte&0xDF >0
+>>>>441 string x \b %-.6s
+>>>>>447 ubyte&0xDF >0
+>>>>>>447 string x \b%-.1s
+>>>>>>>448 ubyte&0xDF >0
+>>>>>>>>448 string x \b%-.1s
+>>>>449 ubyte&0xDF >0
+>>>>>449 string x \b.%-.3s
+>124 string FreeDOS\0
+>>331 string \ err\0 \b, FREE-DOS BETa 0.9 Bootloader
+# DOS names like KERNEL.SYS,KERNEL16.SYS,KERNEL32.SYS,METAKERN.SYS are 8 right space padded bytes+3 bytes
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>>333 string \ err\0 \b, FREE-DOS BEta 0.9 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>>334 string \ err\0 \b, FREE-DOS Beta 0.9 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>336 string Error!\
+>>343 string Hit\ a\ key\ to\ reboot. \b, FREE-DOS Beta 0.9sr1 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+# added by Joerg Jenderek
+# http://www.visopsys.org/
+# http://partitionlogic.org.uk/
+# OEM-ID=Visopsys
+>478 ulelong 0
+>>(1.b+326) string I/O\ Error\ reading\
+>>>(1.b+344) string Visopsys\ loader\r
+>>>>(1.b+361) string Press\ any\ key\ to\ continue.\r \b, Visopsys loader
+# http://alexfru.chat.ru/epm.html#bootprog
+>494 ubyte >0x4D
+>>495 string >E
+>>>495 string <S
+#OEM-ID is not reliable
+>>>>3 string BootProg
+# It just looks for a program file name at the root directory
+# and loads corresponding file with following execution.
+# DOS names like STARTUP.BIN,STARTUPC.COM,STARTUPE.EXE are 8 right space padded bytes+3 bytes
+>>>>499 ubyte&0xDF >0 \b, COM/EXE Bootloader
+>>>>>499 string x \b %-.1s
+>>>>>>500 ubyte&0xDF >0
+>>>>>>>500 string x \b%-.1s
+>>>>>>>>501 ubyte&0xDF >0
+>>>>>>>>>501 string x \b%-.1s
+>>>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>>>>>>>>>>>505 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>505 string x \b%-.1s
+>>>>>>>>>>>>>>>>>>506 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>>>506 string x \b%-.1s
+#name extension
+>>>>>507 ubyte&0xDF >0 \b.
+>>>>>>507 string x \b%-.1s
+>>>>>>>508 ubyte&0xDF >0
+>>>>>>>>508 string x \b%-.1s
+>>>>>>>>>509 ubyte&0xDF >0
+>>>>>>>>>>509 string x \b%-.1s
+#If the boot sector fails to read any other sector,
+#it prints a very short message ("RE") to the screen and hangs the computer.
+#If the boot sector fails to find needed program in the root directory,
+#it also hangs with another message ("NF").
+>>>>>492 string RENF \b, FAT (12 bit)
+>>>>>495 string RENF \b, FAT (16 bit)
+# http://alexfru.chat.ru/epm.html#bootprog
+>494 ubyte >0x4D
+>>495 string >E
+>>>495 string <S
+#OEM-ID is not reliable
+>>>>3 string BootProg
+# It just looks for a program file name at the root directory
+# and loads corresponding file with following execution.
+# DOS names like STARTUP.BIN,STARTUPC.COM,STARTUPE.EXE are 8 right space padded bytes+3 bytes
+>>>>499 ubyte&0xDF >0 \b, COM/EXE Bootloader
+>>>>>499 string x \b %-.1s
+>>>>>>500 ubyte&0xDF >0
+>>>>>>>500 string x \b%-.1s
+>>>>>>>>501 ubyte&0xDF >0
+>>>>>>>>>501 string x \b%-.1s
+>>>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>>>>>>>>>>>505 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>505 string x \b%-.1s
+>>>>>>>>>>>>>>>>>>506 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>>>506 string x \b%-.1s
+#name extension
+>>>>>507 ubyte&0xDF >0 \b.
+>>>>>>507 string x \b%-.1s
+>>>>>>>508 ubyte&0xDF >0
+>>>>>>>>508 string x \b%-.1s
+>>>>>>>>>509 ubyte&0xDF >0
+>>>>>>>>>>509 string x \b%-.1s
+#If the boot sector fails to read any other sector,
+#it prints a very short message ("RE") to the screen and hangs the computer.
+#If the boot sector fails to find needed program in the root directory,
+#it also hangs with another message ("NF").
+>>>>>492 string RENF \b, FAT (12 bit)
+>>>>>495 string RENF \b, FAT (16 bit)
+# x86 bootloader end
+# updated by Joerg Jenderek at Sep 2007
+>3 ubyte 0
+#no active flag
+>>446 ubyte 0
+# partition 1 not empty
+>>>450 ubyte >0
+# partitions 3,4 empty
+>>>>482 ubyte 0
+>>>>>498 ubyte 0
+# partition 2 ID=0,5,15
+>>>>>>466 ubyte <0x10
+>>>>>>>466 ubyte 0x05 \b, extended partition table
+>>>>>>>466 ubyte 0x0F \b, extended partition table (LBA)
+>>>>>>>466 ubyte 0x0 \b, extended partition table (last)
+# JuMP short bootcodeoffset NOP assembler instructions will usually be EB xx 90
+# http://mirror.href.com/thestarman/asm/2bytejumps.htmm#FWD
+# older drives may use Near JuMP instruction E9 xx xx
+>0 lelong&0x009000EB 0x009000EB
+>0 lelong&0x000000E9 0x000000E9
+# minimal short forward jump found 03cx??
+# maximal short forward jump is 07fx
+>1 ubyte <0xff \b, code offset 0x%x
+# mtools-3.9.8/msdos.h
+# usual values are marked with comments to get only informations of strange FAT systems
+# valid sectorsize must be a power of 2 from 32 to 32768
+>>11 uleshort&0x000f x
+>>>11 uleshort <32769
+>>>>11 uleshort >31
+>>>>>21 ubyte&0xf0 0xF0
+>>>>>>3 string >\0 \b, OEM-ID "%8.8s"
+#http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
+>>>>>>>8 string IHC \b cached by Windows 9M
+>>>>>>11 uleshort >512 \b, Bytes/sector %u
+#>>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
+>>>>>>11 uleshort <512 \b, Bytes/sector %u
+>>>>>>13 ubyte >1 \b, sectors/cluster %u
+#>>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
+>>>>>>14 uleshort >32 \b, reserved sectors %u
+#>>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
+#>>>>>>14 uleshort >1 \b, reserved sectors %u
+#>>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
+>>>>>>14 uleshort <1 \b, reserved sectors %u
+>>>>>>16 ubyte >2 \b, FATs %u
+#>>>>>>16 ubyte =2 \b, FATs %u (usual)
+>>>>>>16 ubyte =1 \b, FAT %u
+>>>>>>16 ubyte >0
+>>>>>>17 uleshort >0 \b, root entries %u
+#>>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
+>>>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
+#>>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
+>>>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
+#>>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
+>>>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
+>>>>>>22 uleshort >0 \b, sectors/FAT %u
+#>>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
+>>>>>>26 ubyte >2 \b, heads %u
+#>>>>>>26 ubyte =2 \b, heads %u (usual floppy)
+>>>>>>26 ubyte =1 \b, heads %u
+#skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
+>>>>>>38 ubyte !0x70
+>>>>>>>28 ulelong >0 \b, hidden sectors %u
+#>>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
+>>>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
+#>>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
+# FAT<32 specific
+>>>>>>82 string !FAT32
+#>>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
+#>>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>36 ubyte !0x80
+>>>>>>>>36 ubyte !0 \b, physical drive 0x%x
+>>>>>>>37 ubyte >0 \b, reserved 0x%x
+#>>>>>>>37 ubyte =0 \b, reserved 0x%x
+>>>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte =0x29
+>>>>>>>>39 ulelong x \b, serial number 0x%x
+>>>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string =NO\ NAME \b, unlabeled
+>>>>>>>54 string FAT \b, FAT
+>>>>>>>>54 string FAT12 \b (12 bit)
+>>>>>>>>54 string FAT16 \b (16 bit)
+# FAT32 specific
+>>>>>>82 string FAT32 \b, FAT (32 bit)
+>>>>>>>36 ulelong x \b, sectors/FAT %u
+>>>>>>>40 uleshort >0 \b, extension flags %u
+#>>>>>>>40 uleshort =0 \b, extension flags %u
+>>>>>>>42 uleshort >0 \b, fsVersion %u
+#>>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
+>>>>>>>44 ulelong >2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =1 \b, rootdir cluster %u
+>>>>>>>48 uleshort >1 \b, infoSector %u
+#>>>>>>>48 uleshort =1 \b, infoSector %u (usual)
+>>>>>>>48 uleshort <1 \b, infoSector %u
+>>>>>>>50 uleshort >6 \b, Backup boot sector %u
+#>>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
+>>>>>>>50 uleshort <6 \b, Backup boot sector %u
+>>>>>>>54 ulelong >0 \b, reserved1 0x%x
+>>>>>>>58 ulelong >0 \b, reserved2 0x%x
+>>>>>>>62 ulelong >0 \b, reserved3 0x%x
+# same structure as FAT1X
+>>>>>>>64 ubyte >0x80 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
+>>>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>65 ubyte >0 \b, reserved 0x%x
+>>>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte =0x29
+>>>>>>>>67 ulelong x \b, serial number 0x%x
+>>>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string =NO\ NAME \b, unlabeled
+### FATs end
+>0x200 lelong 0x82564557 \b, BSD disklabel
+# FATX
+0 string FATX FATX filesystem data
+
+
+# Minix filesystems - Juan Cespedes <cespedes at debian.org>
+0x410 leshort 0x137f
+!:strength / 2
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V1, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x137f
+!:strength / 2
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V1 (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x138f
+!:strength / 2
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V1, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x138f
+!:strength / 2
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V1, 30 char names (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x2468
+>0x402 beshort < 100
+>>0x402 beshort > -1 Minix filesystem, V2, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x2468
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V2 (big endian), %d zones
+>0x1e string minix \b, bootable
+
+0x410 leshort 0x2478
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V2, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x2478
+>0x402 beshort < 100
+>0x402 beshort > -1 Minix filesystem, V2, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x2478
+>0x402 beshort !0 Minix filesystem, V2, 30 char names (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x4d5a
+>0x402 beshort !0 Minix filesystem, V3, %d zones
+>0x1e string minix \b, bootable
+
+# romfs filesystems - Juan Cespedes <cespedes at debian.org>
+0 string -rom1fs- romfs filesystem, version 1
+>8 belong x %d bytes,
+>16 string x named %s.
+
+# netboot image - Juan Cespedes <cespedes at debian.org>
+0 lelong 0x1b031336L Netboot image,
+>4 lelong&0xFFFFFF00 0
+>>4 lelong&0x100 0x000 mode 2
+>>4 lelong&0x100 0x100 mode 3
+>4 lelong&0xFFFFFF00 !0 unknown mode
+
+0x18b string OS/2 OS/2 Boot Manager
+
+# updated by Joerg Jenderek at Oct 2008!!
+# http://syslinux.zytor.com/iso.php
+0 ulelong 0x7c40eafa isolinux Loader
+# http://syslinux.zytor.com/pxe.php
+0 ulelong 0x007c05ea pxelinux Loader
+0 ulelong 0x60669c66 pxelinux Loader
+
+# added by Joerg Jenderek
+# In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
+# grub-1.94/kern/i386/pc/startup.S
+# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
+# usual values are marked with comments to get only informations of strange GRUB loaders
+0x200 uleshort 0x70EA
+# found only version 3.{1,2}
+>0x206 ubeshort >0x0300
+# GRUB version (0.5.)95,0.93,0.94,0.96,0.97 > "00"
+>>0x212 ubyte >0x29
+>>>0x213 ubyte >0x29
+# not iso9660_stage1_5
+#>>>0 ulelong&0x00BE5652 0x00BE5652
+>>>>0x213 ubyte >0x29 GRand Unified Bootloader
+# config_file for stage1_5 is 0xffffffff + default "/boot/grub/stage2"
+>>>>0x217 ubyte 0xFF stage1_5
+>>>>0x217 ubyte <0xFF stage2
+>>>>0x206 ubyte x \b version %u
+>>>>0x207 ubyte x \b.%u
+# module_size for 1.94
+>>>>0x208 ulelong <0xffffff \b, installed partition %u
+#>>>>0x208 ulelong =0xffffff \b, %u (default)
+>>>>0x208 ulelong >0xffffff \b, installed partition %u
+# GRUB 0.5.95 unofficial
+>>>>0x20C ulelong&0x2E300000 0x2E300000
+# 0=stage2 1=ffs 2=e2fs 3=fat 4=minix 5=reiserfs
+>>>>>0x20C ubyte x \b, identifier 0x%x
+#>>>>>0x20D ubyte =0 \b, LBA flag 0x%x (default)
+>>>>>0x20D ubyte >0 \b, LBA flag 0x%x
+# GRUB version as string
+>>>>>0x20E string >\0 \b, GRUB version %-s
+# for stage1_5 is 0xffffffff + config_file "/boot/grub/stage2" default
+>>>>>>0x215 ulong 0xffffffff
+>>>>>>>0x219 string >\0 \b, configuration file %-s
+>>>>>>0x215 ulong !0xffffffff
+>>>>>>>0x215 string >\0 \b, configuration file %-s
+# newer GRUB versions
+>>>>0x20C ulelong&0x2E300000 !0x2E300000
+##>>>>>0x20C ulelong =0 \b, saved entry %d (usual)
+>>>>>0x20C ulelong >0 \b, saved entry %d
+# for 1.94 contains kernel image size
+# for 0.93,0.94,0.96,0.97
+# 0=stage2 1=ffs 2=e2fs 3=fat 4=minix 5=reiserfs 6=vstafs 7=jfs 8=xfs 9=iso9660 a=ufs2
+>>>>>0x210 ubyte x \b, identifier 0x%x
+# The flag for LBA forcing is in most cases 0
+#>>>>>0x211 ubyte =0 \b, LBA flag 0x%x (default)
+>>>>>0x211 ubyte >0 \b, LBA flag 0x%x
+# GRUB version as string
+>>>>>0x212 string >\0 \b, GRUB version %-s
+# for stage1_5 is 0xffffffff + config_file "/boot/grub/stage2" default
+>>>>>0x217 ulong 0xffffffff
+>>>>>>0x21b string >\0 \b, configuration file %-s
+>>>>>0x217 ulong !0xffffffff
+>>>>>>0x217 string >\0 \b, configuration file %-s
+
+9564 lelong 0x00011954 Unix Fast File system [v1] (little-endian),
+>8404 string x last mounted on %s,
+#>9504 ledate x last checked at %s,
+>8224 ledate x last written at %s,
+>8401 byte x clean flag %d,
+>8228 lelong x number of blocks %d,
+>8232 lelong x number of data blocks %d,
+>8236 lelong x number of cylinder groups %d,
+>8240 lelong x block size %d,
+>8244 lelong x fragment size %d,
+>8252 lelong x minimum percentage of free blocks %d,
+>8256 lelong x rotational delay %dms,
+>8260 lelong x disk rotational speed %drps,
+>8320 lelong 0 TIME optimization
+>8320 lelong 1 SPACE optimization
+
+42332 lelong 0x19540119 Unix Fast File system [v2] (little-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 leqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 lequad x number of blocks %lld,
+>&-288 lequad x number of data blocks %lld,
+>&-1332 lelong x number of cylinder groups %d,
+>&-1328 lelong x block size %d,
+>&-1324 lelong x fragment size %d,
+>&-180 lelong x average file size %d,
+>&-176 lelong x average number of files in dir %d,
+>&-272 lequad x pending blocks to free %lld,
+>&-264 lelong x pending inodes to free %ld,
+>&-664 lequad x system-wide uuid %0llx,
+>&-1316 lelong x minimum percentage of free blocks %d,
+>&-1248 lelong 0 TIME optimization
+>&-1248 lelong 1 SPACE optimization
+
+66908 lelong 0x19540119 Unix Fast File system [v2] (little-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 leqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 lequad x number of blocks %lld,
+>&-288 lequad x number of data blocks %lld,
+>&-1332 lelong x number of cylinder groups %d,
+>&-1328 lelong x block size %d,
+>&-1324 lelong x fragment size %d,
+>&-180 lelong x average file size %d,
+>&-176 lelong x average number of files in dir %d,
+>&-272 lequad x pending blocks to free %lld,
+>&-264 lelong x pending inodes to free %ld,
+>&-664 lequad x system-wide uuid %0llx,
+>&-1316 lelong x minimum percentage of free blocks %d,
+>&-1248 lelong 0 TIME optimization
+>&-1248 lelong 1 SPACE optimization
+
+9564 belong 0x00011954 Unix Fast File system [v1] (big-endian),
+>7168 belong 0x4c41424c Apple UFS Volume
+>>7186 string x named %s,
+>>7176 belong x volume label version %d,
+>>7180 bedate x created on %s,
+>8404 string x last mounted on %s,
+#>9504 bedate x last checked at %s,
+>8224 bedate x last written at %s,
+>8401 byte x clean flag %d,
+>8228 belong x number of blocks %d,
+>8232 belong x number of data blocks %d,
+>8236 belong x number of cylinder groups %d,
+>8240 belong x block size %d,
+>8244 belong x fragment size %d,
+>8252 belong x minimum percentage of free blocks %d,
+>8256 belong x rotational delay %dms,
+>8260 belong x disk rotational speed %drps,
+>8320 belong 0 TIME optimization
+>8320 belong 1 SPACE optimization
+
+42332 belong 0x19540119 Unix Fast File system [v2] (big-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 beqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 bequad x number of blocks %lld,
+>&-288 bequad x number of data blocks %lld,
+>&-1332 belong x number of cylinder groups %d,
+>&-1328 belong x block size %d,
+>&-1324 belong x fragment size %d,
+>&-180 belong x average file size %d,
+>&-176 belong x average number of files in dir %d,
+>&-272 bequad x pending blocks to free %lld,
+>&-264 belong x pending inodes to free %ld,
+>&-664 bequad x system-wide uuid %0llx,
+>&-1316 belong x minimum percentage of free blocks %d,
+>&-1248 belong 0 TIME optimization
+>&-1248 belong 1 SPACE optimization
+
+66908 belong 0x19540119 Unix Fast File system [v2] (big-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 beqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 bequad x number of blocks %lld,
+>&-288 bequad x number of data blocks %lld,
+>&-1332 belong x number of cylinder groups %d,
+>&-1328 belong x block size %d,
+>&-1324 belong x fragment size %d,
+>&-180 belong x average file size %d,
+>&-176 belong x average number of files in dir %d,
+>&-272 bequad x pending blocks to free %lld,
+>&-264 belong x pending inodes to free %ld,
+>&-664 bequad x system-wide uuid %0llx,
+>&-1316 belong x minimum percentage of free blocks %d,
+>&-1248 belong 0 TIME optimization
+>&-1248 belong 1 SPACE optimization
+
+# ext2/ext3 filesystems - Andreas Dilger <adilger at dilger.ca>
+# ext4 filesystem - Eric Sandeen <sandeen at sandeen.net>
+0x438 leshort 0xEF53 Linux
+>0x44c lelong x rev %d
+>0x43e leshort x \b.%d
+# No journal? ext2
+>0x45c lelong ^0x0000004 ext2 filesystem data
+>>0x43a leshort ^0x0000001 (mounted or unclean)
+# Has a journal? ext3 or ext4
+>0x45c lelong &0x0000004
+# and small INCOMPAT?
+>>0x460 lelong <0x0000040
+# and small RO_COMPAT?
+>>>0x464 lelong <0x0000008 ext3 filesystem data
+# else large RO_COMPAT?
+>>>0x464 lelong >0x0000007 ext4 filesystem data
+# else large INCOMPAT?
+>>0x460 lelong >0x000003f ext4 filesystem data
+# General flags for any ext* fs
+>0x460 lelong &0x0000004 (needs journal recovery)
+>0x43a leshort &0x0000002 (errors)
+# INCOMPAT flags
+>0x460 lelong &0x0000001 (compressed)
+#>0x460 lelong &0x0000002 (filetype)
+#>0x460 lelong &0x0000010 (meta bg)
+>0x460 lelong &0x0000040 (extents)
+>0x460 lelong &0x0000080 (64bit)
+#>0x460 lelong &0x0000100 (mmp)
+#>0x460 lelong &0x0000200 (flex bg)
+# RO_INCOMPAT flags
+#>0x464 lelong &0x0000001 (sparse super)
+>0x464 lelong &0x0000002 (large files)
+>0x464 lelong &0x0000008 (huge files)
+#>0x464 lelong &0x0000010 (gdt checksum)
+#>0x464 lelong &0x0000020 (many subdirs)
+#>0x463 lelong &0x0000040 (extra isize)
+
+# SGI disk labels - Nathan Scott <nathans at debian.org>
+0 belong 0x0BE5A941 SGI disk label (volume header)
+
+# SGI XFS filesystem - Nathan Scott <nathans at debian.org>
+0 belong 0x58465342 SGI XFS filesystem data
+>0x4 belong x (blksz %d,
+>0x68 beshort x inosz %d,
+>0x64 beshort ^0x2004 v1 dirs)
+>0x64 beshort &0x2004 v2 dirs)
+
+############################################################################
+# Minix-ST kernel floppy
+0x800 belong 0x46fc2700 Atari-ST Minix kernel image
+>19 string \240\5\371\5\0\011\0\2\0 \b, 720k floppy
+>19 string \320\2\370\5\0\011\0\1\0 \b, 360k floppy
+
+############################################################################
+# Hmmm, is this a better way of detecting _standard_ floppy images ?
+19 string \320\2\360\3\0\011\0\1\0 DOS floppy 360k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \240\5\371\3\0\011\0\2\0 DOS floppy 720k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \100\013\360\011\0\022\0\2\0 DOS floppy 1440k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+
+19 string \240\5\371\5\0\011\0\2\0 DOS floppy 720k, IBM
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \100\013\371\5\0\011\0\2\0 DOS floppy 1440k, mkdosfs
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+
+19 string \320\2\370\5\0\011\0\1\0 Atari-ST floppy 360k
+19 string \240\5\371\5\0\011\0\2\0 Atari-ST floppy 720k
+
+# Valid media descriptor bytes for MS-DOS:
+#
+# Byte Capacity Media Size and Type
+# -------------------------------------------------
+#
+# F0 2.88 MB 3.5-inch, 2-sided, 36-sector
+# F0 1.44 MB 3.5-inch, 2-sided, 18-sector
+# F9 720K 3.5-inch, 2-sided, 9-sector
+# F9 1.2 MB 5.25-inch, 2-sided, 15-sector
+# FD 360K 5.25-inch, 2-sided, 9-sector
+# FF 320K 5.25-inch, 2-sided, 8-sector
+# FC 180K 5.25-inch, 1-sided, 9-sector
+# FE 160K 5.25-inch, 1-sided, 8-sector
+# FE 250K 8-inch, 1-sided, single-density
+# FD 500K 8-inch, 2-sided, single-density
+# FE 1.2 MB 8-inch, 2-sided, double-density
+# F8 ----- Fixed disk
+#
+# FC xxxK Apricot 70x1x9 boot disk.
+#
+# Originally a bitmap:
+# xxxxxxx0 Not two sided
+# xxxxxxx1 Double sided
+# xxxxxx0x Not 8 SPT
+# xxxxxx1x 8 SPT
+# xxxxx0xx Not Removable drive
+# xxxxx1xx Removable drive
+# 11111xxx Must be one.
+#
+# But now it's rather random:
+# 111111xx Low density disk
+# 00 SS, Not 8 SPT
+# 01 DS, Not 8 SPT
+# 10 SS, 8 SPT
+# 11 DS, 8 SPT
+#
+# 11111001 Double density 3� floppy disk, high density 5�
+# 11110000 High density 3� floppy disk
+# 11111000 Hard disk any format
+#
+
+# CDROM Filesystems
+# Modified for UDF by gerardo.cacciari at gmail.com
+32769 string CD001
+!:mime application/x-iso9660-image
+>38913 string !NSR0 ISO 9660 CD-ROM filesystem data
+>38913 string NSR0 UDF filesystem data
+>>38917 string 1 (version 1.0)
+>>38917 string 2 (version 1.5)
+>>38917 string 3 (version 2.0)
+>>38917 byte >0x33 (unknown version, ID 0x%X)
+>>38917 byte <0x31 (unknown version, ID 0x%X)
+# "application id" which appears to be used as a volume label
+#>32808 string/T >\0 '%s'
+>34816 string \000CD001\001EL\ TORITO\ SPECIFICATION (bootable)
+37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
+!:mime application/x-iso9660-image
+32776 string CDROM High Sierra CD-ROM filesystem data
+
+# .cso files
+0 string CISO Compressed ISO CD image
+
+# cramfs filesystem - russell at coker.com.au
+0 lelong 0x28cd3d45 Linux Compressed ROM File System data, little endian
+>4 lelong x size %lu
+>8 lelong &1 version #2
+>8 lelong &2 sorted_dirs
+>8 lelong &4 hole_support
+>32 lelong x CRC 0x%x,
+>36 lelong x edition %lu,
+>40 lelong x %lu blocks,
+>44 lelong x %lu files
+
+0 belong 0x28cd3d45 Linux Compressed ROM File System data, big endian
+>4 belong x size %lu
+>8 belong &1 version #2
+>8 belong &2 sorted_dirs
+>8 belong &4 hole_support
+>32 belong x CRC 0x%x,
+>36 belong x edition %lu,
+>40 belong x %lu blocks,
+>44 belong x %lu files
+
+# reiserfs - russell at coker.com.au
+0x10034 string ReIsErFs ReiserFS V3.5
+0x10034 string ReIsEr2Fs ReiserFS V3.6
+>0x1002c leshort x block size %d
+>0x10032 leshort &2 (mounted or unclean)
+>0x10000 lelong x num blocks %d
+>0x10040 lelong 1 tea hash
+>0x10040 lelong 2 yura hash
+>0x10040 lelong 3 r5 hash
+
+# JFFS - russell at coker.com.au
+0 lelong 0x34383931 Linux Journalled Flash File system, little endian
+0 belong 0x34383931 Linux Journalled Flash File system, big endian
+
+# EST flat binary format (which isn't, but anyway)
+# From: Mark Brown <broonie at sirena.org.uk>
+0 string ESTFBINR EST flat binary
+
+# Aculab VoIP firmware
+# From: Mark Brown <broonie at sirena.org.uk>
+0 string VoIP\ Startup\ and Aculab VoIP firmware
+>35 string x format %s
+
+# From: Mark Brown <broonie at sirena.org.uk> [old]
+# From: Behan Webster <behanw at websterwood.com>
+0 belong 0x27051956 u-boot legacy uImage,
+>32 string x %s,
+>28 byte 0 Invalid os/
+>28 byte 1 OpenBSD/
+>28 byte 2 NetBSD/
+>28 byte 3 FreeBSD/
+>28 byte 4 4.4BSD/
+>28 byte 5 Linux/
+>28 byte 6 SVR4/
+>28 byte 7 Esix/
+>28 byte 8 Solaris/
+>28 byte 9 Irix/
+>28 byte 10 SCO/
+>28 byte 11 Dell/
+>28 byte 12 NCR/
+>28 byte 13 LynxOS/
+>28 byte 14 VxWorks/
+>28 byte 15 pSOS/
+>28 byte 16 QNX/
+>28 byte 17 Firmware/
+>28 byte 18 RTEMS/
+>28 byte 19 ARTOS/
+>28 byte 20 Unity OS/
+>28 byte 21 INTEGRITY/
+>29 byte 0 \bInvalid CPU,
+>29 byte 1 \bAlpha,
+>29 byte 2 \bARM,
+>29 byte 3 \bIntel x86,
+>29 byte 4 \bIA64,
+>29 byte 5 \bMIPS,
+>29 byte 6 \bMIPS 64-bit,
+>29 byte 7 \bPowerPC,
+>29 byte 8 \bIBM S390,
+>29 byte 9 \bSuperH,
+>29 byte 10 \bSparc,
+>29 byte 11 \bSparc 64-bit,
+>29 byte 12 \bM68K,
+>29 byte 13 \bNios-32,
+>29 byte 14 \bMicroBlaze,
+>29 byte 15 \bNios-II,
+>29 byte 16 \bBlackfin,
+>29 byte 17 \bAVR32,
+>29 byte 18 \bSTMicroelectronics ST200,
+>30 byte 0 Invalid Image
+>30 byte 1 Standalone Program
+>30 byte 2 OS Kernel Image
+>30 byte 3 RAMDisk Image
+>30 byte 4 Multi-File Image
+>30 byte 5 Firmware Image
+>30 byte 6 Script File
+>30 byte 7 Filesystem Image (any type)
+>30 byte 8 Binary Flat Device Tree BLOB
+>31 byte 0 (Not compressed),
+>31 byte 1 (gzip),
+>31 byte 2 (bzip2),
+>31 byte 3 (lzma),
+>12 belong x %d bytes,
+>8 bedate x %s,
+>16 belong x Load Address: 0x%08X,
+>20 belong x Entry Point: 0x%08X,
+>4 belong x Header CRC: 0x%08X,
+>24 belong x Data CRC: 0x%08X
+
+# JFFS2 file system
+0 leshort 0x1984 Linux old jffs2 filesystem data little endian
+0 leshort 0x1985 Linux jffs2 filesystem data little endian
+
+# Squashfs
+0 string sqsh Squashfs filesystem, big endian,
+>28 beshort x version %d.
+>30 beshort x \b%d,
+>28 beshort <3
+>>8 belong x %d bytes,
+>28 beshort >2
+>>28 beshort <4
+>>>63 bequad x %lld bytes,
+>>28 beshort >3
+>>>40 bequad x %lld bytes,
+#>>67 belong x %d bytes,
+>4 belong x %d inodes,
+>28 beshort <2
+>>32 beshort x blocksize: %d bytes,
+>28 beshort >1
+>>28 beshort <4
+>>>51 belong x blocksize: %d bytes,
+>>28 beshort >3
+>>>12 belong x blocksize: %d bytes,
+>28 beshort <4
+>>39 bedate x created: %s
+>28 beshort >3
+>>8 bedate x created: %s
+0 string hsqs Squashfs filesystem, little endian,
+>28 leshort x version %d.
+>30 leshort x \b%d,
+>28 leshort <3
+>>8 lelong x %d bytes,
+>28 leshort >2
+>>28 leshort <4
+>>>63 lequad x %lld bytes,
+>>28 leshort >3
+>>>40 lequad x %lld bytes,
+#>>63 lelong x %d bytes,
+>4 lelong x %d inodes,
+>28 leshort <2
+>>32 leshort x blocksize: %d bytes,
+>28 leshort >1
+>>28 leshort <4
+>>>51 lelong x blocksize: %d bytes,
+>>28 leshort >3
+>>>12 lelong x blocksize: %d bytes,
+>28 leshort <4
+>>39 ledate x created: %s
+>28 leshort >3
+>>8 ledate x created: %s
+
+0 string td\000 floppy image data (TeleDisk)
+
+# AFS Dump Magic
+# From: Ty Sarna <tsarna at sarna.org>
+0 string \x01\xb3\xa1\x13\x22 AFS Dump
+>&0 belong x (v%d)
+>>&0 byte 0x76
+>>>&0 belong x Vol %d,
+>>>>&0 byte 0x6e
+>>>>>&0 string x %s
+>>>>>>&1 byte 0x74
+>>>>>>>&0 beshort 2
+>>>>>>>>&4 bedate x on: %s
+>>>>>>>>&0 bedate =0 full dump
+>>>>>>>>&0 bedate !0 incremental since: %s
+
+#----------------------------------------------------------
+#delta ISO Daniel Novotny (dnovotny at redhat.com)
+0 string DISO Delta ISO data
+>4 belong x version %d
+
+# VMS backup savesets - gerardo.cacciari at gmail.com
+#
+4 string \x01\x00\x01\x00\x01\x00
+>(0.s+16) string \x01\x01
+>>&(&0.b+8) byte 0x42 OpenVMS backup saveset data
+>>>40 lelong x (block size %d,
+>>>49 string >\0 original name '%s',
+>>>2 short 1024 VAX generated)
+>>>2 short 2048 AXP generated)
+>>>2 short 4096 I64 generated)
+
+# Summary: Oracle Clustered Filesystem
+# Created by: Aaron Botsis <redhat at digitalmafia.org>
+8 string OracleCFS Oracle Clustered Filesystem,
+>4 long x rev %d
+>0 long x \b.%d,
+>560 string x label: %.64s,
+>136 string x mountpoint: %.128s
+
+# Summary: Oracle ASM tagged volume
+# Created by: Aaron Botsis <redhat at digitalmafia.org>
+32 string ORCLDISK Oracle ASM Volume,
+>40 string x Disk Name: %0.12s
+32 string ORCLCLRD Oracle ASM Volume (cleared),
+>40 string x Disk Name: %0.12s
+
+# Oracle Clustered Filesystem - Aaron Botsis <redhat at digitalmafia.org>
+8 string OracleCFS Oracle Clustered Filesystem,
+>4 long x rev %d
+>0 long x \b.%d,
+>560 string x label: %.64s,
+>136 string x mountpoint: %.128s
+
+# Oracle ASM tagged volume - Aaron Botsis <redhat at digitalmafia.org>
+32 string ORCLDISK Oracle ASM Volume,
+>40 string x Disk Name: %0.12s
+32 string ORCLCLRD Oracle ASM Volume (cleared),
+>40 string x Disk Name: %0.12s
+
+# Compaq/HP RILOE floppy image
+# From: Dirk Jagdmann <doj at cubic.org>
+0 string CPQRFBLO Compaq/HP RILOE floppy image
+
+#------------------------------------------------------------------------------
+# Files-11 On-Disk Structure (OpenVMS file system) - gerardo.cacciari at gmail.com
+# These bits come from LBN 1 (home block) of ODS-2 and ODS-5 volumes, which is
+# mapped to VBN 2 of [000000]INDEXF.SYS;1
+#
+1008 string DECFILE11B Files-11 On-Disk Structure
+>525 byte x Level %d
+>525 byte x (ODS-%d OpenVMS file system),
+>984 string x volume label is '%-12.12s'
+
+# From: Thomas Klausner <wiz at NetBSD.org>
+# http://filext.com/file-extension/DAA
+# describes the daa file format. The magic would be:
+0 string DAA\x0\x0\x0\x0\x0 PowerISO Direct-Access-Archive
+
+# From Albert Cahalan <acahalan at gmail.com>
+# really le32 operation,destination,payloadsize (but quite predictable)
+# 01 00 00 00 00 00 00 c0 00 02 00 00
+0 string \1\0\0\0\0\0\0\300\0\2\0\0 Marvell Libertas firmware
+
+# From Eric Sandeen
+# GFS2
+0x10000 belong 0x01161970 Linux
+>0x10018 belong 0x0000051d GFS1 Filesystem
+>>0x10024 belong x (blocksize %d,
+>>0x10060 string >\0 lockproto %s)
+>0x10018 belong 0x00000709 GFS2 Filesystem
+>>0x10024 belong x (blocksize %d,
+>>0x10060 string >\0 lockproto %s)
+
+# BTRFS
+0x10040 string _BHRfS_M BTRFS Filesystem
+>0x1012b string >\0 (label "%s",
+>0x10090 lelong x sectorsize %d,
+>0x10094 lelong x nodesize %d,
+>0x10098 lelong x leafsize %d)
+
+
+# dvdisaster's .ecc
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string *dvdisaster* dvdisaster error correction file
+
+# xfs metadump image
+# mb_magic XFSM at 0; superblock magic XFSB at 1 << mb_blocklog
+# but can we do the << ? For now it's always 512 (0x200) anyway.
+0 string XFSM
+>0x200 string XFSB XFS filesystem metadump image
+
+
+#------------------------------------------------------------------------------
+# $File: filesystems,v 1.55 2010/01/16 17:45:12 chl Exp $
+# filesystems: file(1) magic for different filesystems
+#
+0 string \366\366\366\366 PC formatted floppy with no filesystem
+# Sun disk labels
+# From /usr/include/sun/dklabel.h:
+0774 beshort 0xdabe
+# modified by Joerg Jenderek, because original test
+# succeeds for Cabinet archive dao360.dl_ with negative blocks
+>0770 long >0 Sun disk label
+>>0 string x '%s
+>>>31 string >\0 \b%s
+>>>>63 string >\0 \b%s
+>>>>>95 string >\0 \b%s
+>>0 string x \b'
+>>0734 short >0 %d rpm,
+>>0736 short >0 %d phys cys,
+>>0740 short >0 %d alts/cyl,
+>>0746 short >0 %d interleave,
+>>0750 short >0 %d data cyls,
+>>0752 short >0 %d alt cyls,
+>>0754 short >0 %d heads/partition,
+>>0756 short >0 %d sectors/track,
+>>0764 long >0 start cyl %ld,
+>>0770 long x %ld blocks
+# Is there a boot block written 1 sector in?
+>512 belong&077777777 0600407 \b, boot block present
+# Joerg Jenderek: Smart Boot Manager backup file is 41 byte header + first sectors of disc
+# (http://btmgr.sourceforge.net/docs/user-guide-3.html)
+0 string SBMBAKUP_ Smart Boot Manager backup file
+>9 string x \b, version %-5.5s
+>>14 string =_
+>>>15 string x %-.1s
+>>>>16 string =_ \b.
+>>>>>17 string x \b%-.1s
+>>>>>>18 string =_ \b.
+>>>>>>>19 string x \b%-.1s
+>>>22 ubyte 0
+>>>>21 ubyte x \b, from drive 0x%x
+>>>22 ubyte >0
+>>>>21 string x \b, from drive %s
+
+# Joerg Jenderek
+# DOS Emulator image is 128 byte, null right padded header + harddisc image
+0 string DOSEMU\0
+>0x27E leshort 0xAA55
+#offset is 128
+>>19 ubyte 128
+>>>(19.b-1) ubyte 0x0 DOS Emulator image
+>>>>7 ulelong >0 \b, %u heads
+>>>>11 ulelong >0 \b, %d sectors/track
+>>>>15 ulelong >0 \b, %d cylinders
+
+# updated by Joerg Jenderek at Sep 2007
+# only for sector sizes with 512 or more Bytes
+0x1FE leshort 0xAA55 x86 boot sector
+# to do also for sectors < than 512 Bytes and some other files, GRR
+#30 search/481 \x55\xAA x86 boot sector
+# not for BeOS floppy 1440k, MBRs
+#(11.s-2) uleshort 0xAA55 x86 boot sector
+>2 string OSBS \b, OS/BS MBR
+# J\xf6rg Jenderek <joerg dot jenderek at web dot de>
+>0x8C string Invalid\ partition\ table \b, MS-DOS MBR
+# dr-dos with some upper-, lowercase variants
+>0x9D string Invalid\ partition\ table$
+>>181 string No\ Operating\ System$
+>>>201 string Operating\ System\ load\ error$ \b, DR-DOS MBR, Version 7.01 to 7.03
+>0x9D string Invalid\ partition\ table$
+>>181 string No\ operating\ system$
+>>>201 string Operating\ system\ load\ error$ \b, DR-DOS MBR, Version 7.01 to 7.03
+>342 string Invalid\ partition\ table$
+>>366 string No\ operating\ system$
+>>>386 string Operating\ system\ load\ error$ \b, DR-DOS MBR, version 7.01 to 7.03
+>295 string NEWLDR\0
+>>302 string Bad\ PT\ $
+>>>310 string No\ OS\ $
+>>>>317 string OS\ load\ err$
+>>>>>329 string Moved\ or\ missing\ IBMBIO.LDR\n\r
+>>>>>>358 string Press\ any\ key\ to\ continue.\n\r$
+>>>>>>>387 string Copyright\ (c)\ 1984,1998
+>>>>>>>>411 string Caldera\ Inc.\0 \b, DR-DOS MBR (IBMBIO.LDR)
+>0x10F string Ung\201ltige\ Partitionstabelle \b, MS-DOS MBR, german version 4.10.1998, 4.10.2222
+>>0x1B8 ubelong >0 \b, Serial 0x%-.4x
+>0x8B string Ung\201ltige\ Partitionstabelle \b, MS-DOS MBR, german version 5.00 to 4.00.950
+>271 string Invalid\ partition\ table\0
+>>295 string Error\ loading\ operating\ system\0
+>>>326 string Missing\ operating\ system\0 \b, mbr
+#
+>139 string Invalid\ partition\ table\0
+>>163 string Error\ loading\ operating\ system\0
+>>>194 string Missing\ operating\ system\0 \b, Microsoft Windows XP mbr
+# http://www.heise.de/ct/05/09/006/ page 184
+#HKEY_LOCAL_MACHINE\SYSTEM\MountedDevices\DosDevices\?:=Serial4Bytes+8Bytes
+>>>>0x1B8 ulelong >0 \b,Serial 0x%-.4x
+>300 string Invalid\ partition\ table\0
+>>324 string Error\ loading\ operating\ system\0
+>>>355 string Missing\ operating\ system\0 \b, Microsoft Windows XP MBR
+#??>>>389 string Invalid\ system\ disk
+>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
+>300 string Ung\201ltige\ Partitionstabelle
+#split string to avoid error: String too long
+>>328 string Fehler\ beim\ Laden\
+>>>346 string des\ Betriebssystems
+>>>>366 string Betriebssystem\ nicht\ vorhanden \b, Microsoft Windows XP MBR (german)
+>>>>>0x1B8 ulelong >0 \b, Serial 0x%-.4x
+#>0x145 string Default:\ F \b, FREE-DOS MBR
+#>0x14B string Default:\ F \b, FREE-DOS 1.0 MBR
+>0x145 search/7 Default:\ F \b, FREE-DOS MBR
+#>>313 string F0\ .\ .\ .
+#>>>322 string disk\ 1
+#>>>>382 string FAT3
+>64 string no\ active\ partition\ found
+>>96 string read\ error\ while\ reading\ drive \b, FREE-DOS Beta 0.9 MBR
+# Ranish Partition Manager http://www.ranish.com/part/
+>387 search/4 \0\ Error!\r
+>>378 search/7 Virus!
+>>>397 search/4 Booting\
+>>>>408 search/4 HD1/\0 \b, Ranish MBR (
+>>>>>416 string Writing\ changes... \b2.37
+>>>>>>438 ubyte x \b,0x%x dots
+>>>>>>440 ubyte >0 \b,virus check
+>>>>>>441 ubyte >0 \b,partition %c
+#2.38,2.42,2.44
+>>>>>416 string !Writing\ changes... \b
+>>>>>>418 ubyte 1 \bvirus check,
+>>>>>>419 ubyte x \b0x%x seconds
+>>>>>>420 ubyte&0x0F >0 \b,partition
+>>>>>>>420 ubyte&0x0F <5 \b %x
+>>>>>>>420 ubyte&0x0F 0Xf \b ask
+>>>>>420 ubyte x \b)
+#
+>271 string Operating\ system\ loading
+>>296 string error\r \b, SYSLINUX MBR (2.10)
+# http://www.acronis.de/
+>362 string MBR\ Error\ \0\r
+>>376 string ress\ any\ key\ to\
+>>>392 string boot\ from\ floppy...\0 \b, Acronis MBR
+# added by Joerg Jenderek
+# http://www.visopsys.org/
+# http://partitionlogic.org.uk/
+>309 string No\ bootable\ partition\ found\r
+>>339 string I/O\ Error\ reading\ boot\ sector\r \b, Visopsys MBR
+>349 string No\ bootable\ partition\ found\r
+>>379 string I/O\ Error\ reading\ boot\ sector\r \b, simple Visopsys MBR
+# bootloader, bootmanager
+>0x40 string SBML
+# label with 11 characters of FAT 12 bit filesystem
+>>43 string SMART\ BTMGR
+>>>430 string SBMK\ Bad!\r \b, Smart Boot Manager
+# OEM-ID not always "SBM"
+#>>>>3 strings SBM
+>>>>6 string >\0 \b, version %s
+>382 string XOSLLOADXCF \b, eXtended Operating System Loader
+>6 string LILO \b, LInux i386 boot LOader
+>>120 string LILO \b, version 22.3.4 SuSe
+>>172 string LILO \b, version 22.5.8 Debian
+# updated by Joerg Jenderek at Oct 2008
+# variables according to grub-0.97/stage1/stage1.S or
+# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
+# usual values are marked with comments to get only informations of strange GRUB loaders
+>342 search/60 \0Geom\0
+#>0 ulelong x %x=0x009048EB , 0x2a9048EB 0
+>>0x41 ubyte <2
+>>>0x3E ubyte >2 \b; GRand Unified Bootloader
+# 0x3 for 0.5.95,0.93,0.94,0.96 0x4 for 1.90
+>>>>0x3E ubyte x \b, stage1 version 0x%x
+#If it is 0xFF, use a drive passed by BIOS
+>>>>0x40 ubyte <0xFF \b, boot drive 0x%x
+# in most case 0,1,0x2e for GRUB 0.5.95
+>>>>0x41 ubyte >0 \b, LBA flag 0x%x
+>>>>0x42 uleshort <0x8000 \b, stage2 address 0x%x
+#>>>>0x42 uleshort =0x8000 \b, stage2 address 0x%x (usual)
+>>>>0x42 uleshort >0x8000 \b, stage2 address 0x%x
+#>>>>0x44 ulelong =1 \b, 1st sector stage2 0x%x (default)
+>>>>0x44 ulelong >1 \b, 1st sector stage2 0x%x
+>>>>0x48 uleshort <0x800 \b, stage2 segment 0x%x
+#>>>>0x48 uleshort =0x800 \b, stage2 segment 0x%x (usual)
+>>>>0x48 uleshort >0x800 \b, stage2 segment 0x%x
+>>>>402 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>394 string stage1 \b, GRUB version 0.5.95
+>>>>382 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>376 string GRUB\ \0 \b, GRUB version 0.93 or 1.94
+>>>>383 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>377 string GRUB\ \0 \b, GRUB version 0.94
+>>>>385 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>379 string GRUB\ \0 \b, GRUB version 0.95 or 0.96
+>>>>391 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>>385 string GRUB\ \0 \b, GRUB version 0.97
+#unkown version
+>>>343 string Geom\0Read\0\ Error\0
+>>>>321 string Loading\ stage1.5 \b, GRUB version x.y
+>>>380 string Geom\0Hard\ Disk\0Read\0\ Error\0
+>>>>374 string GRUB\ \0 \b, GRUB version n.m
+# http://syslinux.zytor.com/
+>478 string Boot\ failed\r
+>>495 string LDLINUX\ SYS \b, SYSLINUX bootloader (1.62)
+>480 string Boot\ failed\r
+>>495 string LDLINUX\ SYS \b, SYSLINUX bootloader (2.06 or 2.11)
+>484 string Boot\ error\r \b, SYSLINUX bootloader (3.11)
+>395 string chksum\0\ ERROR!\0 \b, Gujin bootloader
+# http://www.bcdwb.de/bcdw/index_e.htm
+>3 string BCDL
+>>498 string BCDL\ \ \ \ BIN \b, Bootable CD Loader (1.50Z)
+# mbr partition table entries
+# OEM-ID does not contain MicroSoft,NEWLDR,DOS,SYSLINUX,or MTOOLs
+>3 string !MS
+>>3 string !SYSLINUX
+>>>3 string !MTOOL
+>>>>3 string !NEWLDR
+>>>>>5 string !DOS
+# not FAT (32 bit)
+>>>>>>82 string !FAT32
+#not Linux kernel
+>>>>>>>514 string !HdrS
+#not BeOS
+>>>>>>>>422 string !Be\ Boot\ Loader
+# active flag 0 or 0x80 and type > 0
+>>>>>>>>>446 ubyte <0x81
+>>>>>>>>>>446 ubyte&0x7F 0
+>>>>>>>>>>>450 ubyte >0 \b; partition 1: ID=0x%x
+>>>>>>>>>>>>446 ubyte 0x80 \b, active
+>>>>>>>>>>>>447 ubyte x \b, starthead %u
+#>>>>>>>>>>>>448 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>448 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>454 ulelong x \b, startsector %u
+>>>>>>>>>>>>458 ulelong x \b, %u sectors
+#
+>>>>>>>>>462 ubyte <0x81
+>>>>>>>>>>462 ubyte&0x7F 0
+>>>>>>>>>>>466 ubyte >0 \b; partition 2: ID=0x%x
+>>>>>>>>>>>>462 ubyte 0x80 \b, active
+>>>>>>>>>>>>463 ubyte x \b, starthead %u
+#>>>>>>>>>>>>464 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>464 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>470 ulelong x \b, startsector %u
+>>>>>>>>>>>>474 ulelong x \b, %u sectors
+#
+>>>>>>>>>478 ubyte <0x81
+>>>>>>>>>>478 ubyte&0x7F 0
+>>>>>>>>>>>482 ubyte >0 \b; partition 3: ID=0x%x
+>>>>>>>>>>>>478 ubyte 0x80 \b, active
+>>>>>>>>>>>>479 ubyte x \b, starthead %u
+#>>>>>>>>>>>>480 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>481 ubyte x \b, start C2S: 0x%x
+#>>>>>>>>>>>>480 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>486 ulelong x \b, startsector %u
+>>>>>>>>>>>>490 ulelong x \b, %u sectors
+#
+>>>>>>>>>494 ubyte <0x81
+>>>>>>>>>>494 ubyte&0x7F 0
+>>>>>>>>>>>498 ubyte >0 \b; partition 4: ID=0x%x
+>>>>>>>>>>>>494 ubyte 0x80 \b, active
+>>>>>>>>>>>>495 ubyte x \b, starthead %u
+#>>>>>>>>>>>>496 ubyte x \b, start C_S: 0x%x
+#>>>>>>>>>>>>496 ubeshort&1023 x \b, startcylinder? %d
+>>>>>>>>>>>>502 ulelong x \b, startsector %u
+>>>>>>>>>>>>506 ulelong x \b, %u sectors
+# mbr partition table entries end
+# http://www.acronis.de/
+#FAT label=ACRONIS\ SZ
+#OEM-ID=BOOTWIZ0
+>442 string Non-system\ disk,\
+>>459 string press\ any\ key...\x7\0 \b, Acronis Startup Recovery Loader
+# DOS names like F11.SYS are 8 right space padded bytes+3 bytes
+>>>477 ubyte&0xDF >0
+>>>>477 string x \b %-.3s
+>>>>>480 ubyte&0xDF >0
+>>>>>>480 string x \b%-.5s
+>>>>485 ubyte&0xDF >0
+>>>>>485 string x \b.%-.3s
+#
+>185 string FDBOOT\ Version\
+>>204 string \rNo\ Systemdisk.\
+>>>220 string Booting\ from\ harddisk.\n\r
+>>>245 string Cannot\ load\ from\ harddisk.\n\r
+>>>>273 string Insert\ Systemdisk\
+>>>>>291 string and\ press\ any\ key.\n\r \b, FDBOOT harddisk Bootloader
+>>>>>>200 string >\0 \b, version %-3s
+>242 string Bootsector\ from\ C.H.\ Hochst\204
+>>278 string No\ Systemdisk.\
+>>>293 string Booting\ from\ harddisk.\n\r
+>>>441 string Cannot\ load\ from\ harddisk.\n\r
+>>>>469 string Insert\ Systemdisk\
+>>>>>487 string and\ press\ any\ key.\n\r \b, WinImage harddisk Bootloader
+>>>>>>209 string >\0 \b, version %-4.4s
+>(1.b+2) ubyte 0xe
+>>(1.b+3) ubyte 0x1f
+>>>(1.b+4) ubyte 0xbe
+>>>>(1.b+5) ubyte 0x77
+>>>>(1.b+6) ubyte 0x7c
+>>>>>(1.b+7) ubyte 0xac
+>>>>>>(1.b+8) ubyte 0x22
+>>>>>>>(1.b+9) ubyte 0xc0
+>>>>>>>>(1.b+10) ubyte 0x74
+>>>>>>>>>(1.b+11) ubyte 0xb
+>>>>>>>>>>(1.b+12) ubyte 0x56
+>>>>>>>>>>(1.b+13) ubyte 0xb4 \b, mkdosfs boot message display
+>214 string Please\ try\ to\ install\ FreeDOS\ \b, DOS Emulator boot message display
+#>>244 string from\ dosemu-freedos-*-bin.tgz\r
+#>>>170 string Sorry,\ could\ not\ load\ an\
+#>>>>195 string operating\ system.\r\n
+#
+>103 string This\ is\ not\ a\ bootable\ disk.\
+>>132 string Please\ insert\ a\ bootable\
+>>>157 string floppy\ and\r\n
+>>>>169 string press\ any\ key\ to\ try\ again...\r \b, FREE-DOS message display
+#
+>66 string Solaris\ Boot\ Sector
+>>99 string Incomplete\ MDBoot\ load.
+>>>89 string Version \b, Sun Solaris Bootloader
+>>>>97 byte x version %c
+#
+>408 string OS/2\ !!\ SYS01475\r\0
+>>429 string OS/2\ !!\ SYS02025\r\0
+>>>450 string OS/2\ !!\ SYS02027\r\0
+>>>469 string OS2BOOT\ \ \ \ \b, IBM OS/2 Warp bootloader
+#
+>409 string OS/2\ !!\ SYS01475\r\0
+>>430 string OS/2\ !!\ SYS02025\r\0
+>>>451 string OS/2\ !!\ SYS02027\r\0
+>>>470 string OS2BOOT\ \ \ \ \b, IBM OS/2 Warp Bootloader
+>112 string This\ disk\ is\ not\ bootable\r
+>>142 string If\ you\ wish\ to\ make\ it\ bootable
+>>>176 string run\ the\ DOS\ program\ SYS\
+>>>200 string after\ the\r
+>>>>216 string system\ has\ been\ loaded\r\n
+>>>>>242 string Please\ insert\ a\ DOS\ diskette\
+>>>>>271 string into\r\n\ the\ drive\ and\
+>>>>>>292 string strike\ any\ key...\0 \b, IBM OS/2 Warp message display
+# XP
+>430 string NTLDR\ is\ missing\xFF\r\n
+>>449 string Disk\ error\xFF\r\n
+>>>462 string Press\ any\ key\ to\ restart\r \b, Microsoft Windows XP Bootloader
+# DOS names like NTLDR,CMLDR,$LDR$ are 8 right space padded bytes+3 bytes
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+#
+>430 string NTLDR\ nicht\ gefunden\xFF\r\n
+>>453 string Datentr\204gerfehler\xFF\r\n
+>>>473 string Neustart\ mit\ beliebiger\ Taste\r \b, Microsoft Windows XP Bootloader (german)
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+# offset variant
+>>>>379 string \0
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+#
+>430 string NTLDR\ fehlt\xFF\r\n
+>>444 string Datentr\204gerfehler\xFF\r\n
+>>>464 string Neustart\ mit\ beliebiger\ Taste\r \b, Microsoft Windows XP Bootloader (2.german)
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+# variant
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+#
+>430 string NTLDR\ fehlt\xFF\r\n
+>>444 string Medienfehler\xFF\r\n
+>>>459 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (3.german)
+>>>>371 ubyte >0x20
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+# variant
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+>430 string Datentr\204ger\ entfernen\xFF\r\n
+>>454 string Medienfehler\xFF\r\n
+>>>469 string Neustart:\ Taste\ dr\201cken\r \b, Microsoft Windows XP Bootloader (4.german)
+>>>>379 string \0
+>>>>>368 ubyte&0xDF >0
+>>>>>>368 string x %-.5s
+>>>>>>>373 ubyte&0xDF >0
+>>>>>>>>373 string x \b%-.3s
+>>>>>>376 ubyte&0xDF >0
+>>>>>>>376 string x \b.%-.3s
+# variant
+>>>>417 ubyte&0xDF >0
+>>>>>417 string x %-.5s
+>>>>>>422 ubyte&0xDF >0
+>>>>>>>422 string x \b%-.3s
+>>>>>425 ubyte&0xDF >0
+>>>>>>425 string >\ \b.%-.3s
+#
+
+#>3 string NTFS\ \ \ \
+>389 string Fehler\ beim\ Lesen\
+>>407 string des\ Datentr\204gers
+>>>426 string NTLDR\ fehlt
+>>>>440 string NTLDR\ ist\ komprimiert
+>>>>>464 string Neustart\ mit\ Strg+Alt+Entf\r \b, Microsoft Windows XP Bootloader NTFS (german)
+#>3 string NTFS\ \ \ \
+>313 string A\ disk\ read\ error\ occurred.\r
+>>345 string A\ kernel\ file\ is\ missing\
+>>>370 string from\ the\ disk.\r
+>>>>484 string NTLDR\ is\ compressed
+>>>>>429 string Insert\ a\ system\ diskette\
+>>>>>>454 string and\ restart\r\nthe\ system.\r \b, Microsoft Windows XP Bootloader NTFS
+# DOS loader variants different languages,offsets
+>472 ubyte&0xDF >0
+>>389 string Invalid\ system\ disk\xFF\r\n
+>>>411 string Disk\ I/O\ error
+>>>>428 string Replace\ the\ disk,\ and\
+>>>>>455 string press\ any\ key \b, Microsoft Windows 98 Bootloader
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.5s
+>>>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>>>479 string x \b%-.1s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>390 string Invalid\ system\ disk\xFF\r\n
+>>>412 string Disk\ I/O\ error\xFF\r\n
+>>>>429 string Replace\ the\ disk,\ and\
+>>>>>451 string then\ press\ any\ key\r \b, Microsoft Windows 98 Bootloader
+>>388 string Ungueltiges\ System\ \xFF\r\n
+>>>410 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>427 string Datentraeger\ wechseln\ und\
+>>>>>453 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (german)
+#WINBOOT.SYS only not spaces (0xDF)
+>>>>>>497 ubyte&0xDF >0
+>>>>>>>497 string x %-.5s
+>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>505 ubyte&0xDF >0
+>>>>>>>505 string x \b.%-.3s
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0 or
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.5s
+>>>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>>>479 string x \b%-.1s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>390 string Ungueltiges\ System\ \xFF\r\n
+>>>412 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>429 string Datentraeger\ wechseln\ und\
+>>>>>455 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (German)
+#WINBOOT.SYS only not spaces (0xDF)
+>>>>>>497 ubyte&0xDF >0
+>>>>>>>497 string x %-.7s
+>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>504 string x \b%-.1s
+>>>>>>505 ubyte&0xDF >0
+>>>>>>>505 string x \b.%-.3s
+#IO.SYS
+>>>>>>472 ubyte&0xDF >0 or
+>>>>>>>472 string x \b %-.2s
+>>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>>474 string x \b%-.6s
+>>>>>>>480 ubyte&0xDF >0
+>>>>>>>>480 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>>483 string x \b%-.5s
+>>>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>>>488 string x \b%-.3s
+>>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>>491 string x \b.%-.3s
+#
+>>389 string Ungueltiges\ System\ \xFF\r\n
+>>>411 string E/A-Fehler\ \ \ \ \xFF\r\n
+>>>>428 string Datentraeger\ wechseln\ und\
+>>>>>454 string Taste\ druecken\r \b, Microsoft Windows 95/98/ME Bootloader (GERMAN)
+# DOS names like IO.SYS,WINBOOT.SYS,MSDOS.SYS,WINBOOT.INI are 8 right space padded bytes+3 bytes
+>>>>>>472 string x %-.2s
+>>>>>>>474 ubyte&0xDF >0
+>>>>>>>>474 string x \b%-.5s
+>>>>>>>>479 ubyte&0xDF >0
+>>>>>>>>>479 string x \b%-.1s
+>>>>>>480 ubyte&0xDF >0
+>>>>>>>480 string x \b.%-.3s
+>>>>>>483 ubyte&0xDF >0 \b+
+>>>>>>>483 string x \b%-.5s
+>>>>>>>488 ubyte&0xDF >0
+>>>>>>>>488 string x \b%-.2s
+>>>>>>>>490 ubyte&0xDF >0
+>>>>>>>>>490 string x \b%-.1s
+>>>>>>>491 ubyte&0xDF >0
+>>>>>>>>491 string x \b.%-.3s
+>479 ubyte&0xDF >0
+>>416 string Kein\ System\ oder\
+>>>433 string Laufwerksfehler
+>>>>450 string Wechseln\ und\ Taste\ dr\201cken \b, Microsoft DOS Bootloader (german)
+#IO.SYS
+>>>>>479 string x \b %-.2s
+>>>>>>481 ubyte&0xDF >0
+>>>>>>>481 string x \b%-.6s
+>>>>>487 ubyte&0xDF >0
+>>>>>>487 string x \b.%-.3s
+#MSDOS.SYS
+>>>>>>490 ubyte&0xDF >0 \b+
+>>>>>>>490 string x \b%-.5s
+>>>>>>>>495 ubyte&0xDF >0
+>>>>>>>>>495 string x \b%-.3s
+>>>>>>>498 ubyte&0xDF >0
+>>>>>>>>498 string x \b.%-.3s
+#
+>376 search/41 Non-System\ disk\ or\
+>>395 search/41 disk\ error\r
+>>>407 search/41 Replace\ and\
+>>>>419 search/41 press\ \b,
+>>>>419 search/41 strike\ \b, old
+>>>>426 search/41 any\ key\ when\ ready\r MS or PC-DOS bootloader
+#449 Disk\ Boot\ failure\r MS 3.21
+#466 Boot\ Failure\r MS 3.30
+>>>>>468 search/18 \0
+#IO.SYS,IBMBIO.COM
+>>>>>>&0 string x \b %-.2s
+>>>>>>>&-20 ubyte&0xDF >0
+>>>>>>>>&-1 string x \b%-.4s
+>>>>>>>>>&-16 ubyte&0xDF >0
+>>>>>>>>>>&-1 string x \b%-.2s
+>>>>>>&8 ubyte&0xDF >0 \b.
+>>>>>>>&-1 string x \b%-.3s
+#MSDOS.SYS,IBMDOS.COM
+>>>>>>&11 ubyte&0xDF >0 \b+
+>>>>>>>&-1 string x \b%-.5s
+>>>>>>>>&-6 ubyte&0xDF >0
+>>>>>>>>>&-1 string x \b%-.1s
+>>>>>>>>>>&-5 ubyte&0xDF >0
+>>>>>>>>>>>&-1 string x \b%-.2s
+>>>>>>>&7 ubyte&0xDF >0 \b.
+>>>>>>>>&-1 string x \b%-.3s
+>441 string Cannot\ load\ from\ harddisk.\n\r
+>>469 string Insert\ Systemdisk\
+>>>487 string and\ press\ any\ key.\n\r \b, MS (2.11) DOS bootloader
+#>43 string \224R-LOADER\ \ SYS =label
+>54 string SYS
+>>324 string VASKK
+>>>495 string NEWLDR\0 \b, DR-DOS Bootloader (LOADER.SYS)
+#
+>98 string Press\ a\ key\ to\ retry\0\r
+>>120 string Cannot\ find\ file\ \0\r
+>>>139 string Disk\ read\ error\0\r
+>>>>156 string Loading\ ...\0 \b, DR-DOS (3.41) Bootloader
+#DRBIOS.SYS
+>>>>>44 ubyte&0xDF >0
+>>>>>>44 string x \b %-.6s
+>>>>>>>50 ubyte&0xDF >0
+>>>>>>>>50 string x \b%-.2s
+>>>>>>52 ubyte&0xDF >0
+>>>>>>>52 string x \b.%-.3s
+#
+>70 string IBMBIO\ \ COM
+>>472 string Cannot\ load\ DOS!\
+>>>489 string Any\ key\ to\ retry \b, DR-DOS Bootloader
+>>471 string Cannot\ load\ DOS\
+>>487 string press\ key\ to\ retry \b, Open-DOS Bootloader
+#??
+>444 string KERNEL\ \ SYS
+>>314 string BOOT\ error! \b, FREE-DOS Bootloader
+>499 string KERNEL\ \ SYS
+>>305 string BOOT\ err!\0 \b, Free-DOS Bootloader
+>449 string KERNEL\ \ SYS
+>>319 string BOOT\ error! \b, FREE-DOS 0.5 Bootloader
+#
+>449 string Loading\ FreeDOS
+>>0x1AF ulelong >0 \b, FREE-DOS 0.95,1.0 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+#
+>331 string Error!.0 \b, FREE-DOS 1.0 bootloader
+#
+>125 string Loading\ FreeDOS...\r
+>>311 string BOOT\ error!\r \b, FREE-DOS bootloader
+>>>441 ubyte&0xDF >0
+>>>>441 string x \b %-.6s
+>>>>>447 ubyte&0xDF >0
+>>>>>>447 string x \b%-.1s
+>>>>>>>448 ubyte&0xDF >0
+>>>>>>>>448 string x \b%-.1s
+>>>>449 ubyte&0xDF >0
+>>>>>449 string x \b.%-.3s
+>124 string FreeDOS\0
+>>331 string \ err\0 \b, FREE-DOS BETa 0.9 Bootloader
+# DOS names like KERNEL.SYS,KERNEL16.SYS,KERNEL32.SYS,METAKERN.SYS are 8 right space padded bytes+3 bytes
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>>333 string \ err\0 \b, FREE-DOS BEta 0.9 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>>334 string \ err\0 \b, FREE-DOS Beta 0.9 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+>336 string Error!\
+>>343 string Hit\ a\ key\ to\ reboot. \b, FREE-DOS Beta 0.9sr1 Bootloader
+>>>497 ubyte&0xDF >0
+>>>>497 string x \b %-.6s
+>>>>>503 ubyte&0xDF >0
+>>>>>>503 string x \b%-.1s
+>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>504 string x \b%-.1s
+>>>>505 ubyte&0xDF >0
+>>>>>505 string x \b.%-.3s
+# added by Joerg Jenderek
+# http://www.visopsys.org/
+# http://partitionlogic.org.uk/
+# OEM-ID=Visopsys
+>478 ulelong 0
+>>(1.b+326) string I/O\ Error\ reading\
+>>>(1.b+344) string Visopsys\ loader\r
+>>>>(1.b+361) string Press\ any\ key\ to\ continue.\r \b, Visopsys loader
+# http://alexfru.chat.ru/epm.html#bootprog
+>494 ubyte >0x4D
+>>495 string >E
+>>>495 string <S
+#OEM-ID is not reliable
+>>>>3 string BootProg
+# It just looks for a program file name at the root directory
+# and loads corresponding file with following execution.
+# DOS names like STARTUP.BIN,STARTUPC.COM,STARTUPE.EXE are 8 right space padded bytes+3 bytes
+>>>>499 ubyte&0xDF >0 \b, COM/EXE Bootloader
+>>>>>499 string x \b %-.1s
+>>>>>>500 ubyte&0xDF >0
+>>>>>>>500 string x \b%-.1s
+>>>>>>>>501 ubyte&0xDF >0
+>>>>>>>>>501 string x \b%-.1s
+>>>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>>>>>>>>>>>505 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>505 string x \b%-.1s
+>>>>>>>>>>>>>>>>>>506 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>>>506 string x \b%-.1s
+#name extension
+>>>>>507 ubyte&0xDF >0 \b.
+>>>>>>507 string x \b%-.1s
+>>>>>>>508 ubyte&0xDF >0
+>>>>>>>>508 string x \b%-.1s
+>>>>>>>>>509 ubyte&0xDF >0
+>>>>>>>>>>509 string x \b%-.1s
+#If the boot sector fails to read any other sector,
+#it prints a very short message ("RE") to the screen and hangs the computer.
+#If the boot sector fails to find needed program in the root directory,
+#it also hangs with another message ("NF").
+>>>>>492 string RENF \b, FAT (12 bit)
+>>>>>495 string RENF \b, FAT (16 bit)
+# http://alexfru.chat.ru/epm.html#bootprog
+>494 ubyte >0x4D
+>>495 string >E
+>>>495 string <S
+#OEM-ID is not reliable
+>>>>3 string BootProg
+# It just looks for a program file name at the root directory
+# and loads corresponding file with following execution.
+# DOS names like STARTUP.BIN,STARTUPC.COM,STARTUPE.EXE are 8 right space padded bytes+3 bytes
+>>>>499 ubyte&0xDF >0 \b, COM/EXE Bootloader
+>>>>>499 string x \b %-.1s
+>>>>>>500 ubyte&0xDF >0
+>>>>>>>500 string x \b%-.1s
+>>>>>>>>501 ubyte&0xDF >0
+>>>>>>>>>501 string x \b%-.1s
+>>>>>>>>>>502 ubyte&0xDF >0
+>>>>>>>>>>>502 string x \b%-.1s
+>>>>>>>>>>>>503 ubyte&0xDF >0
+>>>>>>>>>>>>>503 string x \b%-.1s
+>>>>>>>>>>>>>>504 ubyte&0xDF >0
+>>>>>>>>>>>>>>>504 string x \b%-.1s
+>>>>>>>>>>>>>>>>505 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>505 string x \b%-.1s
+>>>>>>>>>>>>>>>>>>506 ubyte&0xDF >0
+>>>>>>>>>>>>>>>>>>>506 string x \b%-.1s
+#name extension
+>>>>>507 ubyte&0xDF >0 \b.
+>>>>>>507 string x \b%-.1s
+>>>>>>>508 ubyte&0xDF >0
+>>>>>>>>508 string x \b%-.1s
+>>>>>>>>>509 ubyte&0xDF >0
+>>>>>>>>>>509 string x \b%-.1s
+#If the boot sector fails to read any other sector,
+#it prints a very short message ("RE") to the screen and hangs the computer.
+#If the boot sector fails to find needed program in the root directory,
+#it also hangs with another message ("NF").
+>>>>>492 string RENF \b, FAT (12 bit)
+>>>>>495 string RENF \b, FAT (16 bit)
+# x86 bootloader end
+# updated by Joerg Jenderek at Sep 2007
+>3 ubyte 0
+#no active flag
+>>446 ubyte 0
+# partition 1 not empty
+>>>450 ubyte >0
+# partitions 3,4 empty
+>>>>482 ubyte 0
+>>>>>498 ubyte 0
+# partition 2 ID=0,5,15
+>>>>>>466 ubyte <0x10
+>>>>>>>466 ubyte 0x05 \b, extended partition table
+>>>>>>>466 ubyte 0x0F \b, extended partition table (LBA)
+>>>>>>>466 ubyte 0x0 \b, extended partition table (last)
+# JuMP short bootcodeoffset NOP assembler instructions will usually be EB xx 90
+# http://mirror.href.com/thestarman/asm/2bytejumps.htmm#FWD
+# older drives may use Near JuMP instruction E9 xx xx
+>0 lelong&0x009000EB 0x009000EB
+>0 lelong&0x000000E9 0x000000E9
+# minimal short forward jump found 03cx??
+# maximal short forward jump is 07fx
+>1 ubyte <0xff \b, code offset 0x%x
+# mtools-3.9.8/msdos.h
+# usual values are marked with comments to get only informations of strange FAT systems
+# valid sectorsize must be a power of 2 from 32 to 32768
+>>11 uleshort&0x000f x
+>>>11 uleshort <32769
+>>>>11 uleshort >31
+>>>>>21 ubyte&0xf0 0xF0
+>>>>>>3 string >\0 \b, OEM-ID "%8.8s"
+#http://mirror.href.com/thestarman/asm/debug/debug2.htm#IHC
+>>>>>>>8 string IHC \b cached by Windows 9M
+>>>>>>11 uleshort >512 \b, Bytes/sector %u
+#>>>>>>11 uleshort =512 \b, Bytes/sector %u=512 (usual)
+>>>>>>11 uleshort <512 \b, Bytes/sector %u
+>>>>>>13 ubyte >1 \b, sectors/cluster %u
+#>>>>>>13 ubyte =1 \b, sectors/cluster %u (usual on Floppies)
+>>>>>>14 uleshort >32 \b, reserved sectors %u
+#>>>>>>14 uleshort =32 \b, reserved sectors %u (usual Fat32)
+#>>>>>>14 uleshort >1 \b, reserved sectors %u
+#>>>>>>14 uleshort =1 \b, reserved sectors %u (usual FAT12,FAT16)
+>>>>>>14 uleshort <1 \b, reserved sectors %u
+>>>>>>16 ubyte >2 \b, FATs %u
+#>>>>>>16 ubyte =2 \b, FATs %u (usual)
+>>>>>>16 ubyte =1 \b, FAT %u
+>>>>>>16 ubyte >0
+>>>>>>17 uleshort >0 \b, root entries %u
+#>>>>>>17 uleshort =0 \b, root entries %u=0 (usual Fat32)
+>>>>>>19 uleshort >0 \b, sectors %u (volumes <=32 MB)
+#>>>>>>19 uleshort =0 \b, sectors %u=0 (usual Fat32)
+>>>>>>21 ubyte >0xF0 \b, Media descriptor 0x%x
+#>>>>>>21 ubyte =0xF0 \b, Media descriptor 0x%x (usual floppy)
+>>>>>>21 ubyte <0xF0 \b, Media descriptor 0x%x
+>>>>>>22 uleshort >0 \b, sectors/FAT %u
+#>>>>>>22 uleshort =0 \b, sectors/FAT %u=0 (usual Fat32)
+>>>>>>26 ubyte >2 \b, heads %u
+#>>>>>>26 ubyte =2 \b, heads %u (usual floppy)
+>>>>>>26 ubyte =1 \b, heads %u
+#skip for Digital Research DOS (version 3.41) 1440 kB Bootdisk
+>>>>>>38 ubyte !0x70
+>>>>>>>28 ulelong >0 \b, hidden sectors %u
+#>>>>>>>28 ulelong =0 \b, hidden sectors %u (usual floppy)
+>>>>>>>32 ulelong >0 \b, sectors %u (volumes > 32 MB)
+#>>>>>>>32 ulelong =0 \b, sectors %u (volumes > 32 MB)
+# FAT<32 specific
+>>>>>>82 string !FAT32
+#>>>>>>>36 ubyte 0x80 \b, physical drive 0x%x=0x80 (usual harddisk)
+#>>>>>>>36 ubyte 0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>36 ubyte !0x80
+>>>>>>>>36 ubyte !0 \b, physical drive 0x%x
+>>>>>>>37 ubyte >0 \b, reserved 0x%x
+#>>>>>>>37 ubyte =0 \b, reserved 0x%x
+>>>>>>>38 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>38 ubyte =0x29
+>>>>>>>>39 ulelong x \b, serial number 0x%x
+>>>>>>>>43 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>>43 string =NO\ NAME \b, unlabeled
+>>>>>>>54 string FAT \b, FAT
+>>>>>>>>54 string FAT12 \b (12 bit)
+>>>>>>>>54 string FAT16 \b (16 bit)
+# FAT32 specific
+>>>>>>82 string FAT32 \b, FAT (32 bit)
+>>>>>>>36 ulelong x \b, sectors/FAT %u
+>>>>>>>40 uleshort >0 \b, extension flags %u
+#>>>>>>>40 uleshort =0 \b, extension flags %u
+>>>>>>>42 uleshort >0 \b, fsVersion %u
+#>>>>>>>42 uleshort =0 \b, fsVersion %u (usual)
+>>>>>>>44 ulelong >2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =2 \b, rootdir cluster %u
+#>>>>>>>44 ulelong =1 \b, rootdir cluster %u
+>>>>>>>48 uleshort >1 \b, infoSector %u
+#>>>>>>>48 uleshort =1 \b, infoSector %u (usual)
+>>>>>>>48 uleshort <1 \b, infoSector %u
+>>>>>>>50 uleshort >6 \b, Backup boot sector %u
+#>>>>>>>50 uleshort =6 \b, Backup boot sector %u (usual)
+>>>>>>>50 uleshort <6 \b, Backup boot sector %u
+>>>>>>>54 ulelong >0 \b, reserved1 0x%x
+>>>>>>>58 ulelong >0 \b, reserved2 0x%x
+>>>>>>>62 ulelong >0 \b, reserved3 0x%x
+# same structure as FAT1X
+>>>>>>>64 ubyte >0x80 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0x80 \b, physical drive 0x%x=80 (usual harddisk)
+>>>>>>>64 ubyte&0x7F >0 \b, physical drive 0x%x
+#>>>>>>>64 ubyte =0 \b, physical drive 0x%x=0 (usual floppy)
+>>>>>>>65 ubyte >0 \b, reserved 0x%x
+>>>>>>>66 ubyte >0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte <0x29 \b, dos < 4.0 BootSector (0x%x)
+>>>>>>>66 ubyte =0x29
+>>>>>>>>67 ulelong x \b, serial number 0x%x
+>>>>>>>>71 string <NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string >NO\ NAME \b, label: "%11.11s"
+>>>>>>>71 string =NO\ NAME \b, unlabeled
+### FATs end
+>0x200 lelong 0x82564557 \b, BSD disklabel
+# FATX
+0 string FATX FATX filesystem data
+
+
+# Minix filesystems - Juan Cespedes <cespedes at debian.org>
+0x410 leshort 0x137f
+!:strength / 2
+>0x402 beshort < 100 Minix filesystem, V1, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x137f
+!:strength / 2
+>0x402 beshort < 100 Minix filesystem, V1 (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x138f
+!:strength / 2
+>0x402 beshort < 100 Minix filesystem, V1, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x138f
+!:strength / 2
+>0x402 beshort < 100 Minix filesystem, V1, 30 char names (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x2468
+>0x402 beshort < 100 Minix filesystem, V2, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x2468
+>0x402 beshort < 100 Minix filesystem, V2 (big endian), %d zones
+>0x1e string minix \b, bootable
+
+0x410 leshort 0x2478
+>0x402 beshort < 100 Minix filesystem, V2, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x2478
+>0x402 beshort < 100 Minix filesystem, V2, 30 char names, %d zones
+>0x1e string minix \b, bootable
+0x410 beshort 0x2478
+>0x402 beshort !0 Minix filesystem, V2, 30 char names (big endian), %d zones
+>0x1e string minix \b, bootable
+0x410 leshort 0x4d5a
+>0x402 beshort !0 Minix filesystem, V3, %d zones
+>0x1e string minix \b, bootable
+
+# romfs filesystems - Juan Cespedes <cespedes at debian.org>
+0 string -rom1fs- romfs filesystem, version 1
+>8 belong x %d bytes,
+>16 string x named %s.
+
+# netboot image - Juan Cespedes <cespedes at debian.org>
+0 lelong 0x1b031336L Netboot image,
+>4 lelong&0xFFFFFF00 0
+>>4 lelong&0x100 0x000 mode 2
+>>4 lelong&0x100 0x100 mode 3
+>4 lelong&0xFFFFFF00 !0 unknown mode
+
+0x18b string OS/2 OS/2 Boot Manager
+
+# updated by Joerg Jenderek at Oct 2008!!
+# http://syslinux.zytor.com/iso.php
+0 ulelong 0x7c40eafa isolinux Loader
+# http://syslinux.zytor.com/pxe.php
+0 ulelong 0x007c05ea pxelinux Loader
+0 ulelong 0x60669c66 pxelinux Loader
+
+# added by Joerg Jenderek
+# In the second sector (+0x200) are variables according to grub-0.97/stage2/asm.S or
+# grub-1.94/kern/i386/pc/startup.S
+# http://www.gnu.org/software/grub/manual/grub.html#Embedded-data
+# usual values are marked with comments to get only informations of strange GRUB loaders
+0x200 uleshort 0x70EA
+# found only version 3.{1,2}
+>0x206 ubeshort >0x0300
+# GRUB version (0.5.)95,0.93,0.94,0.96,0.97 > "00"
+>>0x212 ubyte >0x29
+>>>0x213 ubyte >0x29
+# not iso9660_stage1_5
+#>>>0 ulelong&0x00BE5652 0x00BE5652
+>>>>0x213 ubyte >0x29 GRand Unified Bootloader
+# config_file for stage1_5 is 0xffffffff + default "/boot/grub/stage2"
+>>>>0x217 ubyte 0xFF stage1_5
+>>>>0x217 ubyte <0xFF stage2
+>>>>0x206 ubyte x \b version %u
+>>>>0x207 ubyte x \b.%u
+# module_size for 1.94
+>>>>0x208 ulelong <0xffffff \b, installed partition %u
+#>>>>0x208 ulelong =0xffffff \b, %u (default)
+>>>>0x208 ulelong >0xffffff \b, installed partition %u
+# GRUB 0.5.95 unofficial
+>>>>0x20C ulelong&0x2E300000 0x2E300000
+# 0=stage2 1=ffs 2=e2fs 3=fat 4=minix 5=reiserfs
+>>>>>0x20C ubyte x \b, identifier 0x%x
+#>>>>>0x20D ubyte =0 \b, LBA flag 0x%x (default)
+>>>>>0x20D ubyte >0 \b, LBA flag 0x%x
+# GRUB version as string
+>>>>>0x20E string >\0 \b, GRUB version %-s
+# for stage1_5 is 0xffffffff + config_file "/boot/grub/stage2" default
+>>>>>>0x215 ulong 0xffffffff
+>>>>>>>0x219 string >\0 \b, configuration file %-s
+>>>>>>0x215 ulong !0xffffffff
+>>>>>>>0x215 string >\0 \b, configuration file %-s
+# newer GRUB versions
+>>>>0x20C ulelong&0x2E300000 !0x2E300000
+##>>>>>0x20C ulelong =0 \b, saved entry %d (usual)
+>>>>>0x20C ulelong >0 \b, saved entry %d
+# for 1.94 contains kernel image size
+# for 0.93,0.94,0.96,0.97
+# 0=stage2 1=ffs 2=e2fs 3=fat 4=minix 5=reiserfs 6=vstafs 7=jfs 8=xfs 9=iso9660 a=ufs2
+>>>>>0x210 ubyte x \b, identifier 0x%x
+# The flag for LBA forcing is in most cases 0
+#>>>>>0x211 ubyte =0 \b, LBA flag 0x%x (default)
+>>>>>0x211 ubyte >0 \b, LBA flag 0x%x
+# GRUB version as string
+>>>>>0x212 string >\0 \b, GRUB version %-s
+# for stage1_5 is 0xffffffff + config_file "/boot/grub/stage2" default
+>>>>>0x217 ulong 0xffffffff
+>>>>>>0x21b string >\0 \b, configuration file %-s
+>>>>>0x217 ulong !0xffffffff
+>>>>>>0x217 string >\0 \b, configuration file %-s
+
+9564 lelong 0x00011954 Unix Fast File system [v1] (little-endian),
+>8404 string x last mounted on %s,
+#>9504 ledate x last checked at %s,
+>8224 ledate x last written at %s,
+>8401 byte x clean flag %d,
+>8228 lelong x number of blocks %d,
+>8232 lelong x number of data blocks %d,
+>8236 lelong x number of cylinder groups %d,
+>8240 lelong x block size %d,
+>8244 lelong x fragment size %d,
+>8252 lelong x minimum percentage of free blocks %d,
+>8256 lelong x rotational delay %dms,
+>8260 lelong x disk rotational speed %drps,
+>8320 lelong 0 TIME optimization
+>8320 lelong 1 SPACE optimization
+
+42332 lelong 0x19540119 Unix Fast File system [v2] (little-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 leqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 lequad x number of blocks %lld,
+>&-288 lequad x number of data blocks %lld,
+>&-1332 lelong x number of cylinder groups %d,
+>&-1328 lelong x block size %d,
+>&-1324 lelong x fragment size %d,
+>&-180 lelong x average file size %d,
+>&-176 lelong x average number of files in dir %d,
+>&-272 lequad x pending blocks to free %lld,
+>&-264 lelong x pending inodes to free %ld,
+>&-664 lequad x system-wide uuid %0llx,
+>&-1316 lelong x minimum percentage of free blocks %d,
+>&-1248 lelong 0 TIME optimization
+>&-1248 lelong 1 SPACE optimization
+
+66908 lelong 0x19540119 Unix Fast File system [v2] (little-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 leqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 lequad x number of blocks %lld,
+>&-288 lequad x number of data blocks %lld,
+>&-1332 lelong x number of cylinder groups %d,
+>&-1328 lelong x block size %d,
+>&-1324 lelong x fragment size %d,
+>&-180 lelong x average file size %d,
+>&-176 lelong x average number of files in dir %d,
+>&-272 lequad x pending blocks to free %lld,
+>&-264 lelong x pending inodes to free %ld,
+>&-664 lequad x system-wide uuid %0llx,
+>&-1316 lelong x minimum percentage of free blocks %d,
+>&-1248 lelong 0 TIME optimization
+>&-1248 lelong 1 SPACE optimization
+
+9564 belong 0x00011954 Unix Fast File system [v1] (big-endian),
+>7168 belong 0x4c41424c Apple UFS Volume
+>>7186 string x named %s,
+>>7176 belong x volume label version %d,
+>>7180 bedate x created on %s,
+>8404 string x last mounted on %s,
+#>9504 bedate x last checked at %s,
+>8224 bedate x last written at %s,
+>8401 byte x clean flag %d,
+>8228 belong x number of blocks %d,
+>8232 belong x number of data blocks %d,
+>8236 belong x number of cylinder groups %d,
+>8240 belong x block size %d,
+>8244 belong x fragment size %d,
+>8252 belong x minimum percentage of free blocks %d,
+>8256 belong x rotational delay %dms,
+>8260 belong x disk rotational speed %drps,
+>8320 belong 0 TIME optimization
+>8320 belong 1 SPACE optimization
+
+42332 belong 0x19540119 Unix Fast File system [v2] (big-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 beqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 bequad x number of blocks %lld,
+>&-288 bequad x number of data blocks %lld,
+>&-1332 belong x number of cylinder groups %d,
+>&-1328 belong x block size %d,
+>&-1324 belong x fragment size %d,
+>&-180 belong x average file size %d,
+>&-176 belong x average number of files in dir %d,
+>&-272 bequad x pending blocks to free %lld,
+>&-264 belong x pending inodes to free %ld,
+>&-664 bequad x system-wide uuid %0llx,
+>&-1316 belong x minimum percentage of free blocks %d,
+>&-1248 belong 0 TIME optimization
+>&-1248 belong 1 SPACE optimization
+
+66908 belong 0x19540119 Unix Fast File system [v2] (big-endian)
+>&-1164 string x last mounted on %s,
+>&-696 string >\0 volume name %s,
+>&-304 beqldate x last written at %s,
+>&-1167 byte x clean flag %d,
+>&-1168 byte x readonly flag %d,
+>&-296 bequad x number of blocks %lld,
+>&-288 bequad x number of data blocks %lld,
+>&-1332 belong x number of cylinder groups %d,
+>&-1328 belong x block size %d,
+>&-1324 belong x fragment size %d,
+>&-180 belong x average file size %d,
+>&-176 belong x average number of files in dir %d,
+>&-272 bequad x pending blocks to free %lld,
+>&-264 belong x pending inodes to free %ld,
+>&-664 bequad x system-wide uuid %0llx,
+>&-1316 belong x minimum percentage of free blocks %d,
+>&-1248 belong 0 TIME optimization
+>&-1248 belong 1 SPACE optimization
+
+# ext2/ext3 filesystems - Andreas Dilger <adilger at dilger.ca>
+# ext4 filesystem - Eric Sandeen <sandeen at sandeen.net>
+0x438 leshort 0xEF53 Linux
+>0x44c lelong x rev %d
+>0x43e leshort x \b.%d
+# No journal? ext2
+>0x45c lelong ^0x0000004 ext2 filesystem data
+>>0x43a leshort ^0x0000001 (mounted or unclean)
+# Has a journal? ext3 or ext4
+>0x45c lelong &0x0000004
+# and small INCOMPAT?
+>>0x460 lelong <0x0000040
+# and small RO_COMPAT?
+>>>0x464 lelong <0x0000008 ext3 filesystem data
+# else large RO_COMPAT?
+>>>0x464 lelong >0x0000007 ext4 filesystem data
+# else large INCOMPAT?
+>>0x460 lelong >0x000003f ext4 filesystem data
+# General flags for any ext* fs
+>0x460 lelong &0x0000004 (needs journal recovery)
+>0x43a leshort &0x0000002 (errors)
+# INCOMPAT flags
+>0x460 lelong &0x0000001 (compressed)
+#>0x460 lelong &0x0000002 (filetype)
+#>0x460 lelong &0x0000010 (meta bg)
+>0x460 lelong &0x0000040 (extents)
+>0x460 lelong &0x0000080 (64bit)
+#>0x460 lelong &0x0000100 (mmp)
+#>0x460 lelong &0x0000200 (flex bg)
+# RO_INCOMPAT flags
+#>0x464 lelong &0x0000001 (sparse super)
+>0x464 lelong &0x0000002 (large files)
+>0x464 lelong &0x0000008 (huge files)
+#>0x464 lelong &0x0000010 (gdt checksum)
+#>0x464 lelong &0x0000020 (many subdirs)
+#>0x463 lelong &0x0000040 (extra isize)
+
+# SGI disk labels - Nathan Scott <nathans at debian.org>
+0 belong 0x0BE5A941 SGI disk label (volume header)
+
+# SGI XFS filesystem - Nathan Scott <nathans at debian.org>
+0 belong 0x58465342 SGI XFS filesystem data
+>0x4 belong x (blksz %d,
+>0x68 beshort x inosz %d,
+>0x64 beshort ^0x2004 v1 dirs)
+>0x64 beshort &0x2004 v2 dirs)
+
+############################################################################
+# Minix-ST kernel floppy
+0x800 belong 0x46fc2700 Atari-ST Minix kernel image
+>19 string \240\5\371\5\0\011\0\2\0 \b, 720k floppy
+>19 string \320\2\370\5\0\011\0\1\0 \b, 360k floppy
+
+############################################################################
+# Hmmm, is this a better way of detecting _standard_ floppy images ?
+19 string \320\2\360\3\0\011\0\1\0 DOS floppy 360k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \240\5\371\3\0\011\0\2\0 DOS floppy 720k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \100\013\360\011\0\022\0\2\0 DOS floppy 1440k
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+
+19 string \240\5\371\5\0\011\0\2\0 DOS floppy 720k, IBM
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+19 string \100\013\371\5\0\011\0\2\0 DOS floppy 1440k, mkdosfs
+>0x1FE leshort 0xAA55 \b, x86 hard disk boot sector
+
+19 string \320\2\370\5\0\011\0\1\0 Atari-ST floppy 360k
+19 string \240\5\371\5\0\011\0\2\0 Atari-ST floppy 720k
+
+# Valid media descriptor bytes for MS-DOS:
+#
+# Byte Capacity Media Size and Type
+# -------------------------------------------------
+#
+# F0 2.88 MB 3.5-inch, 2-sided, 36-sector
+# F0 1.44 MB 3.5-inch, 2-sided, 18-sector
+# F9 720K 3.5-inch, 2-sided, 9-sector
+# F9 1.2 MB 5.25-inch, 2-sided, 15-sector
+# FD 360K 5.25-inch, 2-sided, 9-sector
+# FF 320K 5.25-inch, 2-sided, 8-sector
+# FC 180K 5.25-inch, 1-sided, 9-sector
+# FE 160K 5.25-inch, 1-sided, 8-sector
+# FE 250K 8-inch, 1-sided, single-density
+# FD 500K 8-inch, 2-sided, single-density
+# FE 1.2 MB 8-inch, 2-sided, double-density
+# F8 ----- Fixed disk
+#
+# FC xxxK Apricot 70x1x9 boot disk.
+#
+# Originally a bitmap:
+# xxxxxxx0 Not two sided
+# xxxxxxx1 Double sided
+# xxxxxx0x Not 8 SPT
+# xxxxxx1x 8 SPT
+# xxxxx0xx Not Removable drive
+# xxxxx1xx Removable drive
+# 11111xxx Must be one.
+#
+# But now it's rather random:
+# 111111xx Low density disk
+# 00 SS, Not 8 SPT
+# 01 DS, Not 8 SPT
+# 10 SS, 8 SPT
+# 11 DS, 8 SPT
+#
+# 11111001 Double density 3� floppy disk, high density 5�
+# 11110000 High density 3� floppy disk
+# 11111000 Hard disk any format
+#
+
+# CDROM Filesystems
+# Modified for UDF by gerardo.cacciari at gmail.com
+32769 string CD001
+!:mime application/x-iso9660-image
+>38913 string !NSR0 ISO 9660 CD-ROM filesystem data
+>38913 string NSR0 UDF filesystem data
+>>38917 string 1 (version 1.0)
+>>38917 string 2 (version 1.5)
+>>38917 string 3 (version 2.0)
+>>38917 byte >0x33 (unknown version, ID 0x%X)
+>>38917 byte <0x31 (unknown version, ID 0x%X)
+# "application id" which appears to be used as a volume label
+>32808 string >\0 '%s'
+>34816 string \000CD001\001EL\ TORITO\ SPECIFICATION (bootable)
+37633 string CD001 ISO 9660 CD-ROM filesystem data (raw 2352 byte sectors)
+!:mime application/x-iso9660-image
+32776 string CDROM High Sierra CD-ROM filesystem data
+
+# .cso files
+0 string CISO Compressed ISO CD image
+
+# cramfs filesystem - russell at coker.com.au
+0 lelong 0x28cd3d45 Linux Compressed ROM File System data, little endian
+>4 lelong x size %lu
+>8 lelong &1 version #2
+>8 lelong &2 sorted_dirs
+>8 lelong &4 hole_support
+>32 lelong x CRC 0x%x,
+>36 lelong x edition %lu,
+>40 lelong x %lu blocks,
+>44 lelong x %lu files
+
+0 belong 0x28cd3d45 Linux Compressed ROM File System data, big endian
+>4 belong x size %lu
+>8 belong &1 version #2
+>8 belong &2 sorted_dirs
+>8 belong &4 hole_support
+>32 belong x CRC 0x%x,
+>36 belong x edition %lu,
+>40 belong x %lu blocks,
+>44 belong x %lu files
+
+# reiserfs - russell at coker.com.au
+0x10034 string ReIsErFs ReiserFS V3.5
+0x10034 string ReIsEr2Fs ReiserFS V3.6
+>0x1002c leshort x block size %d
+>0x10032 leshort &2 (mounted or unclean)
+>0x10000 lelong x num blocks %d
+>0x10040 lelong 1 tea hash
+>0x10040 lelong 2 yura hash
+>0x10040 lelong 3 r5 hash
+
+# JFFS - russell at coker.com.au
+0 lelong 0x34383931 Linux Journalled Flash File system, little endian
+0 belong 0x34383931 Linux Journalled Flash File system, big endian
+
+# EST flat binary format (which isn't, but anyway)
+# From: Mark Brown <broonie at sirena.org.uk>
+0 string ESTFBINR EST flat binary
+
+# Aculab VoIP firmware
+# From: Mark Brown <broonie at sirena.org.uk>
+0 string VoIP\ Startup\ and Aculab VoIP firmware
+>35 string x format %s
+
+# From: Mark Brown <broonie at sirena.org.uk> [old]
+# From: Behan Webster <behanw at websterwood.com>
+0 belong 0x27051956 u-boot legacy uImage,
+>32 string x %s,
+>28 byte 0 Invalid os/
+>28 byte 1 OpenBSD/
+>28 byte 2 NetBSD/
+>28 byte 3 FreeBSD/
+>28 byte 4 4.4BSD/
+>28 byte 5 Linux/
+>28 byte 6 SVR4/
+>28 byte 7 Esix/
+>28 byte 8 Solaris/
+>28 byte 9 Irix/
+>28 byte 10 SCO/
+>28 byte 11 Dell/
+>28 byte 12 NCR/
+>28 byte 13 LynxOS/
+>28 byte 14 VxWorks/
+>28 byte 15 pSOS/
+>28 byte 16 QNX/
+>28 byte 17 Firmware/
+>28 byte 18 RTEMS/
+>28 byte 19 ARTOS/
+>28 byte 20 Unity OS/
+>28 byte 21 INTEGRITY/
+>29 byte 0 \bInvalid CPU,
+>29 byte 1 \bAlpha,
+>29 byte 2 \bARM,
+>29 byte 3 \bIntel x86,
+>29 byte 4 \bIA64,
+>29 byte 5 \bMIPS,
+>29 byte 6 \bMIPS 64-bit,
+>29 byte 7 \bPowerPC,
+>29 byte 8 \bIBM S390,
+>29 byte 9 \bSuperH,
+>29 byte 10 \bSparc,
+>29 byte 11 \bSparc 64-bit,
+>29 byte 12 \bM68K,
+>29 byte 13 \bNios-32,
+>29 byte 14 \bMicroBlaze,
+>29 byte 15 \bNios-II,
+>29 byte 16 \bBlackfin,
+>29 byte 17 \bAVR32,
+>29 byte 18 \bSTMicroelectronics ST200,
+>30 byte 0 Invalid Image
+>30 byte 1 Standalone Program
+>30 byte 2 OS Kernel Image
+>30 byte 3 RAMDisk Image
+>30 byte 4 Multi-File Image
+>30 byte 5 Firmware Image
+>30 byte 6 Script File
+>30 byte 7 Filesystem Image (any type)
+>30 byte 8 Binary Flat Device Tree BLOB
+>31 byte 0 (Not compressed),
+>31 byte 1 (gzip),
+>31 byte 2 (bzip2),
+>31 byte 3 (lzma),
+>12 belong x %d bytes,
+>8 bedate x %s,
+>16 belong x Load Address: 0x%08X,
+>20 belong x Entry Point: 0x%08X,
+>4 belong x Header CRC: 0x%08X,
+>24 belong x Data CRC: 0x%08X
+
+# JFFS2 file system
+0 leshort 0x1984 Linux old jffs2 filesystem data little endian
+0 leshort 0x1985 Linux jffs2 filesystem data little endian
+
+# Squashfs
+0 string sqsh Squashfs filesystem, big endian,
+>28 beshort x version %d.
+>30 beshort x \b%d,
+>28 beshort <3
+>>8 belong x %d bytes,
+>28 beshort >2
+>>28 beshort <4
+>>>63 bequad x %lld bytes,
+>>28 beshort >3
+>>>40 bequad x %lld bytes,
+#>>67 belong x %d bytes,
+>4 belong x %d inodes,
+>28 beshort <2
+>>32 beshort x blocksize: %d bytes,
+>28 beshort >1
+>>28 beshort <4
+>>>51 belong x blocksize: %d bytes,
+>>28 beshort >3
+>>>12 belong x blocksize: %d bytes,
+>28 beshort <4
+>>39 bedate x created: %s
+>28 beshort >3
+>>8 bedate x created: %s
+0 string hsqs Squashfs filesystem, little endian,
+>28 leshort x version %d.
+>30 leshort x \b%d,
+>28 leshort <3
+>>8 lelong x %d bytes,
+>28 leshort >2
+>>28 leshort <4
+>>>63 lequad x %lld bytes,
+>>28 leshort >3
+>>>40 lequad x %lld bytes,
+#>>63 lelong x %d bytes,
+>4 lelong x %d inodes,
+>28 leshort <2
+>>32 leshort x blocksize: %d bytes,
+>28 leshort >1
+>>28 leshort <4
+>>>51 lelong x blocksize: %d bytes,
+>>28 leshort >3
+>>>12 lelong x blocksize: %d bytes,
+>28 leshort <4
+>>39 ledate x created: %s
+>28 leshort >3
+>>8 ledate x created: %s
+
+0 string td\000 floppy image data (TeleDisk)
+
+# AFS Dump Magic
+# From: Ty Sarna <tsarna at sarna.org>
+0 string \x01\xb3\xa1\x13\x22 AFS Dump
+>&0 belong x (v%d)
+>>&0 byte 0x76
+>>>&0 belong x Vol %d,
+>>>>&0 byte 0x6e
+>>>>>&0 string x %s
+>>>>>>&1 byte 0x74
+>>>>>>>&0 beshort 2
+>>>>>>>>&4 bedate x on: %s
+>>>>>>>>&0 bedate =0 full dump
+>>>>>>>>&0 bedate !0 incremental since: %s
+
+#----------------------------------------------------------
+#delta ISO Daniel Novotny (dnovotny at redhat.com)
+0 string DISO Delta ISO data
+>4 belong x version %d
+
+# VMS backup savesets - gerardo.cacciari at gmail.com
+#
+4 string \x01\x00\x01\x00\x01\x00
+>(0.s+16) string \x01\x01
+>>&(&0.b+8) byte 0x42 OpenVMS backup saveset data
+>>>40 lelong x (block size %d,
+>>>49 string >\0 original name '%s',
+>>>2 short 1024 VAX generated)
+>>>2 short 2048 AXP generated)
+>>>2 short 4096 I64 generated)
+
+# Summary: Oracle Clustered Filesystem
+# Created by: Aaron Botsis <redhat at digitalmafia.org>
+8 string OracleCFS Oracle Clustered Filesystem,
+>4 long x rev %d
+>0 long x \b.%d,
+>560 string x label: %.64s,
+>136 string x mountpoint: %.128s
+
+# Summary: Oracle ASM tagged volume
+# Created by: Aaron Botsis <redhat at digitalmafia.org>
+32 string ORCLDISK Oracle ASM Volume,
+>40 string x Disk Name: %0.12s
+32 string ORCLCLRD Oracle ASM Volume (cleared),
+>40 string x Disk Name: %0.12s
+
+# Oracle Clustered Filesystem - Aaron Botsis <redhat at digitalmafia.org>
+8 string OracleCFS Oracle Clustered Filesystem,
+>4 long x rev %d
+>0 long x \b.%d,
+>560 string x label: %.64s,
+>136 string x mountpoint: %.128s
+
+# Oracle ASM tagged volume - Aaron Botsis <redhat at digitalmafia.org>
+32 string ORCLDISK Oracle ASM Volume,
+>40 string x Disk Name: %0.12s
+32 string ORCLCLRD Oracle ASM Volume (cleared),
+>40 string x Disk Name: %0.12s
+
+# Compaq/HP RILOE floppy image
+# From: Dirk Jagdmann <doj at cubic.org>
+0 string CPQRFBLO Compaq/HP RILOE floppy image
+
+#------------------------------------------------------------------------------
+# Files-11 On-Disk Structure (OpenVMS file system) - gerardo.cacciari at gmail.com
+# These bits come from LBN 1 (home block) of ODS-2 and ODS-5 volumes, which is
+# mapped to VBN 2 of [000000]INDEXF.SYS;1
+#
+1008 string DECFILE11B Files-11 On-Disk Structure
+>525 byte x Level %d
+>525 byte x (ODS-%d OpenVMS file system),
+>984 string x volume label is '%-12.12s'
+
+# From: Thomas Klausner <wiz at NetBSD.org>
+# http://filext.com/file-extension/DAA
+# describes the daa file format. The magic would be:
+0 string DAA\x0\x0\x0\x0\x0 PowerISO Direct-Access-Archive
+
+# From Albert Cahalan <acahalan at gmail.com>
+# really le32 operation,destination,payloadsize (but quite predictable)
+# 01 00 00 00 00 00 00 c0 00 02 00 00
+0 string \1\0\0\0\0\0\0\300\0\2\0\0 Marvell Libertas firmware
+
+# From Eric Sandeen
+# GFS2
+0x10000 belong 0x01161970 Linux
+>0x10018 belong 0x0000051d GFS1 Filesystem
+>>0x10024 belong x (blocksize %d,
+>>0x10060 string >\0 lockproto %s)
+>0x10018 belong 0x00000709 GFS2 Filesystem
+>>0x10024 belong x (blocksize %d,
+>>0x10060 string >\0 lockproto %s)
+
+# BTRFS
+0x10040 string _BHRfS_M BTRFS Filesystem
+>0x1012b string >\0 (label "%s",
+>0x10090 lelong x sectorsize %d,
+>0x10094 lelong x nodesize %d,
+>0x10098 lelong x leafsize %d)
+
+
+# dvdisaster's .ecc
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string *dvdisaster* dvdisaster error correction file
+
+# xfs metadump image
+# mb_magic XFSM at 0; superblock magic XFSB at 1 << mb_blocklog
+# but can we do the << ? For now it's always 512 (0x200) anyway.
+0 string XFSM
+>0x200 string XFSB XFS filesystem metadump image
+
+
+#------------------------------------------------------------------------------
+# $File: flash,v 1.9 2009/11/08 01:30:01 christos Exp $
+# flash: file(1) magic for Macromedia Flash file format
+#
+# See
+#
+# http://www.macromedia.com/software/flash/open/
+#
+0 string FWS Macromedia Flash data,
+>3 byte x version %d
+!:mime application/x-shockwave-flash
+0 string CWS Macromedia Flash data (compressed),
+!:mime application/x-shockwave-flash
+>3 byte x version %d
+# From: Cal Peake <cp at absolutedigital.net>
+0 string FLV Macromedia Flash Video
+!:mime video/x-flv
+
+#
+# Yosu Gomez
+0 string AGD2\xbe\xb8\xbb\xcd\x00 Macromedia Freehand 7 Document
+0 string AGD3\xbe\xb8\xbb\xcc\x00 Macromedia Freehand 8 Document
+# From Dave Wilson
+0 string AGD4\xbe\xb8\xbb\xcb\x00 Macromedia Freehand 9 Document
+
+#------------------------------------------------------------------------------
+# $File: fonts,v 1.21 2009/12/06 23:17:52 rrt Exp $
+# fonts: file(1) magic for font data
+#
+0 search/1 FONT ASCII vfont text
+0 short 0436 Berkeley vfont data
+0 short 017001 byte-swapped Berkeley vfont data
+
+# PostScript fonts (must precede "printer" entries), quinlan at yggdrasil.com
+0 string %!PS-AdobeFont-1. PostScript Type 1 font text
+>20 string >\0 (%s)
+6 string %!PS-AdobeFont-1. PostScript Type 1 font program data
+0 string %!FontType1 PostScript Type 1 font program data
+6 string %!FontType1 PostScript Type 1 font program data
+0 string %!PS-Adobe-3.0\ Resource-Font PostScript Type 1 font text
+
+# X11 font files in SNF (Server Natural Format) format
+0 belong 00000004 X11 SNF font data, MSB first
+0 lelong 00000004 X11 SNF font data, LSB first
+
+# X11 Bitmap Distribution Format, from Daniel Quinlan (quinlan at yggdrasil.com)
+0 search/1 STARTFONT\ X11 BDF font text
+
+# X11 fonts, from Daniel Quinlan (quinlan at yggdrasil.com)
+# PCF must come before SGI additions ("MIPSEL MIPS-II COFF" collides)
+0 string \001fcp X11 Portable Compiled Font data
+>12 byte 0x02 \b, LSB first
+>12 byte 0x0a \b, MSB first
+0 string D1.0\015 X11 Speedo font data
+
+#------------------------------------------------------------------------------
+# FIGlet fonts and controlfiles
+# From figmagic supplied with Figlet version 2.2
+# "David E. O'Brien" <obrien at FreeBSD.ORG>
+0 string flf FIGlet font
+>3 string >2a version %-2.2s
+0 string flc FIGlet controlfile
+>3 string >2a version %-2.2s
+
+# libGrx graphics lib fonts, from Albert Cahalan (acahalan at cs.uml.edu)
+# Used with djgpp (DOS Gnu C++), sometimes Linux or Turbo C++
+0 belong 0x14025919 libGrx font data,
+>8 leshort x %dx
+>10 leshort x \b%d
+>40 string x %s
+# Misc. DOS VGA fonts, from Albert Cahalan (acahalan at cs.uml.edu)
+0 belong 0xff464f4e DOS code page font data collection
+7 belong 0x00454741 DOS code page font data
+7 belong 0x00564944 DOS code page font data (from Linux?)
+4098 string DOSFONT DOSFONT2 encrypted font data
+
+# downloadable fonts for browser (prints type) anthon at mnt.org
+0 string PFR1 PFR1 font
+>102 string >0 \b: %s
+
+# True Type fonts
+0 string \000\001\000\000\000 TrueType font data
+!:mime application/x-font-ttf
+
+0 string \007\001\001\000Copyright\ (c)\ 199 Adobe Multiple Master font
+0 string \012\001\001\000Copyright\ (c)\ 199 Adobe Multiple Master font
+
+0 string ttcf TrueType font collection data
+
+# Opentype font data from Avi Bercovich
+0 string OTTO OpenType font data
+!:mime application/vnd.ms-opentype
+
+# Gürkan Sengün <gurkan at linuks.mine.nu>, www.linuks.mine.nu
+0 string SplineFontDB: Spline Font Database
+!:mime application/vnd.font-fontforge-sfd
+>14 string x version %s
+
+#------------------------------------------------------------------------------
+# $File: fortran,v 1.6 2009/09/19 16:28:09 christos Exp $
+# FORTRAN source
+0 regex/100 \^[Cc][\ \t] FORTRAN program
+!:mime text/x-fortran
+
+#------------------------------------------------------------------------------
+# $File: frame,v 1.12 2009/09/19 16:28:09 christos Exp $
+# frame: file(1) magic for FrameMaker files
+#
+# This stuff came on a FrameMaker demo tape, most of which is
+# copyright, but this file is "published" as witness the following:
+#
+# Note that this is the Framemaker Maker Interchange Format, not the
+# Normal format which would be application/vnd.framemaker.
+#
+0 string \<MakerFile FrameMaker document
+!:mime application/x-mif
+>11 string 5.5 (5.5
+>11 string 5.0 (5.0
+>11 string 4.0 (4.0
+>11 string 3.0 (3.0
+>11 string 2.0 (2.0
+>11 string 1.0 (1.0
+>14 byte x %c)
+0 string \<MIFFile FrameMaker MIF (ASCII) file
+!:mime application/x-mif
+>9 string 4.0 (4.0)
+>9 string 3.0 (3.0)
+>9 string 2.0 (2.0)
+>9 string 1.0 (1.x)
+0 search/1 \<MakerDictionary FrameMaker Dictionary text
+!:mime application/x-mif
+>17 string 3.0 (3.0)
+>17 string 2.0 (2.0)
+>17 string 1.0 (1.x)
+0 string \<MakerScreenFont FrameMaker Font file
+!:mime application/x-mif
+>17 string 1.01 (%s)
+0 string \<MML FrameMaker MML file
+!:mime application/x-mif
+0 string \<BookFile FrameMaker Book file
+!:mime application/x-mif
+>10 string 3.0 (3.0
+>10 string 2.0 (2.0
+>10 string 1.0 (1.0
+>13 byte x %c)
+# XXX - this book entry should be verified, if you find one, uncomment this
+#0 string \<Book\ FrameMaker Book (ASCII) file
+#!:mime application/x-mif
+#>6 string 3.0 (3.0)
+#>6 string 2.0 (2.0)
+#>6 string 1.0 (1.0)
+0 string \<Maker Intermediate Print File FrameMaker IPL file
+!:mime application/x-mif
+
+#------------------------------------------------------------------------------
+# $File: freebsd,v 1.7 2009/09/19 16:28:09 christos Exp $
+# freebsd: file(1) magic for FreeBSD objects
+#
+# All new-style FreeBSD magic numbers are in host byte order (i.e.,
+# little-endian on x86).
+#
+# XXX - this comes from the file "freebsd" in a recent FreeBSD version of
+# "file"; it, and the NetBSD stuff in "netbsd", appear to use different
+# schemes for distinguishing between executable images, shared libraries,
+# and object files.
+#
+# FreeBSD says:
+#
+# Regardless of whether it's pure, demand-paged, or none of the
+# above:
+#
+# if the entry point is < 4096, then it's a shared library if
+# the "has run-time loader information" bit is set, and is
+# position-independent if the "is position-independent" bit
+# is set;
+#
+# if the entry point is >= 4096 (or >4095, same thing), then it's
+# an executable, and is dynamically-linked if the "has run-time
+# loader information" bit is set.
+#
+# On x86, NetBSD says:
+#
+# If it's neither pure nor demand-paged:
+#
+# if it has the "has run-time loader information" bit set, it's
+# a dynamically-linked executable;
+#
+# if it doesn't have that bit set, then:
+#
+# if it has the "is position-independent" bit set, it's
+# position-independent;
+#
+# if the entry point is non-zero, it's an executable, otherwise
+# it's an object file.
+#
+# If it's pure:
+#
+# if it has the "has run-time loader information" bit set, it's
+# a dynamically-linked executable, otherwise it's just an
+# executable.
+#
+# If it's demand-paged:
+#
+# if it has the "has run-time loader information" bit set,
+# then:
+#
+# if the entry point is < 4096, it's a shared library;
+#
+# if the entry point is = 4096 or > 4096 (i.e., >= 4096),
+# it's a dynamically-linked executable);
+#
+# if it doesn't have the "has run-time loader information" bit
+# set, then it's just an executable.
+#
+# (On non-x86, NetBSD does much the same thing, except that it uses
+# 8192 on 68K - except for "68k4k", which is presumably "68K with 4K
+# pages - SPARC, and MIPS, presumably because Sun-3's and Sun-4's
+# had 8K pages; dunno about MIPS.)
+#
+# I suspect the two will differ only in perverse and uninteresting cases
+# ("shared" libraries that aren't demand-paged and whose pages probably
+# won't actually be shared, executables with entry points <4096).
+#
+# I leave it to those more familiar with FreeBSD and NetBSD to figure out
+# what the right answer is (although using ">4095", FreeBSD-style, is
+# probably better than separately checking for "=4096" and ">4096",
+# NetBSD-style). (The old "netbsd" file analyzed FreeBSD demand paged
+# executables using the NetBSD technique.)
+#
+0 lelong&0377777777 041400407 FreeBSD/i386
+>20 lelong <4096
+>>3 byte&0xC0 &0x80 shared library
+>>3 byte&0xC0 0x40 PIC object
+>>3 byte&0xC0 0x00 object
+>20 lelong >4095
+>>3 byte&0x80 0x80 dynamically linked executable
+>>3 byte&0x80 0x00 executable
+>16 lelong >0 not stripped
+
+0 lelong&0377777777 041400410 FreeBSD/i386 pure
+>20 lelong <4096
+>>3 byte&0xC0 &0x80 shared library
+>>3 byte&0xC0 0x40 PIC object
+>>3 byte&0xC0 0x00 object
+>20 lelong >4095
+>>3 byte&0x80 0x80 dynamically linked executable
+>>3 byte&0x80 0x00 executable
+>16 lelong >0 not stripped
+
+0 lelong&0377777777 041400413 FreeBSD/i386 demand paged
+>20 lelong <4096
+>>3 byte&0xC0 &0x80 shared library
+>>3 byte&0xC0 0x40 PIC object
+>>3 byte&0xC0 0x00 object
+>20 lelong >4095
+>>3 byte&0x80 0x80 dynamically linked executable
+>>3 byte&0x80 0x00 executable
+>16 lelong >0 not stripped
+
+0 lelong&0377777777 041400314 FreeBSD/i386 compact demand paged
+>20 lelong <4096
+>>3 byte&0xC0 &0x80 shared library
+>>3 byte&0xC0 0x40 PIC object
+>>3 byte&0xC0 0x00 object
+>20 lelong >4095
+>>3 byte&0x80 0x80 dynamically linked executable
+>>3 byte&0x80 0x00 executable
+>16 lelong >0 not stripped
+
+# XXX gross hack to identify core files
+# cores start with a struct tss; we take advantage of the following:
+# byte 7: highest byte of the kernel stack pointer, always 0xfe
+# 8/9: kernel (ring 0) ss value, always 0x0010
+# 10 - 27: ring 1 and 2 ss/esp, unused, thus always 0
+# 28: low order byte of the current PTD entry, always 0 since the
+# PTD is page-aligned
+#
+7 string \357\020\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 FreeBSD/i386 a.out core file
+>1039 string >\0 from '%s'
+
+# /var/run/ld.so.hints
+# What are you laughing about?
+0 lelong 011421044151 ld.so hints file (Little Endian
+>4 lelong >0 \b, version %d)
+>4 belong <1 \b)
+0 belong 011421044151 ld.so hints file (Big Endian
+>4 belong >0 \b, version %d)
+>4 belong <1 \b)
+
+#
+# Files generated by FreeBSD scrshot(1)/vidcontrol(1) utilities
+#
+0 string SCRSHOT_ scrshot(1) screenshot,
+>8 byte x version %d,
+>9 byte 2 %d bytes in header,
+>>10 byte x %d chars wide by
+>>11 byte x %d chars high
+
+#------------------------------------------------------------------------------
+# $File: fsav,v 1.11 2009/09/19 16:28:09 christos Exp $
+# fsav: file(1) magic for datafellows fsav virus definition files
+# Anthon van der Neut (anthon at mnt.org)
+
+# ftp://ftp.f-prot.com/pub/{macrdef2.zip,nomacro.def}
+0 beshort 0x1575 fsav macro virus signatures
+>8 leshort >0 (%d-
+>11 byte >0 \b%02d-
+>10 byte >0 \b%02d)
+# ftp://ftp.f-prot.com/pub/sign.zip
+#10 ubyte <12
+#>9 ubyte <32
+#>>8 ubyte 0x0a
+#>>>12 ubyte 0x07
+#>>>>11 uleshort >0 fsav DOS/Windows virus signatures (%d-
+#>>>>10 byte 0 \b01-
+#>>>>10 byte 1 \b02-
+#>>>>10 byte 2 \b03-
+#>>>>10 byte 3 \b04-
+#>>>>10 byte 4 \b05-
+#>>>>10 byte 5 \b06-
+#>>>>10 byte 6 \b07-
+#>>>>10 byte 7 \b08-
+#>>>>10 byte 8 \b09-
+#>>>>10 byte 9 \b10-
+#>>>>10 byte 10 \b11-
+#>>>>10 byte 11 \b12-
+#>>>>9 ubyte >0 \b%02d)
+# ftp://ftp.f-prot.com/pub/sign2.zip
+#0 ubyte 0x62
+#>1 ubyte 0xF5
+#>>2 ubyte 0x1
+#>>>3 ubyte 0x1
+#>>>>4 ubyte 0x0e
+#>>>>>13 ubyte >0 fsav virus signatures
+#>>>>>>11 ubyte x size 0x%02x
+#>>>>>>12 ubyte x \b%02x
+#>>>>>>13 ubyte x \b%02x bytes
+
+# Joerg Jenderek: joerg dot jenderek at web dot de
+# http://www.clamav.net/doc/latest/html/node45.html
+# .cvd files start with a 512 bytes colon separated header
+# ClamAV-VDB:buildDate:version:signaturesNumbers:functionalityLevelRequired:MD5:Signature:builder:buildTime
+# + gzipped tarball files
+0 string ClamAV-VDB:
+>11 string >\0 Clam AntiVirus database %-.23s
+>>34 string :
+>>>35 string !: \b, version
+>>>>35 string x \b%-.1s
+>>>>>36 string !:
+>>>>>>36 string x \b%-.1s
+>>>>>>>37 string !:
+>>>>>>>>37 string x \b%-.1s
+>>>>>>>>>38 string !:
+>>>>>>>>>>38 string x \b%-.1s
+>512 string \037\213 \b, gzipped
+>769 string ustar\0 \b, tarred
+
+# Type: Grisoft AVG AntiVirus
+# From: David Newgas <david at newgas.net>
+0 string AVG7_ANTIVIRUS_VAULT_FILE AVG 7 Antivirus vault file data
+
+#------------------------------------------------------------------------------
+# $File: games,v 1.8 2009/09/19 16:28:09 christos Exp $
+# games: file(1) for games
+
+# Fabio Bonelli <fabiobonelli at libero.it>
+# Quake II - III data files
+0 string IDP2 Quake II 3D Model file,
+>20 long x %lu skin(s),
+>8 long x (%lu x
+>12 long x %lu),
+>40 long x %lu frame(s),
+>16 long x Frame size %lu bytes,
+>24 long x %lu vertices/frame,
+>28 long x %lu texture coordinates,
+>32 long x %lu triangles/frame
+
+0 string IBSP Quake
+>4 long 0x26 II Map file (BSP)
+>4 long 0x2E III Map file (BSP)
+
+0 string IDS2 Quake II SP2 sprite file
+
+#---------------------------------------------------------------------------
+# Doom and Quake
+# submitted by Nicolas Patrois
+
+0 string \xcb\x1dBoom\xe6\xff\x03\x01 Boom or linuxdoom demo
+# some doom lmp files don't match, I've got one beginning with \x6d\x02\x01\x01
+
+24 string LxD\ 203 Linuxdoom save
+>0 string x , name=%s
+>44 string x , world=%s
+
+# Quake
+
+0 string PACK Quake I or II world or extension
+
+#0 string -1\x0a Quake I demo
+#>30 string x version %.4s
+#>61 string x level %s
+
+#0 string 5\x0a Quake I save
+
+# The levels
+
+# Quake 1
+
+0 string 5\x0aIntroduction Quake I save: start Introduction
+0 string 5\x0athe_Slipgate_Complex Quake I save: e1m1 The slipgate complex
+0 string 5\x0aCastle_of_the_Damned Quake I save: e1m2 Castle of the damned
+0 string 5\x0athe_Necropolis Quake I save: e1m3 The necropolis
+0 string 5\x0athe_Grisly_Grotto Quake I save: e1m4 The grisly grotto
+0 string 5\x0aZiggurat_Vertigo Quake I save: e1m8 Ziggurat vertigo (secret)
+0 string 5\x0aGloom_Keep Quake I save: e1m5 Gloom keep
+0 string 5\x0aThe_Door_To_Chthon Quake I save: e1m6 The door to Chthon
+0 string 5\x0aThe_House_of_Chthon Quake I save: e1m7 The house of Chthon
+0 string 5\x0athe_Installation Quake I save: e2m1 The installation
+0 string 5\x0athe_Ogre_Citadel Quake I save: e2m2 The ogre citadel
+0 string 5\x0athe_Crypt_of_Decay Quake I save: e2m3 The crypt of decay (dopefish lives!)
+0 string 5\x0aUnderearth Quake I save: e2m7 Underearth (secret)
+0 string 5\x0athe_Ebon_Fortress Quake I save: e2m4 The ebon fortress
+0 string 5\x0athe_Wizard's_Manse Quake I save: e2m5 The wizard's manse
+0 string 5\x0athe_Dismal_Oubliette Quake I save: e2m6 The dismal oubliette
+0 string 5\x0aTermination_Central Quake I save: e3m1 Termination central
+0 string 5\x0aVaults_of_Zin Quake I save: e3m2 Vaults of Zin
+0 string 5\x0athe_Tomb_of_Terror Quake I save: e3m3 The tomb of terror
+0 string 5\x0aSatan's_Dark_Delight Quake I save: e3m4 Satan's dark delight
+0 string 5\x0athe_Haunted_Halls Quake I save: e3m7 The haunted halls (secret)
+0 string 5\x0aWind_Tunnels Quake I save: e3m5 Wind tunnels
+0 string 5\x0aChambers_of_Torment Quake I save: e3m6 Chambers of torment
+0 string 5\x0athe_Sewage_System Quake I save: e4m1 The sewage system
+0 string 5\x0aThe_Tower_of_Despair Quake I save: e4m2 The tower of despair
+0 string 5\x0aThe_Elder_God_Shrine Quake I save: e4m3 The elder god shrine
+0 string 5\x0athe_Palace_of_Hate Quake I save: e4m4 The palace of hate
+0 string 5\x0aHell's_Atrium Quake I save: e4m5 Hell's atrium
+0 string 5\x0athe_Nameless_City Quake I save: e4m8 The nameless city (secret)
+0 string 5\x0aThe_Pain_Maze Quake I save: e4m6 The pain maze
+0 string 5\x0aAzure_Agony Quake I save: e4m7 Azure agony
+0 string 5\x0aShub-Niggurath's_Pit Quake I save: end Shub-Niggurath's pit
+
+# Quake DeathMatch levels
+
+0 string 5\x0aPlace_of_Two_Deaths Quake I save: dm1 Place of two deaths
+0 string 5\x0aClaustrophobopolis Quake I save: dm2 Claustrophobopolis
+0 string 5\x0aThe_Abandoned_Base Quake I save: dm3 The abandoned base
+0 string 5\x0aThe_Bad_Place Quake I save: dm4 The bad place
+0 string 5\x0aThe_Cistern Quake I save: dm5 The cistern
+0 string 5\x0aThe_Dark_Zone Quake I save: dm6 The dark zone
+
+# Scourge of Armagon
+
+0 string 5\x0aCommand_HQ Quake I save: start Command HQ
+0 string 5\x0aThe_Pumping_Station Quake I save: hip1m1 The pumping station
+0 string 5\x0aStorage_Facility Quake I save: hip1m2 Storage facility
+0 string 5\x0aMilitary_Complex Quake I save: hip1m5 Military complex (secret)
+0 string 5\x0athe_Lost_Mine Quake I save: hip1m3 The lost mine
+0 string 5\x0aResearch_Facility Quake I save: hip1m4 Research facility
+0 string 5\x0aAncient_Realms Quake I save: hip2m1 Ancient realms
+0 string 5\x0aThe_Gremlin's_Domain Quake I save: hip2m6 The gremlin's domain (secret)
+0 string 5\x0aThe_Black_Cathedral Quake I save: hip2m2 The black cathedral
+0 string 5\x0aThe_Catacombs Quake I save: hip2m3 The catacombs
+0 string 5\x0athe_Crypt__ Quake I save: hip2m4 The crypt
+0 string 5\x0aMortum's_Keep Quake I save: hip2m5 Mortum's keep
+0 string 5\x0aTur_Torment Quake I save: hip3m1 Tur torment
+0 string 5\x0aPandemonium Quake I save: hip3m2 Pandemonium
+0 string 5\x0aLimbo Quake I save: hip3m3 Limbo
+0 string 5\x0athe_Edge_of_Oblivion Quake I save: hipdm1 The edge of oblivion (secret)
+0 string 5\x0aThe_Gauntlet Quake I save: hip3m4 The gauntlet
+0 string 5\x0aArmagon's_Lair Quake I save: hipend Armagon's lair
+
+# Malice
+
+0 string 5\x0aThe_Academy Quake I save: start The academy
+0 string 5\x0aThe_Lab Quake I save: d1 The lab
+0 string 5\x0aArea_33 Quake I save: d1b Area 33
+0 string 5\x0aSECRET_MISSIONS Quake I save: d3b Secret missions
+0 string 5\x0aThe_Hospital Quake I save: d10 The hospital (secret)
+0 string 5\x0aThe_Genetics_Lab Quake I save: d11 The genetics lab (secret)
+0 string 5\x0aBACK_2_MALICE Quake I save: d4b Back to Malice
+0 string 5\x0aArea44 Quake I save: d1c Area 44
+0 string 5\x0aTakahiro_Towers Quake I save: d2 Takahiro towers
+0 string 5\x0aA_Rat's_Life Quake I save: d3 A rat's life
+0 string 5\x0aInto_The_Flood Quake I save: d4 Into the flood
+0 string 5\x0aThe_Flood Quake I save: d5 The flood
+0 string 5\x0aNuclear_Plant Quake I save: d6 Nuclear plant
+0 string 5\x0aThe_Incinerator_Plant Quake I save: d7 The incinerator plant
+0 string 5\x0aThe_Foundry Quake I save: d7b The foundry
+0 string 5\x0aThe_Underwater_Base Quake I save: d8 The underwater base
+0 string 5\x0aTakahiro_Base Quake I save: d9 Takahiro base
+0 string 5\x0aTakahiro_Laboratories Quake I save: d12 Takahiro laboratories
+0 string 5\x0aStayin'_Alive Quake I save: d13 Stayin' alive
+0 string 5\x0aB.O.S.S._HQ Quake I save: d14 B.O.S.S. HQ
+0 string 5\x0aSHOWDOWN! Quake I save: d15 Showdown!
+
+# Malice DeathMatch levels
+
+0 string 5\x0aThe_Seventh_Precinct Quake I save: ddm1 The seventh precinct
+0 string 5\x0aSub_Station Quake I save: ddm2 Sub station
+0 string 5\x0aCrazy_Eights! Quake I save: ddm3 Crazy eights!
+0 string 5\x0aEast_Side_Invertationa Quake I save: ddm4 East side invertationa
+0 string 5\x0aSlaughterhouse Quake I save: ddm5 Slaughterhouse
+0 string 5\x0aDOMINO Quake I save: ddm6 Domino
+0 string 5\x0aSANDRA'S_LADDER Quake I save: ddm7 Sandra's ladder
+
+
+0 string MComprHD MAME CHD compressed hard disk image,
+>12 belong x version %lu
+
+# doom - submitted by Jon Dowland
+
+0 string =IWAD doom main IWAD data
+>4 lelong x containing %d lumps
+0 string =PWAD doom patch PWAD data
+>4 lelong x containing %d lumps
+
+
+# Summary: Warcraft 3 save
+# Extension: .w3g
+# Created by: "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string Warcraft\ III\ recorded\ game %s
+
+
+# Summary: Warcraft 3 map
+# Extension: .w3m
+# Created by: "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string HM3W Warcraft III map file
+
+
+# Summary: SGF Smart Game Format
+# Extension: .sgf
+# Reference: http://www.red-bean.com/sgf/
+# Created by: Eduardo Sabbatella <eduardo_sabbatella at yahoo.com.ar>
+# Modified by (1): Abel Cheung (regex, more game format)
+# FIXME: Some games don't have GM (game type)
+0 regex \\(;.*GM\\[[0-9]{1,2}\\] Smart Game Format
+>2 search/0x200 GM[
+>>&0 string 1] (Go)
+>>&0 string 2] (Othello)
+>>&0 string 3] (chess)
+>>&0 string 4] (Gomoku+Renju)
+>>&0 string 5] (Nine Men's Morris)
+>>&0 string 6] (Backgammon)
+>>&0 string 7] (Chinese chess)
+>>&0 string 8] (Shogi)
+>>&0 string 9] (Lines of Action)
+>>&0 string 10] (Ataxx)
+>>&0 string 11] (Hex)
+>>&0 string 12] (Jungle)
+>>&0 string 13] (Neutron)
+>>&0 string 14] (Philosopher's Football)
+>>&0 string 15] (Quadrature)
+>>&0 string 16] (Trax)
+>>&0 string 17] (Tantrix)
+>>&0 string 18] (Amazons)
+>>&0 string 19] (Octi)
+>>&0 string 20] (Gess)
+>>&0 string 21] (Twixt)
+>>&0 string 22] (Zertz)
+>>&0 string 23] (Plateau)
+>>&0 string 24] (Yinsh)
+>>&0 string 25] (Punct)
+>>&0 string 26] (Gobblet)
+>>&0 string 27] (hive)
+>>&0 string 28] (Exxit)
+>>&0 string 29] (Hnefatal)
+>>&0 string 30] (Kuba)
+>>&0 string 31] (Tripples)
+>>&0 string 32] (Chase)
+>>&0 string 33] (Tumbling Down)
+>>&0 string 34] (Sahara)
+>>&0 string 35] (Byte)
+>>&0 string 36] (Focus)
+>>&0 string 37] (Dvonn)
+>>&0 string 38] (Tamsk)
+>>&0 string 39] (Gipf)
+>>&0 string 40] (Kropki)
+
+
+# Summary: Civilization 4 video
+# Extension: .bik
+# Created by: Abel Cheung <abelcheung at gmail.com>
+0 string BIKi Civilization 4 Video
+
+
+##############################################
+# NetImmerse/Gamebryo game engine entries
+
+# Summary: Gamebryo game engine file
+# Extension: .nif, .kf
+# Created by: Abel Cheung <abelcheung at gmail.com>
+0 string Gamebryo\ File\ Format,\ Version\ Gamebryo game engine file
+>&0 regex [0-9a-z.]+ \b, version %s
+
+# Summary: Gamebryo game engine file
+# Extension: .kfm
+# Created by: Abel Cheung <abelcheung at gmail.com>
+0 string ;Gamebryo\ KFM\ File\ Version\ Gamebryo game engine animation File
+>&0 regex [0-9a-z.]+ \b, version %s
+
+# Summary: NetImmerse game engine file
+# Extension .nif
+# Created by: Abel Cheung <abelcheung at gmail.com>
+0 string NetImmerse\ File\ Format,\ Versio
+>&0 string n\ NetImmerse game engine file
+>>&0 regex [0-9a-z.]+ \b, version %s
+
+
+#------------------------------------------------------------------------------
+# $File: gcc,v 1.4 2009/09/19 16:28:09 christos Exp $
+# gcc: file(1) magic for GCC special files
+#
+0 string gpch GCC precompiled header
+
+# The version field is annoying. It's 3 characters, not zero-terminated.
+>5 byte x (version %c
+>6 byte x \b%c
+>7 byte x \b%c)
+
+# 67 = 'C', 111 = 'o', 43 = '+', 79 = 'O'
+>4 byte 67 for C
+>4 byte 111 for Objective C
+>4 byte 43 for C++
+>4 byte 79 for Objective C++
+
+#------------------------------------------------------------------------------
+# $File: geos,v 1.4 2009/09/19 16:28:09 christos Exp $
+# GEOS files (Vidar Madsen, vidar at gimp.org)
+# semi-commonly used in embedded and handheld systems.
+0 belong 0xc745c153 GEOS
+>40 byte 1 executable
+>40 byte 2 VMFile
+>40 byte 3 binary
+>40 byte 4 directory label
+>40 byte <1 unknown
+>40 byte >4 unknown
+>4 string >\0 \b, name "%s"
+#>44 short x \b, version %d
+#>46 short x \b.%d
+#>48 short x \b, rev %d
+#>50 short x \b.%d
+#>52 short x \b, proto %d
+#>54 short x \br%d
+#>168 string >\0 \b, copyright "%s"
+
+#------------------------------------------------------------------------------
+# $File: gimp,v 1.6 2009/09/19 16:28:09 christos Exp $
+# GIMP Gradient: file(1) magic for the GIMP's gradient data files
+# by Federico Mena <federico at nuclecu.unam.mx>
+
+0 string GIMP\ Gradient GIMP gradient data
+
+#------------------------------------------------------------------------------
+# XCF: file(1) magic for the XCF image format used in the GIMP developed
+# by Spencer Kimball and Peter Mattis
+# ('Bucky' LaDieu, nega at vt.edu)
+
+0 string gimp\ xcf GIMP XCF image data,
+>9 string file version 0,
+>9 string v version
+>>10 string >\0 %s,
+>14 belong x %lu x
+>18 belong x %lu,
+>22 belong 0 RGB Color
+>22 belong 1 Greyscale
+>22 belong 2 Indexed Color
+>22 belong >2 Unknown Image Type.
+
+#------------------------------------------------------------------------------
+# XCF: file(1) magic for the patterns used in the GIMP, developed
+# by Spencer Kimball and Peter Mattis
+# ('Bucky' LaDieu, nega at vt.edu)
+
+20 string GPAT GIMP pattern data,
+>24 string x %s
+
+#------------------------------------------------------------------------------
+# XCF: file(1) magic for the brushes used in the GIMP, developed
+# by Spencer Kimball and Peter Mattis
+# ('Bucky' LaDieu, nega at vt.edu)
+
+20 string GIMP GIMP brush data
+
+# GIMP Curves File
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+0 string #\040GIMP\040Curves\040File GIMP curve file
+
+#------------------------------------------------------------------------------
+# $File: gnome-keyring,v 1.2 2009/09/19 16:28:09 christos Exp $
+# GNOME keyring
+# Contributed by Josh Triplett
+# FIXME: Could be simplified if pstring supported two-byte counts
+0 string GnomeKeyring\n\r\0\n GNOME keyring
+>&0 ubyte 0 \b, major version 0
+>>&0 ubyte 0 \b, minor version 0
+>>>&0 ubyte 0 \b, crypto type 0 (AEL)
+>>>&0 ubyte >0 \b, crypto type %hhu (unknown)
+>>>&1 ubyte 0 \b, hash type 0 (MD5)
+>>>&1 ubyte >0 \b, hash type %hhu (unknown)
+>>>&2 ubelong 0xFFFFFFFF \b, name NULL
+>>>&2 ubelong !0xFFFFFFFF
+>>>>&-4 ubelong >255 \b, name too long for file's pstring type
+>>>>&-4 ubelong <256
+>>>>>&-1 pstring x \b, name "%s"
+>>>>>>&0 ubeqdate x \b, last modified %s
+>>>>>>&8 ubeqdate x \b, created %s
+>>>>>>&16 ubelong &1
+>>>>>>>&0 ubelong x \b, locked if idle for %u seconds
+>>>>>>&16 ubelong ^1 \b, not locked if idle
+>>>>>>&24 ubelong x \b, hash iterations %u
+>>>>>>&28 ubequad x \b, salt %llu
+>>>>>>&52 ubelong x \b, %u item(s)
+
+#------------------------------------------------------------------------------
+# $File: gnu,v 1.11 2009/09/19 16:28:09 christos Exp $
+# gnu: file(1) magic for various GNU tools
+#
+# GNU nlsutils message catalog file format
+#
+0 string \336\22\4\225 GNU message catalog (little endian),
+>4 lelong x revision %d,
+>8 lelong x %d messages
+0 string \225\4\22\336 GNU message catalog (big endian),
+>4 belong x revision %d,
+>8 belong x %d messages
+# message catalogs, from Mitchum DSouza <m.dsouza at mrc-apu.cam.ac.uk>
+0 string *nazgul* Nazgul style compiled message catalog
+>8 lelong >0 \b, version %ld
+
+# GnuPG
+# The format is very similar to pgp
+0 string \001gpg GPG key trust database
+>4 byte x version %d
+# Note: magic.mime had 0x8501 for the next line instead of 0x8502
+0 beshort 0x8502 GPG encrypted data
+!:mime text/PGP # encoding: data
+
+# This magic is not particularly good, as the keyrings don't have true
+# magic. Nevertheless, it covers many keyrings.
+0 beshort 0x9901 GPG key public ring
+!:mime application/x-gnupg-keyring
+
+# Gnumeric spreadsheet
+# This entry is only semi-helpful, as Gnumeric compresses its files, so
+# they will ordinarily reported as "compressed", but at least -z helps
+39 string =<gmr:Workbook Gnumeric spreadsheet
+
+# From: James Youngman <jay at gnu.org>
+# gnu find magic
+0 string \0LOCATE GNU findutils locate database data
+>7 string >\0 \b, format %s
+>7 string 02 \b (frcode)
+
+# Files produced by GNU gettext
+0 long 0xDE120495 GNU-format message catalog data
+0 long 0x950412DE GNU-format message catalog data
+
+#------------------------------------------------------------------------------
+# $File: gnumeric,v 1.4 2009/09/19 16:28:09 christos Exp $
+# gnumeric: file(1) magic for Gnumeric spreadsheet
+# This entry is only semi-helpful, as Gnumeric compresses its files, so
+# they will ordinarily reported as "compressed", but at least -z helps
+39 string =<gmr:Workbook Gnumeric spreadsheet
+!:mime application/x-gnumeric
+
+#------------------------------------------------------------------------------
+# $File: grace,v 1.4 2009/09/19 16:28:09 christos Exp $
+# ACE/gr and Grace type files - PLEASE DO NOT REMOVE THIS LINE
+#
+# ACE/gr binary
+0 string \000\000\0001\000\000\0000\000\000\0000\000\000\0002\000\000\0000\000\000\0000\000\000\0003 old ACE/gr binary file
+>39 byte >0 - version %c
+# ACE/gr ascii
+0 string #\ xvgr\ parameter\ file ACE/gr ascii file
+0 string #\ xmgr\ parameter\ file ACE/gr ascii file
+0 string #\ ACE/gr\ parameter\ file ACE/gr ascii file
+# Grace projects
+0 string #\ Grace\ project\ file Grace project file
+>23 string @version\ (version
+>>32 byte >0 %c
+>>33 string >\0 \b.%.2s
+>>35 string >\0 \b.%.2s)
+# ACE/gr fit description files
+0 string #\ ACE/gr\ fit\ description\ ACE/gr fit description file
+# end of ACE/gr and Grace type files - PLEASE DO NOT REMOVE THIS LINE
+
+#------------------------------------------------------------------------------
+# $File: graphviz,v 1.7 2009/09/19 16:28:09 christos Exp $
+# graphviz: file(1) magic for http://www.graphviz.org/
+
+# FIXME: These patterns match too generally. For example, the first
+# line matches a LaTeX file containing the word "graph" (with a {
+# following later) and the second line matches this file.
+#0 regex/100 [\r\n\t\ ]*graph[\r\n\t\ ]+.*\\{ graphviz graph text
+#!:mime text/vnd.graphviz
+#0 regex/100 [\r\n\t\ ]*digraph[\r\n\t\ ]+.*\\{ graphviz digraph text
+#!:mime text/vnd.graphviz
+
+#------------------------------------------------------------------------------
+# $File: gringotts,v 1.5 2009/09/19 16:28:09 christos Exp $
+# gringotts: file(1) magic for Gringotts
+# http://devel.pluto.linux.it/projects/Gringotts/
+# author: Germano Rizzo <mano at pluto.linux.it>
+#GRG3????Y
+0 string GRG Gringotts data file
+#file format 1
+>3 string 1 v.1, MCRYPT S2K, SERPENT crypt, SHA-256 hash, ZLib lvl.9
+#file format 2
+>3 string 2 v.2, MCRYPT S2K,
+>>8 byte&0x70 0x00 RIJNDAEL-128 crypt,
+>>8 byte&0x70 0x10 SERPENT crypt,
+>>8 byte&0x70 0x20 TWOFISH crypt,
+>>8 byte&0x70 0x30 CAST-256 crypt,
+>>8 byte&0x70 0x40 SAFER+ crypt,
+>>8 byte&0x70 0x50 LOKI97 crypt,
+>>8 byte&0x70 0x60 3DES crypt,
+>>8 byte&0x70 0x70 RIJNDAEL-256 crypt,
+>>8 byte&0x08 0x00 SHA1 hash,
+>>8 byte&0x08 0x08 RIPEMD-160 hash,
+>>8 byte&0x04 0x00 ZLib
+>>8 byte&0x04 0x04 BZip2
+>>8 byte&0x03 0x00 lvl.0
+>>8 byte&0x03 0x01 lvl.3
+>>8 byte&0x03 0x02 lvl.6
+>>8 byte&0x03 0x03 lvl.9
+#file format 3
+>3 string 3 v.3, OpenPGP S2K,
+>>8 byte&0x70 0x00 RIJNDAEL-128 crypt,
+>>8 byte&0x70 0x10 SERPENT crypt,
+>>8 byte&0x70 0x20 TWOFISH crypt,
+>>8 byte&0x70 0x30 CAST-256 crypt,
+>>8 byte&0x70 0x40 SAFER+ crypt,
+>>8 byte&0x70 0x50 LOKI97 crypt,
+>>8 byte&0x70 0x60 3DES crypt,
+>>8 byte&0x70 0x70 RIJNDAEL-256 crypt,
+>>8 byte&0x08 0x00 SHA1 hash,
+>>8 byte&0x08 0x08 RIPEMD-160 hash,
+>>8 byte&0x04 0x00 ZLib
+>>8 byte&0x04 0x04 BZip2
+>>8 byte&0x03 0x00 lvl.0
+>>8 byte&0x03 0x01 lvl.3
+>>8 byte&0x03 0x02 lvl.6
+>>8 byte&0x03 0x03 lvl.9
+#file format >3
+>3 string >3 v.%.1s (unknown details)
+
+#------------------------------------------------------------------------------
+# $File: hitachi-sh,v 1.5 2009/09/19 16:28:09 christos Exp $
+# hitach-sh: file(1) magic for Hitachi Super-H
+#
+# Super-H COFF
+#
+0 beshort 0x0500 Hitachi SH big-endian COFF
+>18 beshort&0x0002 =0x0000 object
+>18 beshort&0x0002 =0x0002 executable
+>18 beshort&0x0008 =0x0008 \b, stripped
+>18 beshort&0x0008 =0x0000 \b, not stripped
+#
+0 leshort 0x0550 Hitachi SH little-endian COFF
+>18 leshort&0x0002 =0x0000 object
+>18 leshort&0x0002 =0x0002 executable
+>18 leshort&0x0008 =0x0008 \b, stripped
+>18 leshort&0x0008 =0x0000 \b, not stripped
+
+
+#------------------------------------------------------------------------------
+# $File: hp,v 1.23 2009/09/19 16:28:09 christos Exp $
+# hp: file(1) magic for Hewlett Packard machines (see also "printer")
+#
+# XXX - somebody should figure out whether any byte order needs to be
+# applied to the "TML" stuff; I'm assuming the Apollo stuff is
+# big-endian as it was mostly 68K-based.
+#
+# I think the 500 series was the old stack-based machines, running a
+# UNIX environment atop the "SUN kernel"; dunno whether it was
+# big-endian or little-endian.
+#
+# Daniel Quinlan (quinlan at yggdrasil.com): hp200 machines are 68010 based;
+# hp300 are 68020+68881 based; hp400 are also 68k. The following basic
+# HP magic is useful for reference, but using "long" magic is a better
+# practice in order to avoid collisions.
+#
+# Guy Harris (guy at netapp.com): some additions to this list came from
+# HP-UX 10.0's "/usr/include/sys/unistd.h" (68030, 68040, PA-RISC 1.1,
+# 1.2, and 2.0). The 1.2 and 2.0 stuff isn't in the HP-UX 10.0
+# "/etc/magic", though, except for the "archive file relocatable library"
+# stuff, and the 68030 and 68040 stuff isn't there at all - are they not
+# used in executables, or have they just not yet updated "/etc/magic"
+# completely?
+#
+# 0 beshort 200 hp200 (68010) BSD binary
+# 0 beshort 300 hp300 (68020+68881) BSD binary
+# 0 beshort 0x20c hp200/300 HP-UX binary
+# 0 beshort 0x20d hp400 (68030) HP-UX binary
+# 0 beshort 0x20e hp400 (68040?) HP-UX binary
+# 0 beshort 0x20b PA-RISC1.0 HP-UX binary
+# 0 beshort 0x210 PA-RISC1.1 HP-UX binary
+# 0 beshort 0x211 PA-RISC1.2 HP-UX binary
+# 0 beshort 0x214 PA-RISC2.0 HP-UX binary
+
+#
+# The "misc" stuff needs a byte order; the archives look suspiciously
+# like the old 177545 archives (0xff65 = 0177545).
+#
+#### Old Apollo stuff
+0 beshort 0627 Apollo m68k COFF executable
+>18 beshort ^040000 not stripped
+>22 beshort >0 - version %ld
+0 beshort 0624 apollo a88k COFF executable
+>18 beshort ^040000 not stripped
+>22 beshort >0 - version %ld
+0 long 01203604016 TML 0123 byte-order format
+0 long 01702407010 TML 1032 byte-order format
+0 long 01003405017 TML 2301 byte-order format
+0 long 01602007412 TML 3210 byte-order format
+#### PA-RISC 1.1
+0 belong 0x02100106 PA-RISC1.1 relocatable object
+0 belong 0x02100107 PA-RISC1.1 executable
+>168 belong &0x00000004 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x02100108 PA-RISC1.1 shared executable
+>168 belong&0x4 0x4 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x0210010b PA-RISC1.1 demand-load executable
+>168 belong&0x4 0x4 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x0210010e PA-RISC1.1 shared library
+>96 belong >0 - not stripped
+
+0 belong 0x0210010d PA-RISC1.1 dynamic load library
+>96 belong >0 - not stripped
+
+#### PA-RISC 2.0
+0 belong 0x02140106 PA-RISC2.0 relocatable object
+
+0 belong 0x02140107 PA-RISC2.0 executable
+>168 belong &0x00000004 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x02140108 PA-RISC2.0 shared executable
+>168 belong &0x00000004 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x0214010b PA-RISC2.0 demand-load executable
+>168 belong &0x00000004 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x0214010e PA-RISC2.0 shared library
+>96 belong >0 - not stripped
+
+0 belong 0x0214010d PA-RISC2.0 dynamic load library
+>96 belong >0 - not stripped
+
+#### 800
+0 belong 0x020b0106 PA-RISC1.0 relocatable object
+
+0 belong 0x020b0107 PA-RISC1.0 executable
+>168 belong&0x4 0x4 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x020b0108 PA-RISC1.0 shared executable
+>168 belong&0x4 0x4 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x020b010b PA-RISC1.0 demand-load executable
+>168 belong&0x4 0x4 dynamically linked
+>(144) belong 0x054ef630 dynamically linked
+>96 belong >0 - not stripped
+
+0 belong 0x020b010e PA-RISC1.0 shared library
+>96 belong >0 - not stripped
+
+0 belong 0x020b010d PA-RISC1.0 dynamic load library
+>96 belong >0 - not stripped
+
+0 belong 0x213c6172 archive file
+>68 belong 0x020b0619 - PA-RISC1.0 relocatable library
+>68 belong 0x02100619 - PA-RISC1.1 relocatable library
+>68 belong 0x02110619 - PA-RISC1.2 relocatable library
+>68 belong 0x02140619 - PA-RISC2.0 relocatable library
+
+#### 500
+0 long 0x02080106 HP s500 relocatable executable
+>16 long >0 - version %ld
+
+0 long 0x02080107 HP s500 executable
+>16 long >0 - version %ld
+
+0 long 0x02080108 HP s500 pure executable
+>16 long >0 - version %ld
+
+#### 200
+0 belong 0x020c0108 HP s200 pure executable
+>4 beshort >0 - version %ld
+>8 belong &0x80000000 save fp regs
+>8 belong &0x40000000 dynamically linked
+>8 belong &0x20000000 debuggable
+>36 belong >0 not stripped
+
+0 belong 0x020c0107 HP s200 executable
+>4 beshort >0 - version %ld
+>8 belong &0x80000000 save fp regs
+>8 belong &0x40000000 dynamically linked
+>8 belong &0x20000000 debuggable
+>36 belong >0 not stripped
+
+0 belong 0x020c010b HP s200 demand-load executable
+>4 beshort >0 - version %ld
+>8 belong &0x80000000 save fp regs
+>8 belong &0x40000000 dynamically linked
+>8 belong &0x20000000 debuggable
+>36 belong >0 not stripped
+
+0 belong 0x020c0106 HP s200 relocatable executable
+>4 beshort >0 - version %ld
+>6 beshort >0 - highwater %d
+>8 belong &0x80000000 save fp regs
+>8 belong &0x20000000 debuggable
+>8 belong &0x10000000 PIC
+
+0 belong 0x020a0108 HP s200 (2.x release) pure executable
+>4 beshort >0 - version %ld
+>36 belong >0 not stripped
+
+0 belong 0x020a0107 HP s200 (2.x release) executable
+>4 beshort >0 - version %ld
+>36 belong >0 not stripped
+
+0 belong 0x020c010e HP s200 shared library
+>4 beshort >0 - version %ld
+>6 beshort >0 - highwater %d
+>36 belong >0 not stripped
+
+0 belong 0x020c010d HP s200 dynamic load library
+>4 beshort >0 - version %ld
+>6 beshort >0 - highwater %d
+>36 belong >0 not stripped
+
+#### MISC
+0 long 0x0000ff65 HP old archive
+0 long 0x020aff65 HP s200 old archive
+0 long 0x020cff65 HP s200 old archive
+0 long 0x0208ff65 HP s500 old archive
+
+0 long 0x015821a6 HP core file
+
+0 long 0x4da7eee8 HP-WINDOWS font
+>8 byte >0 - version %ld
+0 string Bitmapfile HP Bitmapfile
+
+0 string IMGfile CIS compimg HP Bitmapfile
+# XXX - see "lif"
+#0 short 0x8000 lif file
+0 long 0x020c010c compiled Lisp
+
+0 string msgcat01 HP NLS message catalog,
+>8 long >0 %d messages
+
+# Summary: HP-48/49 calculator
+# Created by: phk at data.fls.dk
+# Modified by (1): AMAKAWA Shuhei <sa264 at cam.ac.uk>
+# Modified by (2): Samuel Thibault <samuel.thibault at ens-lyon.org> (HP49 support)
+0 string HPHP HP
+>4 string 48 48 binary
+>4 string 49 49 binary
+>7 byte >64 - Rev %c
+>8 leshort 0x2911 (ADR)
+>8 leshort 0x2933 (REAL)
+>8 leshort 0x2955 (LREAL)
+>8 leshort 0x2977 (COMPLX)
+>8 leshort 0x299d (LCOMPLX)
+>8 leshort 0x29bf (CHAR)
+>8 leshort 0x29e8 (ARRAY)
+>8 leshort 0x2a0a (LNKARRAY)
+>8 leshort 0x2a2c (STRING)
+>8 leshort 0x2a4e (HXS)
+>8 leshort 0x2a74 (LIST)
+>8 leshort 0x2a96 (DIR)
+>8 leshort 0x2ab8 (ALG)
+>8 leshort 0x2ada (UNIT)
+>8 leshort 0x2afc (TAGGED)
+>8 leshort 0x2b1e (GROB)
+>8 leshort 0x2b40 (LIB)
+>8 leshort 0x2b62 (BACKUP)
+>8 leshort 0x2b88 (LIBDATA)
+>8 leshort 0x2d9d (PROG)
+>8 leshort 0x2dcc (CODE)
+>8 leshort 0x2e48 (GNAME)
+>8 leshort 0x2e6d (LNAME)
+>8 leshort 0x2e92 (XLIB)
+
+0 string %%HP: HP text
+>6 string T(0) - T(0)
+>6 string T(1) - T(1)
+>6 string T(2) - T(2)
+>6 string T(3) - T(3)
+>10 string A(D) A(D)
+>10 string A(R) A(R)
+>10 string A(G) A(G)
+>14 string F(.) F(.);
+>14 string F(,) F(,);
+
+
+# Summary: HP-38/39 calculator
+# Created by: Samuel Thibault <samuel.thibault at ens-lyon.org>
+0 string HP3
+>3 string 8 HP 38
+>3 string 9 HP 39
+>4 string Bin binary
+>4 string Asc ASCII
+>7 string A (Directory List)
+>7 string B (Zaplet)
+>7 string C (Note)
+>7 string D (Program)
+>7 string E (Variable)
+>7 string F (List)
+>7 string G (Matrix)
+>7 string H (Library)
+>7 string I (Target List)
+>7 string J (ASCII Vector specification)
+>7 string K (wildcard)
+
+# Summary: HP-38/39 calculator
+# Created by: Samuel Thibault <samuel.thibault at ens-lyon.org>
+0 string HP3
+>3 string 8 HP 38
+>3 string 9 HP 39
+>4 string Bin binary
+>4 string Asc ASCII
+>7 string A (Directory List)
+>7 string B (Zaplet)
+>7 string C (Note)
+>7 string D (Program)
+>7 string E (Variable)
+>7 string F (List)
+>7 string G (Matrix)
+>7 string H (Library)
+>7 string I (Target List)
+>7 string J (ASCII Vector specification)
+>7 string K (wildcard)
+
+# hpBSD magic numbers
+0 beshort 200 hp200 (68010) BSD
+>2 beshort 0407 impure binary
+>2 beshort 0410 read-only binary
+>2 beshort 0413 demand paged binary
+0 beshort 300 hp300 (68020+68881) BSD
+>2 beshort 0407 impure binary
+>2 beshort 0410 read-only binary
+>2 beshort 0413 demand paged binary
+#
+# From David Gero <dgero at nortelnetworks.com>
+# HP-UX 10.20 core file format from /usr/include/sys/core.h
+# Unfortunately, HP-UX uses corehead blocks without specifying the order
+# There are four we care about:
+# CORE_KERNEL, which starts with the string "HP-UX"
+# CORE_EXEC, which contains the name of the command
+# CORE_PROC, which contains the signal number that caused the core dump
+# CORE_FORMAT, which contains the version of the core file format (== 1)
+# The only observed order in real core files is KERNEL, EXEC, FORMAT, PROC
+# but we include all 6 variations of the order of the first 3, and
+# assume that PROC will always be last
+# Order 1: KERNEL, EXEC, FORMAT, PROC
+0x10 string HP-UX
+>0 belong 2
+>>0xC belong 0x3C
+>>>0x4C belong 0x100
+>>>>0x58 belong 0x44
+>>>>>0xA0 belong 1
+>>>>>>0xAC belong 4
+>>>>>>>0xB0 belong 1
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0x90 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+# Order 2: KERNEL, FORMAT, EXEC, PROC
+>>>0x4C belong 1
+>>>>0x58 belong 4
+>>>>>0x5C belong 1
+>>>>>>0x60 belong 0x100
+>>>>>>>0x6C belong 0x44
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0xA4 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+# Order 3: FORMAT, KERNEL, EXEC, PROC
+0x24 string HP-UX
+>0 belong 1
+>>0xC belong 4
+>>>0x10 belong 1
+>>>>0x14 belong 2
+>>>>>0x20 belong 0x3C
+>>>>>>0x60 belong 0x100
+>>>>>>>0x6C belong 0x44
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0xA4 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+# Order 4: EXEC, KERNEL, FORMAT, PROC
+0x64 string HP-UX
+>0 belong 0x100
+>>0xC belong 0x44
+>>>0x54 belong 2
+>>>>0x60 belong 0x3C
+>>>>>0xA0 belong 1
+>>>>>>0xAC belong 4
+>>>>>>>0xB0 belong 1
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0x44 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+# Order 5: FORMAT, EXEC, KERNEL, PROC
+0x78 string HP-UX
+>0 belong 1
+>>0xC belong 4
+>>>0x10 belong 1
+>>>>0x14 belong 0x100
+>>>>>0x20 belong 0x44
+>>>>>>0x68 belong 2
+>>>>>>>0x74 belong 0x3C
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0x58 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+# Order 6: EXEC, FORMAT, KERNEL, PROC
+>0 belong 0x100
+>>0xC belong 0x44
+>>>0x54 belong 1
+>>>>0x60 belong 4
+>>>>>0x64 belong 1
+>>>>>>0x68 belong 2
+>>>>>>>0x74 belong 0x2C
+>>>>>>>>0xB4 belong 4 core file
+>>>>>>>>>0x44 string >\0 from '%s'
+>>>>>>>>>0xC4 belong 3 - received SIGQUIT
+>>>>>>>>>0xC4 belong 4 - received SIGILL
+>>>>>>>>>0xC4 belong 5 - received SIGTRAP
+>>>>>>>>>0xC4 belong 6 - received SIGABRT
+>>>>>>>>>0xC4 belong 7 - received SIGEMT
+>>>>>>>>>0xC4 belong 8 - received SIGFPE
+>>>>>>>>>0xC4 belong 10 - received SIGBUS
+>>>>>>>>>0xC4 belong 11 - received SIGSEGV
+>>>>>>>>>0xC4 belong 12 - received SIGSYS
+>>>>>>>>>0xC4 belong 33 - received SIGXCPU
+>>>>>>>>>0xC4 belong 34 - received SIGXFSZ
+
+
+
+#------------------------------------------------------------------------------
+# $File: human68k,v 1.5 2009/09/19 16:28:09 christos Exp $
+# human68k: file(1) magic for Human68k (X680x0 DOS) binary formats
+# Magic too short!
+#0 string HU Human68k
+#>68 string LZX LZX compressed
+#>>72 string >\0 (version %s)
+#>(8.L+74) string LZX LZX compressed
+#>>(8.L+78) string >\0 (version %s)
+#>60 belong >0 binded
+#>(8.L+66) string #HUPAIR hupair
+#>0 string HU X executable
+#>(8.L+74) string #LIBCV1 - linked PD LIBC ver 1
+#>4 belong >0 - base address 0x%x
+#>28 belong >0 not stripped
+#>32 belong >0 with debug information
+#0 beshort 0x601a Human68k Z executable
+#0 beshort 0x6000 Human68k object file
+#0 belong 0xd1000000 Human68k ar binary archive
+#0 belong 0xd1010000 Human68k ar ascii archive
+#0 beshort 0x0068 Human68k lib archive
+#4 string LZX Human68k LZX compressed
+#>8 string >\0 (version %s)
+#>4 string LZX R executable
+#2 string #HUPAIR Human68k hupair R executable
+
+#------------------------------------------------------------------------------
+# $File: ibm370,v 1.8 2009/09/19 16:28:09 christos Exp $
+# ibm370: file(1) magic for IBM 370 and compatibles.
+#
+# "ibm370" said that 0x15d == 0535 was "ibm 370 pure executable".
+# What the heck *is* "USS/370"?
+# AIX 4.1's "/etc/magic" has
+#
+# 0 short 0535 370 sysV executable
+# >12 long >0 not stripped
+# >22 short >0 - version %d
+# >30 long >0 - 5.2 format
+# 0 short 0530 370 sysV pure executable
+# >12 long >0 not stripped
+# >22 short >0 - version %d
+# >30 long >0 - 5.2 format
+#
+# instead of the "USS/370" versions of the same magic numbers.
+#
+0 beshort 0537 370 XA sysV executable
+>12 belong >0 not stripped
+>22 beshort >0 - version %d
+>30 belong >0 - 5.2 format
+0 beshort 0532 370 XA sysV pure executable
+>12 belong >0 not stripped
+>22 beshort >0 - version %d
+>30 belong >0 - 5.2 format
+0 beshort 054001 370 sysV pure executable
+>12 belong >0 not stripped
+0 beshort 055001 370 XA sysV pure executable
+>12 belong >0 not stripped
+0 beshort 056401 370 sysV executable
+>12 belong >0 not stripped
+0 beshort 057401 370 XA sysV executable
+>12 belong >0 not stripped
+0 beshort 0531 SVR2 executable (Amdahl-UTS)
+>12 belong >0 not stripped
+>24 belong >0 - version %ld
+0 beshort 0534 SVR2 pure executable (Amdahl-UTS)
+>12 belong >0 not stripped
+>24 belong >0 - version %ld
+0 beshort 0530 SVR2 pure executable (USS/370)
+>12 belong >0 not stripped
+>24 belong >0 - version %ld
+0 beshort 0535 SVR2 executable (USS/370)
+>12 belong >0 not stripped
+>24 belong >0 - version %ld
+
+#------------------------------------------------------------------------------
+# $File: ibm6000,v 1.9 2009/09/19 16:28:09 christos Exp $
+# ibm6000: file(1) magic for RS/6000 and the RT PC.
+#
+0 beshort 0x01df executable (RISC System/6000 V3.1) or obj module
+>12 belong >0 not stripped
+# Breaks sun4 statically linked execs.
+#0 beshort 0x0103 executable (RT Version 2) or obj module
+#>2 byte 0x50 pure
+#>28 belong >0 not stripped
+#>6 beshort >0 - version %ld
+0 beshort 0x0104 shared library
+0 beshort 0x0105 ctab data
+0 beshort 0xfe04 structured file
+0 string 0xabcdef AIX message catalog
+0 belong 0x000001f9 AIX compiled message catalog
+0 string \<aiaff> archive
+0 string \<bigaf> archive (big format)
+
+
+#------------------------------------------------------------------------------
+# $File: iff,v 1.12 2009/09/19 16:28:09 christos Exp $
+# iff: file(1) magic for Interchange File Format (see also "audio" & "images")
+#
+# Daniel Quinlan (quinlan at yggdrasil.com) -- IFF was designed by Electronic
+# Arts for file interchange. It has also been used by Apple, SGI, and
+# especially Commodore-Amiga.
+#
+# IFF files begin with an 8 byte FORM header, followed by a 4 character
+# FORM type, which is followed by the first chunk in the FORM.
+
+0 string FORM IFF data
+#>4 belong x \b, FORM is %d bytes long
+# audio formats
+>8 string AIFF \b, AIFF audio
+!:mime audio/x-aiff
+>8 string AIFC \b, AIFF-C compressed audio
+!:mime audio/x-aiff
+>8 string 8SVX \b, 8SVX 8-bit sampled sound voice
+!:mime audio/x-aiff
+>8 string 16SV \b, 16SV 16-bit sampled sound voice
+>8 string SAMP \b, SAMP sampled audio
+>8 string MAUD \b, MAUD MacroSystem audio
+>8 string SMUS \b, SMUS simple music
+>8 string CMUS \b, CMUS complex music
+# image formats
+>8 string ILBMBMHD \b, ILBM interleaved image
+>>20 beshort x \b, %d x
+>>22 beshort x %d
+>8 string RGBN \b, RGBN 12-bit RGB image
+>8 string RGB8 \b, RGB8 24-bit RGB image
+>8 string DEEP \b, DEEP TVPaint/XiPaint image
+>8 string DR2D \b, DR2D 2-D object
+>8 string TDDD \b, TDDD 3-D rendering
+>8 string LWOB \b, LWOB 3-D object
+>8 string LWO2 \b, LWO2 3-D object, v2
+>8 string LWLO \b, LWLO 3-D layered object
+>8 string REAL \b, REAL Real3D rendering
+>8 string MC4D \b, MC4D MaxonCinema4D rendering
+>8 string ANIM \b, ANIM animation
+>8 string YAFA \b, YAFA animation
+>8 string SSA\ \b, SSA super smooth animation
+>8 string ACBM \b, ACBM continuous image
+>8 string FAXX \b, FAXX fax image
+# other formats
+>8 string FTXT \b, FTXT formatted text
+>8 string CTLG \b, CTLG message catalog
+>8 string PREF \b, PREF preferences
+>8 string DTYP \b, DTYP datatype description
+>8 string PTCH \b, PTCH binary patch
+>8 string AMFF \b, AMFF AmigaMetaFile format
+>8 string WZRD \b, WZRD StormWIZARD resource
+>8 string DOC\ \b, DOC desktop publishing document
+
+# These go at the end of the iff rules
+#
+# I don't see why these might collide with anything else.
+#
+# Interactive Fiction related formats
+#
+>8 string IFRS \b, Blorb Interactive Fiction
+>>24 string Exec with executable chunk
+>8 string IFZS \b, Z-machine or Glulx saved game file (Quetzal)
+
+#------------------------------------------------------------------------------
+# $File: images,v 1.64 2009/12/06 00:38:50 christos Exp $
+# images: file(1) magic for image formats (see also "iff", and "c-lang" for
+# XPM bitmaps)
+#
+# originally from jef at helios.ee.lbl.gov (Jef Poskanzer),
+# additions by janl at ifi.uio.no as well as others. Jan also suggested
+# merging several one- and two-line files into here.
+#
+# little magic: PCX (first byte is 0x0a)
+
+# Targa - matches `povray', `ppmtotga' and `xv' outputs
+# by Philippe De Muyter <phdm at macqel.be>
+# at 2, byte ImgType must be 1, 2, 3, 9, 10 or 11
+# at 1, byte CoMapType must be 1 if ImgType is 1 or 9, 0 otherwise
+# at 3, leshort Index is 0 for povray, ppmtotga and xv outputs
+# `xv' recognizes only a subset of the following (RGB with pixelsize = 24)
+# `tgatoppm' recognizes a superset (Index may be anything)
+1 belong&0xfff7ffff 0x01010000 Targa image data - Map
+>2 byte&8 8 - RLE
+>12 leshort >0 %hd x
+>14 leshort >0 %hd
+1 belong&0xfff7ffff 0x00020000 Targa image data - RGB
+>2 byte&8 8 - RLE
+>12 leshort >0 %hd x
+>14 leshort >0 %hd
+1 belong&0xfff7ffff 0x00030000 Targa image data - Mono
+>2 byte&8 8 - RLE
+>12 leshort >0 %hd x
+>14 leshort >0 %hd
+
+# PBMPLUS images
+# The next byte following the magic is always whitespace.
+# strength is changed to try these patterns before "x86 boot sector"
+0 search/1 P1
+>3 regex =[0-9]*\ [0-9]* Netpbm PBM image text
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-bitmap
+0 search/1 P2
+>3 regex =[0-9]*\ [0-9]* Netpbm PGM image text
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-greymap
+0 search/1 P3
+>3 regex =[0-9]*\ [0-9]* Netpbm PPM image text
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-pixmap
+0 string P4
+>3 regex =[0-9]*\ [0-9]* Netpbm PBM "rawbits" image data
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-bitmap
+0 string P5
+>3 regex =[0-9]*\ [0-9]* Netpbm PGM "rawbits" image data
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-greymap
+0 string P6
+>3 regex =[0-9]*\ [0-9]* Netpbm PPM "rawbits" image data
+>3 regex =[0-9]+\ \b, size = %sx
+>>3 regex =\ [0-9]+ \b%s
+!:strength + 45
+!:mime image/x-portable-pixmap
+0 string P7 Netpbm PAM image file
+!:mime image/x-portable-pixmap
+
+# From: bryanh at giraffe-data.com (Bryan Henderson)
+0 string \117\072 Solitaire Image Recorder format
+>4 string \013 MGI Type 11
+>4 string \021 MGI Type 17
+0 string .MDA MicroDesign data
+>21 byte 48 version 2
+>21 byte 51 version 3
+0 string .MDP MicroDesign page data
+>21 byte 48 version 2
+>21 byte 51 version 3
+
+# NIFF (Navy Interchange File Format, a modification of TIFF) images
+# [GRR: this *must* go before TIFF]
+0 string IIN1 NIFF image data
+!:mime image/x-niff
+
+# Tag Image File Format, from Daniel Quinlan (quinlan at yggdrasil.com)
+# The second word of TIFF files is the TIFF version number, 42, which has
+# never changed. The TIFF specification recommends testing for it.
+0 string MM\x00\x2a TIFF image data, big-endian
+!:mime image/tiff
+0 string II\x2a\x00 TIFF image data, little-endian
+!:mime image/tiff
+
+# PNG [Portable Network Graphics, or "PNG's Not GIF"] images
+# (Greg Roelofs, newt at uchicago.edu)
+# (Albert Cahalan, acahalan at cs.uml.edu)
+#
+# 137 P N G \r \n ^Z \n [4-byte length] H E A D [HEAD data] [HEAD crc] ...
+#
+0 string \x89PNG\x0d\x0a\x1a\x0a PNG image data
+!:mime image/png
+>16 belong x \b, %ld x
+>20 belong x %ld,
+>24 byte x %d-bit
+>25 byte 0 grayscale,
+>25 byte 2 \b/color RGB,
+>25 byte 3 colormap,
+>25 byte 4 gray+alpha,
+>25 byte 6 \b/color RGBA,
+#>26 byte 0 deflate/32K,
+>28 byte 0 non-interlaced
+>28 byte 1 interlaced
+
+# possible GIF replacements; none yet released!
+# (Greg Roelofs, newt at uchicago.edu)
+#
+# GRR 950115: this was mine ("Zip GIF"):
+0 string GIF94z ZIF image (GIF+deflate alpha)
+!:mime image/x-unknown
+#
+# GRR 950115: this is Jeremy Wohl's Free Graphics Format (better):
+#
+0 string FGF95a FGF image (GIF+deflate beta)
+!:mime image/x-unknown
+#
+# GRR 950115: this is Thomas Boutell's Portable Bitmap Format proposal
+# (best; not yet implemented):
+#
+0 string PBF PBF image (deflate compression)
+!:mime image/x-unknown
+
+# GIF
+0 string GIF8 GIF image data
+!:mime image/gif
+!:apple 8BIMGIFf
+>4 string 7a \b, version 8%s,
+>4 string 9a \b, version 8%s,
+>6 leshort >0 %hd x
+>8 leshort >0 %hd
+#>10 byte &0x80 color mapped,
+#>10 byte&0x07 =0x00 2 colors
+#>10 byte&0x07 =0x01 4 colors
+#>10 byte&0x07 =0x02 8 colors
+#>10 byte&0x07 =0x03 16 colors
+#>10 byte&0x07 =0x04 32 colors
+#>10 byte&0x07 =0x05 64 colors
+#>10 byte&0x07 =0x06 128 colors
+#>10 byte&0x07 =0x07 256 colors
+
+# ITC (CMU WM) raster files. It is essentially a byte-reversed Sun raster,
+# 1 plane, no encoding.
+0 string \361\0\100\273 CMU window manager raster image data
+>4 lelong >0 %d x
+>8 lelong >0 %d,
+>12 lelong >0 %d-bit
+
+# Magick Image File Format
+0 string id=ImageMagick MIFF image data
+
+# Artisan
+0 long 1123028772 Artisan image data
+>4 long 1 \b, rectangular 24-bit
+>4 long 2 \b, rectangular 8-bit with colormap
+>4 long 3 \b, rectangular 32-bit (24-bit with matte)
+
+# FIG (Facility for Interactive Generation of figures), an object-based format
+0 search/1 #FIG FIG image text
+>5 string x \b, version %.3s
+
+# PHIGS
+0 string ARF_BEGARF PHIGS clear text archive
+0 string @(#)SunPHIGS SunPHIGS
+# version number follows, in the form m.n
+>40 string SunBin binary
+>32 string archive archive
+
+# GKS (Graphics Kernel System)
+0 string GKSM GKS Metafile
+>24 string SunGKS \b, SunGKS
+
+# CGM image files
+0 string BEGMF clear text Computer Graphics Metafile
+
+# MGR bitmaps (Michael Haardt, u31b3hs at pool.informatik.rwth-aachen.de)
+0 string yz MGR bitmap, modern format, 8-bit aligned
+0 string zz MGR bitmap, old format, 1-bit deep, 16-bit aligned
+0 string xz MGR bitmap, old format, 1-bit deep, 32-bit aligned
+0 string yx MGR bitmap, modern format, squeezed
+
+# Fuzzy Bitmap (FBM) images
+0 string %bitmap\0 FBM image data
+>30 long 0x31 \b, mono
+>30 long 0x33 \b, color
+
+# facsimile data
+1 string PC\ Research,\ Inc group 3 fax data
+>29 byte 0 \b, normal resolution (204x98 DPI)
+>29 byte 1 \b, fine resolution (204x196 DPI)
+# From: Herbert Rosmanith <herp at wildsau.idv.uni.linz.at>
+0 string Sfff structured fax file
+
+
+# PC bitmaps (OS/2, Windows BMP files) (Greg Roelofs, newt at uchicago.edu)
+0 string BM
+>14 leshort 12 PC bitmap, OS/2 1.x format
+!:mime image/x-ms-bmp
+>>18 leshort x \b, %d x
+>>20 leshort x %d
+>14 leshort 64 PC bitmap, OS/2 2.x format
+!:mime image/x-ms-bmp
+>>18 leshort x \b, %d x
+>>20 leshort x %d
+>14 leshort 40 PC bitmap, Windows 3.x format
+!:mime image/x-ms-bmp
+>>18 lelong x \b, %d x
+>>22 lelong x %d x
+>>28 leshort x %d
+>14 leshort 128 PC bitmap, Windows NT/2000 format
+!:mime image/x-ms-bmp
+>>18 lelong x \b, %d x
+>>22 lelong x %d x
+>>28 leshort x %d
+# Too simple - MPi
+#0 string IC PC icon data
+#0 string PI PC pointer image data
+#0 string CI PC color icon data
+#0 string CP PC color pointer image data
+# Conflicts with other entries [BABYL]
+#0 string BA PC bitmap array data
+
+# XPM icons (Greg Roelofs, newt at uchicago.edu)
+# note possible collision with C/REXX entry in c-lang; currently commented out
+0 search/1 /*\ XPM\ */ X pixmap image text
+
+# Utah Raster Toolkit RLE images (janl at ifi.uio.no)
+0 leshort 0xcc52 RLE image data,
+>6 leshort x %d x
+>8 leshort x %d
+>2 leshort >0 \b, lower left corner: %d
+>4 leshort >0 \b, lower right corner: %d
+>10 byte&0x1 =0x1 \b, clear first
+>10 byte&0x2 =0x2 \b, no background
+>10 byte&0x4 =0x4 \b, alpha channel
+>10 byte&0x8 =0x8 \b, comment
+>11 byte >0 \b, %d color channels
+>12 byte >0 \b, %d bits per pixel
+>13 byte >0 \b, %d color map channels
+
+# image file format (Robert Potter, potter at cs.rochester.edu)
+0 string Imagefile\ version- iff image data
+# this adds the whole header (inc. version number), informative but longish
+>10 string >\0 %s
+
+# Sun raster images, from Daniel Quinlan (quinlan at yggdrasil.com)
+0 belong 0x59a66a95 Sun raster image data
+>4 belong >0 \b, %d x
+>8 belong >0 %d,
+>12 belong >0 %d-bit,
+#>16 belong >0 %d bytes long,
+>20 belong 0 old format,
+#>20 belong 1 standard,
+>20 belong 2 compressed,
+>20 belong 3 RGB,
+>20 belong 4 TIFF,
+>20 belong 5 IFF,
+>20 belong 0xffff reserved for testing,
+>24 belong 0 no colormap
+>24 belong 1 RGB colormap
+>24 belong 2 raw colormap
+#>28 belong >0 colormap is %d bytes long
+
+# SGI image file format, from Daniel Quinlan (quinlan at yggdrasil.com)
+#
+# See
+# http://reality.sgi.com/grafica/sgiimage.html
+#
+0 beshort 474 SGI image data
+#>2 byte 0 \b, verbatim
+>2 byte 1 \b, RLE
+#>3 byte 1 \b, normal precision
+>3 byte 2 \b, high precision
+>4 beshort x \b, %d-D
+>6 beshort x \b, %d x
+>8 beshort x %d
+>10 beshort x \b, %d channel
+>10 beshort !1 \bs
+>80 string >0 \b, "%s"
+
+0 string IT01 FIT image data
+>4 belong x \b, %d x
+>8 belong x %d x
+>12 belong x %d
+#
+0 string IT02 FIT image data
+>4 belong x \b, %d x
+>8 belong x %d x
+>12 belong x %d
+#
+2048 string PCD_IPI Kodak Photo CD image pack file
+>0xe02 byte&0x03 0x00 , landscape mode
+>0xe02 byte&0x03 0x01 , portrait mode
+>0xe02 byte&0x03 0x02 , landscape mode
+>0xe02 byte&0x03 0x03 , portrait mode
+0 string PCD_OPA Kodak Photo CD overview pack file
+
+# FITS format. Jeff Uphoff <juphoff at tarsier.cv.nrao.edu>
+# FITS is the Flexible Image Transport System, the de facto standard for
+# data and image transfer, storage, etc., for the astronomical community.
+# (FITS floating point formats are big-endian.)
+0 string SIMPLE\ \ = FITS image data
+>109 string 8 \b, 8-bit, character or unsigned binary integer
+>108 string 16 \b, 16-bit, two's complement binary integer
+>107 string \ 32 \b, 32-bit, two's complement binary integer
+>107 string -32 \b, 32-bit, floating point, single precision
+>107 string -64 \b, 64-bit, floating point, double precision
+
+# other images
+0 string This\ is\ a\ BitMap\ file Lisp Machine bit-array-file
+
+# From SunOS 5.5.1 "/etc/magic" - appeared right before Sun raster image
+# stuff.
+#
+0 beshort 0x1010 PEX Binary Archive
+
+# DICOM medical imaging data
+128 string DICM DICOM medical imaging data
+!:mime application/dicom
+
+# XWD - X Window Dump file.
+# As described in /usr/X11R6/include/X11/XWDFile.h
+# used by the xwd program.
+# Bradford Castalia, idaeim, 1/01
+4 belong 7 XWD X Window Dump image data
+>100 string >\0 \b, "%s"
+>16 belong x \b, %dx
+>20 belong x \b%dx
+>12 belong x \b%d
+
+# PDS - Planetary Data System
+# These files use Parameter Value Language in the header section.
+# Unfortunately, there is no certain magic, but the following
+# strings have been found to be most likely.
+0 string NJPL1I00 PDS (JPL) image data
+2 string NJPL1I PDS (JPL) image data
+0 string CCSD3ZF PDS (CCSD) image data
+2 string CCSD3Z PDS (CCSD) image data
+0 string PDS_ PDS image data
+0 string LBLSIZE= PDS (VICAR) image data
+
+# pM8x: ATARI STAD compressed bitmap format
+#
+# from Oskar Schirmer <schirmer at scara.com> Feb 2, 2001
+# p M 8 5/6 xx yy zz data...
+# Atari ST STAD bitmap is always 640x400, bytewise runlength compressed.
+# bytes either run horizontally (pM85) or vertically (pM86). yy is the
+# most frequent byte, xx and zz are runlength escape codes, where xx is
+# used for runs of yy.
+#
+0 string pM85 Atari ST STAD bitmap image data (hor)
+>5 byte 0x00 (white background)
+>5 byte 0xFF (black background)
+0 string pM86 Atari ST STAD bitmap image data (vert)
+>5 byte 0x00 (white background)
+>5 byte 0xFF (black background)
+
+# Gürkan Sengün <gurkan at linuks.mine.nu>, www.linuks.mine.nu
+# http://www.atarimax.com/jindroush.atari.org/afmtatr.html
+0 leshort 0x0296 Atari ATR image
+
+# XXX:
+# This is bad magic 0x5249 == 'RI' conflicts with RIFF and other
+# magic.
+# SGI RICE image file <mpruett at sgi.com>
+#0 beshort 0x5249 RICE image
+#>2 beshort x v%d
+#>4 beshort x (%d x
+#>6 beshort x %d)
+#>8 beshort 0 8 bit
+#>8 beshort 1 10 bit
+#>8 beshort 2 12 bit
+#>8 beshort 3 13 bit
+#>10 beshort 0 4:2:2
+#>10 beshort 1 4:2:2:4
+#>10 beshort 2 4:4:4
+#>10 beshort 3 4:4:4:4
+#>12 beshort 1 RGB
+#>12 beshort 2 CCIR601
+#>12 beshort 3 RP175
+#>12 beshort 4 YUV
+
+#------------------------------------------------------------------------------
+#
+# Marco Schmidt (marcoschmidt at users.sourceforge.net) -- an image file format
+# for the EPOC operating system, which is used with PDAs like those from Psion
+#
+# see http://huizen.dds.nl/~frodol/psiconv/html/Index.html for a description
+# of various EPOC file formats
+
+0 string \x37\x00\x00\x10\x42\x00\x00\x10\x00\x00\x00\x00\x39\x64\x39\x47 EPOC MBM image file
+
+# PCX image files
+# From: Dan Fandrich <dan at coneharvesters.com>
+0 beshort 0x0a00 PCX ver. 2.5 image data
+0 beshort 0x0a02 PCX ver. 2.8 image data, with palette
+0 beshort 0x0a03 PCX ver. 2.8 image data, without palette
+0 beshort 0x0a04 PCX for Windows image data
+0 beshort 0x0a05 PCX ver. 3.0 image data
+>4 leshort x bounding box [%hd,
+>6 leshort x %hd] -
+>8 leshort x [%hd,
+>10 leshort x %hd],
+>65 byte >1 %d planes each of
+>3 byte x %hhd-bit
+>68 byte 0 image,
+>68 byte 1 colour,
+>68 byte 2 grayscale,
+>68 byte >2 image,
+>68 byte <0 image,
+>12 leshort >0 %hd x
+>>14 leshort x %hd dpi,
+>2 byte 0 uncompressed
+>2 byte 1 RLE compressed
+
+# Adobe Photoshop
+# From: Asbjoern Sloth Toennesen <asbjorn at lila.io>
+0 string 8BPS Adobe Photoshop Image
+!:mime image/vnd.adobe.photoshop
+>4 beshort 2 (PSB)
+>18 belong x \b, %d x
+>14 belong x %d,
+>24 beshort 0 bitmap
+>24 beshort 1 grayscale
+>>12 beshort 2 with alpha
+>24 beshort 2 indexed
+>24 beshort 3 RGB
+>>12 beshort 4 \bA
+>24 beshort 4 CMYK
+>>12 beshort 5 \bA
+>24 beshort 7 multichannel
+>24 beshort 8 duotone
+>24 beshort 9 lab
+>12 beshort > 1
+>>12 beshort x \b, %dx
+>12 beshort 1 \b,
+>22 beshort x %d-bit channel
+>12 beshort > 1 \bs
+
+# XV thumbnail indicator (ThMO)
+0 string P7\ 332 XV thumbnail image data
+
+# NITF is defined by United States MIL-STD-2500A
+0 string NITF National Imagery Transmission Format
+>25 string >\0 dated %.14s
+
+# GEM Image: Version 1, Headerlen 8 (Wolfram Kleff)
+0 belong 0x00010008 GEM Image data
+>12 beshort x %d x
+>14 beshort x %d,
+>4 beshort x %d planes,
+>8 beshort x %d x
+>10 beshort x %d pixelsize
+
+# GEM Metafile (Wolfram Kleff)
+0 lelong 0x0018FFFF GEM Metafile data
+>4 leshort x version %d
+
+#
+# SMJPEG. A custom Motion JPEG format used by Loki Entertainment
+# Software Torbjorn Andersson <d91tan at Update.UU.SE>.
+#
+0 string \0\nSMJPEG SMJPEG
+>8 belong x %d.x data
+# According to the specification you could find any number of _TXT
+# headers here, but I can't think of any way of handling that. None of
+# the SMJPEG files I tried it on used this feature. Even if such a
+# file is encountered the output should still be reasonable.
+>16 string _SND \b,
+>>24 beshort >0 %d Hz
+>>26 byte 8 8-bit
+>>26 byte 16 16-bit
+>>28 string NONE uncompressed
+# >>28 string APCM ADPCM compressed
+>>27 byte 1 mono
+>>28 byte 2 stereo
+# Help! Isn't there any way to avoid writing this part twice?
+>>32 string _VID \b,
+# >>>48 string JFIF JPEG
+>>>40 belong >0 %d frames
+>>>44 beshort >0 (%d x
+>>>46 beshort >0 %d)
+>16 string _VID \b,
+# >>32 string JFIF JPEG
+>>24 belong >0 %d frames
+>>28 beshort >0 (%d x
+>>30 beshort >0 %d)
+
+0 string Paint\ Shop\ Pro\ Image\ File Paint Shop Pro Image File
+
+# "thumbnail file" (icon)
+# descended from "xv", but in use by other applications as well (Wolfram Kleff)
+0 string P7\ 332 XV "thumbnail file" (icon) data
+
+# taken from fkiss: (<yav at mte.biglobe.ne.jp> ?)
+0 string KiSS KISS/GS
+>4 byte 16 color
+>>5 byte x %d bit
+>>8 leshort x %d colors
+>>10 leshort x %d groups
+>4 byte 32 cell
+>>5 byte x %d bit
+>>8 leshort x %d x
+>>10 leshort x %d
+>>12 leshort x +%d
+>>14 leshort x +%d
+
+# Webshots (www.webshots.com), by John Harrison
+0 string C\253\221g\230\0\0\0 Webshots Desktop .wbz file
+
+# Hercules DASD image files
+# From Jan Jaeger <jj at septa.nl>
+0 string CKD_P370 Hercules CKD DASD image file
+>8 long x \b, %d heads per cylinder
+>12 long x \b, track size %d bytes
+>16 byte x \b, device type 33%2.2X
+
+0 string CKD_C370 Hercules compressed CKD DASD image file
+>8 long x \b, %d heads per cylinder
+>12 long x \b, track size %d bytes
+>16 byte x \b, device type 33%2.2X
+
+0 string CKD_S370 Hercules CKD DASD shadow file
+>8 long x \b, %d heads per cylinder
+>12 long x \b, track size %d bytes
+>16 byte x \b, device type 33%2.2X
+
+# Squeak images and programs - etoffi at softhome.net
+0 string \146\031\0\0 Squeak image data
+0 search/1 'From\040Squeak Squeak program text
+
+# partimage: file(1) magic for PartImage files (experimental, incomplete)
+# Author: Hans-Joachim Baader <hjb at pro-linux.de>
+0 string PaRtImAgE-VoLuMe PartImage
+>0x0020 string 0.6.1 file version %s
+>>0x0060 lelong >-1 volume %ld
+#>>0x0064 8 byte identifier
+#>>0x007c reserved
+>>0x0200 string >\0 type %s
+>>0x1400 string >\0 device %s,
+>>0x1600 string >\0 original filename %s,
+# Some fields omitted
+>>0x2744 lelong 0 not compressed
+>>0x2744 lelong 1 gzip compressed
+>>0x2744 lelong 2 bzip2 compressed
+>>0x2744 lelong >2 compressed with unknown algorithm
+>0x0020 string >0.6.1 file version %s
+>0x0020 string <0.6.1 file version %s
+
+# DCX is multi-page PCX, using a simple header of up to 1024
+# offsets for the respective PCX components.
+# From: Joerg Wunsch <joerg_wunsch at uriah.heep.sax.de>
+0 lelong 987654321 DCX multi-page PCX image data
+
+# Simon Walton <simonw at matteworld.com>
+# Kodak Cineon format for scanned negatives
+# http://www.kodak.com/US/en/motion/support/dlad/
+0 lelong 0xd75f2a80 Cineon image data
+>200 belong >0 \b, %ld x
+>204 belong >0 %ld
+
+
+# Bio-Rad .PIC is an image format used by microscope control systems
+# and related image processing software used by biologists.
+# From: Vebjorn Ljosa <vebjorn at ljosa.com>
+# BOOL values are two-byte integers; use them to rule out false positives.
+# http://web.archive.org/web/20050317223257/www.cs.ubc.ca/spider/ladic/text/biorad.txt
+# Samples: http://www.loci.wisc.edu/software/sample-data
+14 leshort <2
+>62 leshort <2
+>>54 leshort 12345 Bio-Rad .PIC Image File
+>>>0 leshort >0 %hd x
+>>>2 leshort >0 %hd,
+>>>4 leshort =1 1 image in file
+>>>4 leshort >1 %hd images in file
+
+# From Jan "Yenya" Kasprzak <kas at fi.muni.cz>
+# The description of *.mrw format can be found at
+# http://www.dalibor.cz/minolta/raw_file_format.htm
+0 string \000MRM Minolta Dimage camera raw image data
+
+# Summary: DjVu image / document
+# Extension: .djvu
+# Reference: http://djvu.org/docs/DjVu3Spec.djvu
+# Submitted by: Stephane Loeuillet <stephane.loeuillet at tiscali.fr>
+# Modified by (1): Abel Cheung <abelcheung at gmail.com>
+0 string AT&TFORM
+>12 string DJVM DjVu multiple page document
+!:mime image/vnd.djvu
+>12 string DJVU DjVu image or single page document
+!:mime image/vnd.djvu
+>12 string DJVI DjVu shared document
+!:mime image/vnd.djvu
+>12 string THUM DjVu page thumbnails
+!:mime image/vnd.djvu
+
+
+# From Marc Espie
+0 lelong 20000630 OpenEXR image data
+
+# From: Tom Hilinski <tom.hilinski at comcast.net>
+# http://www.unidata.ucar.edu/packages/netcdf/
+0 string CDF\001 NetCDF Data Format data
+
+#-----------------------------------------------------------------------
+# Hierarchical Data Format, used to facilitate scientific data exchange
+# specifications at http://hdf.ncsa.uiuc.edu/
+0 belong 0x0e031301 Hierarchical Data Format (version 4) data
+!:mime application/x-hdf
+0 string \211HDF\r\n\032 Hierarchical Data Format (version 5) data
+!:mime application/x-hdf
+
+# From: Tobias Burnus <burnus at net-b.de>
+# Xara (for a while: Corel Xara) is a graphic package, see
+# http://www.xara.com/ for Windows and as GPL application for Linux
+0 string XARA\243\243 Xara graphics file
+
+# http://www.cartesianinc.com/Tech/
+0 string CPC\262 Cartesian Perceptual Compression image
+!:mime image/x-cpi
+
+# From Albert Cahalan <acahalan at gmail.com>
+# puredigital used it for the CVS disposable camcorder
+#8 lelong 4 ZBM bitmap image data
+#>4 leshort x %u x
+#>6 leshort x %u
+
+# From Albert Cahalan <acahalan at gmail.com>
+# uncompressed 5:6:5 HighColor image for OLPC XO firmware icons
+0 string C565 OLPC firmware icon image data
+>4 leshort x %u x
+>6 leshort x %u
+
+# Applied Images - Image files from Cytovision
+# Gustavo Junior Alves <gjalves at gjalves.com.br>
+0 string \xce\xda\xde\xfa Cytovision Metaphases file
+0 string \xed\xad\xef\xac Cytovision Karyotype file
+0 string \x0b\x00\x03\x00 Cytovision FISH Probe file
+0 string \xed\xfe\xda\xbe Cytovision FLEX file
+0 string \xed\xab\xed\xfe Cytovision FLEX file
+0 string \xad\xfd\xea\xad Cytovision RATS file
+
+# Wavelet Scalar Quantization format used in gray-scale fingerprint images
+# From Tano M Fotang <mfotang at quanteq.com>
+0 string \xff\xa0\xff\xa8\x00 Wavelet Scalar Quantization image data
+
+# JPEG 2000 Code Stream Bitmap
+# From Petr Splichal <psplicha at redhat.com>
+0 string \xFF\x4F\xFF\x51\x00 JPEG-2000 Code Stream Bitmap data
+
+#------------------------------------------------------------------------------
+# $File: inform,v 1.5 2009/09/19 16:28:09 christos Exp $
+# inform: file(1) magic for Inform interactive fiction language
+
+# URL: http://www.inform-fiction.org/
+# From: Reuben Thomas <rrt at sc3d.org>
+
+0 search/100/cW constant\ story Inform source text
+
+#------------------------------------------------------------------------------
+# $File: intel,v 1.8 2009/09/19 16:28:10 christos Exp $
+# intel: file(1) magic for x86 Unix
+#
+# Various flavors of x86 UNIX executable/object (other than Xenix, which
+# is in "microsoft"). DOS is in "msdos"; the ambitious soul can do
+# Windows as well.
+#
+# Windows NT belongs elsewhere, as you need x86 and MIPS and Alpha and
+# whatever comes next (HP-PA Hummingbird?). OS/2 may also go elsewhere
+# as well, if, as, and when IBM makes it portable.
+#
+# The `versions' should be un-commented if they work for you.
+# (Was the problem just one of endianness?)
+#
+0 leshort 0502 basic-16 executable
+>12 lelong >0 not stripped
+#>22 leshort >0 - version %ld
+0 leshort 0503 basic-16 executable (TV)
+>12 lelong >0 not stripped
+#>22 leshort >0 - version %ld
+0 leshort 0510 x86 executable
+>12 lelong >0 not stripped
+0 leshort 0511 x86 executable (TV)
+>12 lelong >0 not stripped
+0 leshort =0512 iAPX 286 executable small model (COFF)
+>12 lelong >0 not stripped
+#>22 leshort >0 - version %ld
+0 leshort =0522 iAPX 286 executable large model (COFF)
+>12 lelong >0 not stripped
+#>22 leshort >0 - version %ld
+# SGI labeled the next entry as "iAPX 386 executable" --Dan Quinlan
+0 leshort =0514 80386 COFF executable
+>12 lelong >0 not stripped
+>22 leshort >0 - version %ld
+
+# rom: file(1) magic for BIOS ROM Extensions found in intel machines
+# mapped into memory between 0xC0000 and 0xFFFFF
+# From Gürkan Sengün <gurkan at linuks.mine.nu>, www.linuks.mine.nu
+0 beshort 0x55AA BIOS (ia32) ROM Ext.
+>5 string USB USB
+>7 string LDR UNDI image
+>30 string IBM IBM comp. Video
+>26 string Adaptec Adaptec
+>28 string Adaptec Adaptec
+>42 string PROMISE Promise
+>2 byte x (%d*512)
+
+#------------------------------------------------------------------------------
+# $File: interleaf,v 1.10 2009/09/19 16:28:10 christos Exp $
+# interleaf: file(1) magic for InterLeaf TPS:
+#
+0 string =\210OPS Interleaf saved data
+0 string =<!OPS Interleaf document text
+>5 string ,\ Version\ = \b, version
+>>17 string >\0 %.3s
+
+#------------------------------------------------------------------------------
+# $File: island,v 1.5 2009/09/19 16:28:10 christos Exp $
+# island: file(1) magic for IslandWite/IslandDraw, from SunOS 5.5.1
+# "/etc/magic":
+# From: guy at netapp.com (Guy Harris)
+#
+4 string pgscriptver IslandWrite document
+13 string DrawFile IslandDraw document
+
+
+#------------------------------------------------------------------------------
+# $File: ispell,v 1.8 2009/09/19 16:28:10 christos Exp $
+# ispell: file(1) magic for ispell
+#
+# Ispell 3.0 has a magic of 0x9601 and ispell 3.1 has 0x9602. This magic
+# will match 0x9600 through 0x9603 in *both* little endian and big endian.
+# (No other current magic entries collide.)
+#
+# Updated by Daniel Quinlan (quinlan at yggdrasil.com)
+#
+0 leshort&0xFFFC 0x9600 little endian ispell
+>0 byte 0 hash file (?),
+>0 byte 1 3.0 hash file,
+>0 byte 2 3.1 hash file,
+>0 byte 3 hash file (?),
+>2 leshort 0x00 8-bit, no capitalization, 26 flags
+>2 leshort 0x01 7-bit, no capitalization, 26 flags
+>2 leshort 0x02 8-bit, capitalization, 26 flags
+>2 leshort 0x03 7-bit, capitalization, 26 flags
+>2 leshort 0x04 8-bit, no capitalization, 52 flags
+>2 leshort 0x05 7-bit, no capitalization, 52 flags
+>2 leshort 0x06 8-bit, capitalization, 52 flags
+>2 leshort 0x07 7-bit, capitalization, 52 flags
+>2 leshort 0x08 8-bit, no capitalization, 128 flags
+>2 leshort 0x09 7-bit, no capitalization, 128 flags
+>2 leshort 0x0A 8-bit, capitalization, 128 flags
+>2 leshort 0x0B 7-bit, capitalization, 128 flags
+>2 leshort 0x0C 8-bit, no capitalization, 256 flags
+>2 leshort 0x0D 7-bit, no capitalization, 256 flags
+>2 leshort 0x0E 8-bit, capitalization, 256 flags
+>2 leshort 0x0F 7-bit, capitalization, 256 flags
+>4 leshort >0 and %d string characters
+0 beshort&0xFFFC 0x9600 big endian ispell
+>1 byte 0 hash file (?),
+>1 byte 1 3.0 hash file,
+>1 byte 2 3.1 hash file,
+>1 byte 3 hash file (?),
+>2 beshort 0x00 8-bit, no capitalization, 26 flags
+>2 beshort 0x01 7-bit, no capitalization, 26 flags
+>2 beshort 0x02 8-bit, capitalization, 26 flags
+>2 beshort 0x03 7-bit, capitalization, 26 flags
+>2 beshort 0x04 8-bit, no capitalization, 52 flags
+>2 beshort 0x05 7-bit, no capitalization, 52 flags
+>2 beshort 0x06 8-bit, capitalization, 52 flags
+>2 beshort 0x07 7-bit, capitalization, 52 flags
+>2 beshort 0x08 8-bit, no capitalization, 128 flags
+>2 beshort 0x09 7-bit, no capitalization, 128 flags
+>2 beshort 0x0A 8-bit, capitalization, 128 flags
+>2 beshort 0x0B 7-bit, capitalization, 128 flags
+>2 beshort 0x0C 8-bit, no capitalization, 256 flags
+>2 beshort 0x0D 7-bit, no capitalization, 256 flags
+>2 beshort 0x0E 8-bit, capitalization, 256 flags
+>2 beshort 0x0F 7-bit, capitalization, 256 flags
+>4 beshort >0 and %d string characters
+# ispell 4.0 hash files kromJx <kromJx at crosswinds.net>
+# Ispell 4.0
+0 string ISPL ispell
+>4 long x hash file version %d,
+>8 long x lexletters %d,
+>12 long x lexsize %d,
+>16 long x hashsize %d,
+>20 long x stblsize %d
+
+#------------------------------------------------------------
+# $File: java,v 1.12 2009/09/19 16:28:10 christos Exp $
+# Java ByteCode and Mach-O binaries (e.g., Mac OS X) use the
+# same magic number, 0xcafebabe, so they are both handled
+# in the entry called "cafebabe".
+#------------------------------------------------------------
+# Java serialization
+# From Martin Pool (m.pool at pharos.com.au)
+0 beshort 0xaced Java serialization data
+>2 beshort >0x0004 \b, version %d
+
+0 belong 0xfeedfeed Java KeyStore
+!:mime application/x-java-keystore
+0 belong 0xcececece Java JCE KeyStore
+!:mime application/x-java-jce-keystore
+
+# Dalvik .dex format. http://retrodev.com/android/dexformat.html
+# From <mkf at google.com> "Mike Fleming"
+0 string dex\n
+>0 regex dex\n[0-9][0-9][0-9]\0 Dalvik dex file
+>4 string >000 version %s
+0 string dey\n
+>0 regex dey\n[0-9][0-9][0-9]\0 Dalvik dex file (optimized for host)
+>4 string >000 version %s
+
+
+#------------------------------------------------------------------------------
+# $File: jpeg,v 1.15 2009/09/19 16:28:10 christos Exp $
+# JPEG images
+# SunOS 5.5.1 had
+#
+# 0 string \377\330\377\340 JPEG file
+# 0 string \377\330\377\356 JPG file
+#
+# both of which turn into "JPEG image data" here.
+#
+0 beshort 0xffd8 JPEG image data
+!:mime image/jpeg
+!:apple 8BIMJPEG
+!:strength +1
+>6 string JFIF \b, JFIF standard
+# The following added by Erik Rossen <rossen at freesurf.ch> 1999-09-06
+# in a vain attempt to add image size reporting for JFIF. Note that these
+# tests are not fool-proof since some perfectly valid JPEGs are currently
+# impossible to specify in magic(4) format.
+# First, a little JFIF version info:
+>>11 byte x \b %d.
+>>12 byte x \b%02d
+# Next, the resolution or aspect ratio of the image:
+#>>13 byte 0 \b, aspect ratio
+#>>13 byte 1 \b, resolution (DPI)
+#>>13 byte 2 \b, resolution (DPCM)
+#>>4 beshort x \b, segment length %d
+# Next, show thumbnail info, if it exists:
+>>18 byte !0 \b, thumbnail %dx
+>>>19 byte x \b%d
+
+# EXIF moved down here to avoid reporting a bogus version number,
+# and EXIF version number printing added.
+# - Patrik R=E5dman <patrik+file-magic at iki.fi>
+>6 string Exif \b, EXIF standard
+# Look for EXIF IFD offset in IFD 0, and then look for EXIF version tag in EXIF IFD.
+# All possible combinations of entries have to be enumerated, since no looping
+# is possible. And both endians are possible...
+# The combinations included below are from real-world JPEGs.
+# Little-endian
+>>12 string II
+# IFD 0 Entry #5:
+>>>70 leshort 0x8769
+# EXIF IFD Entry #1:
+>>>>(78.l+14) leshort 0x9000
+>>>>>(78.l+23) byte x %c
+>>>>>(78.l+24) byte x \b.%c
+>>>>>(78.l+25) byte !0x30 \b%c
+# IFD 0 Entry #9:
+>>>118 leshort 0x8769
+# EXIF IFD Entry #3:
+>>>>(126.l+38) leshort 0x9000
+>>>>>(126.l+47) byte x %c
+>>>>>(126.l+48) byte x \b.%c
+>>>>>(126.l+49) byte !0x30 \b%c
+# IFD 0 Entry #10
+>>>130 leshort 0x8769
+# EXIF IFD Entry #3:
+>>>>(138.l+38) leshort 0x9000
+>>>>>(138.l+47) byte x %c
+>>>>>(138.l+48) byte x \b.%c
+>>>>>(138.l+49) byte !0x30 \b%c
+# EXIF IFD Entry #4:
+>>>>(138.l+50) leshort 0x9000
+>>>>>(138.l+59) byte x %c
+>>>>>(138.l+60) byte x \b.%c
+>>>>>(138.l+61) byte !0x30 \b%c
+# EXIF IFD Entry #5:
+>>>>(138.l+62) leshort 0x9000
+>>>>>(138.l+71) byte x %c
+>>>>>(138.l+72) byte x \b.%c
+>>>>>(138.l+73) byte !0x30 \b%c
+# IFD 0 Entry #11
+>>>142 leshort 0x8769
+# EXIF IFD Entry #3:
+>>>>(150.l+38) leshort 0x9000
+>>>>>(150.l+47) byte x %c
+>>>>>(150.l+48) byte x \b.%c
+>>>>>(150.l+49) byte !0x30 \b%c
+# EXIF IFD Entry #4:
+>>>>(150.l+50) leshort 0x9000
+>>>>>(150.l+59) byte x %c
+>>>>>(150.l+60) byte x \b.%c
+>>>>>(150.l+61) byte !0x30 \b%c
+# EXIF IFD Entry #5:
+>>>>(150.l+62) leshort 0x9000
+>>>>>(150.l+71) byte x %c
+>>>>>(150.l+72) byte x \b.%c
+>>>>>(150.l+73) byte !0x30 \b%c
+# Big-endian
+>>12 string MM
+# IFD 0 Entry #9:
+>>>118 beshort 0x8769
+# EXIF IFD Entry #1:
+>>>>(126.L+14) beshort 0x9000
+>>>>>(126.L+23) byte x %c
+>>>>>(126.L+24) byte x \b.%c
+>>>>>(126.L+25) byte !0x30 \b%c
+# EXIF IFD Entry #3:
+>>>>(126.L+38) beshort 0x9000
+>>>>>(126.L+47) byte x %c
+>>>>>(126.L+48) byte x \b.%c
+>>>>>(126.L+49) byte !0x30 \b%c
+# IFD 0 Entry #10
+>>>130 beshort 0x8769
+# EXIF IFD Entry #3:
+>>>>(138.L+38) beshort 0x9000
+>>>>>(138.L+47) byte x %c
+>>>>>(138.L+48) byte x \b.%c
+>>>>>(138.L+49) byte !0x30 \b%c
+# EXIF IFD Entry #5:
+>>>>(138.L+62) beshort 0x9000
+>>>>>(138.L+71) byte x %c
+>>>>>(138.L+72) byte x \b.%c
+>>>>>(138.L+73) byte !0x30 \b%c
+# IFD 0 Entry #11
+>>>142 beshort 0x8769
+# EXIF IFD Entry #4:
+>>>>(150.L+50) beshort 0x9000
+>>>>>(150.L+59) byte x %c
+>>>>>(150.L+60) byte x \b.%c
+>>>>>(150.L+61) byte !0x30 \b%c
+# Here things get sticky. We can do ONE MORE marker segment with
+# indirect addressing, and that's all. It would be great if we could
+# do pointer arithemetic like in an assembler language. Christos?
+# And if there was some sort of looping construct to do searches, plus a few
+# named accumulators, it would be even more effective...
+# At least we can show a comment if no other segments got inserted before:
+>(4.S+5) byte 0xFE
+>>(4.S+8) string >\0 \b, comment: "%s"
+# FIXME: When we can do non-byte counted strings, we can use that to get
+# the string's count, and fix Debian bug #283760
+#>(4.S+5) byte 0xFE \b, comment
+#>>(4.S+6) beshort x \b length=%d
+#>>(4.S+8) string >\0 \b, "%s"
+# Or, we can show the encoding type (I've included only the three most common)
+# and image dimensions if we are lucky and the SOFn (image segment) is here:
+>(4.S+5) byte 0xC0 \b, baseline
+>>(4.S+6) byte x \b, precision %d
+>>(4.S+7) beshort x \b, %dx
+>>(4.S+9) beshort x \b%d
+>(4.S+5) byte 0xC1 \b, extended sequential
+>>(4.S+6) byte x \b, precision %d
+>>(4.S+7) beshort x \b, %dx
+>>(4.S+9) beshort x \b%d
+>(4.S+5) byte 0xC2 \b, progressive
+>>(4.S+6) byte x \b, precision %d
+>>(4.S+7) beshort x \b, %dx
+>>(4.S+9) beshort x \b%d
+# I've commented-out quantisation table reporting. I doubt anyone cares yet.
+#>(4.S+5) byte 0xDB \b, quantisation table
+#>>(4.S+6) beshort x \b length=%d
+#>14 beshort x \b, %d x
+#>16 beshort x \b %d
+
+# HSI is Handmade Software's proprietary JPEG encoding scheme
+0 string hsi1 JPEG image data, HSI proprietary
+
+# From: David Santinoli <david at santinoli.com>
+0 string \x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A JPEG 2000 image data
+
+# Type: JPEG 2000 codesream
+# From: Mathieu Malaterre <mathieu.malaterre at gmail.com>
+0 belong 0xff4fff51 JPEG 2000 codestream
+45 beshort 0xff52
+
+#------------------------------------------------------------------------------
+# $File: karma,v 1.6 2009/09/19 16:28:10 christos Exp $
+# karma: file(1) magic for Karma data files
+#
+# From <rgooch at atnf.csiro.au>
+
+0 string KarmaRHD Version Karma Data Structure Version
+>16 belong x %lu
+
+#------------------------------------------------------------------------------
+# $File: kde,v 1.4 2009/09/19 16:28:10 christos Exp $
+# kde: file(1) magic for KDE
+
+0 string [KDE\ Desktop\ Entry] KDE desktop entry
+!:mime application/x-kdelnk
+0 string #\ KDE\ Config\ File KDE config file
+!:mime application/x-kdelnk
+0 string #\ xmcd xmcd database file for kscd
+!:mime text/x-xmcd
+
+#------------------------------------------------------------------------------
+# $File: kml,v 1.2 2009/09/19 16:28:10 christos Exp $
+# Type: Google KML, formerly Keyhole Markup Language
+# Future development of this format has been handed
+# over to the Open Geospatial Consortium.
+# http://www.opengeospatial.org/standards/kml/
+# From: Asbjoern Sloth Toennesen <asbjorn at lila.io>
+0 string \<?xml
+>20 search/400 \ xmlns=
+>>&0 regex ['"]http://earth.google.com/kml Google KML document
+!:mime application/vnd.google-earth.kml+xml
+>>>&1 string 2.0' \b, version 2.0
+>>>&1 string 2.1' \b, version 2.1
+>>>&1 string 2.2' \b, version 2.2
+
+#------------------------------------------------------------------------------
+# Type: OpenGIS KML, formerly Keyhole Markup Language
+# This standard is maintained by the
+# Open Geospatial Consortium.
+# http://www.opengeospatial.org/standards/kml/
+# From: Asbjoern Sloth Toennesen <asbjorn at lila.io>
+>>&0 regex ['"]http://www.opengis.net/kml OpenGIS KML document
+!:mime application/vnd.google-earth.kml+xml
+>>>&1 string 2.2 \b, version 2.2
+
+#------------------------------------------------------------------------------
+# Type: Google KML Archive (ZIP based)
+# http://code.google.com/apis/kml/documentation/kml_tut.html
+# From: Asbjoern Sloth Toennesen <asbjorn at lila.io>
+0 string PK\003\004
+>4 byte 0x14
+>>30 string doc.kml Compressed Google KML Document, including resources.
+!:mime application/vnd.google-earth.kmz
+
+#------------------------------------------------------------------------------
+# $File: lecter,v 1.4 2009/09/19 16:28:10 christos Exp $
+# DEC SRC Virtual Paper: Lectern files
+# Karl M. Hegbloom <karlheg at inetarena.com>
+0 string lect DEC SRC Virtual Paper Lectern file
+
+#------------------------------------------------------------------------------
+# $File: lex,v 1.6 2009/09/19 16:28:10 christos Exp $
+# lex: file(1) magic for lex
+#
+# derived empirically, your offsets may vary!
+0 search/100 yyprevious C program text (from lex)
+>3 search/1 >\0 for %s
+# C program text from GNU flex, from Daniel Quinlan <quinlan at yggdrasil.com>
+0 search/100 generated\ by\ flex C program text (from flex)
+# lex description file, from Daniel Quinlan <quinlan at yggdrasil.com>
+0 search/1 %{ lex description text
+
+#------------------------------------------------------------------------------
+# $File: lif,v 1.8 2009/09/19 16:28:10 christos Exp $
+# lif: file(1) magic for lif
+#
+# (Daniel Quinlan <quinlan at yggdrasil.com>)
+#
+0 beshort 0x8000 lif file
+
+#------------------------------------------------------------------------------
+# $File: linux,v 1.33 2009/10/23 16:44:59 christos Exp $
+# linux: file(1) magic for Linux files
+#
+# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan at yggdrasil.com>
+# The following basic Linux magic is useful for reference, but using
+# "long" magic is a better practice in order to avoid collisions.
+#
+# 2 leshort 100 Linux/i386
+# >0 leshort 0407 impure executable (OMAGIC)
+# >0 leshort 0410 pure executable (NMAGIC)
+# >0 leshort 0413 demand-paged executable (ZMAGIC)
+# >0 leshort 0314 demand-paged executable (QMAGIC)
+#
+0 lelong 0x00640107 Linux/i386 impure executable (OMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x00640108 Linux/i386 pure executable (NMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x0064010b Linux/i386 demand-paged executable (ZMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x006400cc Linux/i386 demand-paged executable (QMAGIC)
+>16 lelong 0 \b, stripped
+#
+0 string \007\001\000 Linux/i386 object file
+>20 lelong >0x1020 \b, DLL library
+# Linux-8086 stuff:
+0 string \01\03\020\04 Linux-8086 impure executable
+>28 long !0 not stripped
+0 string \01\03\040\04 Linux-8086 executable
+>28 long !0 not stripped
+#
+0 string \243\206\001\0 Linux-8086 object file
+#
+0 string \01\03\020\20 Minix-386 impure executable
+>28 long !0 not stripped
+0 string \01\03\040\20 Minix-386 executable
+>28 long !0 not stripped
+# core dump file, from Bill Reynolds <bill at goshawk.lanl.gov>
+216 lelong 0421 Linux/i386 core file
+>220 string >\0 of '%s'
+>200 lelong >0 (signal %d)
+#
+# LILO boot/chain loaders, from Daniel Quinlan <quinlan at yggdrasil.com>
+# this can be overridden by the DOS executable (COM) entry
+2 string LILO Linux/i386 LILO boot/chain loader
+#
+# PSF fonts, from H. Peter Anvin <hpa at yggdrasil.com>
+0 leshort 0x0436 Linux/i386 PC Screen Font data,
+>2 byte 0 256 characters, no directory,
+>2 byte 1 512 characters, no directory,
+>2 byte 2 256 characters, Unicode directory,
+>2 byte 3 512 characters, Unicode directory,
+>3 byte >0 8x%d
+# Linux swap file, from Daniel Quinlan <quinlan at yggdrasil.com>
+4086 string SWAP-SPACE Linux/i386 swap file
+# From: Jeff Bailey <jbailey at ubuntu.com>
+# Linux swap file with swsusp1 image, from Jeff Bailey <jbailey at ubuntu.com>
+4076 string SWAPSPACE2S1SUSPEND Linux/i386 swap file (new style) with SWSUSP1 image
+# according to man page of mkswap (8) March 1999
+4086 string SWAPSPACE2 Linux/i386 swap file (new style)
+>0x400 long x %d (4K pages)
+>0x404 long x size %d pages
+>>4086 string SWAPSPACE2
+>>>1052 string >\0 Label %s
+# From Daniel Novotny <dnovotny at redhat.com>
+# swap file for PowerPC
+65526 string SWAPSPACE2 Linux/ppc swap file
+16374 string SWAPSPACE2 Linux/ia64 swap file
+# ECOFF magic for OSF/1 and Linux (only tested under Linux though)
+#
+# from Erik Troan (ewt at redhat.com) examining od dumps, so this
+# could be wrong
+# updated by David Mosberger (davidm at azstarnet.com) based on
+# GNU BFD and MIPS info found below.
+#
+0 leshort 0x0183 ECOFF alpha
+>24 leshort 0407 executable
+>24 leshort 0410 pure
+>24 leshort 0413 demand paged
+>8 long >0 not stripped
+>8 long 0 stripped
+>23 leshort >0 - version %ld.
+#
+# Linux kernel boot images, from Albert Cahalan <acahalan at cs.uml.edu>
+# and others such as Axel Kohlmeyer <akohlmey at rincewind.chemie.uni-ulm.de>
+# and Nicol�s Lichtmaier <nick at debian.org>
+# All known start with: b8 c0 07 8e d8 b8 00 90 8e c0 b9 00 01 29 f6 29
+# Linux kernel boot images (i386 arch) (Wolfram Kleff)
+514 string HdrS Linux kernel
+>510 leshort 0xAA55 x86 boot executable
+>>518 leshort >0x1ff
+>>>529 byte 0 zImage,
+>>>529 byte 1 bzImage,
+>>>(526.s+0x200) string >\0 version %s,
+>>498 leshort 1 RO-rootFS,
+>>498 leshort 0 RW-rootFS,
+>>508 leshort >0 root_dev 0x%X,
+>>502 leshort >0 swap_dev 0x%X,
+>>504 leshort >0 RAMdisksize %u KB,
+>>506 leshort 0xFFFF Normal VGA
+>>506 leshort 0xFFFE Extended VGA
+>>506 leshort 0xFFFD Prompt for Videomode
+>>506 leshort >0 Video mode %d
+# This also matches new kernels, which were caught above by "HdrS".
+0 belong 0xb8c0078e Linux kernel
+>0x1e3 string Loading version 1.3.79 or older
+>0x1e9 string Loading from prehistoric times
+
+# System.map files - Nicol�s Lichtmaier <nick at debian.org>
+8 search/1 \ A\ _text Linux kernel symbol map text
+
+# LSM entries - Nicol�s Lichtmaier <nick at debian.org>
+0 search/1 Begin3 Linux Software Map entry text
+0 search/1 Begin4 Linux Software Map entry text (new format)
+
+# From Matt Zimmerman, enhanced for v3 by Matthew Palmer
+0 belong 0x4f4f4f4d User-mode Linux COW file
+>4 belong <3 \b, version %d
+>>8 string >\0 \b, backing file %s
+>4 belong >2 \b, version %d
+>>32 string >\0 \b, backing file %s
+
+############################################################################
+# Linux kernel versions
+
+0 string \xb8\xc0\x07\x8e\xd8\xb8\x00\x90 Linux
+>497 leshort 0 x86 boot sector
+>>514 belong 0x8e of a kernel from the dawn of time!
+>>514 belong 0x908ed8b4 version 0.99-1.1.42
+>>514 belong 0x908ed8b8 for memtest86
+
+>497 leshort !0 x86 kernel
+>>504 leshort >0 RAMdisksize=%u KB
+>>502 leshort >0 swap=0x%X
+>>508 leshort >0 root=0x%X
+>>>498 leshort 1 \b-ro
+>>>498 leshort 0 \b-rw
+>>506 leshort 0xFFFF vga=normal
+>>506 leshort 0xFFFE vga=extended
+>>506 leshort 0xFFFD vga=ask
+>>506 leshort >0 vga=%d
+>>514 belong 0x908ed881 version 1.1.43-1.1.45
+>>514 belong 0x15b281cd
+>>>0xa8e belong 0x55AA5a5a version 1.1.46-1.2.13,1.3.0
+>>>0xa99 belong 0x55AA5a5a version 1.3.1,2
+>>>0xaa3 belong 0x55AA5a5a version 1.3.3-1.3.30
+>>>0xaa6 belong 0x55AA5a5a version 1.3.31-1.3.41
+>>>0xb2b belong 0x55AA5a5a version 1.3.42-1.3.45
+>>>0xaf7 belong 0x55AA5a5a version 1.3.46-1.3.72
+>>514 string HdrS
+>>>518 leshort >0x1FF
+>>>>529 byte 0 \b, zImage
+>>>>529 byte 1 \b, bzImage
+>>>>(526.s+0x200) string >\0 \b, version %s
+
+# Linux boot sector thefts.
+0 belong 0xb8c0078e Linux
+>0x1e6 belong 0x454c4b53 ELKS Kernel
+>0x1e6 belong !0x454c4b53 style boot sector
+
+############################################################################
+# Linux S390 executable
+8 string \x02\x00\x00\x18\x60\x00\x00\x50\x02\x00\x00\x68\x60\x00\x00\x50\x40\x40\x40\x40\x40\x40\x40\x40 Linux S390
+>0x00010000 search/b/4096 \x00\x0a\x00\x00\x8b\xad\xcc\xcc
+# 64bit
+>>&0 string \xc1\x00\xef\xe3\xf0\x68\x00\x00 Z10 64bit kernel
+>>&0 string \xc1\x00\xef\xc3\x00\x00\x00\x00 Z9-109 64bit kernel
+>>&0 string \xc0\x00\x20\x00\x00\x00\x00\x00 Z990 64bit kernel
+>>&0 string \x00\x00\x00\x00\x00\x00\x00\x00 Z900 64bit kernel
+# 32bit
+>>&0 string \x81\x00\xc8\x80\x00\x00\x00\x00 Z10 32bit kernel
+>>&0 string \x81\x00\xc8\x80\x00\x00\x00\x00 Z9-109 32bit kernel
+>>&0 string \x80\x00\x20\x00\x00\x00\x00\x00 Z990 32bit kernel
+>>&0 string \x80\x00\x00\x00\x00\x00\x00\x00 Z900 32bit kernel
+
+############################################################################
+# Linux 8086 executable
+0 lelong&0xFF0000FF 0xC30000E9 Linux-Dev86 executable, headerless
+>5 string .
+>>4 string >\0 \b, libc version %s
+
+0 lelong&0xFF00FFFF 0x4000301 Linux-8086 executable
+>2 byte&0x01 !0 \b, unmapped zero page
+>2 byte&0x20 0 \b, impure
+>2 byte&0x20 !0
+>>2 byte&0x10 !0 \b, A_EXEC
+>2 byte&0x02 !0 \b, A_PAL
+>2 byte&0x04 !0 \b, A_NSYM
+>2 byte&0x08 !0 \b, A_STAND
+>2 byte&0x40 !0 \b, A_PURE
+>2 byte&0x80 !0 \b, A_TOVLY
+>28 long !0 \b, not stripped
+>37 string .
+>>36 string >\0 \b, libc version %s
+
+# 0 lelong&0xFF00FFFF 0x10000301 ld86 I80386 executable
+# 0 lelong&0xFF00FFFF 0xB000301 ld86 M68K executable
+# 0 lelong&0xFF00FFFF 0xC000301 ld86 NS16K executable
+# 0 lelong&0xFF00FFFF 0x17000301 ld86 SPARC executable
+
+# SYSLINUX boot logo files (from 'ppmtolss16' sources)
+# http://syslinux.zytor.com/
+#
+0 lelong =0x1413f33d SYSLINUX' LSS16 image data
+>4 leshort x \b, width %d
+>6 leshort x \b, height %d
+
+0 string OOOM User-Mode-Linux's Copy-On-Write disk image
+>4 belong x version %d
+
+# SE Linux policy database
+# From: Mike Frysinger <vapier at gentoo.org>
+0 lelong 0xf97cff8c SE Linux policy
+>16 lelong x v%d
+>20 lelong 1 MLS
+>24 lelong x %d symbols
+>28 lelong x %d ocons
+
+# Linux Logical Volume Manager (LVM)
+# Emmanuel VARAGNAT <emmanuel.varagnat at guzu.net>
+#
+# System ID, UUID and volume group name are 128 bytes long
+# but they should never be full and initialized with zeros...
+#
+# LVM1
+#
+0x0 string HM\001 LVM1 (Linux Logical Volume Manager), version 1
+>0x12c string >\0 , System ID: %s
+
+0x0 string HM\002 LVM1 (Linux Logical Volume Manager), version 2
+>0x12c string >\0 , System ID: %s
+
+# LVM2
+#
+# It seems that the label header can be in one the four first sector
+# of the disk... (from _find_labeller in lib/label/label.c of LVM2)
+#
+# 0x200 seems to be the common case
+
+0x218 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+# read the offset to add to the start of the header, and the header
+# start in 0x200
+>(0x214.l+0x200) string >\0 , UUID: %s
+
+0x018 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x014.l) string >\0 , UUID: %s
+
+0x418 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x414.l+0x400) string >\0 , UUID: %s
+
+0x618 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x614.l+0x600) string >\0 , UUID: %s
+
+# LVM snapshot
+# from Jason Farrel
+0 string SnAp LVM Snapshot (CopyOnWrite store)
+>4 lelong !0 - valid,
+>4 lelong 0 - invalid,
+>8 lelong x version %d,
+>12 lelong x chunk_size %d
+
+# SE Linux policy database
+0 lelong 0xf97cff8c SE Linux policy
+>16 lelong x v%d
+>20 lelong 1 MLS
+>24 lelong x %d symbols
+>28 lelong x %d ocons
+
+# LUKS: Linux Unified Key Setup, On-Disk Format, http://luks.endorphin.org/spec
+# Anthon van der Neut (anthon at mnt.org)
+0 string LUKS\xba\xbe LUKS encrypted file,
+>6 beshort x ver %d
+>8 string x [%s,
+>40 string x %s,
+>72 string x %s]
+>168 string x UUID: %s
+
+
+# Summary: Xen saved domain file
+# Created by: Radek Vokal <rvokal at redhat.com>
+0 string LinuxGuestRecord Xen saved domain
+>20 search/256 (name
+>>&1 string x (name %s)
+
+
+#------------------------------------------------------------------------------
+# $File: linux,v 1.33 2009/10/23 16:44:59 christos Exp $
+# linux: file(1) magic for Linux files
+#
+# Values for Linux/i386 binaries, from Daniel Quinlan <quinlan at yggdrasil.com>
+# The following basic Linux magic is useful for reference, but using
+# "long" magic is a better practice in order to avoid collisions.
+#
+# 2 leshort 100 Linux/i386
+# >0 leshort 0407 impure executable (OMAGIC)
+# >0 leshort 0410 pure executable (NMAGIC)
+# >0 leshort 0413 demand-paged executable (ZMAGIC)
+# >0 leshort 0314 demand-paged executable (QMAGIC)
+#
+0 lelong 0x00640107 Linux/i386 impure executable (OMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x00640108 Linux/i386 pure executable (NMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x0064010b Linux/i386 demand-paged executable (ZMAGIC)
+>16 lelong 0 \b, stripped
+0 lelong 0x006400cc Linux/i386 demand-paged executable (QMAGIC)
+>16 lelong 0 \b, stripped
+#
+0 string \007\001\000 Linux/i386 object file
+>20 lelong >0x1020 \b, DLL library
+# Linux-8086 stuff:
+0 string \01\03\020\04 Linux-8086 impure executable
+>28 long !0 not stripped
+0 string \01\03\040\04 Linux-8086 executable
+>28 long !0 not stripped
+#
+0 string \243\206\001\0 Linux-8086 object file
+#
+0 string \01\03\020\20 Minix-386 impure executable
+>28 long !0 not stripped
+0 string \01\03\040\20 Minix-386 executable
+>28 long !0 not stripped
+# core dump file, from Bill Reynolds <bill at goshawk.lanl.gov>
+216 lelong 0421 Linux/i386 core file
+>220 string >\0 of '%s'
+>200 lelong >0 (signal %d)
+#
+# LILO boot/chain loaders, from Daniel Quinlan <quinlan at yggdrasil.com>
+# this can be overridden by the DOS executable (COM) entry
+2 string LILO Linux/i386 LILO boot/chain loader
+#
+# PSF fonts, from H. Peter Anvin <hpa at yggdrasil.com>
+0 leshort 0x0436 Linux/i386 PC Screen Font data,
+>2 byte 0 256 characters, no directory,
+>2 byte 1 512 characters, no directory,
+>2 byte 2 256 characters, Unicode directory,
+>2 byte 3 512 characters, Unicode directory,
+>3 byte >0 8x%d
+# Linux swap file, from Daniel Quinlan <quinlan at yggdrasil.com>
+4086 string SWAP-SPACE Linux/i386 swap file
+# From: Jeff Bailey <jbailey at ubuntu.com>
+# Linux swap file with swsusp1 image, from Jeff Bailey <jbailey at ubuntu.com>
+4076 string SWAPSPACE2S1SUSPEND Linux/i386 swap file (new style) with SWSUSP1 image
+# according to man page of mkswap (8) March 1999
+4086 string SWAPSPACE2 Linux/i386 swap file (new style)
+>0x400 long x %d (4K pages)
+>0x404 long x size %d pages
+>>4086 string SWAPSPACE2
+>>>1052 string >\0 Label %s
+# From Daniel Novotny <dnovotny at redhat.com>
+# swap file for PowerPC
+65526 string SWAPSPACE2 Linux/ppc swap file
+# ECOFF magic for OSF/1 and Linux (only tested under Linux though)
+#
+# from Erik Troan (ewt at redhat.com) examining od dumps, so this
+# could be wrong
+# updated by David Mosberger (davidm at azstarnet.com) based on
+# GNU BFD and MIPS info found below.
+#
+0 leshort 0x0183 ECOFF alpha
+>24 leshort 0407 executable
+>24 leshort 0410 pure
+>24 leshort 0413 demand paged
+>8 long >0 not stripped
+>8 long 0 stripped
+>23 leshort >0 - version %ld.
+#
+# Linux kernel boot images, from Albert Cahalan <acahalan at cs.uml.edu>
+# and others such as Axel Kohlmeyer <akohlmey at rincewind.chemie.uni-ulm.de>
+# and Nicol�s Lichtmaier <nick at debian.org>
+# All known start with: b8 c0 07 8e d8 b8 00 90 8e c0 b9 00 01 29 f6 29
+# Linux kernel boot images (i386 arch) (Wolfram Kleff)
+514 string HdrS Linux kernel
+>510 leshort 0xAA55 x86 boot executable
+>>518 leshort >0x1ff
+>>>529 byte 0 zImage,
+>>>529 byte 1 bzImage,
+>>>(526.s+0x200) string >\0 version %s,
+>>498 leshort 1 RO-rootFS,
+>>498 leshort 0 RW-rootFS,
+>>508 leshort >0 root_dev 0x%X,
+>>502 leshort >0 swap_dev 0x%X,
+>>504 leshort >0 RAMdisksize %u KB,
+>>506 leshort 0xFFFF Normal VGA
+>>506 leshort 0xFFFE Extended VGA
+>>506 leshort 0xFFFD Prompt for Videomode
+>>506 leshort >0 Video mode %d
+# This also matches new kernels, which were caught above by "HdrS".
+0 belong 0xb8c0078e Linux kernel
+>0x1e3 string Loading version 1.3.79 or older
+>0x1e9 string Loading from prehistoric times
+
+# System.map files - Nicol�s Lichtmaier <nick at debian.org>
+8 search/1 \ A\ _text Linux kernel symbol map text
+
+# LSM entries - Nicol�s Lichtmaier <nick at debian.org>
+0 search/1 Begin3 Linux Software Map entry text
+0 search/1 Begin4 Linux Software Map entry text (new format)
+
+# From Matt Zimmerman, enhanced for v3 by Matthew Palmer
+0 belong 0x4f4f4f4d User-mode Linux COW file
+>4 belong <3 \b, version %d
+>>8 string >\0 \b, backing file %s
+>4 belong >2 \b, version %d
+>>32 string >\0 \b, backing file %s
+
+############################################################################
+# Linux kernel versions
+
+0 string \xb8\xc0\x07\x8e\xd8\xb8\x00\x90 Linux
+>497 leshort 0 x86 boot sector
+>>514 belong 0x8e of a kernel from the dawn of time!
+>>514 belong 0x908ed8b4 version 0.99-1.1.42
+>>514 belong 0x908ed8b8 for memtest86
+
+>497 leshort !0 x86 kernel
+>>504 leshort >0 RAMdisksize=%u KB
+>>502 leshort >0 swap=0x%X
+>>508 leshort >0 root=0x%X
+>>>498 leshort 1 \b-ro
+>>>498 leshort 0 \b-rw
+>>506 leshort 0xFFFF vga=normal
+>>506 leshort 0xFFFE vga=extended
+>>506 leshort 0xFFFD vga=ask
+>>506 leshort >0 vga=%d
+>>514 belong 0x908ed881 version 1.1.43-1.1.45
+>>514 belong 0x15b281cd
+>>>0xa8e belong 0x55AA5a5a version 1.1.46-1.2.13,1.3.0
+>>>0xa99 belong 0x55AA5a5a version 1.3.1,2
+>>>0xaa3 belong 0x55AA5a5a version 1.3.3-1.3.30
+>>>0xaa6 belong 0x55AA5a5a version 1.3.31-1.3.41
+>>>0xb2b belong 0x55AA5a5a version 1.3.42-1.3.45
+>>>0xaf7 belong 0x55AA5a5a version 1.3.46-1.3.72
+>>514 string HdrS
+>>>518 leshort >0x1FF
+>>>>529 byte 0 \b, zImage
+>>>>529 byte 1 \b, bzImage
+>>>>(526.s+0x200) string >\0 \b, version %s
+
+# Linux boot sector thefts.
+0 belong 0xb8c0078e Linux
+>0x1e6 belong 0x454c4b53 ELKS Kernel
+>0x1e6 belong !0x454c4b53 style boot sector
+
+############################################################################
+# Linux 8086 executable
+0 lelong&0xFF0000FF 0xC30000E9 Linux-Dev86 executable, headerless
+>5 string .
+>>4 string >\0 \b, libc version %s
+
+0 lelong&0xFF00FFFF 0x4000301 Linux-8086 executable
+>2 byte&0x01 !0 \b, unmapped zero page
+>2 byte&0x20 0 \b, impure
+>2 byte&0x20 !0
+>>2 byte&0x10 !0 \b, A_EXEC
+>2 byte&0x02 !0 \b, A_PAL
+>2 byte&0x04 !0 \b, A_NSYM
+>2 byte&0x08 !0 \b, A_STAND
+>2 byte&0x40 !0 \b, A_PURE
+>2 byte&0x80 !0 \b, A_TOVLY
+>28 long !0 \b, not stripped
+>37 string .
+>>36 string >\0 \b, libc version %s
+
+# 0 lelong&0xFF00FFFF 0x10000301 ld86 I80386 executable
+# 0 lelong&0xFF00FFFF 0xB000301 ld86 M68K executable
+# 0 lelong&0xFF00FFFF 0xC000301 ld86 NS16K executable
+# 0 lelong&0xFF00FFFF 0x17000301 ld86 SPARC executable
+
+# SYSLINUX boot logo files (from 'ppmtolss16' sources)
+# http://syslinux.zytor.com/
+#
+0 lelong =0x1413f33d SYSLINUX' LSS16 image data
+>4 leshort x \b, width %d
+>6 leshort x \b, height %d
+
+0 string OOOM User-Mode-Linux's Copy-On-Write disk image
+>4 belong x version %d
+
+# SE Linux policy database
+# From: Mike Frysinger <vapier at gentoo.org>
+0 lelong 0xf97cff8c SE Linux policy
+>16 lelong x v%d
+>20 lelong 1 MLS
+>24 lelong x %d symbols
+>28 lelong x %d ocons
+
+# Linux Logical Volume Manager (LVM)
+# Emmanuel VARAGNAT <emmanuel.varagnat at guzu.net>
+#
+# System ID, UUID and volume group name are 128 bytes long
+# but they should never be full and initialized with zeros...
+#
+# LVM1
+#
+0x0 string HM\001 LVM1 (Linux Logical Volume Manager), version 1
+>0x12c string >\0 , System ID: %s
+
+0x0 string HM\002 LVM1 (Linux Logical Volume Manager), version 2
+>0x12c string >\0 , System ID: %s
+
+# LVM2
+#
+# It seems that the label header can be in one the four first sector
+# of the disk... (from _find_labeller in lib/label/label.c of LVM2)
+#
+# 0x200 seems to be the common case
+
+0x218 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+# read the offset to add to the start of the header, and the header
+# start in 0x200
+>(0x214.l+0x200) string >\0 , UUID: %s
+
+0x018 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x014.l) string >\0 , UUID: %s
+
+0x418 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x414.l+0x400) string >\0 , UUID: %s
+
+0x618 string LVM2\ 001 LVM2 (Linux Logical Volume Manager)
+>(0x614.l+0x600) string >\0 , UUID: %s
+
+# LVM snapshot
+# from Jason Farrel
+0 string SnAp LVM Snapshot (CopyOnWrite store)
+>4 lelong !0 - valid,
+>4 lelong 0 - invalid,
+>8 lelong x version %d,
+>12 lelong x chunk_size %d
+
+# SE Linux policy database
+0 lelong 0xf97cff8c SE Linux policy
+>16 lelong x v%d
+>20 lelong 1 MLS
+>24 lelong x %d symbols
+>28 lelong x %d ocons
+
+# LUKS: Linux Unified Key Setup, On-Disk Format, http://luks.endorphin.org/spec
+# Anthon van der Neut (anthon at mnt.org)
+0 string LUKS\xba\xbe LUKS encrypted file,
+>6 beshort x ver %d
+>8 string x [%s,
+>40 string x %s,
+>72 string x %s]
+>168 string x UUID: %s
+
+
+# Summary: Xen saved domain file
+# Created by: Radek Vokal <rvokal at redhat.com>
+0 string LinuxGuestRecord Xen saved domain
+>20 search/256 (name
+>>&1 string x (name %s)
+
+
+#------------------------------------------------------------------------------
+# $File: lisp,v 1.23 2009/09/19 16:28:10 christos Exp $
+# lisp: file(1) magic for lisp programs
+#
+# various lisp types, from Daniel Quinlan (quinlan at yggdrasil.com)
+
+# updated by Joerg Jenderek
+# GRR: This lot is too weak
+#0 string ;;
+# windows INF files often begin with semicolon and use CRLF as line end
+# lisp files are mainly created on unix system with LF as line end
+#>2 search/4096 !\r Lisp/Scheme program text
+#>2 search/4096 \r Windows INF file
+
+0 search/4096 (setq\ Lisp/Scheme program text
+!:mime text/x-lisp
+0 search/4096 (defvar\ Lisp/Scheme program text
+!:mime text/x-lisp
+0 search/4096 (defparam\ Lisp/Scheme program text
+!:mime text/x-lisp
+0 search/4096 (defun\ Lisp/Scheme program text
+!:mime text/x-lisp
+0 search/4096 (autoload\ Lisp/Scheme program text
+!:mime text/x-lisp
+0 search/4096 (custom-set-variables\ Lisp/Scheme program text
+!:mime text/x-lisp
+
+# Emacs 18 - this is always correct, but not very magical.
+0 string \012( Emacs v18 byte-compiled Lisp data
+!:mime application/x-elc
+# Emacs 19+ - ver. recognition added by Ian Springer
+# Also applies to XEmacs 19+ .elc files; could tell them apart with regexs
+# - Chris Chittleborough <cchittleborough at yahoo.com.au>
+0 string ;ELC
+>4 byte >18
+>4 byte <32 Emacs/XEmacs v%d byte-compiled Lisp data
+!:mime application/x-elc
+
+# Files produced by CLISP Common Lisp From: Bruno Haible <haible at ilog.fr>
+0 string (SYSTEM::VERSION\040' CLISP byte-compiled Lisp program (pre 2004-03-27)
+0 string (|SYSTEM|::|VERSION|\040' CLISP byte-compiled Lisp program text
+
+0 long 0x70768BD2 CLISP memory image data
+0 long 0xD28B7670 CLISP memory image data, other endian
+
+#.com and .bin for MIT scheme
+0 string \372\372\372\372 MIT scheme (library?)
+
+# From: David Allouche <david at allouche.net>
+0 search/1 \<TeXmacs| TeXmacs document text
+!:mime text/texmacs
+
+#------------------------------------------------------------------------------
+# $File: llvm,v 1.4 2009/09/19 16:28:10 christos Exp $
+# llvm: file(1) magic for LLVM byte-codes
+# URL: http://llvm.cs.uiuc.edu/docs/BytecodeFormat.html#signature
+# From: Al Stone <ahs3 at fc.hp.com>
+
+0 string llvm LLVM byte-codes, uncompressed
+0 string llvc0 LLVM byte-codes, null compression
+0 string llvc1 LLVM byte-codes, gzip compression
+0 string llvc2 LLVM byte-codes, bzip2 compression
+
+#------------------------------------------------------------------------------
+# $File: lua,v 1.5 2009/09/19 16:28:10 christos Exp $
+# lua: file(1) magic for Lua scripting language
+# URL: http://www.lua.org/
+# From: Reuben Thomas <rrt at sc3d.org>, Seo Sanghyeon <tinuviel at sparcs.kaist.ac.kr>
+
+# Lua scripts
+0 search/1/w #!\ /usr/bin/lua Lua script text executable
+!:mime text/x-lua
+0 search/1/w #!\ /usr/local/bin/lua Lua script text executable
+!:mime text/x-lua
+0 search/1 #!/usr/bin/env\ lua Lua script text executable
+!:mime text/x-lua
+0 search/1 #!\ /usr/bin/env\ lua Lua script text executable
+!:mime text/x-lua
+
+# Lua bytecode
+0 string \033Lua Lua bytecode,
+>4 byte 0x50 version 5.0
+>4 byte 0x51 version 5.1
+
+#------------------------------------------------------------------------------
+# $File: luks,v 1.4 2009/09/19 16:28:10 christos Exp $
+# luks: file(1) magic for Linux Unified Key Setup
+# URL: http://luks.endorphin.org/spec
+# From: Anthon van der Neut <anthon at mnt.org>
+
+0 string LUKS\xba\xbe LUKS encrypted file,
+>6 beshort x ver %d
+>8 string x [%s,
+>40 string x %s,
+>72 string x %s]
+>168 string x UUID: %s
+
+#------------------------------------------------------------
+# $File: mach,v 1.9 2009/09/19 16:28:10 christos Exp $
+# Mach has two magic numbers, 0xcafebabe and 0xfeedface.
+# Unfortunately the first, cafebabe, is shared with
+# Java ByteCode, so they are both handled in the file "cafebabe".
+# The "feedface" ones are handled herein.
+#------------------------------------------------------------
+0 lelong&0xfffffffe 0xfeedface Mach-O
+>0 byte 0xcf 64-bit
+>12 lelong 1 object
+>12 lelong 2 executable
+>12 lelong 3 fixed virtual memory shared library
+>12 lelong 4 core
+>12 lelong 5 preload executable
+>12 lelong 6 dynamically linked shared library
+>12 lelong 7 dynamic linker
+>12 lelong 8 bundle
+>12 lelong 9 dynamically linked shared library stub
+>12 lelong >9
+>>12 lelong x filetype=%ld
+>4 lelong <0
+>>4 lelong x architecture=%ld
+>4 lelong 1 vax
+>4 lelong 2 romp
+>4 lelong 3 architecture=3
+>4 lelong 4 ns32032
+>4 lelong 5 ns32332
+>4 lelong 6 m68k
+>4 lelong 7 i386
+>4 lelong 8 mips
+>4 lelong 9 ns32532
+>4 lelong 10 architecture=10
+>4 lelong 11 hppa
+>4 lelong 12 acorn
+>4 lelong 13 m88k
+>4 lelong 14 sparc
+>4 lelong 15 i860-big
+>4 lelong 16 i860
+>4 lelong 17 rs6000
+>4 lelong 18 ppc
+>4 lelong 16777234 ppc64
+>4 lelong >16777234
+>>4 lelong x architecture=%ld
+#
+0 belong&0xfffffffe 0xfeedface Mach-O
+>3 byte 0xcf 64-bit
+>12 belong 1 object
+>12 belong 2 executable
+>12 belong 3 fixed virtual memory shared library
+>12 belong 4 core
+>12 belong 5 preload executable
+>12 belong 6 dynamically linked shared library
+>12 belong 7 dynamic linker
+>12 belong 8 bundle
+>12 belong 9 dynamically linked shared library stub
+>12 belong >9
+>>12 belong x filetype=%ld
+>4 belong <0
+>>4 belong x architecture=%ld
+>4 belong 1 vax
+>4 belong 2 romp
+>4 belong 3 architecture=3
+>4 belong 4 ns32032
+>4 belong 5 ns32332
+>4 belong 6 for m68k architecture
+# from NeXTstep 3.0 <mach/machine.h>
+# i.e. mc680x0_all, ignore
+# >>8 belong 1 (mc68030)
+>>8 belong 2 (mc68040)
+>>8 belong 3 (mc68030 only)
+>4 belong 7 i386
+>4 belong 8 mips
+>4 belong 9 ns32532
+>4 belong 10 architecture=10
+>4 belong 11 hppa
+>4 belong 12 acorn
+>4 belong 13 m88k
+>4 belong 14 sparc
+>4 belong 15 i860-big
+>4 belong 16 i860
+>4 belong 17 rs6000
+>4 belong 18 ppc
+>4 belong 16777234 ppc64
+>4 belong >16777234
+>>4 belong x architecture=%ld
+
+#------------------------------------------------------------------------------
+# $File: macintosh,v 1.20 2009/09/19 16:28:10 christos Exp $
+# macintosh description
+#
+# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
+# Daniel Quinlan, quinlan at yggdrasil.com
+11 string must\ be\ converted\ with\ BinHex BinHex binary text
+!:mime application/mac-binhex40
+>41 string x \b, version %.3s
+
+# Stuffit archives are the de facto standard of compression for Macintosh
+# files obtained from most archives. (franklsm at tuns.ca)
+0 string SIT! StuffIt Archive (data)
+!:mime application/x-stuffit
+!:apple SIT!SIT!
+>2 string x : %s
+0 string SITD StuffIt Deluxe (data)
+>2 string x : %s
+0 string Seg StuffIt Deluxe Segment (data)
+>2 string x : %s
+
+# Newer StuffIt archives (grant at netbsd.org)
+0 string StuffIt StuffIt Archive
+!:mime application/x-stuffit
+!:apple SIT!SIT!
+#>162 string >0 : %s
+
+# Macintosh Applications and Installation binaries (franklsm at tuns.ca)
+# GRR: Too weak
+#0 string APPL Macintosh Application (data)
+#>2 string x \b: %s
+
+# Macintosh System files (franklsm at tuns.ca)
+# GRR: Too weak
+#0 string zsys Macintosh System File (data)
+#0 string FNDR Macintosh Finder (data)
+#0 string libr Macintosh Library (data)
+#>2 string x : %s
+#0 string shlb Macintosh Shared Library (data)
+#>2 string x : %s
+#0 string cdev Macintosh Control Panel (data)
+#>2 string x : %s
+#0 string INIT Macintosh Extension (data)
+#>2 string x : %s
+#0 string FFIL Macintosh Truetype Font (data)
+#>2 string x : %s
+#0 string LWFN Macintosh Postscript Font (data)
+#>2 string x : %s
+
+# Additional Macintosh Files (franklsm at tuns.ca)
+# GRR: Too weak
+#0 string PACT Macintosh Compact Pro Archive (data)
+#>2 string x : %s
+#0 string ttro Macintosh TeachText File (data)
+#>2 string x : %s
+#0 string TEXT Macintosh TeachText File (data)
+#>2 string x : %s
+#0 string PDF Macintosh PDF File (data)
+#>2 string x : %s
+
+# MacBinary format (Eric Fischer, enf at pobox.com)
+#
+# Unfortunately MacBinary doesn't really have a magic number prior
+# to the MacBinary III format. The checksum is really the way to
+# do it, but the magic file format isn't up to the challenge.
+#
+# 0 byte 0
+# 1 byte # filename length
+# 2 string # filename
+# 65 string # file type
+# 69 string # file creator
+# 73 byte # Finder flags
+# 74 byte 0
+# 75 beshort # vertical posn in window
+# 77 beshort # horiz posn in window
+# 79 beshort # window or folder ID
+# 81 byte # protected?
+# 82 byte 0
+# 83 belong # length of data segment
+# 87 belong # length of resource segment
+# 91 belong # file creation date
+# 95 belong # file modification date
+# 99 beshort # length of comment after resource
+# 101 byte # new Finder flags
+# 102 string mBIN # (only in MacBinary III)
+# 106 byte # char. code of file name
+# 107 byte # still more Finder flags
+# 116 belong # total file length
+# 120 beshort # length of add'l header
+# 122 byte 129 # for MacBinary II
+# 122 byte 130 # for MacBinary III
+# 123 byte 129 # minimum version that can read fmt
+# 124 beshort # checksum
+#
+# This attempts to use the version numbers as a magic number, requiring
+# that the first one be 0x80, 0x81, 0x82, or 0x83, and that the second
+# be 0x81. This works for the files I have, but maybe not for everyone's.
+
+# Unfortunately, this magic is quite weak - MPi
+#122 beshort&0xFCFF 0x8081 Macintosh MacBinary data
+
+# MacBinary I doesn't have the version number field at all, but MacBinary II
+# has been in use since 1987 so I hope there aren't many really old files
+# floating around that this will miss. The original spec calls for using
+# the nulls in 0, 74, and 82 as the magic number.
+#
+# Another possibility, that would also work for MacBinary I, is to use
+# the assumption that 65-72 will all be ASCII (0x20-0x7F), that 73 will
+# have bits 1 (changed), 2 (busy), 3 (bozo), and 6 (invisible) unset,
+# and that 74 will be 0. So something like
+#
+# 71 belong&0x80804EFF 0x00000000 Macintosh MacBinary data
+#
+# >73 byte&0x01 0x01 \b, inited
+# >73 byte&0x02 0x02 \b, changed
+# >73 byte&0x04 0x04 \b, busy
+# >73 byte&0x08 0x08 \b, bozo
+# >73 byte&0x10 0x10 \b, system
+# >73 byte&0x10 0x20 \b, bundle
+# >73 byte&0x10 0x40 \b, invisible
+# >73 byte&0x10 0x80 \b, locked
+
+#>65 string x \b, type "%4.4s"
+
+#>65 string 8BIM (PhotoShop)
+#>65 string ALB3 (PageMaker 3)
+#>65 string ALB4 (PageMaker 4)
+#>65 string ALT3 (PageMaker 3)
+#>65 string APPL (application)
+#>65 string AWWP (AppleWorks word processor)
+#>65 string CIRC (simulated circuit)
+#>65 string DRWG (MacDraw)
+#>65 string EPSF (Encapsulated PostScript)
+#>65 string FFIL (font suitcase)
+#>65 string FKEY (function key)
+#>65 string FNDR (Macintosh Finder)
+#>65 string GIFf (GIF image)
+#>65 string Gzip (GNU gzip)
+#>65 string INIT (system extension)
+#>65 string LIB\ (library)
+#>65 string LWFN (PostScript font)
+#>65 string MSBC (Microsoft BASIC)
+#>65 string PACT (Compact Pro archive)
+#>65 string PDF\ (Portable Document Format)
+#>65 string PICT (picture)
+#>65 string PNTG (MacPaint picture)
+#>65 string PREF (preferences)
+#>65 string PROJ (Think C project)
+#>65 string QPRJ (Think Pascal project)
+#>65 string SCFL (Defender scores)
+#>65 string SCRN (startup screen)
+#>65 string SITD (StuffIt Deluxe)
+#>65 string SPn3 (SuperPaint)
+#>65 string STAK (HyperCard stack)
+#>65 string Seg\ (StuffIt segment)
+#>65 string TARF (Unix tar archive)
+#>65 string TEXT (ASCII)
+#>65 string TIFF (TIFF image)
+#>65 string TOVF (Eudora table of contents)
+#>65 string WDBN (Microsoft Word word processor)
+#>65 string WORD (MacWrite word processor)
+#>65 string XLS\ (Microsoft Excel)
+#>65 string ZIVM (compress (.Z))
+#>65 string ZSYS (Pre-System 7 system file)
+#>65 string acf3 (Aldus FreeHand)
+#>65 string cdev (control panel)
+#>65 string dfil (Desk Acessory suitcase)
+#>65 string libr (library)
+#>65 string nX^d (WriteNow word processor)
+#>65 string nX^w (WriteNow dictionary)
+#>65 string rsrc (resource)
+#>65 string scbk (Scrapbook)
+#>65 string shlb (shared library)
+#>65 string ttro (SimpleText read-only)
+#>65 string zsys (system file)
+
+#>69 string x \b, creator "%4.4s"
+
+# Somewhere, Apple has a repository of registered Creator IDs. These are
+# just the ones that I happened to have files from and was able to identify.
+
+#>69 string 8BIM (Adobe Photoshop)
+#>69 string ALD3 (PageMaker 3)
+#>69 string ALD4 (PageMaker 4)
+#>69 string ALFA (Alpha editor)
+#>69 string APLS (Apple Scanner)
+#>69 string APSC (Apple Scanner)
+#>69 string BRKL (Brickles)
+#>69 string BTFT (BitFont)
+#>69 string CCL2 (Common Lisp 2)
+#>69 string CCL\ (Common Lisp)
+#>69 string CDmo (The Talking Moose)
+#>69 string CPCT (Compact Pro)
+#>69 string CSOm (Eudora)
+#>69 string DMOV (Font/DA Mover)
+#>69 string DSIM (DigSim)
+#>69 string EDIT (Macintosh Edit)
+#>69 string ERIK (Macintosh Finder)
+#>69 string EXTR (self-extracting archive)
+#>69 string Gzip (GNU gzip)
+#>69 string KAHL (Think C)
+#>69 string LWFU (LaserWriter Utility)
+#>69 string LZIV (compress)
+#>69 string MACA (MacWrite)
+#>69 string MACS (Macintosh operating system)
+#>69 string MAcK (MacKnowledge terminal emulator)
+#>69 string MLND (Defender)
+#>69 string MPNT (MacPaint)
+#>69 string MSBB (Microsoft BASIC (binary))
+#>69 string MSWD (Microsoft Word)
+#>69 string NCSA (NCSA Telnet)
+#>69 string PJMM (Think Pascal)
+#>69 string PSAL (Hunt the Wumpus)
+#>69 string PSI2 (Apple File Exchange)
+#>69 string R*ch (BBEdit)
+#>69 string RMKR (Resource Maker)
+#>69 string RSED (Resource Editor)
+#>69 string Rich (BBEdit)
+#>69 string SIT! (StuffIt)
+#>69 string SPNT (SuperPaint)
+#>69 string Unix (NeXT Mac filesystem)
+#>69 string VIM! (Vim editor)
+#>69 string WILD (HyperCard)
+#>69 string XCEL (Microsoft Excel)
+#>69 string aCa2 (Fontographer)
+#>69 string aca3 (Aldus FreeHand)
+#>69 string dosa (Macintosh MS-DOS file system)
+#>69 string movr (Font/DA Mover)
+#>69 string nX^n (WriteNow)
+#>69 string pdos (Apple ProDOS file system)
+#>69 string scbk (Scrapbook)
+#>69 string ttxt (SimpleText)
+#>69 string ufox (Foreign File Access)
+
+# Just in case...
+
+102 string mBIN MacBinary III data with surprising version number
+
+# sas magic from Bruce Foster (bef at nwu.edu)
+#
+#0 string SAS SAS
+#>8 string x %s
+0 string SAS SAS
+>24 string DATA data file
+>24 string CATALOG catalog
+>24 string INDEX data file index
+>24 string VIEW data view
+# sas 7+ magic from Reinhold Koch (reinhold.koch at roche.com)
+#
+0x54 string SAS SAS 7+
+>0x9C string DATA data file
+>0x9C string CATALOG catalog
+>0x9C string INDEX data file index
+>0x9C string VIEW data view
+
+# spss magic for SPSS system and portable files,
+# from Bruce Foster (bef at nwu.edu).
+
+0 long 0xc1e2c3c9 SPSS Portable File
+>40 string x %s
+
+0 string $FL2 SPSS System File
+>24 string x %s
+
+# Macintosh filesystem data
+# From "Tom N Harris" <telliamed at mac.com>
+# Fixed HFS+ and Partition map magic: Ethan Benson <erbenson at alaska.net>
+# The MacOS epoch begins on 1 Jan 1904 instead of 1 Jan 1970, so these
+# entries depend on the data arithmetic added after v.35
+# There's also some Pascal strings in here, ditto...
+
+# The boot block signature, according to IM:Files, is
+# "for HFS volumes, this field always contains the value 0x4C4B."
+# But if this is true for MFS or HFS+ volumes, I don't know.
+# Alternatively, the boot block is supposed to be zeroed if it's
+# unused, so a simply >0 should suffice.
+
+0x400 beshort 0xD2D7 Macintosh MFS data
+>0 beshort 0x4C4B (bootable)
+>0x40a beshort &0x8000 (locked)
+>0x402 beldate-0x7C25B080 x created: %s,
+>0x406 beldate-0x7C25B080 >0 last backup: %s,
+>0x414 belong x block size: %d,
+>0x412 beshort x number of blocks: %d,
+>0x424 pstring x volume name: %s
+
+# "BD" is has many false positives
+#0x400 beshort 0x4244 Macintosh HFS data
+#>0 beshort 0x4C4B (bootable)
+#>0x40a beshort &0x8000 (locked)
+#>0x40a beshort ^0x0100 (mounted)
+#>0x40a beshort &0x0200 (spared blocks)
+#>0x40a beshort &0x0800 (unclean)
+#>0x47C beshort 0x482B (Embedded HFS+ Volume)
+#>0x402 beldate-0x7C25B080 x created: %s,
+#>0x406 beldate-0x7C25B080 x last modified: %s,
+#>0x440 beldate-0x7C25B080 >0 last backup: %s,
+#>0x414 belong x block size: %d,
+#>0x412 beshort x number of blocks: %d,
+#>0x424 pstring x volume name: %s
+
+0x400 beshort 0x482B Macintosh HFS Extended
+>&0 beshort x version %d data
+>0 beshort 0x4C4B (bootable)
+>0x404 belong ^0x00000100 (mounted)
+>&2 belong &0x00000200 (spared blocks)
+>&2 belong &0x00000800 (unclean)
+>&2 belong &0x00008000 (locked)
+>&6 string x last mounted by: '%.4s',
+# really, that should be treated as a belong and we print a string
+# based on the value. TN1150 only mentions '8.10' for "MacOS 8.1"
+>&14 beldate-0x7C25B080 x created: %s,
+# only the creation date is local time, all other timestamps in HFS+ are UTC.
+>&18 bedate-0x7C25B080 x last modified: %s,
+>&22 bedate-0x7C25B080 >0 last backup: %s,
+>&26 bedate-0x7C25B080 >0 last checked: %s,
+>&38 belong x block size: %d,
+>&42 belong x number of blocks: %d,
+>&46 belong x free blocks: %d
+
+# I don't think this is really necessary since it doesn't do much and
+# anything with a valid driver descriptor will also have a valid
+# partition map
+#0 beshort 0x4552 Apple Device Driver data
+#>&24 beshort =1 \b, MacOS
+
+# Is that the partition type a cstring or a pstring? Well, IM says "strings
+# shorter than 32 bytes must be terminated with NULL" so I'll treat it as a
+# cstring. Of course, partitions can contain more than four entries, but
+# what're you gonna do?
+# GRR: This magic is too weak, it is just "PM"
+#0x200 beshort 0x504D Apple Partition data
+#>0x2 beshort x (block size: %d):
+#>0x230 string x first type: %s,
+#>0x210 string x name: %s,
+#>0x254 belong x number of blocks: %d,
+#>0x400 beshort 0x504D
+#>>0x430 string x second type: %s,
+#>>0x410 string x name: %s,
+#>>0x454 belong x number of blocks: %d,
+#>>0x600 beshort 0x504D
+#>>>0x630 string x third type: %s,
+#>>>0x610 string x name: %s,
+#>>>0x654 belong x number of blocks: %d,
+#>>0x800 beshort 0x504D
+#>>>0x830 string x fourth type: %s,
+#>>>0x810 string x name: %s,
+#>>>0x854 belong x number of blocks: %d,
+#>>>0xa00 beshort 0x504D
+#>>>>0xa30 string x fifth type: %s,
+#>>>>0xa10 string x name: %s,
+#>>>>0xa54 belong x number of blocks: %d
+#>>>0xc00 beshort 0x504D
+#>>>>0xc30 string x sixth type: %s,
+#>>>>0xc10 string x name: %s,
+#>>>>0xc54 belong x number of blocks: %d
+## AFAIK, only the signature is different
+#0x200 beshort 0x5453 Apple Old Partition data
+#>0x2 beshort x block size: %d,
+#>0x230 string x first type: %s,
+#>0x210 string x name: %s,
+#>0x254 belong x number of blocks: %d,
+#>0x400 beshort 0x504D
+#>>0x430 string x second type: %s,
+#>>0x410 string x name: %s,
+#>>0x454 belong x number of blocks: %d,
+#>>0x800 beshort 0x504D
+#>>>0x830 string x third type: %s,
+#>>>0x810 string x name: %s,
+#>>>0x854 belong x number of blocks: %d,
+#>>>0xa00 beshort 0x504D
+#>>>>0xa30 string x fourth type: %s,
+#>>>>0xa10 string x name: %s,
+#>>>>0xa54 belong x number of blocks: %d
+
+# From: Remi Mommsen <mommsen at slac.stanford.edu>
+0 string BOMStore Mac OS X bill of materials (BOM) file
+
+#------------------------------------------------------------------------------
+# $File: magic,v 1.9 2009/09/19 16:28:10 christos Exp $
+# magic: file(1) magic for magic files
+#
+0 string #\ Magic magic text file for file(1) cmd
+0 lelong 0xF11E041C magic binary file for file(1) cmd
+>4 lelong x (version %d) (little endian)
+0 belong 0xF11E041C magic binary file for file(1) cmd
+>4 belong x (version %d) (big endian)
+
+#------------------------------------------------------------------------------
+# $File: mail.news,v 1.17 2009/09/19 16:28:10 christos Exp $
+# mail.news: file(1) magic for mail and news
+#
+# Unfortunately, saved netnews also has From line added in some news software.
+#0 string From mail text
+# There are tests to ascmagic.c to cope with mail and news.
+0 string Relay-Version: old news text
+!:mime message/rfc822
+0 string #!\ rnews batched news text
+!:mime message/rfc822
+0 string N#!\ rnews mailed, batched news text
+!:mime message/rfc822
+0 string Forward\ to mail forwarding text
+!:mime message/rfc822
+0 string Pipe\ to mail piping text
+!:mime message/rfc822
+0 string Return-Path: smtp mail text
+!:mime message/rfc822
+0 string Path: news text
+!:mime message/news
+0 string Xref: news text
+!:mime message/news
+0 string From: news or mail text
+!:mime message/rfc822
+0 string Article saved news text
+!:mime message/news
+0 string BABYL Emacs RMAIL text
+0 string Received: RFC 822 mail text
+!:mime message/rfc822
+0 string MIME-Version: MIME entity text
+#0 string Content- MIME entity text
+
+# TNEF files...
+0 lelong 0x223E9F78 Transport Neutral Encapsulation Format
+
+# From: Kevin Sullivan <ksulliva at psc.edu>
+0 string *mbx* MBX mail folder
+
+# From: Simon Matter <simon.matter at invoca.ch>
+0 string \241\002\213\015skiplist\ file\0\0\0 Cyrus skiplist DB
+
+# JAM(mbp) Fidonet message area databases
+# JHR file
+0 string JAM\0 JAM message area header file
+>12 leshort >0 (%d messages)
+
+# Squish Fidonet message area databases
+# SQD file (requires at least one message in the area)
+# XXX: Weak magic
+#256 leshort 0xAFAE4453 Squish message area data file
+#>4 leshort >0 (%d messages)
+
+#0 string \<!--\ MHonArc text/html; x-type=mhonarc
+
+#------------------------------------------------------------------------------
+# $File: maple,v 1.6 2009/09/19 16:28:10 christos Exp $
+# maple: file(1) magic for maple files
+# "H. Nanosecond" <aldomel at ix.netcom.com>
+# Maple V release 4, a multi-purpose math program
+#
+
+# maple library .lib
+0 string \000MVR4\nI MapleVr4 library
+
+# .ind
+# no magic for these :-(
+# they are compiled indexes for maple files
+
+# .hdb
+0 string \000\004\000\000 Maple help database
+
+# .mhp
+# this has the form <PACKAGE=name>
+0 string \<PACKAGE= Maple help file
+0 string \<HELP\ NAME= Maple help file
+0 string \n\<HELP\ NAME= Maple help file with extra carriage return at start (yuck)
+#0 string #\ Newton Maple help file, old style
+0 string #\ daub Maple help file, old style
+#0 string #=========== Maple help file, old style
+
+# .mws
+0 string \000\000\001\044\000\221 Maple worksheet
+#this is anomalous
+0 string WriteNow\000\002\000\001\000\000\000\000\100\000\000\000\000\000 Maple worksheet, but weird
+# this has the form {VERSION 2 3 "IBM INTEL NT" "2.3" }\n
+# that is {VERSION major_version miunor_version computer_type version_string}
+0 string {VERSION\ Maple worksheet
+>9 string >\0 version %.1s.
+>>>11 string >\0 %.1s
+
+# .mps
+0 string \0\0\001$ Maple something
+# from byte 4 it is either 'nul E' or 'soh R'
+# I think 'nul E' means a file that was saved as a different name
+# a sort of revision marking
+# 'soh R' means new
+>4 string \000\105 An old revision
+>4 string \001\122 The latest save
+
+# .mpl
+# some of these are the same as .mps above
+#0000000 000 000 001 044 000 105 same as .mps
+#0000000 000 000 001 044 001 122 same as .mps
+
+0 string #\n##\ <SHAREFILE= Maple something
+0 string \n#\n##\ <SHAREFILE= Maple something
+0 string ##\ <SHAREFILE= Maple something
+0 string #\r##\ <SHAREFILE= Maple something
+0 string \r#\r##\ <SHAREFILE= Maple something
+0 string #\ \r##\ <DESCRIBE> Maple something anomalous.
+
+#------------------------------------------------------------------------------
+# $File: mathcad,v 1.5 2009/09/19 16:28:10 christos Exp $
+# mathcad: file(1) magic for Mathcad documents
+# URL: http://www.mathsoft.com/
+# From: Josh Triplett <josh at freedesktop.org>
+
+0 string .MCAD\t Mathcad document
+
+#------------------------------------------------------------------------------
+# $File: mathematica,v 1.7 2009/09/19 16:28:10 christos Exp $
+# mathematica: file(1) magic for mathematica files
+# "H. Nanosecond" <aldomel at ix.netcom.com>
+# Mathematica a multi-purpose math program
+# versions 2.2 and 3.0
+
+#mathematica .mb
+0 string \064\024\012\000\035\000\000\000 Mathematica version 2 notebook
+0 string \064\024\011\000\035\000\000\000 Mathematica version 2 notebook
+
+# .ma
+# multiple possibilites:
+
+0 string (*^\n\n::[\011frontEndVersion\ =\ Mathematica notebook
+#>41 string >\0 %s
+
+#0 string (*^\n\n::[\011palette Mathematica notebook version 2.x
+
+#0 string (*^\n\n::[\011Information Mathematica notebook version 2.x
+#>675 string >\0 %s #doesn't work well
+
+# there may be 'cr' instread of 'nl' in some does this matter?
+
+# generic:
+0 string (*^\r\r::[\011 Mathematica notebook version 2.x
+0 string (*^\r\n\r\n::[\011 Mathematica notebook version 2.x
+0 string (*^\015 Mathematica notebook version 2.x
+0 string (*^\n\r\n\r::[\011 Mathematica notebook version 2.x
+0 string (*^\r::[\011 Mathematica notebook version 2.x
+0 string (*^\r\n::[\011 Mathematica notebook version 2.x
+0 string (*^\n\n::[\011 Mathematica notebook version 2.x
+0 string (*^\n::[\011 Mathematica notebook version 2.x
+
+
+# Mathematica .mx files
+
+#0 string (*This\ is\ a\ Mathematica\ binary\ dump\ file.\ It\ can\ be\ loaded\ with\ Get.*) Mathematica binary file
+0 string (*This\ is\ a\ Mathematica\ binary\ Mathematica binary file
+#>71 string \000\010\010\010\010\000\000\000\000\000\000\010\100\010\000\000\000
+# >71... is optional
+>88 string >\0 from %s
+
+
+# Mathematica files PBF:
+# 115 115 101 120 102 106 000 001 000 000 000 203 000 001 000
+0 string MMAPBF\000\001\000\000\000\203\000\001\000 Mathematica PBF (fonts I think)
+
+# .ml files These are menu resources I think
+# these start with "[0-9][0-9][0-9]\ A~[0-9][0-9][0-9]\
+# how to put that into a magic rule?
+4 string \ A~ MAthematica .ml file
+
+# .nb files
+#too long 0 string (***********************************************************************\n\n\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ Mathematica-Compatible Notebook Mathematica 3.0 notebook
+0 string (*********************** Mathematica 3.0 notebook
+
+# other (* matches it is a comment start in these langs
+# GRR: Too weak; also matches other languages e.g. ML
+#0 string (* Mathematica, or Pascal, Modula-2 or 3 code text
+
+#########################
+# MatLab v5
+0 string MATLAB Matlab v5 mat-file
+>126 short 0x494d (big endian)
+>>124 beshort x version 0x%04x
+>126 short 0x4d49 (little endian)
+>>124 leshort x version 0x%04x
+
+
+#------------------------------------------------------------------------------
+# $File: matroska,v 1.5 2009/09/27 19:02:12 christos Exp $
+# matroska: file(1) magic for Matroska files
+#
+# See http://www.matroska.org/
+#
+
+# EBML id:
+0 belong 0x1a45dfa3
+# DocType id:
+>5 beshort 0x4282
+# DocType contents:
+>>8 string matroska Matroska data
+!:mime video/x-matroska
+
+# EBML id:
+0 belong 0x1a45dfa3
+# DocType id:
+>0 search/4096 \x42\x82
+# DocType contents:
+>>&1 string webm WebM
+!:mime video/webm
+
+#------------------------------------------------------------------------------
+# $File: mcrypt,v 1.5 2009/09/19 16:28:10 christos Exp $
+# Mavroyanopoulos Nikos <nmav at hellug.gr>
+# mcrypt: file(1) magic for mcrypt 2.2.x;
+0 string \0m\3 mcrypt 2.5 encrypted data,
+>4 string >\0 algorithm: %s,
+>>&1 leshort >0 keysize: %d bytes,
+>>>&0 string >\0 mode: %s,
+
+0 string \0m\2 mcrypt 2.2 encrypted data,
+>3 byte 0 algorithm: blowfish-448,
+>3 byte 1 algorithm: DES,
+>3 byte 2 algorithm: 3DES,
+>3 byte 3 algorithm: 3-WAY,
+>3 byte 4 algorithm: GOST,
+>3 byte 6 algorithm: SAFER-SK64,
+>3 byte 7 algorithm: SAFER-SK128,
+>3 byte 8 algorithm: CAST-128,
+>3 byte 9 algorithm: xTEA,
+>3 byte 10 algorithm: TWOFISH-128,
+>3 byte 11 algorithm: RC2,
+>3 byte 12 algorithm: TWOFISH-192,
+>3 byte 13 algorithm: TWOFISH-256,
+>3 byte 14 algorithm: blowfish-128,
+>3 byte 15 algorithm: blowfish-192,
+>3 byte 16 algorithm: blowfish-256,
+>3 byte 100 algorithm: RC6,
+>3 byte 101 algorithm: IDEA,
+>4 byte 0 mode: CBC,
+>4 byte 1 mode: ECB,
+>4 byte 2 mode: CFB,
+>4 byte 3 mode: OFB,
+>4 byte 4 mode: nOFB,
+>5 byte 0 keymode: 8bit
+>5 byte 1 keymode: 4bit
+>5 byte 2 keymode: SHA-1 hash
+>5 byte 3 keymode: MD5 hash
+
+#------------------------------------------------------------------------------
+# $File: mercurial,v 1.4 2009/09/19 16:28:10 christos Exp $
+# mercurial: file(1) magic for Mercurial changeset bundles
+# http://www.selenic.com/mercurial/wiki/
+#
+# Jesse Glick (jesse.glick at sun.com)
+#
+
+0 string HG10 Mercurial changeset bundle
+>4 string UN (uncompressed)
+>4 string GZ (gzip compressed)
+>4 string BZ (bzip2 compressed)
+
+#------------------------------------------------------------------------------
+# $File: mime,v 1.5 2009/09/19 16:28:10 christos Exp $
+# mime: file(1) magic for MIME encoded files
+#
+0 string Content-Type:\
+>14 string >\0 %s
+0 string Content-Type:
+>13 string >\0 %s
+
+#------------------------------------------------------------------------------
+# $File: mips,v 1.5 2009/09/19 16:28:10 christos Exp $
+# mips: file(1) magic for Silicon Graphics (MIPS, IRIS, IRIX, etc.)
+# Dec Ultrix (MIPS)
+# all of SGI's *current* machines and OSes run in big-endian mode on the
+# MIPS machines, as far as I know.
+#
+# XXX - what is the blank "-" line?
+#
+# kbd file definitions
+0 string kbd!map kbd map file
+>8 byte >0 Ver %d:
+>10 short >0 with %d table(s)
+0 belong 0407 old SGI 68020 executable
+0 belong 0410 old SGI 68020 pure executable
+0 beshort 0x8765 disk quotas file
+0 beshort 0x0506 IRIS Showcase file
+>2 byte 0x49 -
+>3 byte x - version %ld
+0 beshort 0x0226 IRIS Showcase template
+>2 byte 0x63 -
+>3 byte x - version %ld
+0 belong 0x5343464d IRIS Showcase file
+>4 byte x - version %ld
+0 belong 0x5443464d IRIS Showcase template
+>4 byte x - version %ld
+0 belong 0xdeadbabe IRIX Parallel Arena
+>8 belong >0 - version %ld
+#
+0 beshort 0x0160 MIPSEB ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>22 byte x - version %ld
+>23 byte x .%ld
+#
+0 beshort 0x0162 MIPSEL-BE ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %d
+>22 byte x .%ld
+#
+0 beshort 0x6001 MIPSEB-LE ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %d
+>22 byte x .%ld
+#
+0 beshort 0x6201 MIPSEL ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %ld
+>22 byte x .%ld
+#
+# MIPS 2 additions
+#
+0 beshort 0x0163 MIPSEB MIPS-II ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>22 byte x - version %ld
+>23 byte x .%ld
+#
+0 beshort 0x0166 MIPSEL-BE MIPS-II ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>22 byte x - version %ld
+>23 byte x .%ld
+#
+0 beshort 0x6301 MIPSEB-LE MIPS-II ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %ld
+>22 byte x .%ld
+#
+0 beshort 0x6601 MIPSEL MIPS-II ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %ld
+>22 byte x .%ld
+#
+# MIPS 3 additions
+#
+0 beshort 0x0140 MIPSEB MIPS-III ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>22 byte x - version %ld
+>23 byte x .%ld
+#
+0 beshort 0x0142 MIPSEL-BE MIPS-III ECOFF executable
+>20 beshort 0407 (impure)
+>20 beshort 0410 (swapped)
+>20 beshort 0413 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>22 byte x - version %ld
+>23 byte x .%ld
+#
+0 beshort 0x4001 MIPSEB-LE MIPS-III ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %ld
+>22 byte x .%ld
+#
+0 beshort 0x4201 MIPSEL MIPS-III ECOFF executable
+>20 beshort 03401 (impure)
+>20 beshort 04001 (swapped)
+>20 beshort 05401 (paged)
+>8 belong >0 not stripped
+>8 belong 0 stripped
+>23 byte x - version %ld
+>22 byte x .%ld
+#
+0 beshort 0x180 MIPSEB Ucode
+0 beshort 0x182 MIPSEL-BE Ucode
+# 32bit core file
+0 belong 0xdeadadb0 IRIX core dump
+>4 belong 1 of
+>16 string >\0 '%s'
+# 64bit core file
+0 belong 0xdeadad40 IRIX 64-bit core dump
+>4 belong 1 of
+>16 string >\0 '%s'
+# N32bit core file
+0 belong 0xbabec0bb IRIX N32 core dump
+>4 belong 1 of
+>16 string >\0 '%s'
+# New style crash dump file
+0 string \x43\x72\x73\x68\x44\x75\x6d\x70 IRIX vmcore dump of
+>36 string >\0 '%s'
+# Trusted IRIX info
+0 string SGIAUDIT SGI Audit file
+>8 byte x - version %d
+>9 byte x .%ld
+#
+0 string WNGZWZSC Wingz compiled script
+0 string WNGZWZSS Wingz spreadsheet
+0 string WNGZWZHP Wingz help file
+#
+0 string #Inventor V IRIS Inventor 1.0 file
+0 string #Inventor V2 Open Inventor 2.0 file
+# GLF is OpenGL stream encoding
+0 string glfHeadMagic(); GLF_TEXT
+4 belong 0x7d000000 GLF_BINARY_LSB_FIRST
+4 belong 0x0000007d GLF_BINARY_MSB_FIRST
+# GLS is OpenGL stream encoding; GLS is the successor of GLF
+0 string glsBeginGLS( GLS_TEXT
+4 belong 0x10000000 GLS_BINARY_LSB_FIRST
+4 belong 0x00000010 GLS_BINARY_MSB_FIRST
+
+#------------------------------------------------------------------------------
+# $File: mirage,v 1.7 2009/09/19 16:28:10 christos Exp $
+# mirage: file(1) magic for Mirage executables
+#
+# XXX - byte order?
+#
+0 long 31415 Mirage Assembler m.out executable
+
+#-----------------------------------------------------------------------------
+# $File: misctools,v 1.10 2009/09/19 16:28:10 christos Exp $
+# misctools: file(1) magic for miscellaneous UNIX tools.
+#
+0 search/1 %%!! X-Post-It-Note text
+0 string/c BEGIN:VCALENDAR vCalendar calendar file
+0 string/c BEGIN:VCARD vCard visiting card
+!:mime text/x-vcard
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+4 string gtktalog GNOME Catalogue (gtktalog)
+>13 string >\0 version %s
+
+# Summary: Libtool library file
+# Extension: .la
+# Submitted by: Tomasz Trojanowski <tomek at uninet.com.pl>
+0 search/80 .la\ -\ a\ libtool\ library\ file libtool library file
+
+# Summary: Libtool object file
+# Extension: .lo
+# Submitted by: Abel Cheung <abelcheung at gmail.com>
+0 search/80 .lo\ -\ a\ libtool\ object\ file libtool object file
+
+# From: Daniel Novotny <dnovotny at redhat.com>
+0 string MDMP\x93\xA7 MDMP crash report data
+
+#-----------------------------------------------------------------------------
+# $File: misctools,v 1.10 2009/09/19 16:28:10 christos Exp $
+# misctools: file(1) magic for miscellaneous UNIX tools.
+#
+0 search/1 %%!! X-Post-It-Note text
+0 string/c BEGIN:VCALENDAR vCalendar calendar file
+0 string/c BEGIN:VCARD vCard visiting card
+!:mime text/x-vcard
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+4 string gtktalog GNOME Catalogue (gtktalog)
+>13 string >\0 version %s
+
+# Summary: Libtool library file
+# Extension: .la
+# Submitted by: Tomasz Trojanowski <tomek at uninet.com.pl>
+0 search/80 .la\ -\ a\ libtool\ library\ file libtool library file
+
+# Summary: Libtool object file
+# Extension: .lo
+# Submitted by: Abel Cheung <abelcheung at gmail.com>
+0 search/80 .lo\ -\ a\ libtool\ object\ file libtool object file
+
+#------------------------------------------------------------------------------
+# $File: mkid,v 1.6 2009/09/19 16:28:10 christos Exp $
+# mkid: file(1) magic for mkid(1) databases
+#
+# ID is the binary tags database produced by mkid(1).
+#
+# XXX - byte order?
+#
+0 string \311\304 ID tags data
+>2 short >0 version %d
+
+#------------------------------------------------------------------------------
+# $File: mlssa,v 1.4 2009/09/19 16:28:10 christos Exp $
+# mlssa: file(1) magic for MLSSA datafiles
+#
+0 lelong 0xffffabcd MLSSA datafile,
+>4 leshort x algorithm %d,
+>10 lelong x %d samples
+
+#------------------------------------------------------------------------------
+# $File: mmdf,v 1.6 2009/09/19 16:28:10 christos Exp $
+# mmdf: file(1) magic for MMDF mail files
+#
+0 string \001\001\001\001 MMDF mailbox
+
+#------------------------------------------------------------------------------
+# $File: modem,v 1.4 2009/09/19 16:28:10 christos Exp $
+# modem: file(1) magic for modem programs
+#
+# From: Florian La Roche <florian at knorke.saar.de>
+4 string Research, Digifax-G3-File
+>29 byte 1 , fine resolution
+>29 byte 0 , normal resolution
+
+0 short 0x0100 raw G3 data, byte-padded
+0 short 0x1400 raw G3 data
+#
+# Magic data for vgetty voice formats
+# (Martin Seine & Marc Eberhard)
+
+#
+# raw modem data version 1
+#
+0 string RMD1 raw modem data
+>4 string >\0 (%s /
+>20 short >0 compression type 0x%04x)
+
+#
+# portable voice format 1
+#
+0 string PVF1\n portable voice format
+>5 string >\0 (binary %s)
+
+#
+# portable voice format 2
+#
+0 string PVF2\n portable voice format
+>5 string >\0 (ascii %s)
+
+
+#------------------------------------------------------------------------------
+# $File: motorola,v 1.10 2009/09/19 16:28:11 christos Exp $
+# motorola: file(1) magic for Motorola 68K and 88K binaries
+#
+# 68K
+#
+0 beshort 0520 mc68k COFF
+>18 beshort ^00000020 object
+>18 beshort &00000020 executable
+>12 belong >0 not stripped
+>168 string .lowmem Apple toolbox
+>20 beshort 0407 (impure)
+>20 beshort 0410 (pure)
+>20 beshort 0413 (demand paged)
+>20 beshort 0421 (standalone)
+0 beshort 0521 mc68k executable (shared)
+>12 belong >0 not stripped
+0 beshort 0522 mc68k executable (shared demand paged)
+>12 belong >0 not stripped
+#
+# Motorola/UniSoft 68K Binary Compatibility Standard (BCS)
+#
+0 beshort 0554 68K BCS executable
+#
+# 88K
+#
+# Motorola/88Open BCS
+#
+0 beshort 0555 88K BCS executable
+#
+# Motorola S-Records, from Gerd Truschinski <gt at freebsd.first.gmd.de>
+0 string S0 Motorola S-Record; binary data in text format
+
+# ATARI ST relocatable PRG
+#
+# from Oskar Schirmer <schirmer at scara.com> Feb 3, 2001
+# (according to Roland Waldi, Oct 21, 1987)
+# besides the magic 0x601a, the text segment size is checked to be
+# not larger than 1 MB (which is a lot on ST).
+# The additional 0x601b distinction I took from Doug Lee's magic.
+0 belong&0xFFFFFFF0 0x601A0000 Atari ST M68K contiguous executable
+>2 belong x (txt=%ld,
+>6 belong x dat=%ld,
+>10 belong x bss=%ld,
+>14 belong x sym=%ld)
+0 belong&0xFFFFFFF0 0x601B0000 Atari ST M68K non-contig executable
+>2 belong x (txt=%ld,
+>6 belong x dat=%ld,
+>10 belong x bss=%ld,
+>14 belong x sym=%ld)
+
+# Atari ST/TT... program format (sent by Wolfram Kleff <kleff at cs.uni-bonn.de>)
+0 beshort 0x601A Atari 68xxx executable,
+>2 belong x text len %lu,
+>6 belong x data len %lu,
+>10 belong x BSS len %lu,
+>14 belong x symboltab len %lu,
+>18 belong 0
+>22 belong &0x01 fastload flag,
+>22 belong &0x02 may be loaded to alternate RAM,
+>22 belong &0x04 malloc may be from alternate RAM,
+>22 belong x flags: 0x%lX,
+>26 beshort 0 no relocation tab
+>26 beshort !0 + relocation tab
+>30 string SFX [Self-Extracting LZH SFX archive]
+>38 string SFX [Self-Extracting LZH SFX archive]
+>44 string ZIP! [Self-Extracting ZIP SFX archive]
+
+0 beshort 0x0064 Atari 68xxx CPX file
+>8 beshort x (version %04lx)
+
+#------------------------------------------------------------------------------
+# $File: mozilla,v 1.4 2009/09/19 16:28:11 christos Exp $
+# mozilla: file(1) magic for Mozilla XUL fastload files
+# (XUL.mfasl and XPC.mfasl)
+# URL: http://www.mozilla.org/
+# From: Josh Triplett <josh at freedesktop.org>
+
+0 string XPCOM\nMozFASL\r\n\x1A Mozilla XUL fastload data
+
+#------------------------------------------------------------------------------
+# $File: msdos,v 1.65 2009/09/19 16:28:11 christos Exp $
+# msdos: file(1) magic for MS-DOS files
+#
+
+# .BAT files (Daniel Quinlan, quinlan at yggdrasil.com)
+# updated by Joerg Jenderek at Oct 2008
+0 string @
+>1 string/cW \ echo\ off DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW echo\ off DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW rem\ DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW set\ DOS batch file text
+!:mime text/x-msdos-batch
+
+
+# OS/2 batch files are REXX. the second regex is a bit generic, oh well
+# the matched commands seem to be common in REXX and uncommon elsewhere
+100 regex/c =^[\ \t]{0,10}call[\ \t]{1,10}rxfunc OS/2 REXX batch file text
+100 regex/c =^[\ \t]{0,10}say\ ['"] OS/2 REXX batch file text
+
+0 leshort 0x14c MS Windows COFF Intel 80386 object file
+#>4 ledate x stamp %s
+0 leshort 0x166 MS Windows COFF MIPS R4000 object file
+#>4 ledate x stamp %s
+0 leshort 0x184 MS Windows COFF Alpha object file
+#>4 ledate x stamp %s
+0 leshort 0x268 MS Windows COFF Motorola 68000 object file
+#>4 ledate x stamp %s
+0 leshort 0x1f0 MS Windows COFF PowerPC object file
+#>4 ledate x stamp %s
+0 leshort 0x290 MS Windows COFF PA-RISC object file
+#>4 ledate x stamp %s
+
+# XXX - according to Microsoft's spec, at an offset of 0x3c in a
+# PE-format executable is the offset in the file of the PE header;
+# unfortunately, that's a little-endian offset, and there's no way
+# to specify an indirect offset with a specified byte order.
+# So, for now, we assume the standard MS-DOS stub, which puts the
+# PE header at 0x80 = 128.
+#
+# Required OS version and subsystem version were 4.0 on some NT 3.51
+# executables built with Visual C++ 4.0, so it's not clear that
+# they're interesting. The user version was 0.0, but there's
+# probably some linker directive to set it. The linker version was
+# 3.0, except for one ".exe" which had it as 4.20 (same damn linker!).
+#
+# many of the compressed formats were extraced from IDARC 1.23 source code
+#
+0 string MZ
+!:mime application/x-dosexec
+>0x18 leshort <0x40 MS-DOS executable
+>0 string MZ\0\0\0\0\0\0\0\0\0\0PE\0\0 \b, PE for MS Windows
+>>&18 leshort&0x2000 >0 (DLL)
+>>&88 leshort 0 (unknown subsystem)
+>>&88 leshort 1 (native)
+>>&88 leshort 2 (GUI)
+>>&88 leshort 3 (console)
+>>&88 leshort 7 (POSIX)
+>>&0 leshort 0x0 unknown processor
+>>&0 leshort 0x14c Intel 80386
+>>&0 leshort 0x166 MIPS R4000
+>>&0 leshort 0x184 Alpha
+>>&0 leshort 0x268 Motorola 68000
+>>&0 leshort 0x1f0 PowerPC
+>>&0 leshort 0x290 PA-RISC
+>>&18 leshort&0x0100 >0 32-bit
+>>&18 leshort&0x1000 >0 system file
+>>&0xf4 search/0x140 \x0\x40\x1\x0
+>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
+>30 string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
+!:mime application/zip
+# Is next line correct? One might expect "Corp." not "Copr." If it is right, add a note to that effect.
+>30 string PKLITE\ Copr. Self-extracting PKZIP archive
+!:mime application/zip
+
+>0x18 leshort >0x3f
+>>(0x3c.l) string PE\0\0 PE
+>>>(0x3c.l+25) byte 1 \b32 executable
+>>>(0x3c.l+25) byte 2 \b32+ executable
+# hooray, there's a DOS extender using the PE format, with a valid PE
+# executable inside (which just prints a message and exits if run in win)
+>>>(0x3c.l+92) leshort <10
+>>>>(8.s*16) string 32STUB for MS-DOS, 32rtm DOS extender
+>>>>(8.s*16) string !32STUB for MS Windows
+>>>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
+>>>>>(0x3c.l+92) leshort 0 (unknown subsystem)
+>>>>>(0x3c.l+92) leshort 1 (native)
+>>>>>(0x3c.l+92) leshort 2 (GUI)
+>>>>>(0x3c.l+92) leshort 3 (console)
+>>>>>(0x3c.l+92) leshort 7 (POSIX)
+>>>(0x3c.l+92) leshort 10 (EFI application)
+>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
+>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
+>>>(0x3c.l+92) leshort 13 (XBOX)
+>>>(0x3c.l+4) leshort 0x0 unknown processor
+>>>(0x3c.l+4) leshort 0x14c Intel 80386
+>>>(0x3c.l+4) leshort 0x166 MIPS R4000
+>>>(0x3c.l+4) leshort 0x184 Alpha
+>>>(0x3c.l+4) leshort 0x268 Motorola 68000
+>>>(0x3c.l+4) leshort 0x1f0 PowerPC
+>>>(0x3c.l+4) leshort 0x290 PA-RISC
+>>>(0x3c.l+4) leshort 0x200 Intel Itanium
+>>>(0x3c.l+22) leshort&0x0100 >0 32-bit
+>>>(0x3c.l+22) leshort&0x1000 >0 system file
+>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
+
+>>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
+>>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
+>>>>(0x3c.l+0xf8) search/0x140 UPX2
+>>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
+>>>>(0x3c.l+0xf8) search/0x140 .idata
+>>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
+>>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
+>>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .rsrc
+>>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
+>>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
+>>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
+>>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .data
+>>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
+>>>>>(0x3c.l+0xf7) byte x
+>>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .reloc
+>>>>>(&0xe.l+(-4)) search/0x180 PK\3\4 \b, ZIP self-extracting archive (WinZip)
+
+>>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
+>>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
+>>>>0x30 string Inno \b, InnoSetup self-extracting archive
+
+>>(0x3c.l) string !PE\0\0 MS-DOS executable
+
+>>(0x3c.l) string NE \b, NE
+>>>(0x3c.l+0x36) byte 0 (unknown OS)
+>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
+>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
+>>>(0x3c.l+0x36) byte 3 for MS-DOS
+>>>(0x3c.l+0x36) byte >3 (unknown OS)
+>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
+>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
+>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
+>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
+>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
+
+>>(0x3c.l) string LX\0\0 \b, LX
+>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
+>>>(0x3c.l+0x0a) leshort 1 for OS/2
+>>>(0x3c.l+0x0a) leshort 2 for MS Windows
+>>>(0x3c.l+0x0a) leshort 3 for DOS
+>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
+>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
+>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
+>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
+>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
+>>>(0x3c.l+0x08) leshort 1 i80286
+>>>(0x3c.l+0x08) leshort 2 i80386
+>>>(0x3c.l+0x08) leshort 3 i80486
+>>>(8.s*16) string emx \b, emx
+>>>>&1 string x %s
+>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
+
+# MS Windows system file, supposedly a collection of LE executables
+>>(0x3c.l) string W3 \b, W3 for MS Windows
+
+>>(0x3c.l) string LE\0\0 \b, LE executable
+>>>(0x3c.l+0x0a) leshort 1
+# some DOS extenders use LE files with OS/2 header
+>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
+>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
+>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
+>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
+>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
+>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
+>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
+# this is a wild guess; hopefully it is a specific signature
+>>>>&0x24 lelong <0x50
+>>>>>(&0x4c.l) string \xfc\xb8WATCOM
+>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
+# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
+#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
+# fails with DOS-Extenders.
+>>>(0x3c.l+0x0a) leshort 2 for MS Windows
+>>>(0x3c.l+0x0a) leshort 3 for DOS
+>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
+>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
+>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
+
+# looks like ASCII, probably some embedded copyright message.
+# and definitely not NE/LE/LX/PE
+>>0x3c lelong >0x20000000
+>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
+# header data too small for extended executable
+>2 long !0
+>>0x18 leshort <0x40
+>>>(4.s*512) leshort !0x014c
+
+>>>>&(2.s-514) string !LE
+>>>>>&-2 string !BW \b, MZ for MS-DOS
+>>>>&(2.s-514) string LE \b, LE
+>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
+# educated guess since indirection is still not capable enough for complex offset
+# calculations (next embedded executable would be at &(&2*512+&0-2)
+# I suspect there are only LE executables in these multi-exe files
+>>>>&(2.s-514) string BW
+>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
+>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
+
+# This sequence skips to the first COFF segment, usually .text
+>(4.s*512) leshort 0x014c \b, COFF
+>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
+>>(8.s*16) string emx
+>>>&1 string x for DOS, Win or OS/2, emx %s
+>>&(&0x42.l-3) byte x
+>>>&0x26 string UPX \b, UPX compressed
+# and yet another guess: small .text, and after large .data is unusal, could be 32lite
+>>&0x2c search/0xa0 .text
+>>>&0x0b lelong <0x2000
+>>>>&0 lelong >0x6000 \b, 32lite compressed
+
+>(8.s*16) string $WdX \b, WDos/X DOS extender
+
+# .EXE formats (Greg Roelofs, newt at uchicago.edu)
+#
+>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
+>0xe7 string LH/2\ Self-Extract \b, %s
+>0x1c string diet \b, diet compressed
+>0x1c string LZ09 \b, LZEXE v0.90 compressed
+>0x1c string LZ91 \b, LZEXE v0.91 compressed
+>0x1c string tz \b, TinyProg compressed
+>0x1e string PKLITE \b, %s compressed
+>0x64 string W\ Collis\0\0 \b, Compack compressed
+>0x24 string LHa's\ SFX \b, LHa self-extracting archive
+!:mime application/x-lha
+>0x24 string LHA's\ SFX \b, LHa self-extracting archive
+!:mime application/x-lha
+>0x24 string \ $ARX \b, ARX self-extracting archive
+>0x24 string \ $LHarc \b, LHarc self-extracting archive
+>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
+>1638 string -lh5- \b, LHa self-extracting archive v2.13S
+>0x17888 string Rar! \b, RAR self-extracting archive
+>0x40 string aPKG \b, aPackage self-extracting archive
+
+>32 string AIN
+>>35 string 2 \b, AIN 2.x compressed
+>>35 string <2 \b, AIN 1.x compressed
+>>35 string >2 \b, AIN 1.x compressed
+>28 string UC2X \b, UCEXE compressed
+>28 string WWP\ \b, WWPACK compressed
+
+# skip to the end of the exe
+>(4.s*512) long x
+>>&(2.s-517) byte x
+>>>&0 string PK\3\4 \b, ZIP self-extracting archive
+>>>&0 string Rar! \b, RAR self-extracting archive
+>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
+>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
+>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
+>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
+>>>&7 search/400 **ACE** \b, ACE self-extracting archive
+>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
+
+>0x1c string RJSX \b, ARJ self-extracting archive
+# winarj stores a message in the stub instead of the sig in the MZ header
+>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
+
+# a few unknown ZIP sfxes, no idea if they are needed or if they are
+# already captured by the generic patterns above
+>122 string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
+>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
+# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
+#
+
+# TELVOX Teleinformatica CODEC self-extractor for OS/2:
+>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
+>>49824 leshort =1 \b, 1 file
+>>49824 leshort >1 \b, %u files
+
+# .COM formats (Daniel Quinlan, quinlan at yggdrasil.com)
+# Uncommenting only the first two lines will cover about 2/3 of COM files,
+# but it isn't feasible to match all COM files since there must be at least
+# two dozen different one-byte "magics".
+# test too generic ?
+#0 byte 0xe9 DOS executable (COM)
+#>0x1FE leshort 0xAA55 \b, boot code
+#>6 string SFX\ of\ LHarc (%s)
+#0 belong 0xffffffff DOS executable (device driver)
+#CMD640X2.SYS
+#>10 string >\x23
+#>>10 string !\x2e
+#>>>17 string <\x5B
+#>>>>10 string CMD640X2.SYS \b, name: %.8s
+#UDMA.SYS
+#>10 string <\x41
+#>>12 string >\x40
+#>>>10 string !$
+#>>>>12 string UDMA.SYS \b, name: %.8s
+#CMD640X2.SYS
+#>10 string <\x41
+#>>12 string >\x40
+#>>>10 string !$
+#>>>>12 string CMD640X2.SYS \b, name: %.8s
+#KEYB.SYS
+#>10 string <\x41
+#>>12 string >\x40
+#>>>10 string !$
+#>>>>12 string KEYB.SYS \b, name: %.8s
+#BTCDROM.SYS
+#>22 string >\x40
+#>>22 string <\x5B
+#>>>23 string <\x5B
+#>>>>22 string BTCDROM.SYS \b, name: %.8s
+#ASPICD.SYS
+#>22 string >\x40
+#>>22 string <\x5B
+#>>>23 string <\x5B
+#>>>>22 string ASPICD.SYS \b, name: %.8s
+#ATAPICD.SYS
+#>76 string \0
+#>>77 string >\x40
+#>>>77 string <\x5B
+#>>>>77 string ATAPICD.SYS \b, name: %.8s
+# test too generic ?
+#0 byte 0x8c DOS executable (COM)
+# updated by Joerg Jenderek at Oct 2008
+#0 ulelong 0xffff10eb DR-DOS executable (COM)
+# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
+#0 ubeshort&0xeb8d >0xeb00
+# DR-DOS STACKER.COM SCREATE.SYS missed
+#>0 byte 0xeb DOS executable (COM)
+#>>0x1FE leshort 0xAA55 \b, boot code
+#>>85 string UPX \b, UPX compressed
+#>>4 string \ $ARX \b, ARX self-extracting archive
+#>>4 string \ $LHarc \b, LHarc self-extracting archive
+#>>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
+# updated by Joerg Jenderek at Oct 2008
+#0 byte 0xb8 COM executable
+#0 uleshort&0x80ff 0x00b8
+# modified by Joerg Jenderek
+#>1 lelong !0x21cd4cff COM executable for DOS
+# http://syslinux.zytor.com/comboot.php
+# (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
+# start with assembler instructions mov eax,21cd4cffh
+0 uleshort&0xc0ff 0xc0b8
+>1 lelong 0x21cd4cff COM executable (32-bit COMBOOT)
+0 string \x81\xfc
+>4 string \x77\x02\xcd\x20\xb9
+>>36 string UPX! FREE-DOS executable (COM), UPX compressed
+252 string Must\ have\ DOS\ version DR-DOS executable (COM)
+# added by Joerg Jenderek at Oct 2008
+# GRR search is not working
+#34 search/2 UPX! FREE-DOS executable (COM), UPX compressed
+34 string UPX! FREE-DOS executable (COM), UPX compressed
+35 string UPX! FREE-DOS executable (COM), UPX compressed
+# GRR search is not working
+#2 search/28 \xcd\x21 COM executable for MS-DOS
+#WHICHFAT.cOM
+#2 string \xcd\x21 COM executable for DOS
+#DELTREE.cOM DELTREE2.cOM
+#4 string \xcd\x21 COM executable for DOS
+#IFMEMDSK.cOM ASSIGN.cOM COMP.cOM
+#5 string \xcd\x21 COM executable for DOS
+#DELTMP.COm HASFAT32.cOM
+#7 string \xcd\x21
+#>0 byte !0xb8 COM executable for DOS
+#COMP.cOM MORE.COm
+#10 string \xcd\x21
+#>5 string !\xcd\x21 COM executable for DOS
+#comecho.com
+#13 string \xcd\x21 COM executable for DOS
+#HELP.COm EDIT.coM
+#18 string \xcd\x21 COM executable for MS-DOS
+#NWRPLTRM.COm
+#23 string \xcd\x21 COM executable for MS-DOS
+#LOADFIX.cOm LOADFIX.cOm
+#30 string \xcd\x21 COM executable for MS-DOS
+#syslinux.com 3.11
+#70 string \xcd\x21 COM executable for DOS
+# many compressed/converted COMs start with a copy loop instead of a jump
+0x6 search/0xa \xfc\x57\xf3\xa5\xc3 COM executable for MS-DOS
+0x6 search/0xa \xfc\x57\xf3\xa4\xc3 COM executable for DOS
+>0x18 search/0x10 \x50\xa4\xff\xd5\x73 \b, aPack compressed
+0x3c string W\ Collis\0\0 COM executable for MS-DOS, Compack compressed
+# FIXME: missing diet .com compression
+
+# miscellaneous formats
+0 string LZ MS-DOS executable (built-in)
+#0 byte 0xf0 MS-DOS program library data
+#
+
+# AAF files:
+# <stuartc at rd.bbc.co.uk> Stuart Cunningham
+0 string \320\317\021\340\241\261\032\341AAFB\015\000OM\006\016\053\064\001\001\001\377 AAF legacy file using MS Structured Storage
+>30 byte 9 (512B sectors)
+>30 byte 12 (4kB sectors)
+0 string \320\317\021\340\241\261\032\341\001\002\001\015\000\002\000\000\006\016\053\064\003\002\001\001 AAF file using MS Structured Storage
+>30 byte 9 (512B sectors)
+>30 byte 12 (4kB sectors)
+
+# Popular applications
+2080 string Microsoft\ Word\ 6.0\ Document %s
+!:mime application/msword
+2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
+!:mime application/msword
+# Pawel Wiecek <coven at i17linuxb.ists.pwr.wroc.pl> (for polish Word)
+2112 string MSWordDoc Microsoft Word document data
+!:mime application/msword
+#
+0 belong 0x31be0000 Microsoft Word Document
+!:mime application/msword
+#
+0 string PO^Q` Microsoft Word 6.0 Document
+!:mime application/msword
+#
+0 string \376\067\0\043 Microsoft Office Document
+!:mime application/msword
+0 string \333\245-\0\0\0 Microsoft Office Document
+!:mime application/msword
+512 string \354\245\301 Microsoft Word Document
+!:mime application/msword
+#
+2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
+!:mime application/vnd.ms-excel
+
+2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
+!:mime application/vnd.ms-excel
+#
+# Pawel Wiecek <coven at i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
+2114 string Biff5 Microsoft Excel 5.0 Worksheet
+!:mime application/vnd.ms-excel
+# Italian MS-Excel
+2121 string Biff5 Microsoft Excel 5.0 Worksheet
+!:mime application/vnd.ms-excel
+0 string \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
+!:mime application/vnd.ms-excel
+#
+0 belong 0x00001a00 Lotus 1-2-3
+!:mime application/x-123
+>4 belong 0x00100400 wk3 document data
+>4 belong 0x02100400 wk4 document data
+>4 belong 0x07800100 fm3 or fmb document data
+>4 belong 0x07800000 fm3 or fmb document data
+#
+0 belong 0x00000200 Lotus 1-2-3
+!:mime application/x-123
+>4 belong 0x06040600 wk1 document data
+>4 belong 0x06800200 fmt document data
+0 string WordPro\0 Lotus WordPro
+!:mime application/vnd.lotus-wordpro
+0 string WordPro\r\373 Lotus WordPro
+!:mime application/vnd.lotus-wordpro
+
+
+# Summary: Script used by InstallScield to uninstall applications
+# Extension: .isu
+# Submitted by: unknown
+# Modified by (1): Abel Cheung <abelcheung at gmail.com> (replace useless entry)
+0 string \x71\xa8\x00\x00\x01\x02
+>12 string Stirling\ Technologies, InstallShield Uninstall Script
+
+# Winamp .avs
+#0 string Nullsoft\ AVS\ Preset\ \060\056\061\032 A plug in for Winamp ms-windows Freeware media player
+0 string Nullsoft\ AVS\ Preset\ Winamp plug in
+
+# Windows Metafont .WMF
+0 string \327\315\306\232 ms-windows metafont .wmf
+0 string \002\000\011\000 ms-windows metafont .wmf
+0 string \001\000\011\000 ms-windows metafont .wmf
+
+#tz3 files whatever that is (MS Works files)
+0 string \003\001\001\004\070\001\000\000 tz3 ms-works file
+0 string \003\002\001\004\070\001\000\000 tz3 ms-works file
+0 string \003\003\001\004\070\001\000\000 tz3 ms-works file
+
+# PGP sig files .sig
+#0 string \211\000\077\003\005\000\063\237\127 065 to \027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\065\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\066\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\067\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\070\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\071\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig
+
+# windows zips files .dmf
+0 string MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file
+
+
+#ico files
+0 string \102\101\050\000\000\000\056\000\000\000\000\000\000\000 Icon for MS Windows
+
+# Windows icons (Ian Springer <ips at fpk.hp.com>)
+0 string \000\000\001\000 MS Windows icon resource
+!:mime image/x-ico
+>4 byte 1 - 1 icon
+>4 byte >1 - %d icons
+>>6 byte >0 \b, %dx
+>>>7 byte >0 \b%d
+>>8 byte 0 \b, 256-colors
+>>8 byte >0 \b, %d-colors
+
+
+# .chr files
+0 string PK\010\010BGI Borland font
+>4 string >\0 %s
+# then there is a copyright notice
+
+
+# .bgi files
+0 string pk\010\010BGI Borland device
+>4 string >\0 %s
+# then there is a copyright notice
+
+
+# Windows Recycle Bin record file (named INFO2)
+# By Abel Cheung (abelcheung AT gmail dot com)
+# Version 4 always has 280 bytes (0x118) per record, version 5 has 800 bytes
+# Since Vista uses another structure, INFO2 structure probably won't change
+# anymore. Detailed analysis in:
+# http://www.cybersecurityinstitute.biz/downloads/INFO2.pdf
+0 lelong 0x00000004
+>12 lelong 0x00000118 Windows Recycle Bin INFO2 file (Win98 or below)
+
+0 lelong 0x00000005
+>12 lelong 0x00000320 Windows Recycle Bin INFO2 file (Win2k - WinXP)
+
+
+##### put in Either Magic/font or Magic/news
+# Acroread or something files wrongly identified as G3 .pfm
+# these have the form \000 \001 any? \002 \000 \000
+# or \000 \001 any? \022 \000 \000
+#0 string \000\001 pfm?
+#>3 string \022\000\000Copyright\ yes
+#>3 string \002\000\000Copyright\ yes
+#>3 string >\0 oops, not a font file. Cancel that.
+#it clashes with ttf files so put it lower down.
+
+# From Doug Lee via a FreeBSD pr
+9 string GERBILDOC First Choice document
+9 string GERBILDB First Choice database
+9 string GERBILCLIP First Choice database
+0 string GERBIL First Choice device file
+9 string RABBITGRAPH RabbitGraph file
+0 string DCU1 Borland Delphi .DCU file
+0 string =!<spell> MKS Spell hash list (old format)
+0 string =!<spell2> MKS Spell hash list
+# Too simple - MPi
+#0 string AH Halo(TM) bitmapped font file
+0 lelong 0x08086b70 TurboC BGI file
+0 lelong 0x08084b50 TurboC Font file
+
+# WARNING: below line conflicts with Infocom game data Z-machine 3
+0 byte 0x03 DBase 3 data file
+>0x04 lelong 0 (no records)
+>0x04 lelong >0 (%ld records)
+0 byte 0x83 DBase 3 data file with memo(s)
+>0x04 lelong 0 (no records)
+>0x04 lelong >0 (%ld records)
+0 leshort 0x0006 DBase 3 index file
+0 string PMCC Windows 3.x .GRP file
+1 string RDC-meg MegaDots
+>8 byte >0x2F version %c
+>9 byte >0x2F \b.%c file
+0 lelong 0x4C
+>4 lelong 0x00021401 Windows shortcut file
+
+# DOS EPS Binary File Header
+# From: Ed Sznyter <ews at Black.Market.NET>
+0 belong 0xC5D0D3C6 DOS EPS Binary File
+>4 long >0 Postscript starts at byte %d
+>>8 long >0 length %d
+>>>12 long >0 Metafile starts at byte %d
+>>>>16 long >0 length %d
+>>>20 long >0 TIFF starts at byte %d
+>>>>24 long >0 length %d
+
+# TNEF magic From "Joomy" <joomy at se-ed.net>
+# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
+0 leshort 0x223e9f78 TNEF
+!:mime application/vnd.ms-tnef
+
+# HtmlHelp files (.chm)
+0 string ITSF\003\000\000\000\x60\000\000\000\001\000\000\000 MS Windows HtmlHelp Data
+
+# GFA-BASIC (Wolfram Kleff)
+2 string GFA-BASIC3 GFA-BASIC 3 data
+
+#------------------------------------------------------------------------------
+# From Stuart Caie <kyzer at 4u.net> (developer of cabextract)
+# Microsoft Cabinet files
+0 string MSCF\0\0\0\0 Microsoft Cabinet archive data
+!:mime application/vnd.ms-cab-compressed
+>8 lelong x \b, %u bytes
+>28 leshort 1 \b, 1 file
+>28 leshort >1 \b, %u files
+
+# InstallShield Cabinet files
+0 string ISc( InstallShield Cabinet archive data
+>5 byte&0xf0 =0x60 version 6,
+>5 byte&0xf0 !0x60 version 4/5,
+>(12.l+40) lelong x %u files
+
+# Windows CE package files
+0 string MSCE\0\0\0\0 Microsoft WinCE install header
+>20 lelong 0 \b, architecture-independent
+>20 lelong 103 \b, Hitachi SH3
+>20 lelong 104 \b, Hitachi SH4
+>20 lelong 0xA11 \b, StrongARM
+>20 lelong 4000 \b, MIPS R4000
+>20 lelong 10003 \b, Hitachi SH3
+>20 lelong 10004 \b, Hitachi SH3E
+>20 lelong 10005 \b, Hitachi SH4
+>20 lelong 70001 \b, ARM 7TDMI
+>52 leshort 1 \b, 1 file
+>52 leshort >1 \b, %u files
+>56 leshort 1 \b, 1 registry entry
+>56 leshort >1 \b, %u registry entries
+
+
+# Windows Enhanced Metafile (EMF)
+# See msdn.microsoft.com/archive/en-us/dnargdi/html/msdn_enhmeta.asp
+# for further information.
+0 ulelong 1
+>40 string \ EMF Windows Enhanced Metafile (EMF) image data
+>>44 ulelong x version 0x%x
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+0 string COWD VMWare3
+>4 byte 3 disk image
+>>32 lelong x (%d/
+>>36 lelong x \b%d/
+>>40 lelong x \b%d)
+>4 byte 2 undoable disk image
+>>32 string >\0 (%s)
+
+0 string VMDK VMware4 disk image
+0 string KDMV VMware4 disk image
+
+#--------------------------------------------------------------------
+# Qemu Emulator Images
+# Lines written by Friedrich Schwittay (f.schwittay at yousable.de)
+# Made by reading sources and doing trial and error on existing
+# qcow files
+0 string QFI Qemu Image, Format: Qcow
+
+# Uncomment the following line to display Magic (only used for debugging
+# this magic number)
+#>0 string x , Magic: %s
+
+# There are currently 2 Versions: "1" and "2"
+# I do not use Version 2 and therefor branch here
+# but can assure: it works (tested on both versions)
+# Also my Qemu 0.9.0 which uses this Version 2 refuses
+# to start in its bios
+>0x04 belong 2 , Version: 2
+>0x04 belong 1 , Version: 1
+
+# Using the existence of the Backing File Offset to Branch or not
+# to read Backing File Information
+>>0xc belong >0 , Backing File( Offset: %lu
+>>>(0xc.L) string >\0 , Path: %s
+
+# Didn't get the trick here how qemu stores the "Size" at this Position
+# There is actually something stored but nothing makes sense
+# The header in the sources talks about it
+#>>>16 lelong x , Size: %lu
+
+# Modification time of the Backing File
+# Really useful if you want to know if your backing
+# file is still usable together with this image
+>>>20 bedate x , Mtime: %s )
+
+# Don't know how to calculate in Magicfiles
+# Also: this Information is not reliably
+# stored in image-files
+>>24 lelong x , Disk Size could be: %d * 256 bytes
+
+>4 belong 3 (v3)
+# Using the existence of the Backing File Offset to determine whether
+# to read Backing File Information
+>>8 bequad >0 \b, has backing file
+# Note that this isn't a null-terminated string; the length is actually
+# (16.L). Assuming a null-terminated string happens to work usually, but it
+# may spew junk until it reaches a \0 in some cases. Also, since there's no
+# .Q modifier, we just use the bottom four bytes as an offset. Note that if
+# the file is over 4G, and the backing file path is stored after the first 4G,
+# the wrong filename will be printed. (This should be (8.Q), when that syntax
+# is introduced.)
+>>>(12.L) string >\0 (path %s)
+>>24 bequad x \b, %lld bytes
+>>32 belong 1 \b, AES-encrypted
+
+0 string QEVM QEMU's suspend to disk image
+
+# QEMU QED Image
+# http://wiki.qemu.org/Features/QED/Specification
+0 string/b QED\0 QEMU QED Image
+
+# VDI Image
+64 string/b \x7f\x10\xda\xbe VDI Image
+>68 string/b \x01\x00\x01\x00 version 1.1
+>0 string >\0 (%s)
+>368 lequad x \b, %lld bytes
+
+0 string Bochs\ Virtual\ HD\ Image Bochs disk image,
+>32 string x type %s,
+>48 string x subtype %s
+
+0 lelong 0x02468ace Bochs Sparse disk image
+
+# from http://filext.com by Derek M Jones <derek at knosof.co.uk>
+# False positive with PPT (also currently this string is too long)
+#0 string \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
+0 string \320\317\021\340\241\261\032\341 Microsoft Office Document
+#>48 byte 0x1B Excel Document
+#!:mime application/vnd.ms-excel
+>546 string bjbj Microsoft Word Document
+!:mime application/msword
+>546 string jbjb Microsoft Word Document
+!:mime application/msword
+
+0 string \224\246\056 Microsoft Word Document
+!:mime application/msword
+
+512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
+!:mime application/msword
+
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+# Magic type for Dell's BIOS .hdr files
+# Dell's .hdr
+0 string $RBU
+>23 string Dell %s system BIOS
+>5 byte 2
+>>48 byte x version %d.
+>>49 byte x \b%d.
+>>50 byte x \b%d
+>5 byte <2
+>>48 string x version %.3s
+
+# Type: Microsoft DirectDraw Surface
+# URL: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/DDSFileReference/ddsfileformat.asp
+# From: Morten Hustveit <morten at debian.org>
+0 string DDS\040\174\000\000\000 Microsoft DirectDraw Surface (DDS),
+>16 lelong >0 %hd x
+>12 lelong >0 %hd,
+>84 string x %.4s
+
+# Type: Microsoft Document Imaging Format (.mdi)
+# URL: http://en.wikipedia.org/wiki/Microsoft_Document_Imaging_Format
+# From: Daniele Sempione <scrows at oziosi.org>
+0 short 0x5045 Microsoft Document Imaging Format
+
+# MS eBook format (.lit)
+0 string ITOLITLS Microsoft Reader eBook Data
+>8 lelong x \b, version %u
+!:mime application/x-ms-reader
+
+#------------------------------------------------------------------------------
+# $File: msdos,v 1.65 2009/09/19 16:28:11 christos Exp $
+# msdos: file(1) magic for MS-DOS files
+#
+
+# .BAT files (Daniel Quinlan, quinlan at yggdrasil.com)
+# updated by Joerg Jenderek at Oct 2008
+0 string @
+>1 string/cW \ echo\ off DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW echo\ off DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW rem\ DOS batch file text
+!:mime text/x-msdos-batch
+>1 string/cW set\ DOS batch file text
+!:mime text/x-msdos-batch
+
+
+# OS/2 batch files are REXX. the second regex is a bit generic, oh well
+# the matched commands seem to be common in REXX and uncommon elsewhere
+100 regex/c =^[\ \t]{0,10}call[\ \t]{1,10}rxfunc OS/2 REXX batch file text
+100 regex/c =^[\ \t]{0,10}say\ ['"] OS/2 REXX batch file text
+
+0 leshort 0x14c MS Windows COFF Intel 80386 object file
+#>4 ledate x stamp %s
+0 leshort 0x166 MS Windows COFF MIPS R4000 object file
+#>4 ledate x stamp %s
+0 leshort 0x184 MS Windows COFF Alpha object file
+#>4 ledate x stamp %s
+0 leshort 0x268 MS Windows COFF Motorola 68000 object file
+#>4 ledate x stamp %s
+0 leshort 0x1f0 MS Windows COFF PowerPC object file
+#>4 ledate x stamp %s
+0 leshort 0x290 MS Windows COFF PA-RISC object file
+#>4 ledate x stamp %s
+
+# XXX - according to Microsoft's spec, at an offset of 0x3c in a
+# PE-format executable is the offset in the file of the PE header;
+# unfortunately, that's a little-endian offset, and there's no way
+# to specify an indirect offset with a specified byte order.
+# So, for now, we assume the standard MS-DOS stub, which puts the
+# PE header at 0x80 = 128.
+#
+# Required OS version and subsystem version were 4.0 on some NT 3.51
+# executables built with Visual C++ 4.0, so it's not clear that
+# they're interesting. The user version was 0.0, but there's
+# probably some linker directive to set it. The linker version was
+# 3.0, except for one ".exe" which had it as 4.20 (same damn linker!).
+#
+# many of the compressed formats were extraced from IDARC 1.23 source code
+#
+0 string MZ
+!:mime application/x-dosexec
+>0x18 leshort <0x40 MS-DOS executable
+>0 string MZ\0\0\0\0\0\0\0\0\0\0PE\0\0 \b, PE for MS Windows
+>>&18 leshort&0x2000 >0 (DLL)
+>>&88 leshort 0 (unknown subsystem)
+>>&88 leshort 1 (native)
+>>&88 leshort 2 (GUI)
+>>&88 leshort 3 (console)
+>>&88 leshort 7 (POSIX)
+>>&0 leshort 0x0 unknown processor
+>>&0 leshort 0x14c Intel 80386
+>>&0 leshort 0x166 MIPS R4000
+>>&0 leshort 0x184 Alpha
+>>&0 leshort 0x268 Motorola 68000
+>>&0 leshort 0x1f0 PowerPC
+>>&0 leshort 0x290 PA-RISC
+>>&18 leshort&0x0100 >0 32-bit
+>>&18 leshort&0x1000 >0 system file
+>>&0xf4 search/0x140 \x0\x40\x1\x0
+>>>(&0.l+(4)) string MSCF \b, WinHKI CAB self-extracting archive
+>30 string Copyright\ 1989-1990\ PKWARE\ Inc. Self-extracting PKZIP archive
+!:mime application/zip
+# Is next line correct? One might expect "Corp." not "Copr." If it is right, add a note to that effect.
+>30 string PKLITE\ Copr. Self-extracting PKZIP archive
+!:mime application/zip
+
+>0x18 leshort >0x3f
+>>(0x3c.l) string PE\0\0 PE
+>>>(0x3c.l+25) byte 1 \b32 executable
+>>>(0x3c.l+25) byte 2 \b32+ executable
+# hooray, there's a DOS extender using the PE format, with a valid PE
+# executable inside (which just prints a message and exits if run in win)
+>>>(0x3c.l+92) leshort <10
+>>>>(8.s*16) string 32STUB for MS-DOS, 32rtm DOS extender
+>>>>(8.s*16) string !32STUB for MS Windows
+>>>>>(0x3c.l+22) leshort&0x2000 >0 (DLL)
+>>>>>(0x3c.l+92) leshort 0 (unknown subsystem)
+>>>>>(0x3c.l+92) leshort 1 (native)
+>>>>>(0x3c.l+92) leshort 2 (GUI)
+>>>>>(0x3c.l+92) leshort 3 (console)
+>>>>>(0x3c.l+92) leshort 7 (POSIX)
+>>>(0x3c.l+92) leshort 10 (EFI application)
+>>>(0x3c.l+92) leshort 11 (EFI boot service driver)
+>>>(0x3c.l+92) leshort 12 (EFI runtime driver)
+>>>(0x3c.l+92) leshort 13 (XBOX)
+>>>(0x3c.l+4) leshort 0x0 unknown processor
+>>>(0x3c.l+4) leshort 0x14c Intel 80386
+>>>(0x3c.l+4) leshort 0x166 MIPS R4000
+>>>(0x3c.l+4) leshort 0x184 Alpha
+>>>(0x3c.l+4) leshort 0x268 Motorola 68000
+>>>(0x3c.l+4) leshort 0x1f0 PowerPC
+>>>(0x3c.l+4) leshort 0x290 PA-RISC
+>>>(0x3c.l+4) leshort 0x200 Intel Itanium
+>>>(0x3c.l+22) leshort&0x0100 >0 32-bit
+>>>(0x3c.l+22) leshort&0x1000 >0 system file
+>>>(0x3c.l+232) lelong >0 Mono/.Net assembly
+
+>>>>(0x3c.l+0xf8) string UPX0 \b, UPX compressed
+>>>>(0x3c.l+0xf8) search/0x140 PEC2 \b, PECompact2 compressed
+>>>>(0x3c.l+0xf8) search/0x140 UPX2
+>>>>>(&0x10.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
+>>>>(0x3c.l+0xf8) search/0x140 .idata
+>>>>>(&0xe.l+(-4)) string PK\3\4 \b, ZIP self-extracting archive (Info-Zip)
+>>>>>(&0xe.l+(-4)) string ZZ0 \b, ZZip self-extracting archive
+>>>>>(&0xe.l+(-4)) string ZZ1 \b, ZZip self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .rsrc
+>>>>>(&0x0f.l+(-4)) string a\\\4\5 \b, WinHKI self-extracting archive
+>>>>>(&0x0f.l+(-4)) string Rar! \b, RAR self-extracting archive
+>>>>>(&0x0f.l+(-4)) search/0x3000 MSCF \b, InstallShield self-extracting archive
+>>>>>(&0x0f.l+(-4)) search/32 Nullsoft \b, Nullsoft Installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .data
+>>>>>(&0x0f.l) string WEXTRACT \b, MS CAB-Installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .petite\0 \b, Petite compressed
+>>>>>(0x3c.l+0xf7) byte x
+>>>>>>(&0x104.l+(-4)) string =!sfx! \b, ACE self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .WISE \b, WISE installer self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .dz\0\0\0 \b, Dzip self-extracting archive
+>>>>(0x3c.l+0xf8) search/0x140 .reloc
+>>>>>(&0xe.l+(-4)) search/0x180 PK\3\4 \b, ZIP self-extracting archive (WinZip)
+
+>>>>&(0x3c.l+0xf8) search/0x100 _winzip_ \b, ZIP self-extracting archive (WinZip)
+>>>>&(0x3c.l+0xf8) search/0x100 SharedD \b, Microsoft Installer self-extracting archive
+>>>>0x30 string Inno \b, InnoSetup self-extracting archive
+
+>>(0x3c.l) string !PE\0\0 MS-DOS executable
+
+>>(0x3c.l) string NE \b, NE
+>>>(0x3c.l+0x36) byte 0 (unknown OS)
+>>>(0x3c.l+0x36) byte 1 for OS/2 1.x
+>>>(0x3c.l+0x36) byte 2 for MS Windows 3.x
+>>>(0x3c.l+0x36) byte 3 for MS-DOS
+>>>(0x3c.l+0x36) byte >3 (unknown OS)
+>>>(0x3c.l+0x36) byte 0x81 for MS-DOS, Phar Lap DOS extender
+>>>(0x3c.l+0x0c) leshort&0x8003 0x8002 (DLL)
+>>>(0x3c.l+0x0c) leshort&0x8003 0x8001 (driver)
+>>>&(&0x24.s-1) string ARJSFX \b, ARJ self-extracting archive
+>>>(0x3c.l+0x70) search/0x80 WinZip(R)\ Self-Extractor \b, ZIP self-extracting archive (WinZip)
+
+>>(0x3c.l) string LX\0\0 \b, LX
+>>>(0x3c.l+0x0a) leshort <1 (unknown OS)
+>>>(0x3c.l+0x0a) leshort 1 for OS/2
+>>>(0x3c.l+0x0a) leshort 2 for MS Windows
+>>>(0x3c.l+0x0a) leshort 3 for DOS
+>>>(0x3c.l+0x0a) leshort >3 (unknown OS)
+>>>(0x3c.l+0x10) lelong&0x28000 =0x8000 (DLL)
+>>>(0x3c.l+0x10) lelong&0x20000 >0 (device driver)
+>>>(0x3c.l+0x10) lelong&0x300 0x300 (GUI)
+>>>(0x3c.l+0x10) lelong&0x28300 <0x300 (console)
+>>>(0x3c.l+0x08) leshort 1 i80286
+>>>(0x3c.l+0x08) leshort 2 i80386
+>>>(0x3c.l+0x08) leshort 3 i80486
+>>>(8.s*16) string emx \b, emx
+>>>>&1 string x %s
+>>>&(&0x54.l-3) string arjsfx \b, ARJ self-extracting archive
+
+# MS Windows system file, supposedly a collection of LE executables
+>>(0x3c.l) string W3 \b, W3 for MS Windows
+
+>>(0x3c.l) string LE\0\0 \b, LE executable
+>>>(0x3c.l+0x0a) leshort 1
+# some DOS extenders use LE files with OS/2 header
+>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
+>>>>0x240 search/0x200 WATCOM\ C/C++ for MS-DOS, DOS4GW DOS extender
+>>>>0x440 search/0x100 CauseWay\ DOS\ Extender for MS-DOS, CauseWay DOS extender
+>>>>0x40 search/0x40 PMODE/W for MS-DOS, PMODE/W DOS extender
+>>>>0x40 search/0x40 STUB/32A for MS-DOS, DOS/32A DOS extender (stub)
+>>>>0x40 search/0x80 STUB/32C for MS-DOS, DOS/32A DOS extender (configurable stub)
+>>>>0x40 search/0x80 DOS/32A for MS-DOS, DOS/32A DOS extender (embedded)
+# this is a wild guess; hopefully it is a specific signature
+>>>>&0x24 lelong <0x50
+>>>>>(&0x4c.l) string \xfc\xb8WATCOM
+>>>>>>&0 search/8 3\xdbf\xb9 \b, 32Lite compressed
+# another wild guess: if real OS/2 LE executables exist, they probably have higher start EIP
+#>>>>(0x3c.l+0x1c) lelong >0x10000 for OS/2
+# fails with DOS-Extenders.
+>>>(0x3c.l+0x0a) leshort 2 for MS Windows
+>>>(0x3c.l+0x0a) leshort 3 for DOS
+>>>(0x3c.l+0x0a) leshort 4 for MS Windows (VxD)
+>>>(&0x7c.l+0x26) string UPX \b, UPX compressed
+>>>&(&0x54.l-3) string UNACE \b, ACE self-extracting archive
+
+# looks like ASCII, probably some embedded copyright message.
+# and definitely not NE/LE/LX/PE
+>>0x3c lelong >0x20000000
+>>>(4.s*512) leshort !0x014c \b, MZ for MS-DOS
+# header data too small for extended executable
+>2 long !0
+>>0x18 leshort <0x40
+>>>(4.s*512) leshort !0x014c
+
+>>>>&(2.s-514) string !LE
+>>>>>&-2 string !BW \b, MZ for MS-DOS
+>>>>&(2.s-514) string LE \b, LE
+>>>>>0x240 search/0x100 DOS/4G for MS-DOS, DOS4GW DOS extender
+# educated guess since indirection is still not capable enough for complex offset
+# calculations (next embedded executable would be at &(&2*512+&0-2)
+# I suspect there are only LE executables in these multi-exe files
+>>>>&(2.s-514) string BW
+>>>>>0x240 search/0x100 DOS/4G ,\b LE for MS-DOS, DOS4GW DOS extender (embedded)
+>>>>>0x240 search/0x100 !DOS/4G ,\b BW collection for MS-DOS
+
+# This sequence skips to the first COFF segment, usually .text
+>(4.s*512) leshort 0x014c \b, COFF
+>>(8.s*16) string go32stub for MS-DOS, DJGPP go32 DOS extender
+>>(8.s*16) string emx
+>>>&1 string x for DOS, Win or OS/2, emx %s
+>>&(&0x42.l-3) byte x
+>>>&0x26 string UPX \b, UPX compressed
+# and yet another guess: small .text, and after large .data is unusal, could be 32lite
+>>&0x2c search/0xa0 .text
+>>>&0x0b lelong <0x2000
+>>>>&0 lelong >0x6000 \b, 32lite compressed
+
+>(8.s*16) string $WdX \b, WDos/X DOS extender
+
+# .EXE formats (Greg Roelofs, newt at uchicago.edu)
+#
+>0x35 string \x8e\xc0\xb9\x08\x00\xf3\xa5\x4a\x75\xeb\x8e\xc3\x8e\xd8\x33\xff\xbe\x30\x00\x05 \b, aPack compressed
+>0xe7 string LH/2\ Self-Extract \b, %s
+>0x1c string diet \b, diet compressed
+>0x1c string LZ09 \b, LZEXE v0.90 compressed
+>0x1c string LZ91 \b, LZEXE v0.91 compressed
+>0x1c string tz \b, TinyProg compressed
+>0x1e string PKLITE \b, %s compressed
+>0x64 string W\ Collis\0\0 \b, Compack compressed
+>0x24 string LHa's\ SFX \b, LHa self-extracting archive
+!:mime application/x-lha
+>0x24 string LHA's\ SFX \b, LHa self-extracting archive
+!:mime application/x-lha
+>0x24 string \ $ARX \b, ARX self-extracting archive
+>0x24 string \ $LHarc \b, LHarc self-extracting archive
+>0x20 string SFX\ by\ LARC \b, LARC self-extracting archive
+>1638 string -lh5- \b, LHa self-extracting archive v2.13S
+>0x17888 string Rar! \b, RAR self-extracting archive
+>0x40 string aPKG \b, aPackage self-extracting archive
+
+>32 string AIN
+>>35 string 2 \b, AIN 2.x compressed
+>>35 string <2 \b, AIN 1.x compressed
+>>35 string >2 \b, AIN 1.x compressed
+>28 string UC2X \b, UCEXE compressed
+>28 string WWP\ \b, WWPACK compressed
+
+# skip to the end of the exe
+>(4.s*512) long x
+>>&(2.s-517) byte x
+>>>&0 string PK\3\4 \b, ZIP self-extracting archive
+>>>&0 string Rar! \b, RAR self-extracting archive
+>>>&0 string =!\x11 \b, AIN 2.x self-extracting archive
+>>>&0 string =!\x12 \b, AIN 2.x self-extracting archive
+>>>&0 string =!\x17 \b, AIN 1.x self-extracting archive
+>>>&0 string =!\x18 \b, AIN 1.x self-extracting archive
+>>>&7 search/400 **ACE** \b, ACE self-extracting archive
+>>>&0 search/0x480 UC2SFX\ Header \b, UC2 self-extracting archive
+
+>0x1c string RJSX \b, ARJ self-extracting archive
+# winarj stores a message in the stub instead of the sig in the MZ header
+>0x20 search/0xe0 aRJsfX \b, ARJ self-extracting archive
+
+# a few unknown ZIP sfxes, no idea if they are needed or if they are
+# already captured by the generic patterns above
+>122 string Windows\ self-extracting\ ZIP \b, ZIP self-extracting archive
+>(8.s*16) search/0x20 PKSFX \b, ZIP self-extracting archive (PKZIP)
+# TODO: how to add this? >FileSize-34 string Windows\ Self-Installing\ Executable \b, ZIP self-extracting archive
+#
+
+# TELVOX Teleinformatica CODEC self-extractor for OS/2:
+>49801 string \x79\xff\x80\xff\x76\xff \b, CODEC archive v3.21
+>>49824 leshort =1 \b, 1 file
+>>49824 leshort >1 \b, %u files
+
+# .COM formats (Daniel Quinlan, quinlan at yggdrasil.com)
+# Uncommenting only the first two lines will cover about 2/3 of COM files,
+# but it isn't feasible to match all COM files since there must be at least
+# two dozen different one-byte "magics".
+# test too generic ?
+0 byte 0xe9 DOS executable (COM)
+>0x1FE leshort 0xAA55 \b, boot code
+>6 string SFX\ of\ LHarc (%s)
+0 belong 0xffffffff DOS executable (device driver)
+#CMD640X2.SYS
+>10 string >\x23
+>>10 string !\x2e
+>>>17 string <\x5B
+>>>>10 string x \b, name: %.8s
+#UDMA.SYS KEYB.SYS CMD640X2.SYS
+>10 string <\x41
+>>12 string >\x40
+>>>10 string !$
+>>>>12 string x \b, name: %.8s
+#BTCDROM.SYS ASPICD.SYS
+>22 string >\x40
+>>22 string <\x5B
+>>>23 string <\x5B
+>>>>22 string x \b, name: %.8s
+#ATAPICD.SYS
+>76 string \0
+>>77 string >\x40
+>>>77 string <\x5B
+>>>>77 string x \b, name: %.8s
+# test too generic ?
+0 byte 0x8c DOS executable (COM)
+# updated by Joerg Jenderek at Oct 2008
+0 ulelong 0xffff10eb DR-DOS executable (COM)
+# byte 0xeb conflicts with "sequent" magic leshort 0xn2eb
+0 ubeshort&0xeb8d >0xeb00
+# DR-DOS STACKER.COM SCREATE.SYS missed
+>0 byte 0xeb DOS executable (COM)
+>>0x1FE leshort 0xAA55 \b, boot code
+>>85 string UPX \b, UPX compressed
+>>4 string \ $ARX \b, ARX self-extracting archive
+>>4 string \ $LHarc \b, LHarc self-extracting archive
+>>0x20e string SFX\ by\ LARC \b, LARC self-extracting archive
+# updated by Joerg Jenderek at Oct 2008
+#0 byte 0xb8 COM executable
+0 uleshort&0x80ff 0x00b8
+# modified by Joerg Jenderek
+>1 lelong !0x21cd4cff COM executable for DOS
+# http://syslinux.zytor.com/comboot.php
+# (32-bit COMBOOT) programs *.C32 contain 32-bit code and run in flat-memory 32-bit protected mode
+# start with assembler instructions mov eax,21cd4cffh
+0 uleshort&0xc0ff 0xc0b8
+>1 lelong 0x21cd4cff COM executable (32-bit COMBOOT)
+0 string \x81\xfc
+>4 string \x77\x02\xcd\x20\xb9
+>>36 string UPX! FREE-DOS executable (COM), UPX compressed
+252 string Must\ have\ DOS\ version DR-DOS executable (COM)
+# added by Joerg Jenderek at Oct 2008
+# GRR search is not working
+#34 search/2 UPX! FREE-DOS executable (COM), UPX compressed
+34 string UPX! FREE-DOS executable (COM), UPX compressed
+35 string UPX! FREE-DOS executable (COM), UPX compressed
+# GRR search is not working
+#2 search/28 \xcd\x21 COM executable for MS-DOS
+#WHICHFAT.cOM
+2 string \xcd\x21 COM executable for DOS
+#DELTREE.cOM DELTREE2.cOM
+4 string \xcd\x21 COM executable for DOS
+#IFMEMDSK.cOM ASSIGN.cOM COMP.cOM
+5 string \xcd\x21 COM executable for DOS
+#DELTMP.COm HASFAT32.cOM
+7 string \xcd\x21
+>0 byte !0xb8 COM executable for DOS
+#COMP.cOM MORE.COm
+10 string \xcd\x21
+>5 string !\xcd\x21 COM executable for DOS
+#comecho.com
+13 string \xcd\x21 COM executable for DOS
+#HELP.COm EDIT.coM
+18 string \xcd\x21 COM executable for MS-DOS
+#NWRPLTRM.COm
+23 string \xcd\x21 COM executable for MS-DOS
+#LOADFIX.cOm LOADFIX.cOm
+30 string \xcd\x21 COM executable for MS-DOS
+#syslinux.com 3.11
+70 string \xcd\x21 COM executable for DOS
+# many compressed/converted COMs start with a copy loop instead of a jump
+0x6 search/0xa \xfc\x57\xf3\xa5\xc3 COM executable for MS-DOS
+0x6 search/0xa \xfc\x57\xf3\xa4\xc3 COM executable for DOS
+>0x18 search/0x10 \x50\xa4\xff\xd5\x73 \b, aPack compressed
+0x3c string W\ Collis\0\0 COM executable for MS-DOS, Compack compressed
+# FIXME: missing diet .com compression
+
+# miscellaneous formats
+0 string LZ MS-DOS executable (built-in)
+#0 byte 0xf0 MS-DOS program library data
+#
+
+# AAF files:
+# <stuartc at rd.bbc.co.uk> Stuart Cunningham
+0 string \320\317\021\340\241\261\032\341AAFB\015\000OM\006\016\053\064\001\001\001\377 AAF legacy file using MS Structured Storage
+>30 byte 9 (512B sectors)
+>30 byte 12 (4kB sectors)
+0 string \320\317\021\340\241\261\032\341\001\002\001\015\000\002\000\000\006\016\053\064\003\002\001\001 AAF file using MS Structured Storage
+>30 byte 9 (512B sectors)
+>30 byte 12 (4kB sectors)
+
+# Popular applications
+2080 string Microsoft\ Word\ 6.0\ Document %s
+!:mime application/msword
+2080 string Documento\ Microsoft\ Word\ 6 Spanish Microsoft Word 6 document data
+!:mime application/msword
+# Pawel Wiecek <coven at i17linuxb.ists.pwr.wroc.pl> (for polish Word)
+2112 string MSWordDoc Microsoft Word document data
+!:mime application/msword
+#
+0 belong 0x31be0000 Microsoft Word Document
+!:mime application/msword
+#
+0 string PO^Q` Microsoft Word 6.0 Document
+!:mime application/msword
+#
+0 string \376\067\0\043 Microsoft Office Document
+!:mime application/msword
+0 string \333\245-\0\0\0 Microsoft Office Document
+!:mime application/msword
+512 string \354\245\301 Microsoft Word Document
+!:mime application/msword
+#
+2080 string Microsoft\ Excel\ 5.0\ Worksheet %s
+!:mime application/vnd.ms-excel
+
+2080 string Foglio\ di\ lavoro\ Microsoft\ Exce %s
+!:mime application/vnd.ms-excel
+#
+# Pawel Wiecek <coven at i17linuxb.ists.pwr.wroc.pl> (for polish Excel)
+2114 string Biff5 Microsoft Excel 5.0 Worksheet
+!:mime application/vnd.ms-excel
+# Italian MS-Excel
+2121 string Biff5 Microsoft Excel 5.0 Worksheet
+!:mime application/vnd.ms-excel
+0 string \x09\x04\x06\x00\x00\x00\x10\x00 Microsoft Excel Worksheet
+!:mime application/vnd.ms-excel
+#
+0 belong 0x00001a00 Lotus 1-2-3
+!:mime application/x-123
+>4 belong 0x00100400 wk3 document data
+>4 belong 0x02100400 wk4 document data
+>4 belong 0x07800100 fm3 or fmb document data
+>4 belong 0x07800000 fm3 or fmb document data
+#
+0 belong 0x00000200 Lotus 1-2-3
+!:mime application/x-123
+>4 belong 0x06040600 wk1 document data
+>4 belong 0x06800200 fmt document data
+0 string WordPro\0 Lotus WordPro
+!:mime application/vnd.lotus-wordpro
+0 string WordPro\r\373 Lotus WordPro
+!:mime application/vnd.lotus-wordpro
+
+
+# Summary: Script used by InstallScield to uninstall applications
+# Extension: .isu
+# Submitted by: unknown
+# Modified by (1): Abel Cheung <abelcheung at gmail.com> (replace useless entry)
+0 string \x71\xa8\x00\x00\x01\x02
+>12 string Stirling\ Technologies, InstallShield Uninstall Script
+
+# Winamp .avs
+#0 string Nullsoft\ AVS\ Preset\ \060\056\061\032 A plug in for Winamp ms-windows Freeware media player
+0 string Nullsoft\ AVS\ Preset\ Winamp plug in
+
+# Windows Metafont .WMF
+0 string \327\315\306\232 ms-windows metafont .wmf
+0 string \002\000\011\000 ms-windows metafont .wmf
+0 string \001\000\011\000 ms-windows metafont .wmf
+
+#tz3 files whatever that is (MS Works files)
+0 string \003\001\001\004\070\001\000\000 tz3 ms-works file
+0 string \003\002\001\004\070\001\000\000 tz3 ms-works file
+0 string \003\003\001\004\070\001\000\000 tz3 ms-works file
+
+# PGP sig files .sig
+#0 string \211\000\077\003\005\000\063\237\127 065 to \027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\065\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\066\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\067\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\070\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\077\003\005\000\063\237\127\071\027\266\151\064\005\045\101\233\021\002 PGP sig
+0 string \211\000\225\003\005\000\062\122\207\304\100\345\042 PGP sig
+
+# windows zips files .dmf
+0 string MDIF\032\000\010\000\000\000\372\046\100\175\001\000\001\036\001\000 MS Windows special zipped file
+
+
+#ico files
+0 string \102\101\050\000\000\000\056\000\000\000\000\000\000\000 Icon for MS Windows
+
+# Windows icons (Ian Springer <ips at fpk.hp.com>)
+0 string \000\000\001\000 MS Windows icon resource
+!:mime image/x-ico
+>4 byte 1 - 1 icon
+>4 byte >1 - %d icons
+>>6 byte >0 \b, %dx
+>>>7 byte >0 \b%d
+>>8 byte 0 \b, 256-colors
+>>8 byte >0 \b, %d-colors
+
+
+# .chr files
+0 string PK\010\010BGI Borland font
+>4 string >\0 %s
+# then there is a copyright notice
+
+
+# .bgi files
+0 string pk\010\010BGI Borland device
+>4 string >\0 %s
+# then there is a copyright notice
+
+
+# Windows Recycle Bin record file (named INFO2)
+# By Abel Cheung (abelcheung AT gmail dot com)
+# Version 4 always has 280 bytes (0x118) per record, version 5 has 800 bytes
+# Since Vista uses another structure, INFO2 structure probably won't change
+# anymore. Detailed analysis in:
+# http://www.cybersecurityinstitute.biz/downloads/INFO2.pdf
+0 lelong 0x00000004
+>12 lelong 0x00000118 Windows Recycle Bin INFO2 file (Win98 or below)
+
+0 lelong 0x00000005
+>12 lelong 0x00000320 Windows Recycle Bin INFO2 file (Win2k - WinXP)
+
+
+##### put in Either Magic/font or Magic/news
+# Acroread or something files wrongly identified as G3 .pfm
+# these have the form \000 \001 any? \002 \000 \000
+# or \000 \001 any? \022 \000 \000
+#0 string \000\001 pfm?
+#>3 string \022\000\000Copyright\ yes
+#>3 string \002\000\000Copyright\ yes
+#>3 string >\0 oops, not a font file. Cancel that.
+#it clashes with ttf files so put it lower down.
+
+# From Doug Lee via a FreeBSD pr
+9 string GERBILDOC First Choice document
+9 string GERBILDB First Choice database
+9 string GERBILCLIP First Choice database
+0 string GERBIL First Choice device file
+9 string RABBITGRAPH RabbitGraph file
+0 string DCU1 Borland Delphi .DCU file
+0 string =!<spell> MKS Spell hash list (old format)
+0 string =!<spell2> MKS Spell hash list
+# Too simple - MPi
+#0 string AH Halo(TM) bitmapped font file
+0 lelong 0x08086b70 TurboC BGI file
+0 lelong 0x08084b50 TurboC Font file
+
+# WARNING: below line conflicts with Infocom game data Z-machine 3
+0 byte 0x03 DBase 3 data file
+>0x04 lelong 0 (no records)
+>0x04 lelong >0 (%ld records)
+0 byte 0x83 DBase 3 data file with memo(s)
+>0x04 lelong 0 (no records)
+>0x04 lelong >0 (%ld records)
+0 leshort 0x0006 DBase 3 index file
+0 string PMCC Windows 3.x .GRP file
+1 string RDC-meg MegaDots
+>8 byte >0x2F version %c
+>9 byte >0x2F \b.%c file
+0 lelong 0x4C
+>4 lelong 0x00021401 Windows shortcut file
+
+# DOS EPS Binary File Header
+# From: Ed Sznyter <ews at Black.Market.NET>
+0 belong 0xC5D0D3C6 DOS EPS Binary File
+>4 long >0 Postscript starts at byte %d
+>>8 long >0 length %d
+>>>12 long >0 Metafile starts at byte %d
+>>>>16 long >0 length %d
+>>>20 long >0 TIFF starts at byte %d
+>>>>24 long >0 length %d
+
+# TNEF magic From "Joomy" <joomy at se-ed.net>
+# Microsoft Outlook's Transport Neutral Encapsulation Format (TNEF)
+0 leshort 0x223e9f78 TNEF
+!:mime application/vnd.ms-tnef
+
+# HtmlHelp files (.chm)
+0 string ITSF\003\000\000\000\x60\000\000\000\001\000\000\000 MS Windows HtmlHelp Data
+
+# GFA-BASIC (Wolfram Kleff)
+2 string GFA-BASIC3 GFA-BASIC 3 data
+
+#------------------------------------------------------------------------------
+# From Stuart Caie <kyzer at 4u.net> (developer of cabextract)
+# Microsoft Cabinet files
+0 string MSCF\0\0\0\0 Microsoft Cabinet archive data
+!:mime application/vnd.ms-cab-compressed
+>8 lelong x \b, %u bytes
+>28 leshort 1 \b, 1 file
+>28 leshort >1 \b, %u files
+
+# InstallShield Cabinet files
+0 string ISc( InstallShield Cabinet archive data
+>5 byte&0xf0 =0x60 version 6,
+>5 byte&0xf0 !0x60 version 4/5,
+>(12.l+40) lelong x %u files
+
+# Windows CE package files
+0 string MSCE\0\0\0\0 Microsoft WinCE install header
+>20 lelong 0 \b, architecture-independent
+>20 lelong 103 \b, Hitachi SH3
+>20 lelong 104 \b, Hitachi SH4
+>20 lelong 0xA11 \b, StrongARM
+>20 lelong 4000 \b, MIPS R4000
+>20 lelong 10003 \b, Hitachi SH3
+>20 lelong 10004 \b, Hitachi SH3E
+>20 lelong 10005 \b, Hitachi SH4
+>20 lelong 70001 \b, ARM 7TDMI
+>52 leshort 1 \b, 1 file
+>52 leshort >1 \b, %u files
+>56 leshort 1 \b, 1 registry entry
+>56 leshort >1 \b, %u registry entries
+
+
+# Windows Enhanced Metafile (EMF)
+# See msdn.microsoft.com/archive/en-us/dnargdi/html/msdn_enhmeta.asp
+# for further information.
+0 ulelong 1
+>40 string \ EMF Windows Enhanced Metafile (EMF) image data
+>>44 ulelong x version 0x%x
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+0 string COWD VMWare3
+>4 byte 3 disk image
+>>32 lelong x (%d/
+>>36 lelong x \b%d/
+>>40 lelong x \b%d)
+>4 byte 2 undoable disk image
+>>32 string >\0 (%s)
+
+0 string VMDK VMware4 disk image
+0 string KDMV VMware4 disk image
+
+#--------------------------------------------------------------------
+# Qemu Emulator Images
+# Lines written by Friedrich Schwittay (f.schwittay at yousable.de)
+# Made by reading sources and doing trial and error on existing
+# qcow files
+0 string QFI Qemu Image, Format: Qcow
+
+# Uncomment the following line to display Magic (only used for debugging
+# this magic number)
+#>0 string x , Magic: %s
+
+# There are currently 2 Versions: "1" and "2"
+# I do not use Version 2 and therefor branch here
+# but can assure: it works (tested on both versions)
+# Also my Qemu 0.9.0 which uses this Version 2 refuses
+# to start in its bios
+>0x04 belong 2 , Version: 2
+>0x04 belong 1 , Version: 1
+
+# Using the existence of the Backing File Offset to Branch or not
+# to read Backing File Information
+>>0xc belong >0 , Backing File( Offset: %lu
+>>>(0xc.L) string >\0 , Path: %s
+
+# Didn't get the trick here how qemu stores the "Size" at this Position
+# There is actually something stored but nothing makes sense
+# The header in the sources talks about it
+#>>>16 lelong x , Size: %lu
+
+# Modification time of the Backing File
+# Really useful if you want to know if your backing
+# file is still usable together with this image
+>>>20 bedate x , Mtime: %s )
+
+# Don't know how to calculate in Magicfiles
+# Also: this Information is not reliably
+# stored in image-files
+>>24 lelong x , Disk Size could be: %d * 256 bytes
+
+0 string QEVM QEMU's suspend to disk image
+
+0 string Bochs\ Virtual\ HD\ Image Bochs disk image,
+>32 string x type %s,
+>48 string x subtype %s
+
+0 lelong 0x02468ace Bochs Sparse disk image
+
+# from http://filext.com by Derek M Jones <derek at knosof.co.uk>
+# False positive with PPT (also currently this string is too long)
+#0 string \xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3E\x00\x03\x00\xFE\xFF\x09\x00\x06 Microsoft Installer
+0 string \320\317\021\340\241\261\032\341 Microsoft Office Document
+#>48 byte 0x1B Excel Document
+#!:mime application/vnd.ms-excel
+>546 string bjbj Microsoft Word Document
+!:mime application/msword
+>546 string jbjb Microsoft Word Document
+!:mime application/msword
+
+0 string \224\246\056 Microsoft Word Document
+!:mime application/msword
+
+512 string R\0o\0o\0t\0\ \0E\0n\0t\0r\0y Microsoft Word Document
+!:mime application/msword
+
+# From: "Nelson A. de Oliveira" <naoliv at gmail.com>
+# Magic type for Dell's BIOS .hdr files
+# Dell's .hdr
+0 string $RBU
+>23 string Dell %s system BIOS
+>48 string x version %.3s
+
+# Type: Microsoft DirectDraw Surface
+# URL: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/directx9_c/directx/graphics/reference/DDSFileReference/ddsfileformat.asp
+# From: Morten Hustveit <morten at debian.org>
+0 string DDS\040\174\000\000\000 Microsoft DirectDraw Surface (DDS),
+>16 lelong >0 %hd x
+>12 lelong >0 %hd,
+>84 string x %.4s
+
+# Type: Microsoft Document Imaging Format (.mdi)
+# URL: http://en.wikipedia.org/wiki/Microsoft_Document_Imaging_Format
+# From: Daniele Sempione <scrows at oziosi.org>
+0 short 0x5045 Microsoft Document Imaging Format
+
+# MS eBook format (.lit)
+0 string ITOLITLS Microsoft Reader eBook Data
+>8 lelong x \b, version %u
+!:mime application/x-ms-reader
+
+#------------------------------------------------------------------------------
+# $File: msvc,v 1.5 2009/09/19 16:28:11 christos Exp $
+# msvc: file(1) magic for msvc
+# "H. Nanosecond" <aldomel at ix.netcom.com>
+# Microsoft visual C
+#
+# I have version 1.0
+
+# .aps
+0 string HWB\000\377\001\000\000\000 Microsoft Visual C .APS file
+
+# .ide
+#too long 0 string \102\157\162\154\141\156\144\040\103\053\053\040\120\162\157\152\145\143\164\040\106\151\154\145\012\000\032\000\002\000\262\000\272\276\372\316 MSVC .ide
+0 string \102\157\162\154\141\156\144\040\103\053\053\040\120\162\157 MSVC .ide
+
+# .res
+0 string \000\000\000\000\040\000\000\000\377 MSVC .res
+0 string \377\003\000\377\001\000\020\020\350 MSVC .res
+0 string \377\003\000\377\001\000\060\020\350 MSVC .res
+
+#.lib
+0 string \360\015\000\000 Microsoft Visual C library
+0 string \360\075\000\000 Microsoft Visual C library
+0 string \360\175\000\000 Microsoft Visual C library
+
+#.pch
+0 string DTJPCH0\000\022\103\006\200 Microsoft Visual C .pch
+
+# .pdb
+# too long 0 string Microsoft\ C/C++\ program\ database\
+0 string Microsoft\ C/C++\ MSVC program database
+>18 string program\ database\
+>33 string >\0 ver %s
+
+#.sbr
+0 string \000\002\000\007\000 MSVC .sbr
+>5 string >\0 %s
+
+#.bsc
+0 string \002\000\002\001 MSVC .bsc
+
+#.wsp
+0 string 1.00\ .0000.0000\000\003 MSVC .wsp version 1.0000.0000
+# these seem to start with the version and contain menus
+
+# ------------------------------------------------------------------------
+# $File: mup,v 1.4 2009/09/19 16:28:11 christos Exp $
+# mup: file(1) magic for Mup (Music Publisher) input file.
+#
+# From: Abel Cheung <abel (@) oaka.org>
+#
+# NOTE: This header is mainly proposed in the Arkkra mailing list,
+# and is not a mandatory header because of old mup input file
+# compatibility. Noteedit also use mup format, but is not forcing
+# user to use any header as well.
+#
+0 search/1 //!Mup Mup music publication program input text
+>6 string -Arkkra (Arkkra)
+>>13 string -
+>>>16 string .
+>>>>14 string x \b, need V%.4s
+>>>15 string .
+>>>>14 string x \b, need V%.3s
+>6 string -
+>>9 string .
+>>>7 string x \b, need V%.4s
+>>8 string .
+>>>7 string x \b, need V%.3s
+
+#-----------------------------------------------------------------------------
+# $File: natinst,v 1.4 2009/09/19 16:28:11 christos Exp $
+# natinst: file(1) magic for National Instruments Code Files
+
+#
+# From <egamez at fcfm.buap.mx> Enrique G�mez-Flores
+# version 1
+# Many formats still missing, we use, for the moment LabVIEW
+# We guess VXI format file. VISA, LabWindowsCVI, BridgeVIEW, etc, are missing
+#
+0 string RSRC National Instruments,
+# Check if it's a LabVIEW File
+>8 string LV LabVIEW File,
+# Check wich kind of file is
+>>10 string SB Code Resource File, data
+>>10 string IN Virtual Instrument Program, data
+>>10 string AR VI Library, data
+# This is for Menu Libraries
+>8 string LMNULBVW Portable File Names, data
+# This is for General Resources
+>8 string rsc Resources File, data
+# This is for VXI Package
+0 string VMAP National Instruments, VXI File, data
+
+#------------------------------------------------------------------------------
+# $File: ncr,v 1.7 2009/09/19 16:28:11 christos Exp $
+# ncr: file(1) magic for NCR Tower objects
+#
+# contributed by
+# Michael R. Wayne *** TMC & Associates *** INTERNET: wayne at ford-vax.arpa
+# uucp: {philabs | pyramid} !fmsrl7!wayne OR wayne at fmsrl7.UUCP
+#
+0 beshort 000610 Tower/XP rel 2 object
+>12 belong >0 not stripped
+>20 beshort 0407 executable
+>20 beshort 0410 pure executable
+>22 beshort >0 - version %ld
+0 beshort 000615 Tower/XP rel 2 object
+>12 belong >0 not stripped
+>20 beshort 0407 executable
+>20 beshort 0410 pure executable
+>22 beshort >0 - version %ld
+0 beshort 000620 Tower/XP rel 3 object
+>12 belong >0 not stripped
+>20 beshort 0407 executable
+>20 beshort 0410 pure executable
+>22 beshort >0 - version %ld
+0 beshort 000625 Tower/XP rel 3 object
+>12 belong >0 not stripped
+>20 beshort 0407 executable
+>20 beshort 0410 pure executable
+>22 beshort >0 - version %ld
+0 beshort 000630 Tower32/600/400 68020 object
+>12 belong >0 not stripped
+>20 beshort 0407 executable
+>20 beshort 0410 pure executable
+>22 beshort >0 - version %ld
+0 beshort 000640 Tower32/800 68020
+>18 beshort &020000 w/68881 object
+>18 beshort &040000 compatible object
+>18 beshort &060000 object
+>20 beshort 0407 executable
+>20 beshort 0413 pure executable
+>12 belong >0 not stripped
+>22 beshort >0 - version %ld
+0 beshort 000645 Tower32/800 68010
+>18 beshort &040000 compatible object
+>18 beshort &060000 object
+>20 beshort 0407 executable
+>20 beshort 0413 pure executable
+>12 belong >0 not stripped
+>22 beshort >0 - version %ld
+
+#------------------------------------------------------------------------------
+# $File: netbsd,v 1.18 2009/09/19 16:28:11 christos Exp $
+# netbsd: file(1) magic for NetBSD objects
+#
+# All new-style magic numbers are in network byte order.
+#
+
+0 lelong 000000407 a.out NetBSD little-endian object file
+>16 lelong >0 not stripped
+0 belong 000000407 a.out NetBSD big-endian object file
+>16 belong >0 not stripped
+
+0 belong&0377777777 041400413 a.out NetBSD/i386 demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 041400410 a.out NetBSD/i386 pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 041400407 a.out NetBSD/i386
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+0 belong&0377777777 041400507 a.out NetBSD/i386 core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+0 belong&0377777777 041600413 a.out NetBSD/m68k demand paged
+>0 byte &0x80
+>>20 belong <8192 shared library
+>>20 belong =8192 dynamically linked executable
+>>20 belong >8192 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 041600410 a.out NetBSD/m68k pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 041600407 a.out NetBSD/m68k
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 belong !0 executable
+>>20 belong =0 object file
+>16 belong >0 not stripped
+0 belong&0377777777 041600507 a.out NetBSD/m68k core
+>12 string >\0 from '%s'
+>32 belong !0 (signal %d)
+
+0 belong&0377777777 042000413 a.out NetBSD/m68k4k demand paged
+>0 byte &0x80
+>>20 belong <4096 shared library
+>>20 belong =4096 dynamically linked executable
+>>20 belong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 042000410 a.out NetBSD/m68k4k pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 042000407 a.out NetBSD/m68k4k
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 belong !0 executable
+>>20 belong =0 object file
+>16 belong >0 not stripped
+0 belong&0377777777 042000507 a.out NetBSD/m68k4k core
+>12 string >\0 from '%s'
+>32 belong !0 (signal %d)
+
+0 belong&0377777777 042200413 a.out NetBSD/ns32532 demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 042200410 a.out NetBSD/ns32532 pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 042200407 a.out NetBSD/ns32532
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+0 belong&0377777777 042200507 a.out NetBSD/ns32532 core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+0 belong&0377777777 045200507 a.out NetBSD/powerpc core
+>12 string >\0 from '%s'
+
+0 belong&0377777777 042400413 a.out NetBSD/sparc demand paged
+>0 byte &0x80
+>>20 belong <8192 shared library
+>>20 belong =8192 dynamically linked executable
+>>20 belong >8192 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 042400410 a.out NetBSD/sparc pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 042400407 a.out NetBSD/sparc
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 belong !0 executable
+>>20 belong =0 object file
+>16 belong >0 not stripped
+0 belong&0377777777 042400507 a.out NetBSD/sparc core
+>12 string >\0 from '%s'
+>32 belong !0 (signal %d)
+
+0 belong&0377777777 042600413 a.out NetBSD/pmax demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 042600410 a.out NetBSD/pmax pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 042600407 a.out NetBSD/pmax
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+0 belong&0377777777 042600507 a.out NetBSD/pmax core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+0 belong&0377777777 043000413 a.out NetBSD/vax 1k demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 043000410 a.out NetBSD/vax 1k pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 043000407 a.out NetBSD/vax 1k
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+0 belong&0377777777 043000507 a.out NetBSD/vax 1k core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+0 belong&0377777777 045400413 a.out NetBSD/vax 4k demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 045400410 a.out NetBSD/vax 4k pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 045400407 a.out NetBSD/vax 4k
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+0 belong&0377777777 045400507 a.out NetBSD/vax 4k core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+# NetBSD/alpha does not support (and has never supported) a.out objects,
+# so no rules are provided for them. NetBSD/alpha ELF objects are
+# dealt with in "elf".
+0 lelong 0x00070185 ECOFF NetBSD/alpha binary
+>10 leshort 0x0001 not stripped
+>10 leshort 0x0000 stripped
+0 belong&0377777777 043200507 a.out NetBSD/alpha core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+0 belong&0377777777 043400413 a.out NetBSD/mips demand paged
+>0 byte &0x80
+>>20 belong <8192 shared library
+>>20 belong =8192 dynamically linked executable
+>>20 belong >8192 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 043400410 a.out NetBSD/mips pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+0 belong&0377777777 043400407 a.out NetBSD/mips
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 belong !0 executable
+>>20 belong =0 object file
+>16 belong >0 not stripped
+0 belong&0377777777 043400507 a.out NetBSD/mips core
+>12 string >\0 from '%s'
+>32 belong !0 (signal %d)
+
+0 belong&0377777777 043600413 a.out NetBSD/arm32 demand paged
+>0 byte &0x80
+>>20 lelong <4096 shared library
+>>20 lelong =4096 dynamically linked executable
+>>20 lelong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 043600410 a.out NetBSD/arm32 pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 lelong >0 not stripped
+0 belong&0377777777 043600407 a.out NetBSD/arm32
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80
+>>0 byte &0x40 position independent
+>>20 lelong !0 executable
+>>20 lelong =0 object file
+>16 lelong >0 not stripped
+# NetBSD/arm26 has always used ELF objects, but it shares a core file
+# format with NetBSD/arm32.
+0 belong&0377777777 043600507 a.out NetBSD/arm core
+>12 string >\0 from '%s'
+>32 lelong !0 (signal %d)
+
+#------------------------------------------------------------------------------
+# $File: netscape,v 1.6 2009/09/19 16:28:11 christos Exp $
+# netscape: file(1) magic for Netscape files
+# "H. Nanosecond" <aldomel at ix.netcom.com>
+# version 3 and 4 I think
+#
+
+# Netscape Address book .nab
+0 string \000\017\102\104\000\000\000\000\000\000\001\000\000\000\000\002\000\000\000\002\000\000\004\000 Netscape Address book
+
+# Netscape Communicator address book
+0 string \000\017\102\111 Netscape Communicator address book
+
+# .snm Caches
+0 string #\ Netscape\ folder\ cache Netscape folder cache
+0 string \000\036\204\220\000 Netscape folder cache
+# .n2p
+# Net 2 Phone
+#0 string 123\130\071\066\061\071\071\071\060\070\061\060\061\063\060
+0 string SX961999 Net2phone
+
+#
+#This is files ending in .art, FIXME add more rules
+0 string JG\004\016\0\0\0\0 ART
+
+#------------------------------------------------------------------------------
+# $File: netware,v 1.4 2009/09/19 16:28:11 christos Exp $
+# netware: file(1) magic for NetWare Loadable Modules (NLMs)
+# From: Mads Martin Joergensen <mmj at suse.de>
+
+0 string NetWare\ Loadable\ Module NetWare Loadable Module
+
+#------------------------------------------------------------------------------
+# $File: news,v 1.6 2009/09/19 16:28:11 christos Exp $
+# news: file(1) magic for SunOS NeWS fonts (not "news" as in "netnews")
+#
+0 string StartFontMetrics ASCII font metrics
+0 string StartFont ASCII font bits
+0 belong 0x137A2944 NeWS bitmap font
+0 belong 0x137A2947 NeWS font family
+0 belong 0x137A2950 scalable OpenFont binary
+0 belong 0x137A2951 encrypted scalable OpenFont binary
+8 belong 0x137A2B45 X11/NeWS bitmap font
+8 belong 0x137A2B48 X11/NeWS font family
+
+#------------------------------------------------------------------------------
+# $File: nitpicker,v 1.4 2009/09/19 16:28:11 christos Exp $
+# nitpicker: file(1) magic for Flowfiles.
+# From: Christian Jachmann <C.Jachmann at gmx.net> http://www.nitpicker.de
+0 string NPFF NItpicker Flow File
+>4 byte x V%d.
+>5 byte x %d
+>6 bedate x started: %s
+>10 bedate x stopped: %s
+>14 belong x Bytes: %u
+>18 belong x Bytes1: %u
+>22 belong x Flows: %u
+>26 belong x Pkts: %u
+
+#------------------------------------------------------------------------------
+# $File: ocaml,v 1.4 2009/09/19 16:28:11 christos Exp $
+# ocaml: file(1) magic for Objective Caml files.
+0 string Caml1999 Objective caml
+>8 string X exec file
+>8 string I interface file (.cmi)
+>8 string O object file (.cmo)
+>8 string A library file (.cma)
+>8 string Y native object file (.cmx)
+>8 string Z native library file (.cmxa)
+>8 string M abstract syntax tree implementation file
+>8 string N abstract syntax tree interface file
+>9 string >\0 (Version %3.3s).
+
+#------------------------------------------------------------------------------
+# $File: octave,v 1.4 2009/09/19 16:28:11 christos Exp $
+# octave binary data file(1) magic, from Dirk Eddelbuettel <edd at debian.org>
+0 string Octave-1-L Octave binary data (little endian)
+0 string Octave-1-B Octave binary data (big endian)
+
+#------------------------------------------------------------------------------
+# $File: ole2compounddocs,v 1.4 2009/09/19 16:28:11 christos Exp $
+# Microsoft OLE 2 Compound Documents : file(1) magic for Microsoft Structured
+# storage (http://en.wikipedia.org/wiki/Structured_Storage)
+# Additional tests for OLE 2 Compound Documents should be under this recipe.
+
+0 string \320\317\021\340\241\261\032\341 OLE 2 Compound Document
+# - Microstation V8 DGN files (www.bentley.com)
+# Last update on 10/23/2006 by Lester Hightower
+> 0x480 string D\000g\000n\000~\000H : Microstation V8 DGN
+# - Visio documents
+# Last update on 10/23/2006 by Lester Hightower
+> 0x480 string V\000i\000s\000i\000o\000D\000o\000c : Visio Document
+
+#------------------------------------------------------------------------------
+# $File: olf,v 1.4 2009/09/19 16:28:11 christos Exp $
+# olf: file(1) magic for OLF executables
+#
+# We have to check the byte order flag to see what byte order all the
+# other stuff in the header is in.
+#
+# MIPS R3000 may also be for MIPS R2000.
+# What're the correct byte orders for the nCUBE and the Fujitsu VPP500?
+#
+# Created by Erik Theisen <etheisen at openbsd.org>
+# Based on elf from Daniel Quinlan <quinlan at yggdrasil.com>
+0 string \177OLF OLF
+>4 byte 0 invalid class
+>4 byte 1 32-bit
+>4 byte 2 64-bit
+>7 byte 0 invalid os
+>7 byte 1 OpenBSD
+>7 byte 2 NetBSD
+>7 byte 3 FreeBSD
+>7 byte 4 4.4BSD
+>7 byte 5 Linux
+>7 byte 6 SVR4
+>7 byte 7 esix
+>7 byte 8 Solaris
+>7 byte 9 Irix
+>7 byte 10 SCO
+>7 byte 11 Dell
+>7 byte 12 NCR
+>5 byte 0 invalid byte order
+>5 byte 1 LSB
+>>16 leshort 0 no file type,
+>>16 leshort 1 relocatable,
+>>16 leshort 2 executable,
+>>16 leshort 3 shared object,
+# Core handling from Peter Tobias <tobias at server.et-inf.fho-emden.de>
+# corrections by Christian 'Dr. Disk' Hechelmann <drdisk at ds9.au.s.shuttle.de>
+>>16 leshort 4 core file
+>>>(0x38+0xcc) string >\0 of '%s'
+>>>(0x38+0x10) lelong >0 (signal %d),
+>>16 leshort &0xff00 processor-specific,
+>>18 leshort 0 no machine,
+>>18 leshort 1 AT&T WE32100 - invalid byte order,
+>>18 leshort 2 SPARC - invalid byte order,
+>>18 leshort 3 Intel 80386,
+>>18 leshort 4 Motorola 68000 - invalid byte order,
+>>18 leshort 5 Motorola 88000 - invalid byte order,
+>>18 leshort 6 Intel 80486,
+>>18 leshort 7 Intel 80860,
+>>18 leshort 8 MIPS R3000_BE - invalid byte order,
+>>18 leshort 9 Amdahl - invalid byte order,
+>>18 leshort 10 MIPS R3000_LE,
+>>18 leshort 11 RS6000 - invalid byte order,
+>>18 leshort 15 PA-RISC - invalid byte order,
+>>18 leshort 16 nCUBE,
+>>18 leshort 17 VPP500,
+>>18 leshort 18 SPARC32PLUS,
+>>18 leshort 20 PowerPC,
+>>18 leshort 0x9026 Alpha,
+>>20 lelong 0 invalid version
+>>20 lelong 1 version 1
+>>36 lelong 1 MathCoPro/FPU/MAU Required
+>8 string >\0 (%s)
+>5 byte 2 MSB
+>>16 beshort 0 no file type,
+>>16 beshort 1 relocatable,
+>>16 beshort 2 executable,
+>>16 beshort 3 shared object,
+>>16 beshort 4 core file,
+>>>(0x38+0xcc) string >\0 of '%s'
+>>>(0x38+0x10) belong >0 (signal %d),
+>>16 beshort &0xff00 processor-specific,
+>>18 beshort 0 no machine,
+>>18 beshort 1 AT&T WE32100,
+>>18 beshort 2 SPARC,
+>>18 beshort 3 Intel 80386 - invalid byte order,
+>>18 beshort 4 Motorola 68000,
+>>18 beshort 5 Motorola 88000,
+>>18 beshort 6 Intel 80486 - invalid byte order,
+>>18 beshort 7 Intel 80860,
+>>18 beshort 8 MIPS R3000_BE,
+>>18 beshort 9 Amdahl,
+>>18 beshort 10 MIPS R3000_LE - invalid byte order,
+>>18 beshort 11 RS6000,
+>>18 beshort 15 PA-RISC,
+>>18 beshort 16 nCUBE,
+>>18 beshort 17 VPP500,
+>>18 beshort 18 SPARC32PLUS,
+>>18 beshort 20 PowerPC or cisco 4500,
+>>18 beshort 21 cisco 7500,
+>>18 beshort 24 cisco SVIP,
+>>18 beshort 25 cisco 7200,
+>>18 beshort 36 cisco 12000,
+>>18 beshort 0x9026 Alpha,
+>>20 belong 0 invalid version
+>>20 belong 1 version 1
+>>36 belong 1 MathCoPro/FPU/MAU Required
+
+#------------------------------------------------------------------------------
+# $File: os2,v 1.7 2009/09/19 16:28:11 christos Exp $
+# os2: file(1) magic for OS/2 files
+#
+
+# Provided 1998/08/22 by
+# David Mediavilla <davidme.news at REMOVEIFNOTSPAMusa.net>
+1 search/1 InternetShortcut MS Windows 95 Internet shortcut text
+>24 search/1 >\ (URL=<%s>)
+
+# OS/2 URL objects
+# Provided 1998/08/22 by
+# David Mediavilla <davidme.news at REMOVEIFNOTSPAMusa.net>
+#0 string http: OS/2 URL object text
+#>5 string >\ (WWW) <http:%s>
+#0 string mailto: OS/2 URL object text
+#>7 string >\ (email) <%s>
+#0 string news: OS/2 URL object text
+#>5 string >\ (Usenet) <%s>
+#0 string ftp: OS/2 URL object text
+#>4 string >\ (FTP) <ftp:%s>
+#0 string file: OS/2 URL object text
+#>5 string >\ (Local file) <%s>
+
+# >>>>> OS/2 INF/HLP <<<<< (source: Daniel Dissett ddissett at netcom.com)
+# Carl Hauser (chauser.parc at xerox.com) and
+# Marcus Groeber (marcusg at ph-cip.uni-koeln.de)
+# list the following header format in inf02a.doc:
+#
+# int16 ID; // ID magic word (5348h = "HS")
+# int8 unknown1; // unknown purpose, could be third letter of ID
+# int8 flags; // probably a flag word...
+# // bit 0: set if INF style file
+# // bit 4: set if HLP style file
+# // patching this byte allows reading HLP files
+# // using the VIEW command, while help files
+# // seem to work with INF settings here as well.
+# int16 hdrsize; // total size of header
+# int16 unknown2; // unknown purpose
+#
+0 string HSP\x01\x9b\x00 OS/2 INF
+>107 string >0 (%s)
+0 string HSP\x10\x9b\x00 OS/2 HLP
+>107 string >0 (%s)
+
+# OS/2 INI (this is a guess)
+0 string \xff\xff\xff\xff\x14\0\0\0 OS/2 INI
+
+#------------------------------------------------------------------------------
+# $File: os400,v 1.5 2009/09/19 16:28:11 christos Exp $
+# os400: file(1) magic for IBM OS/400 files
+#
+# IBM OS/400 (i5/OS) Save file (SAVF) - gerardo.cacciari at gmail.com
+# In spite of its quite variable format (due to internal memory page
+# length differences between CISC and RISC versions of the OS) the
+# SAVF structure hasn't suitable offsets to identify the catalog
+# header in the first descriptor where there are some useful infos,
+# so we must search in a somewhat large area for a particular string
+# that represents the EBCDIC encoding of 'QSRDSSPC' (save/restore
+# descriptor space) preceded by a two byte constant.
+#
+1090 search/7393 \x19\xDB\xD8\xE2\xD9\xC4\xE2\xE2\xD7\xC3 IBM OS/400 save file data
+>&212 byte 0x01 \b, created with SAVOBJ
+>&212 byte 0x02 \b, created with SAVLIB
+>&212 byte 0x07 \b, created with SAVCFG
+>&212 byte 0x08 \b, created with SAVSECDTA
+>&212 byte 0x0A \b, created with SAVSECDTA
+>&212 byte 0x0B \b, created with SAVDLO
+>&212 byte 0x0D \b, created with SAVLICPGM
+>&212 byte 0x11 \b, created with SAVCHGOBJ
+>&213 byte 0x44 \b, at least V5R4 to open
+>&213 byte 0x43 \b, at least V5R3 to open
+>&213 byte 0x42 \b, at least V5R2 to open
+>&213 byte 0x41 \b, at least V5R1 to open
+>&213 byte 0x40 \b, at least V4R5 to open
+>&213 byte 0x3F \b, at least V4R4 to open
+>&213 byte 0x3E \b, at least V4R3 to open
+>&213 byte 0x3C \b, at least V4R2 to open
+>&213 byte 0x3D \b, at least V4R1M4 to open
+>&213 byte 0x3B \b, at least V4R1 to open
+>&213 byte 0x3A \b, at least V3R7 to open
+>&213 byte 0x35 \b, at least V3R6 to open
+>&213 byte 0x36 \b, at least V3R2 to open
+>&213 byte 0x34 \b, at least V3R1 to open
+>&213 byte 0x31 \b, at least V3R0M5 to open
+>&213 byte 0x30 \b, at least V2R3 to open
+
+#------------------------------------------------------------------------------
+# $File: os9,v 1.6 2009/09/19 16:28:11 christos Exp $
+#
+# Copyright (c) 1996 Ignatios Souvatzis. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by Ignatios Souvatzis for
+# the NetBSD project.
+# 4. The name of the author may not be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+#
+#
+# OS9/6809 module descriptions:
+#
+0 beshort 0x87CD OS9/6809 module:
+#
+>6 byte&0x0f 0x00 non-executable
+>6 byte&0x0f 0x01 machine language
+>6 byte&0x0f 0x02 BASIC I-code
+>6 byte&0x0f 0x03 Pascal P-code
+>6 byte&0x0f 0x04 C I-code
+>6 byte&0x0f 0x05 COBOL I-code
+>6 byte&0x0f 0x06 Fortran I-code
+#
+>6 byte&0xf0 0x10 program executable
+>6 byte&0xf0 0x20 subroutine
+>6 byte&0xf0 0x30 multi-module
+>6 byte&0xf0 0x40 data module
+#
+>6 byte&0xf0 0xC0 system module
+>6 byte&0xf0 0xD0 file manager
+>6 byte&0xf0 0xE0 device driver
+>6 byte&0xf0 0xF0 device descriptor
+#
+# OS9/m68k stuff (to be continued)
+#
+0 beshort 0x4AFC OS9/68K module:
+#
+# attr
+>0x14 byte&0x80 0x80 re-entrant
+>0x14 byte&0x40 0x40 ghost
+>0x14 byte&0x20 0x20 system-state
+#
+# lang:
+#
+>0x13 byte 1 machine language
+>0x13 byte 2 BASIC I-code
+>0x13 byte 3 Pascal P-code
+>0x13 byte 4 C I-code
+>0x13 byte 5 COBOL I-code
+>0x13 byte 6 Fortran I-code
+#
+#
+# type:
+#
+>0x12 byte 1 program executable
+>0x12 byte 2 subroutine
+>0x12 byte 3 multi-module
+>0x12 byte 4 data module
+>0x12 byte 11 trap library
+>0x12 byte 12 system module
+>0x12 byte 13 file manager
+>0x12 byte 14 device driver
+>0x12 byte 15 device descriptor
+
+#------------------------------------------------------------------------------
+# $File: osf1,v 1.7 2009/09/19 16:28:11 christos Exp $
+#
+# Mach magic number info
+#
+0 long 0xefbe OSF/Rose object
+# I386 magic number info
+#
+0 short 0565 i386 COFF object
+
+#------------------------------------------------------------------------------
+# $File: palm,v 1.7 2009/09/19 16:28:11 christos Exp $
+# palm: file(1) magic for PalmOS {.prc,.pdb}: applications, docfiles, and hacks
+#
+# Brian Lalor <blalor at hcirisc.cs.binghamton.edu>
+
+# appl
+60 belong 0x6170706c PalmOS application
+>0 string >\0 "%s"
+# TEXt
+60 belong 0x54455874 AportisDoc file
+>0 string >\0 "%s"
+# HACK
+60 belong 0x4841434b HackMaster hack
+>0 string >\0 "%s"
+
+# Variety of PalmOS document types
+# Michael-John Turner <mj at debian.org>
+# Thanks to Hasan Umit Ezerce <humit at tr-net.net.tr> for his DocType
+60 string BVokBDIC BDicty PalmOS document
+>0 string >\0 "%s"
+60 string DB99DBOS DB PalmOS document
+>0 string >\0 "%s"
+60 string vIMGView FireViewer/ImageViewer PalmOS document
+>0 string >\0 "%s"
+60 string PmDBPmDB HanDBase PalmOS document
+>0 string >\0 "%s"
+60 string InfoINDB InfoView PalmOS document
+>0 string >\0 "%s"
+60 string ToGoToGo iSilo PalmOS document
+>0 string >\0 "%s"
+60 string JfDbJBas JFile PalmOS document
+>0 string >\0 "%s"
+60 string JfDbJFil JFile Pro PalmOS document
+>0 string >\0 "%s"
+60 string DATALSdb List PalmOS document
+>0 string >\0 "%s"
+60 string Mdb1Mdb1 MobileDB PalmOS document
+>0 string >\0 "%s"
+60 string PNRdPPrs PeanutPress PalmOS document
+>0 string >\0 "%s"
+60 string DataPlkr Plucker PalmOS document
+>0 string >\0 "%s"
+60 string DataSprd QuickSheet PalmOS document
+>0 string >\0 "%s"
+60 string SM01SMem SuperMemo PalmOS document
+>0 string >\0 "%s"
+60 string TEXtTlDc TealDoc PalmOS document
+>0 string >\0 "%s"
+60 string InfoTlIf TealInfo PalmOS document
+>0 string >\0 "%s"
+60 string DataTlMl TealMeal PalmOS document
+>0 string >\0 "%s"
+60 string DataTlPt TealPaint PalmOS document
+>0 string >\0 "%s"
+60 string dataTDBP ThinkDB PalmOS document
+>0 string >\0 "%s"
+60 string TdatTide Tides PalmOS document
+>0 string >\0 "%s"
+60 string ToRaTRPW TomeRaider PalmOS document
+>0 string >\0 "%s"
+
+# A GutenPalm zTXT etext for use on Palm Pilots (http://gutenpalm.sf.net)
+# For version 1.xx zTXTs, outputs version and numbers of bookmarks and
+# annotations.
+# For other versions, just outputs version.
+#
+60 string zTXT A GutenPalm zTXT e-book
+>0 string >\0 "%s"
+>(0x4E.L) byte 0
+>>(0x4E.L+1) byte x (v0.%02d)
+>(0x4E.L) byte 1
+>>(0x4E.L+1) byte x (v1.%02d)
+>>>(0x4E.L+10) beshort >0
+>>>>(0x4E.L+10) beshort <2 - 1 bookmark
+>>>>(0x4E.L+10) beshort >1 - %d bookmarks
+>>>(0x4E.L+14) beshort >0
+>>>>(0x4E.L+14) beshort <2 - 1 annotation
+>>>>(0x4E.L+14) beshort >1 - %d annotations
+>(0x4E.L) byte >1 (v%d.
+>>(0x4E.L+1) byte x %02d)
+
+# Palm OS .prc file types
+60 string libr Palm OS dynamic library data
+>0 string >\0 "%s"
+60 string ptch Palm OS operating system patch data
+>0 string >\0 "%s"
+
+# Mobipocket (www.mobipocket.com), donated by Carl Witty
+60 string BOOKMOBI Mobipocket E-book
+>0 string >\0 "%s"
+
+#------------------------------------------------------------------------------
+# $File: parix,v 1.4 2009/09/19 16:28:11 christos Exp $
+#
+# Parix COFF executables
+# From: Ignatios Souvatzis <ignatios at cs.uni-bonn.de>
+#
+0 beshort&0xfff 0xACE PARIX
+>0 byte&0xf0 0x80 T800
+>0 byte&0xf0 0x90 T9000
+>19 byte&0x02 0x02 executable
+>19 byte&0x02 0x00 object
+>19 byte&0x0c 0x00 not stripped
+
+#------------------------------------------------------------------------------
+# $File: pbm,v 1.6 2009/09/19 16:28:11 christos Exp $
+# pbm: file(1) magic for Portable Bitmap files
+#
+# XXX - byte order?
+#
+0 short 0x2a17 "compact bitmap" format (Poskanzer)
+
+#------------------------------------------------------------------------------
+# $File: pdf,v 1.6 2009/09/19 16:28:11 christos Exp $
+# pdf: file(1) magic for Portable Document Format
+#
+
+0 string %PDF- PDF document
+!:mime application/pdf
+>5 byte x \b, version %c
+>7 byte x \b.%c
+
+# From: Nick Schmalenberger <nick at schmalenberger.us>
+# Forms Data Format
+0 string %FDF- FDF document
+>5 byte x \b, version %c
+>7 byte x \b.%c
+
+#------------------------------------------------------------------------------
+# $File: pdp,v 1.8 2009/09/19 16:28:11 christos Exp $
+# pdp: file(1) magic for PDP-11 executable/object and APL workspace
+#
+0 lelong 0101555 PDP-11 single precision APL workspace
+0 lelong 0101554 PDP-11 double precision APL workspace
+#
+# PDP-11 a.out
+#
+0 leshort 0407 PDP-11 executable
+>8 leshort >0 not stripped
+>15 byte >0 - version %ld
+
+0 leshort 0401 PDP-11 UNIX/RT ldp
+0 leshort 0405 PDP-11 old overlay
+
+0 leshort 0410 PDP-11 pure executable
+>8 leshort >0 not stripped
+>15 byte >0 - version %ld
+
+0 leshort 0411 PDP-11 separate I&D executable
+>8 leshort >0 not stripped
+>15 byte >0 - version %ld
+
+0 leshort 0437 PDP-11 kernel overlay
+
+# These last three are derived from 2.11BSD file(1)
+0 leshort 0413 PDP-11 demand-paged pure executable
+>8 leshort >0 not stripped
+
+0 leshort 0430 PDP-11 overlaid pure executable
+>8 leshort >0 not stripped
+
+0 leshort 0431 PDP-11 overlaid separate executable
+>8 leshort >0 not stripped
+
+#------------------------------------------------------------------------------
+# $File: perl,v 1.16 2009/09/19 16:28:11 christos Exp $
+# perl: file(1) magic for Larry Wall's perl language.
+#
+# The `eval' lines recognizes an outrageously clever hack.
+# Keith Waclena <keith at cerberus.uchicago.edu>
+# Send additions to <perl5-porters at perl.org>
+0 search/1/w #!\ /bin/perl Perl script text executable
+!:mime text/x-perl
+0 search/1 eval\ "exec\ /bin/perl Perl script text
+!:mime text/x-perl
+0 search/1/w #!\ /usr/bin/perl Perl script text executable
+!:mime text/x-perl
+0 search/1 eval\ "exec\ /usr/bin/perl Perl script text
+!:mime text/x-perl
+0 search/1/w #!\ /usr/local/bin/perl Perl script text executable
+!:mime text/x-perl
+0 search/1 eval\ "exec\ /usr/local/bin/perl Perl script text
+!:mime text/x-perl
+0 search/1 eval\ '(exit\ $?0)'\ &&\ eval\ 'exec Perl script text
+!:mime text/x-perl
+
+
+# by Dmitry V. Levin and Alexey Tourbin
+# check the first line
+0 search/1 package
+>0 regex \^package[\ \t]+[0-9A-Za-z_:]+\ *; Perl5 module source text
+# not 'p', check other lines
+0 search/1 !p
+>0 regex \^package[\ \t]+[0-9A-Za-z_:]+\ *;
+>>0 regex \^1\ *;|\^(use|sub|my)\ .*[(;{=] Perl5 module source text
+
+
+
+# Perl POD documents
+# From: Tom Hukins <tom at eborcom.com>
+0 search/1/W \=pod\n Perl POD document text
+0 search/1/W \n\=pod\n Perl POD document text
+0 search/1/W \=head1\ Perl POD document text
+0 search/1/W \n\=head1\ Perl POD document text
+0 search/1/W \=head2\ Perl POD document text
+0 search/1/W \n\=head2\ Perl POD document text
+
+# Perl Storable data files.
+0 string perl-store perl Storable (v0.6) data
+>4 byte >0 (net-order %d)
+>>4 byte &01 (network-ordered)
+>>4 byte =3 (major 1)
+>>4 byte =2 (major 1)
+
+0 string pst0 perl Storable (v0.7) data
+>4 byte >0
+>>4 byte &01 (network-ordered)
+>>4 byte =5 (major 2)
+>>4 byte =4 (major 2)
+>>5 byte >0 (minor %d)
+
+#------------------------------------------------------------------------------
+# $File: pgp,v 1.9 2009/09/19 16:28:11 christos Exp $
+# pgp: file(1) magic for Pretty Good Privacy
+# see http://lists.gnupg.org/pipermail/gnupg-devel/1999-September/016052.html
+#
+0 beshort 0x9900 PGP key public ring
+!:mime application/x-pgp-keyring
+0 beshort 0x9501 PGP key security ring
+!:mime application/x-pgp-keyring
+0 beshort 0x9500 PGP key security ring
+!:mime application/x-pgp-keyring
+0 beshort 0xa600 PGP encrypted data
+#!:mime application/pgp-encrypted
+#0 string -----BEGIN\040PGP text/PGP armored data
+!:mime text/PGP # encoding: armored data
+#>15 string PUBLIC\040KEY\040BLOCK- public key block
+#>15 string MESSAGE- message
+#>15 string SIGNED\040MESSAGE- signed message
+#>15 string PGP\040SIGNATURE- signature
+
+2 string ---BEGIN\ PGP\ PUBLIC\ KEY\ BLOCK- PGP public key block
+!:mime application/pgp-keys
+0 string -----BEGIN\040PGP\40MESSAGE- PGP message
+!:mime application/pgp
+0 string -----BEGIN\040PGP\40SIGNATURE- PGP signature
+!:mime application/pgp-signature
+
+#------------------------------------------------------------------------------
+# $File: pkgadd,v 1.6 2009/09/19 16:28:11 christos Exp $
+# pkgadd: file(1) magic for SysV R4 PKG Datastreams
+#
+0 string #\ PaCkAgE\ DaTaStReAm pkg Datastream (SVR4)
+!:mime application/x-svr4-package
+
+#------------------------------------------------------------------------------
+# $File: plan9,v 1.5 2009/09/19 16:28:11 christos Exp $
+# plan9: file(1) magic for AT&T Bell Labs' Plan 9 executables
+# From: "Stefan A. Haubenthal" <polluks at web.de>
+#
+0 belong 0x00000107 Plan 9 executable, Motorola 68k
+0 belong 0x000001EB Plan 9 executable, Intel 386
+0 belong 0x00000247 Plan 9 executable, Intel 960
+0 belong 0x000002AB Plan 9 executable, SPARC
+0 belong 0x00000407 Plan 9 executable, MIPS R3000
+0 belong 0x0000048B Plan 9 executable, AT&T DSP 3210
+0 belong 0x00000517 Plan 9 executable, MIPS R4000 BE
+0 belong 0x000005AB Plan 9 executable, AMD 29000
+0 belong 0x00000647 Plan 9 executable, ARM 7-something
+0 belong 0x000006EB Plan 9 executable, PowerPC
+0 belong 0x00000797 Plan 9 executable, MIPS R4000 LE
+0 belong 0x0000084B Plan 9 executable, DEC Alpha
+
+#------------------------------------------------------------------------------
+# $File: plus5,v 1.6 2009/09/19 16:28:11 christos Exp $
+# plus5: file(1) magic for Plus Five's UNIX MUMPS
+#
+# XXX - byte order? Paging Hokey....
+#
+0 short 0x259 mumps avl global
+>2 byte >0 (V%d)
+>6 byte >0 with %d byte name
+>7 byte >0 and %d byte data cells
+0 short 0x25a mumps blt global
+>2 byte >0 (V%d)
+>8 short >0 - %d byte blocks
+>15 byte 0x00 - P/D format
+>15 byte 0x01 - P/K/D format
+>15 byte 0x02 - K/D format
+>15 byte >0x02 - Bad Flags
+
+#------------------------------------------------------------------------------
+# $File: printer,v 1.22 2009/09/19 16:28:11 christos Exp $
+# printer: file(1) magic for printer-formatted files
+#
+
+# PostScript, updated by Daniel Quinlan (quinlan at yggdrasil.com)
+0 string %! PostScript document text
+!:mime application/postscript
+!:apple ASPSTEXT
+>2 string PS-Adobe- conforming
+>>11 string >\0 DSC level %.3s
+>>>15 string EPS \b, type %s
+>>>15 string Query \b, type %s
+>>>15 string ExitServer \b, type %s
+>>>15 search/1000 %%LanguageLevel:\
+>>>>&0 string >\0 \b, Level %s
+# Some PCs have the annoying habit of adding a ^D as a document separator
+0 string \004%! PostScript document text
+!:mime application/postscript
+!:apple ASPSTEXT
+>3 string PS-Adobe- conforming
+>>12 string >\0 DSC level %.3s
+>>>16 string EPS \b, type %s
+>>>16 string Query \b, type %s
+>>>16 string ExitServer \b, type %s
+>>>16 search/1000 %%LanguageLevel:\
+>>>>&0 string >\0 \b, Level %s
+0 string \033%-12345X%!PS PostScript document
+
+# DOS EPS Binary File Header
+# From: Ed Sznyter <ews at Black.Market.NET>
+0 belong 0xC5D0D3C6 DOS EPS Binary File
+>4 long >0 Postscript starts at byte %d
+>>8 long >0 length %d
+>>>12 long >0 Metafile starts at byte %d
+>>>>16 long >0 length %d
+>>>20 long >0 TIFF starts at byte %d
+>>>>24 long >0 length %d
+
+# Summary: Adobe's PostScript Printer Description File
+# Extension: .ppd
+# Reference: http://partners.adobe.com/public/developer/en/ps/5003.PPD_Spec_v4.3.pdf, Section 3.8
+# Submitted by: Yves Arrouye <arrouye at marin.fdn.fr>
+#
+0 string *PPD-Adobe:\x20 PPD file
+>&0 string x \b, version %s
+
+# HP Printer Job Language
+0 string \033%-12345X at PJL HP Printer Job Language data
+# HP Printer Job Language
+# The header found on Win95 HP plot files is the "Silliest Thing possible"
+# (TM)
+# Every driver puts the language at some random position, with random case
+# (LANGUAGE and Language)
+# For example the LaserJet 5L driver puts the "PJL ENTER LANGUAGE" in line 10
+# From: Uwe Bonnes <bon at elektron.ikp.physik.th-darmstadt.de>
+#
+0 string \033%-12345X at PJL HP Printer Job Language data
+>&0 string >\0 %s
+>>&0 string >\0 %s
+>>>&0 string >\0 %s
+>>>>&0 string >\0 %s
+#>15 string \ ENTER\ LANGUAGE\ =
+#>31 string PostScript PostScript
+
+# HP Printer Control Language, Daniel Quinlan (quinlan at yggdrasil.com)
+0 string \033E\033 HP PCL printer data
+>3 string \&l0A - default page size
+>3 string \&l1A - US executive page size
+>3 string \&l2A - US letter page size
+>3 string \&l3A - US legal page size
+>3 string \&l26A - A4 page size
+>3 string \&l80A - Monarch envelope size
+>3 string \&l81A - No. 10 envelope size
+>3 string \&l90A - Intl. DL envelope size
+>3 string \&l91A - Intl. C5 envelope size
+>3 string \&l100A - Intl. B5 envelope size
+>3 string \&l-81A - No. 10 envelope size (landscape)
+>3 string \&l-90A - Intl. DL envelope size (landscape)
+
+# IMAGEN printer-ready files:
+0 string @document( Imagen printer
+# this only works if "language xxx" is first item in Imagen header.
+>10 string language\ impress (imPRESS data)
+>10 string language\ daisy (daisywheel text)
+>10 string language\ diablo (daisywheel text)
+>10 string language\ printer (line printer emulation)
+>10 string language\ tektronix (Tektronix 4014 emulation)
+# Add any other languages that your Imagen uses - remember
+# to keep the word `text' if the file is human-readable.
+# [GRR 950115: missing "postscript" or "ultrascript" (whatever it was called)]
+#
+# Now magic for IMAGEN font files...
+0 string Rast RST-format raster font data
+>45 string >0 face %s
+# From Jukka Ukkonen
+0 string \033[K\002\0\0\017\033(a\001\0\001\033(g Canon Bubble Jet BJC formatted data
+
+# From <mike at flyn.org>
+# These are the /etc/magic entries to decode data sent to an Epson printer.
+0 string \x1B\x40\x1B\x28\x52\x08\x00\x00REMOTE1P Epson Stylus Color 460 data
+
+
+#------------------------------------------------------------------------------
+# zenographics: file(1) magic for Zenographics ZjStream printer data
+# Rick Richardson rickr at mn.rr.com
+0 string JZJZ
+>0x12 string ZZ Zenographics ZjStream printer data (big-endian)
+0 string ZJZJ
+>0x12 string ZZ Zenographics ZjStream printer data (little-endian)
+
+
+#------------------------------------------------------------------------------
+# Oak Technologies printer stream
+# Rick Richardson <rickr at mn.rr.com>
+0 string OAK
+>0x07 byte 0
+>0x0b byte 0 Oak Technologies printer stream
+
+# This would otherwise be recognized as PostScript - nick at debian.org
+0 string %!VMF SunClock's Vector Map Format data
+
+#------------------------------------------------------------------------------
+# HP LaserJet 1000 series downloadable firmware file
+0 string \xbe\xefABCDEFGH HP LaserJet 1000 series downloadable firmware
+
+# From: Paolo <oopla at users.sf.net>
+# Epson ESC/Page, ESC/PageColor
+0 string \x1b\x01 at EJL Epson ESC/Page language printer data
+
+#------------------------------------------------------------------------------
+# $File: project,v 1.4 2009/09/19 16:28:11 christos Exp $
+# project: file(1) magic for Project management
+#
+# Magic strings for ftnchek project files. Alexander Mai
+0 string FTNCHEK_\ P project file for ftnchek
+>10 string 1 version 2.7
+>10 string 2 version 2.8 to 2.10
+>10 string 3 version 2.11 or later
+
+#------------------------------------------------------------------------------
+# $File: psdbms,v 1.6 2009/09/19 16:28:11 christos Exp $
+# psdbms: file(1) magic for psdatabase
+#
+0 belong&0xff00ffff 0x56000000 ps database
+>1 string >\0 version %s
+>4 string >\0 from kernel %s
+
+#------------------------------------------------------------------------------
+# $File: pulsar,v 1.5 2009/09/19 16:28:12 christos Exp $
+# pulsar: file(1) magic for Pulsar POP3 daemon binary files
+#
+# http://pulsar.sourceforge.net
+# mailto:rok.papez at lugos.si
+#
+
+0 belong 0x1ee7f11e Pulsar POP3 daemon mailbox cache file.
+>4 ubelong x Version: %d.
+>8 ubelong x \b%d
+
+
+#------------------------------------------------------------------------------
+# $File: pyramid,v 1.7 2009/09/19 16:28:12 christos Exp $
+# pyramid: file(1) magic for Pyramids
+#
+# XXX - byte order?
+#
+0 long 0x50900107 Pyramid 90x family executable
+0 long 0x50900108 Pyramid 90x family pure executable
+>16 long >0 not stripped
+0 long 0x5090010b Pyramid 90x family demand paged pure executable
+>16 long >0 not stripped
+
+#------------------------------------------------------------------------------
+# $File: python,v 1.12 2009/10/27 14:49:57 christos Exp $
+# python: file(1) magic for python
+#
+# From: David Necas <yeti at physics.muni.cz>
+# often the module starts with a multiline string
+0 string/t """ a python script text executable
+# MAGIC as specified in Python/import.c (1.5 to 2.6a1 and 3.1a0, assuming
+# that Py_UnicodeFlag is off for Python 2)
+# 20121 ( YEAR - 1995 ) + MONTH + DAY (little endian followed by "\r\n"
+0 belong 0x994e0d0a python 1.5/1.6 byte-compiled
+0 belong 0x87c60d0a python 2.0 byte-compiled
+0 belong 0x2aeb0d0a python 2.1 byte-compiled
+0 belong 0x2ded0d0a python 2.2 byte-compiled
+0 belong 0x3bf20d0a python 2.3 byte-compiled
+0 belong 0x6df20d0a python 2.4 byte-compiled
+0 belong 0xb3f20d0a python 2.5 byte-compiled
+0 belong 0xd1f20d0a python 2.6 byte-compiled
+0 belong 0x3b0c0d0a python 3.0 byte-compiled
+0 belong 0x4f0c0d0a python 3.1 byte-compiled
+
+0 search/1/w #!\ /usr/bin/python Python script text executable
+!:mime text/x-python
+0 search/1/w #!\ /usr/local/bin/python Python script text executable
+!:mime text/x-python
+0 search/1 #!/usr/bin/env\ python Python script text executable
+!:mime text/x-python
+0 search/1 #!\ /usr/bin/env\ ruby Python script text executable
+!:mime text/x-python
+
+# from module.submodule import func1, func2
+0 regex/ \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
+!:strength - 20
+!:mime text/x-python
+
+# def __init__ (self, ...):
+0 search/4096 def\ __init__
+>&0 search/64 self Python script text executable
+!:mime text/x-python
+
+# comments
+0 search/4096 '''
+>&0 regex .*'''$ Python script text executable
+!:mime text/x-python
+
+0 search/4096 """
+>&0 regex .*"""$ Python script text executable
+!:mime text/x-python
+
+# try:
+# except: or finally:
+# block
+0 search/4096 try:
+>&0 regex \^\\s*except.*: Python script text executable
+!:mime text/x-python
+0 search/4096 try:
+>&0 search/4096 finally: Python script text executable
+!:mime text/x-python
+
+# def name(args, args):
+0 regex \^(\ |\\t)*def\ +[a-zA-Z]+
+>&0 regex \ *\\(([a-zA-Z]|,|\ )*\\):$ Python script text executable
+!:strength - 20
+!:mime text/x-python
+
+#------------------------------------------------------------------------------
+# $File: python,v 1.12 2009/10/27 14:49:57 christos Exp $
+# python: file(1) magic for python
+#
+# From: David Necas <yeti at physics.muni.cz>
+# often the module starts with a multiline string
+0 string/t """ a python script text executable
+# MAGIC as specified in Python/import.c (1.5 to 2.6a1 and 3.1a0, assuming
+# that Py_UnicodeFlag is off for Python 2)
+# 20121 ( YEAR - 1995 ) + MONTH + DAY (little endian followed by "\r\n"
+0 belong 0x994e0d0a python 1.5/1.6 byte-compiled
+0 belong 0x87c60d0a python 2.0 byte-compiled
+0 belong 0x2aeb0d0a python 2.1 byte-compiled
+0 belong 0x2ded0d0a python 2.2 byte-compiled
+0 belong 0x3bf20d0a python 2.3 byte-compiled
+0 belong 0x6df20d0a python 2.4 byte-compiled
+0 belong 0xb3f20d0a python 2.5 byte-compiled
+0 belong 0xd1f20d0a python 2.6 byte-compiled
+0 belong 0x3b0c0d0a python 3.0 byte-compiled
+0 belong 0x4f0c0d0a python 3.1 byte-compiled
+
+0 search/1/w #!\ /usr/bin/python Python script text executable
+!:mime text/x-python
+0 search/1/w #!\ /usr/local/bin/python Python script text executable
+!:mime text/x-python
+0 search/1 #!/usr/bin/env\ python Python script text executable
+!:mime text/x-python
+0 search/1 #!\ /usr/bin/env\ ruby Python script text executable
+!:mime text/x-python
+
+# from module.submodule import func1, func2
+0 regex/ \^from\\s+(\\w|\\.)+\\s+import.*$ Python script text executable
+!:strength - 20
+!:mime text/x-python
+
+# def __init__ (self, ...):
+0 search/4096 def\ __init__
+>&0 search/64 self Python script text executable
+!:mime text/x-python
+
+# comments
+0 search/4096 '''
+>&0 regex .*'''$ Python script text executable
+!:mime text/x-python
+
+0 search/4096 """
+>&0 regex .*"""$ Python script text executable
+!:mime text/x-python
+
+# try:
+# except: or finally:
+# block
+0 search/4096 try:
+>&0 regex \^\\s*except.*: Python script text executable
+!:mime text/x-python
+0 search/4096 try:
+>&0 search/4096 finally: Python script text executable
+!:mime text/x-python
+
+#------------------------------------------------------------------------------
+# $File: revision,v 1.6 2009/09/19 16:28:12 christos Exp $
+# file(1) magic for revision control files
+# From Hendrik Scholz <hendrik at scholz.net>
+0 string /1\ :pserver: cvs password text file
+
+# Conary changesets
+# From: Jonathan Smith <smithj at rpath.com>
+0 belong 0xea3f81bb Conary changeset data
+
+# Type: Git bundles (git-bundle)
+# From: Josh Triplett <josh at freedesktop.org>
+0 string #\ v2\ git\ bundle\n Git bundle
+
+# Type: Mercurial bundles
+# From: Seo Sanghyeon <tinuviel at sparcs.kaist.ac.kr>
+0 string HG10 Mercurial bundle,
+>4 string UN uncompressed
+>4 string BZ bzip2 compressed
+
+#------------------------------------------------------------------------------
+# $File: riff,v 1.18 2009/09/19 16:28:12 christos Exp $
+# riff: file(1) magic for RIFF format
+# See
+#
+# http://www.seanet.com/users/matts/riffmci/riffmci.htm
+#
+# AVI section extended by Patrik R�dman <patrik+file-magic at iki.fi>
+#
+0 string RIFF RIFF (little-endian) data
+# RIFF Palette format
+>8 string PAL \b, palette
+>>16 leshort x \b, version %d
+>>18 leshort x \b, %d entries
+# RIFF Device Independent Bitmap format
+>8 string RDIB \b, device-independent bitmap
+>>16 string BM
+>>>30 leshort 12 \b, OS/2 1.x format
+>>>>34 leshort x \b, %d x
+>>>>36 leshort x %d
+>>>30 leshort 64 \b, OS/2 2.x format
+>>>>34 leshort x \b, %d x
+>>>>36 leshort x %d
+>>>30 leshort 40 \b, Windows 3.x format
+>>>>34 lelong x \b, %d x
+>>>>38 lelong x %d x
+>>>>44 leshort x %d
+# RIFF MIDI format
+>8 string RMID \b, MIDI
+# RIFF Multimedia Movie File format
+>8 string RMMP \b, multimedia movie
+# RIFF wrapper for MP3
+>8 string RMP3 \b, MPEG Layer 3 audio
+# Microsoft WAVE format (*.wav)
+>8 string WAVE \b, WAVE audio
+!:mime audio/x-wav
+>>20 leshort 1 \b, Microsoft PCM
+>>>34 leshort >0 \b, %d bit
+>>20 leshort 2 \b, Microsoft ADPCM
+>>20 leshort 6 \b, ITU G.711 A-law
+>>20 leshort 7 \b, ITU G.711 mu-law
+>>20 leshort 17 \b, IMA ADPCM
+>>20 leshort 20 \b, ITU G.723 ADPCM (Yamaha)
+>>20 leshort 49 \b, GSM 6.10
+>>20 leshort 64 \b, ITU G.721 ADPCM
+>>20 leshort 80 \b, MPEG
+>>20 leshort 85 \b, MPEG Layer 3
+>>22 leshort =1 \b, mono
+>>22 leshort =2 \b, stereo
+>>22 leshort >2 \b, %d channels
+>>24 lelong >0 %d Hz
+# Corel Draw Picture
+>8 string CDRA \b, Corel Draw Picture
+!:mime image/x-coreldraw
+# AVI == Audio Video Interleave
+>8 string AVI\040 \b, AVI
+!:mime video/x-msvideo
+>>12 string LIST
+>>>20 string hdrlavih
+>>>>&36 lelong x \b, %lu x
+>>>>&40 lelong x %lu,
+>>>>&4 lelong >1000000 <1 fps,
+>>>>&4 lelong 1000000 1.00 fps,
+>>>>&4 lelong 500000 2.00 fps,
+>>>>&4 lelong 333333 3.00 fps,
+>>>>&4 lelong 250000 4.00 fps,
+>>>>&4 lelong 200000 5.00 fps,
+>>>>&4 lelong 166667 6.00 fps,
+>>>>&4 lelong 142857 7.00 fps,
+>>>>&4 lelong 125000 8.00 fps,
+>>>>&4 lelong 111111 9.00 fps,
+>>>>&4 lelong 100000 10.00 fps,
+# ]9.9,10.1[
+>>>>&4 lelong <101010
+>>>>>&-4 lelong >99010
+>>>>>>&-4 lelong !100000 ~10 fps,
+>>>>&4 lelong 83333 12.00 fps,
+# ]11.9,12.1[
+>>>>&4 lelong <84034
+>>>>>&-4 lelong >82645
+>>>>>>&-4 lelong !83333 ~12 fps,
+>>>>&4 lelong 66667 15.00 fps,
+# ]14.9,15.1[
+>>>>&4 lelong <67114
+>>>>>&-4 lelong >66225
+>>>>>>&-4 lelong !66667 ~15 fps,
+>>>>&4 lelong 50000 20.00 fps,
+>>>>&4 lelong 41708 23.98 fps,
+>>>>&4 lelong 41667 24.00 fps,
+# ]23.9,24.1[
+>>>>&4 lelong <41841
+>>>>>&-4 lelong >41494
+>>>>>>&-4 lelong !41708
+>>>>>>>&-4 lelong !41667 ~24 fps,
+>>>>&4 lelong 40000 25.00 fps,
+# ]24.9,25.1[
+>>>>&4 lelong <40161
+>>>>>&-4 lelong >39841
+>>>>>>&-4 lelong !40000 ~25 fps,
+>>>>&4 lelong 33367 29.97 fps,
+>>>>&4 lelong 33333 30.00 fps,
+# ]29.9,30.1[
+>>>>&4 lelong <33445
+>>>>>&-4 lelong >33223
+>>>>>>&-4 lelong !33367
+>>>>>>>&-4 lelong !33333 ~30 fps,
+>>>>&4 lelong <32224 >30 fps,
+##>>>>&4 lelong x (%lu)
+##>>>>&20 lelong x %lu frames,
+# Note: The tests below assume that the AVI has 1 or 2 streams,
+# "vids" optionally followed by "auds".
+# (Should cover 99.9% of all AVIs.)
+# assuming avih length = 56
+>>>88 string LIST
+>>>>96 string strlstrh
+>>>>>108 string vids video:
+>>>>>>&0 lelong 0 uncompressed
+# skip past vids strh
+>>>>>>(104.l+108) string strf
+>>>>>>>(104.l+132) lelong 1 RLE 8bpp
+>>>>>>>(104.l+132) string/c cvid Cinepak
+>>>>>>>(104.l+132) string/c i263 Intel I.263
+>>>>>>>(104.l+132) string/c iv32 Indeo 3.2
+>>>>>>>(104.l+132) string/c iv41 Indeo 4.1
+>>>>>>>(104.l+132) string/c iv50 Indeo 5.0
+>>>>>>>(104.l+132) string/c mp42 Microsoft MPEG-4 v2
+>>>>>>>(104.l+132) string/c mp43 Microsoft MPEG-4 v3
+>>>>>>>(104.l+132) string/c fmp4 FFMpeg MPEG-4
+>>>>>>>(104.l+132) string/c mjpg Motion JPEG
+>>>>>>>(104.l+132) string/c div3 DivX 3
+>>>>>>>>112 string/c div3 Low-Motion
+>>>>>>>>112 string/c div4 Fast-Motion
+>>>>>>>(104.l+132) string/c divx DivX 4
+>>>>>>>(104.l+132) string/c dx50 DivX 5
+>>>>>>>(104.l+132) string/c xvid XviD
+>>>>>>>(104.l+132) string/c h264 H.264
+>>>>>>>(104.l+132) string/c wmv3 Windows Media Video 9
+>>>>>>>(104.l+132) string/c h264 X.264 or H.264
+>>>>>>>(104.l+132) lelong 0
+##>>>>>>>(104.l+132) string x (%.4s)
+# skip past first (video) LIST
+>>>>(92.l+96) string LIST
+>>>>>(92.l+104) string strlstrh
+>>>>>>(92.l+116) string auds \b, audio:
+# auds strh length = 56:
+>>>>>>>(92.l+172) string strf
+>>>>>>>>(92.l+180) leshort 0x0001 uncompressed PCM
+>>>>>>>>(92.l+180) leshort 0x0002 ADPCM
+>>>>>>>>(92.l+180) leshort 0x0006 aLaw
+>>>>>>>>(92.l+180) leshort 0x0007 uLaw
+>>>>>>>>(92.l+180) leshort 0x0050 MPEG-1 Layer 1 or 2
+>>>>>>>>(92.l+180) leshort 0x0055 MPEG-1 Layer 3
+>>>>>>>>(92.l+180) leshort 0x2000 Dolby AC3
+>>>>>>>>(92.l+180) leshort 0x0161 DivX
+##>>>>>>>>(92.l+180) leshort x (0x%.4x)
+>>>>>>>>(92.l+182) leshort 1 (mono,
+>>>>>>>>(92.l+182) leshort 2 (stereo,
+>>>>>>>>(92.l+182) leshort >2 (%d channels,
+>>>>>>>>(92.l+184) lelong x %d Hz)
+# auds strh length = 64:
+>>>>>>>(92.l+180) string strf
+>>>>>>>>(92.l+188) leshort 0x0001 uncompressed PCM
+>>>>>>>>(92.l+188) leshort 0x0002 ADPCM
+>>>>>>>>(92.l+188) leshort 0x0055 MPEG-1 Layer 3
+>>>>>>>>(92.l+188) leshort 0x2000 Dolby AC3
+>>>>>>>>(92.l+188) leshort 0x0161 DivX
+##>>>>>>>>(92.l+188) leshort x (0x%.4x)
+>>>>>>>>(92.l+190) leshort 1 (mono,
+>>>>>>>>(92.l+190) leshort 2 (stereo,
+>>>>>>>>(92.l+190) leshort >2 (%d channels,
+>>>>>>>>(92.l+192) lelong x %d Hz)
+# Animated Cursor format
+>8 string ACON \b, animated cursor
+# SoundFont 2 <mpruett at sgi.com>
+>8 string sfbk SoundFont/Bank
+# MPEG-1 wrapped in a RIFF, apparently
+>8 string CDXA \b, wrapped MPEG-1 (CDXA)
+>8 string 4XMV \b, 4X Movie file
+
+#
+# XXX - some of the below may only appear in little-endian form.
+#
+# Also "MV93" appears to be for one form of Macromedia Director
+# files, and "GDMF" appears to be another multimedia format.
+#
+0 string RIFX RIFF (big-endian) data
+# RIFF Palette format
+>8 string PAL \b, palette
+>>16 beshort x \b, version %d
+>>18 beshort x \b, %d entries
+# RIFF Device Independent Bitmap format
+>8 string RDIB \b, device-independent bitmap
+>>16 string BM
+>>>30 beshort 12 \b, OS/2 1.x format
+>>>>34 beshort x \b, %d x
+>>>>36 beshort x %d
+>>>30 beshort 64 \b, OS/2 2.x format
+>>>>34 beshort x \b, %d x
+>>>>36 beshort x %d
+>>>30 beshort 40 \b, Windows 3.x format
+>>>>34 belong x \b, %d x
+>>>>38 belong x %d x
+>>>>44 beshort x %d
+# RIFF MIDI format
+>8 string RMID \b, MIDI
+# RIFF Multimedia Movie File format
+>8 string RMMP \b, multimedia movie
+# Microsoft WAVE format (*.wav)
+>8 string WAVE \b, WAVE audio
+>>20 leshort 1 \b, Microsoft PCM
+>>>34 leshort >0 \b, %d bit
+>>22 beshort =1 \b, mono
+>>22 beshort =2 \b, stereo
+>>22 beshort >2 \b, %d channels
+>>24 belong >0 %d Hz
+# Corel Draw Picture
+>8 string CDRA \b, Corel Draw Picture
+# AVI == Audio Video Interleave
+>8 string AVI\040 \b, AVI
+# Animated Cursor format
+>8 string ACON \b, animated cursor
+# Notation Interchange File Format (big-endian only)
+>8 string NIFF \b, Notation Interchange File Format
+# SoundFont 2 <mpruett at sgi.com>
+>8 string sfbk SoundFont/Bank
+
+#------------------------------------------------------------------------------
+# $File: rpm,v 1.9 2009/11/06 13:53:52 christos Exp $
+#
+# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt at redhat.com)
+#
+0 belong 0xedabeedb RPM
+!:mime application/x-rpm
+>4 byte x v%d
+>5 byte x \b.%d
+>6 beshort 1 src
+>6 beshort 0 bin
+>>8 beshort 1 i386/x86_64
+>>8 beshort 2 Alpha/Sparc64
+>>8 beshort 3 Sparc
+>>8 beshort 4 MIPS
+>>8 beshort 5 PowerPC
+>>8 beshort 6 68000
+>>8 beshort 7 SGI
+>>8 beshort 8 RS6000
+>>8 beshort 9 IA64
+>>8 beshort 10 Sparc64
+>>8 beshort 11 MIPSel
+>>8 beshort 12 ARM
+>>8 beshort 13 MiNT
+>>8 beshort 14 S/390
+>>8 beshort 15 S/390x
+>>8 beshort 16 PowerPC64
+>>8 beshort 17 SuperH
+>>8 beshort 18 Xtensa
+>>8 beshort 255 noarch
+>>10 string x %s
+
+#delta RPM Daniel Novotny (dnovotny at redhat.com)
+0 string drpm Delta RPM
+!:mime application/x-rpm
+>12 string x %s
+
+>>>8 beshort 11 MIPSel
+>>>8 beshort 12 ARM
+>>>8 beshort 13 MiNT
+>>>8 beshort 14 S/390
+>>>8 beshort 15 S/390x
+>>>8 beshort 16 PowerPC64
+>>>8 beshort 17 SuperH
+>>>8 beshort 18 Xtensa
+>>10 string x %s
+
+#------------------------------------------------------------------------------
+# $File: rpm,v 1.9 2009/11/06 13:53:52 christos Exp $
+#
+# RPM: file(1) magic for Red Hat Packages Erik Troan (ewt at redhat.com)
+#
+0 belong 0xedabeedb RPM
+!:mime application/x-rpm
+>4 byte x v%d
+>5 byte x \b.%d
+>6 beshort 1 src
+>6 beshort 0 bin
+>>8 beshort 1 i386/x86_64
+>>8 beshort 2 Alpha/Sparc64
+>>8 beshort 3 Sparc
+>>8 beshort 4 MIPS
+>>8 beshort 5 PowerPC
+>>8 beshort 6 68000
+>>8 beshort 7 SGI
+>>8 beshort 8 RS6000
+>>8 beshort 9 IA64
+>>8 beshort 10 Sparc64
+>>8 beshort 11 MIPSel
+>>8 beshort 12 ARM
+>>8 beshort 13 MiNT
+>>8 beshort 14 S/390
+>>8 beshort 15 S/390x
+>>8 beshort 16 PowerPC64
+>>8 beshort 17 SuperH
+>>8 beshort 18 Xtensa
+>>8 beshort 255 noarch
+
+#delta RPM Daniel Novotny (dnovotny at redhat.com)
+0 string drpm Delta RPM
+!:mime application/x-rpm
+>12 string x %s
+
+>>>8 beshort 11 MIPSel
+>>>8 beshort 12 ARM
+>>>8 beshort 13 MiNT
+>>>8 beshort 14 S/390
+>>>8 beshort 15 S/390x
+>>>8 beshort 16 PowerPC64
+>>>8 beshort 17 SuperH
+>>>8 beshort 18 Xtensa
+>>10 string x %s
+
+#------------------------------------------------------------------------------
+# $File: rtf,v 1.7 2009/09/19 16:28:12 christos Exp $
+# rtf: file(1) magic for Rich Text Format (RTF)
+#
+# Duncan P. Simpson, D.P.Simpson at dcs.warwick.ac.uk
+#
+0 string {\\rtf Rich Text Format data,
+!:mime text/rtf
+>5 string 1 version 1,
+>>6 string \\ansi ANSI
+>>6 string \\mac Apple Macintosh
+>>6 string \\pc IBM PC, code page 437
+>>6 string \\pca IBM PS/2, code page 850
+>>6 default x unknown character set
+>5 default x unknown version
+
+#------------------------------------------------------------------------------
+# $File: ruby,v 1.3 2009/09/19 16:28:12 christos Exp $
+# ruby: file(1) magic for Ruby scripting language
+# URL: http://www.ruby-lang.org/
+# From: Reuben Thomas <rrt at sc3d.org>
+
+# Ruby scripts
+0 search/1/w #!\ /usr/bin/ruby Ruby script text executable
+!:mime text/x-ruby
+0 search/1/w #!\ /usr/local/bin/ruby Ruby script text executable
+!:mime text/x-ruby
+0 search/1 #!/usr/bin/env\ ruby Ruby script text executable
+!:mime text/x-ruby
+0 search/1 #!\ /usr/bin/env\ ruby Ruby script text executable
+!:mime text/x-ruby
+
+# What looks like ruby, but does not have a shebang
+# (modules and such)
+# From: Lubomir Rintel <lkundrak at v3.sk>
+0 regex \^[\ \t]*require[\ \t]'[A-Za-z_\/]+'
+>0 regex include\ [A-Z]|def\ [a-z]|\ do$
+>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby script text
+!:mime text/x-ruby
+0 regex \^[\ \t]*(class|module)[\ \t][A-Z]
+>0 regex (modul|includ)e\ [A-Z]|def\ [a-z]
+>>0 regex \^[\ \t]*end([\ \t]*[;#].*)?$ Ruby module source text
+!:mime text/x-ruby
+
+#------------------------------------------------------------------------------
+# $File: sc,v 1.6 2009/09/19 16:28:12 christos Exp $
+# sc: file(1) magic for "sc" spreadsheet
+#
+38 string Spreadsheet sc spreadsheet file
+!:mime application/x-sc
+
+#------------------------------------------------------------------------------
+# $File: sccs,v 1.6 2009/09/19 16:28:12 christos Exp $
+# sccs: file(1) magic for SCCS archives
+#
+# SCCS archive structure:
+# \001h01207
+# \001s 00276/00000/00000
+# \001d D 1.1 87/09/23 08:09:20 ian 1 0
+# \001c date and time created 87/09/23 08:09:20 by ian
+# \001e
+# \001u
+# \001U
+# ... etc.
+# Now '\001h' happens to be the same as the 3B20's a.out magic number (0550).
+# *Sigh*. And these both came from various parts of the USG.
+# Maybe we should just switch everybody from SCCS to RCS!
+# Further, you can't just say '\001h0', because the five-digit number
+# is a checksum that could (presumably) have any leading digit,
+# and we don't have regular expression matching yet.
+# Hence the following official kludge:
+8 string \001s\ SCCS archive data
+
+#------------------------------------------------------------------------------
+# $File: scientific,v 1.6 2009/09/19 16:28:12 christos Exp $
+# scientific: file(1) magic for scientific formats
+#
+# From: Joe Krahn <krahn at niehs.nih.gov>
+
+########################################################
+# CCP4 data and plot files:
+0 string MTZ\040 MTZ reflection file
+
+92 string PLOT%%84 Plot84 plotting file
+>52 byte 1 , Little-endian
+>55 byte 1 , Big-endian
+
+########################################################
+# Electron density MAP/MASK formats
+
+0 string EZD_MAP NEWEZD Electron Density Map
+109 string MAP\040( Old EZD Electron Density Map
+
+0 string/c :-)\040Origin BRIX Electron Density Map
+>170 string >0 , Sigma:%.12s
+#>4 string >0 %.178s
+#>4 addr x %.178s
+
+7 string 18\040!NTITLE XPLOR ASCII Electron Density Map
+9 string \040!NTITLE\012\040REMARK CNS ASCII electron density map
+
+208 string MAP\040 CCP4 Electron Density Map
+# Assumes same stamp for float and double (normal case)
+>212 byte 17 \b, Big-endian
+>212 byte 34 \b, VAX format
+>212 byte 68 \b, Little-endian
+>212 byte 85 \b, Convex native
+
+############################################################
+# X-Ray Area Detector images
+0 string R-AXIS4\ \ \ R-Axis Area Detector Image:
+>796 lelong <20 Little-endian, IP #%d,
+>>768 lelong >0 Size=%dx
+>>772 lelong >0 \b%d
+>796 belong <20 Big-endian, IP #%d,
+>>768 belong >0 Size=%dx
+>>772 belong >0 \b%d
+
+0 string RAXIS\ \ \ \ \ R-Axis Area Detector Image, Win32:
+>796 lelong <20 Little-endian, IP #%d,
+>>768 lelong >0 Size=%dx
+>>772 lelong >0 \b%d
+>796 belong <20 Big-endian, IP #%d,
+>>768 belong >0 Size=%dx
+>>772 belong >0 \b%d
+
+
+1028 string MMX\000\000\000\000\000\000\000\000\000\000\000\000\000 MAR Area Detector Image,
+>1072 ulong >1 Compressed(%d),
+>1100 ulong >1 %d headers,
+>1104 ulong >0 %d x
+>1108 ulong >0 %d,
+>1120 ulong >0 %d bits/pixel
+
+# Type: GEDCOM genealogical (family history) data
+# From: Giuseppe Bilotta
+0 search/1/c 0\ HEAD GEDCOM genealogy text
+>&0 search 1\ GEDC
+>>&0 search 2\ VERS version
+>>>&1 search/1 >\0 %s
+# From: Phil Endecott <phil05 at chezphil.org>
+0 string \000\060\000\040\000\110\000\105\000\101\000\104 GEDCOM data
+0 string \060\000\040\000\110\000\105\000\101\000\104\000 GEDCOM data
+0 string \376\377\000\060\000\040\000\110\000\105\000\101\000\104 GEDCOM data
+0 string \377\376\060\000\040\000\110\000\105\000\101\000\104\000 GEDCOM data
+
+#------------------------------------------------------------------------------
+# $File: securitycerts,v 1.4 2009/09/19 16:28:12 christos Exp $
+0 search/1 -----BEGIN\ CERTIFICATE------ RFC1421 Security Certificate text
+0 search/1 -----BEGIN\ NEW\ CERTIFICATE RFC1421 Security Certificate Signing Request text
+0 belong 0xedfeedfe Sun 'jks' Java Keystore File data
+
+0 string \0volume_key volume_key escrow packet
+
+#------------------------------------------------------------------------------
+# $File: sendmail,v 1.7 2009/09/19 16:28:12 christos Exp $
+# sendmail: file(1) magic for sendmail config files
+#
+# XXX - byte order?
+#
+0 byte 046 Sendmail frozen configuration
+>16 string >\0 - version %s
+0 short 0x271c Sendmail frozen configuration
+>16 string >\0 - version %s
+
+#------------------------------------------------------------------------------
+# sendmail: file(1) magic for sendmail m4(1) files
+#
+# From Hendrik Scholz <hendrik at scholz.net>
+# i.e. files in /usr/share/sendmail/cf/
+#
+0 string divert(-1)\n sendmail m4 text file
+
+
+#------------------------------------------------------------------------------
+# $File: sequent,v 1.8 2009/09/19 16:28:12 christos Exp $
+# sequent: file(1) magic for Sequent machines
+#
+# Sequent information updated by Don Dwiggins <atsun!dwiggins>.
+# For Sequent's multiprocessor systems (incomplete).
+0 lelong 0x00ea BALANCE NS32000 .o
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 lelong 0x10ea BALANCE NS32000 executable (0 @ 0)
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 lelong 0x20ea BALANCE NS32000 executable (invalid @ 0)
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 lelong 0x30ea BALANCE NS32000 standalone executable
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+#
+# Symmetry information added by Jason Merrill <jason at jarthur.claremont.edu>.
+# Symmetry magic nums will not be reached if DOS COM comes before them;
+# byte 0xeb is matched before these get a chance.
+0 leshort 0x12eb SYMMETRY i386 .o
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 leshort 0x22eb SYMMETRY i386 executable (0 @ 0)
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 leshort 0x32eb SYMMETRY i386 executable (invalid @ 0)
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+0 leshort 0x42eb SYMMETRY i386 standalone executable
+>16 lelong >0 not stripped
+>124 lelong >0 version %ld
+
+#------------------------------------------------------------------------------
+# $File: sgi,v 1.17 2009/09/19 16:28:12 christos Exp $
+# sgi: file(1) magic for Silicon Graphics applications
+
+#
+#
+# Performance Co-Pilot file types
+0 string PmNs PCP compiled namespace (V.0)
+0 string PmN PCP compiled namespace
+>3 string >\0 (V.%1.1s)
+#3 lelong 0x84500526 PCP archive
+3 belong 0x84500526 PCP archive
+>7 byte x (V.%d)
+#>20 lelong -2 temporal index
+#>20 lelong -1 metadata
+#>20 lelong 0 log volume #0
+#>20 lelong >0 log volume #%ld
+>20 belong -2 temporal index
+>20 belong -1 metadata
+>20 belong 0 log volume #0
+>20 belong >0 log volume #%ld
+>24 string >\0 host: %s
+0 string PCPFolio PCP
+>9 string Version: Archive Folio
+>18 string >\0 (V.%s)
+0 string #pmchart PCP pmchart view
+>9 string Version
+>17 string >\0 (V%-3.3s)
+0 string #kmchart PCP kmchart view
+>9 string Version
+>17 string >\0 (V.%s)
+0 string pmview PCP pmview config
+>7 string Version
+>15 string >\0 (V%-3.3s)
+0 string #pmlogger PCP pmlogger config
+>10 string Version
+>18 string >\0 (V%1.1s)
+0 string #pmdahotproc PCP pmdahotproc config
+>13 string Version
+>21 string >\0 (V%-3.3s)
+0 string PcPh PCP Help
+>4 string 1 Index
+>4 string 2 Text
+>5 string >\0 (V.%1.1s)
+0 string #pmieconf-rules PCP pmieconf rules
+>16 string >\0 (V.%1.1s)
+3 string pmieconf-pmie PCP pmie config
+>17 string >\0 (V.%1.1s)
+
+# SpeedShop data files
+0 lelong 0x13130303 SpeedShop data file
+
+# mdbm files
+0 lelong 0x01023962 mdbm file, version 0 (obsolete)
+0 string mdbm mdbm file,
+>5 byte x version %d,
+>6 byte x 2^%d pages,
+>7 byte x pagesize 2^%d,
+>17 byte x hash %d,
+>11 byte x dataformat %d
+
+# Alias Maya files
+0 string //Maya ASCII Alias Maya Ascii File,
+>13 string >\0 version %s
+8 string MAYAFOR4 Alias Maya Binary File,
+>32 string >\0 version %s scene
+8 string MayaFOR4 Alias Maya Binary File,
+>32 string >\0 version %s scene
+8 string CIMG Alias Maya Image File
+8 string DEEP Alias Maya Image File
+#------------------------------------------------------------------------------
+# $File: sgml,v 1.24 2009/09/19 17:31:35 christos Exp $
+# Type: SVG Vectorial Graphics
+# From: Noel Torres <tecnico at ejerciciosresueltos.com>
+0 string \<?xml\ version="
+>15 string >\0
+>>19 search/4096 \<svg SVG Scalable Vector Graphics image
+!:mime image/svg+xml
+>>19 search/4096 \<gnc-v2 GnuCash file
+!:mime application/x-gnucash
+
+# Sitemap file
+0 string \<?xml\ version="
+>15 string >\0
+>>19 search/4096 \<urlset XML Sitemap document text
+!:mime application/xml-sitemap
+
+# xhtml
+0 string \<?xml\ version="
+>15 string >\0
+>>19 search/4096/cWbt \<!doctype\ html xHTML document text
+!:mime text/html
+0 string \<?xml\ version='
+>15 string >\0
+>>19 search/4096/cWbt \<!doctype\ html xHTML document text
+!:mime text/html
+0 string \<?xml\ version="
+>15 string >\0
+>>19 search/4096/cWbt \<html broken xHTML document text
+!:mime text/html
+
+#------------------------------------------------------------------------------
+# sgml: file(1) magic for Standard Generalized Markup Language
+# HyperText Markup Language (HTML) is an SGML document type,
+# from Daniel Quinlan (quinlan at yggdrasil.com)
+# adapted to string extenstions by Anthon van der Neut <anthon at mnt.org)
+0 search/1/cb \<!doctype\ html HTML document text
+!:mime text/html
+0 search/1/cb \<head HTML document text
+!:mime text/html
+0 search/1/cb \<title HTML document text
+!:mime text/html
+0 search/1/cb \<html HTML document text
+!:mime text/html
+
+# Extensible markup language (XML), a subset of SGML
+# from Marc Prud'hommeaux (marc at apocalypse.org)
+0 search/1/cwbt \<?xml XML document text
+!:mime application/xml
+0 string \<?xml\ version\ " XML
+!:mime application/xml
+0 string \<?xml\ version=" XML
+!:mime application/xml
+>15 search/1 >\0 %.3s document text
+>>23 search/1 \<xsl:stylesheet (XSL stylesheet)
+>>24 search/1 \<xsl:stylesheet (XSL stylesheet)
+0 string \<?xml\ version=' XML
+!:mime application/xml
+>15 search/1 >\0 %.3s document text
+>>23 search/1 \<xsl:stylesheet (XSL stylesheet)
+>>24 search/1 \<xsl:stylesheet (XSL stylesheet)
+0 search/1/wbt \<?xml XML document text
+!:mime application/xml
+!:strength - 10
+0 search/1/wbt \<?XML broken XML document text
+!:mime application/xml
+!:strength - 10
+
+
+# SGML, mostly from rph at sq
+0 search/1/cb \<!doctype exported SGML document text
+0 search/1/cb \<!subdoc exported SGML subdocument text
+0 search/1/cb \<!-- exported SGML document text
+
+# Web browser cookie files
+# (Mozilla, Galeon, Netscape 4, Konqueror..)
+# Ulf Harnhammar <ulfh at update.uu.se>
+0 search/1 #\ HTTP\ Cookie\ File Web browser cookie text
+0 search/1 #\ Netscape\ HTTP\ Cookie\ File Netscape cookie text
+0 search/1 #\ KDE\ Cookie\ File Konqueror cookie text
+
+#------------------------------------------------------------------------
+# $File: sharc,v 1.6 2009/09/19 16:28:12 christos Exp $
+# file(1) magic for sharc files
+#
+# SHARC DSP, MIDI SysEx and RiscOS filetype definitions added by
+# FutureGroove Music (dsp at futuregroove.de)
+
+#------------------------------------------------------------------------
+#0 string Draw RiscOS Drawfile
+#0 string PACK RiscOS PackdDir archive
+
+#------------------------------------------------------------------------
+# SHARC DSP stuff (based on the FGM SHARC DSP SDK)
+
+#0 string =! Assembler source
+#0 string Analog ADi asm listing file
+0 string .SYSTEM SHARC architecture file
+0 string .system SHARC architecture file
+
+0 leshort 0x521C SHARC COFF binary
+>2 leshort >1 , %hd sections
+>>12 lelong >0 , not stripped
+
+#------------------------------------------------------------------------------
+# $File: sinclair,v 1.5 2009/09/19 16:28:12 christos Exp $
+# sinclair: file(1) sinclair QL
+
+# additions to /etc/magic by Thomas M. Ott (ThMO)
+
+# Sinclair QL floppy disk formats (ThMO)
+0 string =QL5 QL disk dump data,
+>3 string =A 720 KB,
+>3 string =B 1.44 MB,
+>3 string =C 3.2 MB,
+>4 string >\0 label:%.10s
+
+# Sinclair QL OS dump (ThMO)
+# (NOTE: if `file' would be able to use indirect references in a endian format
+# differing from the natural host format, this could be written more
+# reliably and faster...)
+#
+# we *can't* lookup QL OS code dumps, because `file' is UNABLE to read more
+# than the first 8K of a file... #-(
+#
+#0 belong =0x30000
+#>49124 belong <47104
+#>>49128 belong <47104
+#>>>49132 belong <47104
+#>>>>49136 belong <47104 QL OS dump data,
+#>>>>>49148 string >\0 type %.3s,
+#>>>>>49142 string >\0 version %.4s
+
+# Sinclair QL firmware executables (ThMO)
+0 string NqNqNq`\004 QL firmware executable (BCPL)
+
+# Sinclair QL libraries (was ThMO)
+0 beshort 0xFB01 QDOS object
+>2 pstring x '%s'
+
+# Sinclair QL executables (was ThMO)
+4 belong 0x4AFB QDOS executable
+>9 pstring x '%s'
+
+# Sinclair QL ROM (ThMO)
+0 belong =0x4AFB0001 QL plugin-ROM data,
+>9 pstring =\0 un-named
+>9 pstring >\0 named: %s
+
+#------------------------------------------------------------------------------
+# $File: sketch,v 1.4 2009/09/19 16:28:12 christos Exp $
+# Sketch Drawings: http://sketch.sourceforge.net/
+# From: Edwin Mons <e at ik.nu>
+0 search/1 ##Sketch Sketch document text
+
+#-----------------------------------------------
+# $File: smalltalk,v 1.5 2009/09/19 16:28:12 christos Exp $
+# GNU Smalltalk image, starting at version 1.6.2
+# From: catull_us at yahoo.com
+#
+0 string GSTIm\0\0 GNU SmallTalk
+# little-endian
+>7 byte&1 =0 LE image version
+>>10 byte x %d.
+>>9 byte x \b%d.
+>>8 byte x \b%d
+#>>12 lelong x , data: %ld
+#>>16 lelong x , table: %ld
+#>>20 lelong x , memory: %ld
+# big-endian
+>7 byte&1 =1 BE image version
+>>8 byte x %d.
+>>9 byte x \b%d.
+>>10 byte x \b%d
+#>>12 belong x , data: %ld
+#>>16 belong x , table: %ld
+#>>20 belong x , memory: %ld
+
+
+
+#------------------------------------------------------------------------------
+# $File: sniffer,v 1.14 2009/09/19 16:28:12 christos Exp $
+# sniffer: file(1) magic for packet capture files
+#
+# From: guy at alum.mit.edu (Guy Harris)
+#
+
+#
+# Microsoft Network Monitor 1.x capture files.
+#
+0 string RTSS NetMon capture file
+>5 byte x - version %d
+>4 byte x \b.%d
+>6 leshort 0 (Unknown)
+>6 leshort 1 (Ethernet)
+>6 leshort 2 (Token Ring)
+>6 leshort 3 (FDDI)
+>6 leshort 4 (ATM)
+
+#
+# Microsoft Network Monitor 2.x capture files.
+#
+0 string GMBU NetMon capture file
+>5 byte x - version %d
+>4 byte x \b.%d
+>6 leshort 0 (Unknown)
+>6 leshort 1 (Ethernet)
+>6 leshort 2 (Token Ring)
+>6 leshort 3 (FDDI)
+>6 leshort 4 (ATM)
+
+#
+# Network General Sniffer capture files.
+# Sorry, make that "Network Associates Sniffer capture files."
+# Sorry, make that "Network General old DOS Sniffer capture files."
+#
+0 string TRSNIFF\ data\ \ \ \ \032 Sniffer capture file
+>33 byte 2 (compressed)
+>23 leshort x - version %d
+>25 leshort x \b.%d
+>32 byte 0 (Token Ring)
+>32 byte 1 (Ethernet)
+>32 byte 2 (ARCNET)
+>32 byte 3 (StarLAN)
+>32 byte 4 (PC Network broadband)
+>32 byte 5 (LocalTalk)
+>32 byte 6 (Znet)
+>32 byte 7 (Internetwork Analyzer)
+>32 byte 9 (FDDI)
+>32 byte 10 (ATM)
+
+#
+# Cinco Networks NetXRay capture files.
+# Sorry, make that "Network General Sniffer Basic capture files."
+# Sorry, make that "Network Associates Sniffer Basic capture files."
+# Sorry, make that "Network Associates Sniffer Basic, and Windows
+# Sniffer Pro", capture files."
+# Sorry, make that "Network General Sniffer capture files."
+#
+0 string XCP\0 NetXRay capture file
+>4 string >\0 - version %s
+>44 leshort 0 (Ethernet)
+>44 leshort 1 (Token Ring)
+>44 leshort 2 (FDDI)
+>44 leshort 3 (WAN)
+>44 leshort 8 (ATM)
+>44 leshort 9 (802.11)
+
+#
+# "libpcap" capture files.
+# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
+# the main program that uses that format, but there are other programs
+# that use "libpcap", or that use the same capture file format.)
+#
+0 ubelong 0xa1b2c3d4 tcpdump capture file (big-endian)
+>4 beshort x - version %d
+>6 beshort x \b.%d
+>20 belong 0 (No link-layer encapsulation
+>20 belong 1 (Ethernet
+>20 belong 2 (3Mb Ethernet
+>20 belong 3 (AX.25
+>20 belong 4 (ProNET
+>20 belong 5 (CHAOS
+>20 belong 6 (Token Ring
+>20 belong 7 (BSD ARCNET
+>20 belong 8 (SLIP
+>20 belong 9 (PPP
+>20 belong 10 (FDDI
+>20 belong 11 (RFC 1483 ATM
+>20 belong 12 (raw IP
+>20 belong 13 (BSD/OS SLIP
+>20 belong 14 (BSD/OS PPP
+>20 belong 19 (Linux ATM Classical IP
+>20 belong 50 (PPP or Cisco HDLC
+>20 belong 51 (PPP-over-Ethernet
+>20 belong 99 (Symantec Enterprise Firewall
+>20 belong 100 (RFC 1483 ATM
+>20 belong 101 (raw IP
+>20 belong 102 (BSD/OS SLIP
+>20 belong 103 (BSD/OS PPP
+>20 belong 104 (BSD/OS Cisco HDLC
+>20 belong 105 (802.11
+>20 belong 106 (Linux Classical IP over ATM
+>20 belong 107 (Frame Relay
+>20 belong 108 (OpenBSD loopback
+>20 belong 109 (OpenBSD IPsec encrypted
+>20 belong 112 (Cisco HDLC
+>20 belong 113 (Linux "cooked"
+>20 belong 114 (LocalTalk
+>20 belong 117 (OpenBSD PFLOG
+>20 belong 119 (802.11 with Prism header
+>20 belong 122 (RFC 2625 IP over Fibre Channel
+>20 belong 123 (SunATM
+>20 belong 127 (802.11 with radiotap header
+>20 belong 129 (Linux ARCNET
+>20 belong 138 (Apple IP over IEEE 1394
+>20 belong 140 (MTP2
+>20 belong 141 (MTP3
+>20 belong 143 (DOCSIS
+>20 belong 144 (IrDA
+>20 belong 147 (Private use 0
+>20 belong 148 (Private use 1
+>20 belong 149 (Private use 2
+>20 belong 150 (Private use 3
+>20 belong 151 (Private use 4
+>20 belong 152 (Private use 5
+>20 belong 153 (Private use 6
+>20 belong 154 (Private use 7
+>20 belong 155 (Private use 8
+>20 belong 156 (Private use 9
+>20 belong 157 (Private use 10
+>20 belong 158 (Private use 11
+>20 belong 159 (Private use 12
+>20 belong 160 (Private use 13
+>20 belong 161 (Private use 14
+>20 belong 162 (Private use 15
+>20 belong 163 (802.11 with AVS header
+>16 belong x \b, capture length %d)
+0 ulelong 0xa1b2c3d4 tcpdump capture file (little-endian)
+>4 leshort x - version %d
+>6 leshort x \b.%d
+>20 lelong 0 (No link-layer encapsulation
+>20 lelong 1 (Ethernet
+>20 lelong 2 (3Mb Ethernet
+>20 lelong 3 (AX.25
+>20 lelong 4 (ProNET
+>20 lelong 5 (CHAOS
+>20 lelong 6 (Token Ring
+>20 lelong 7 (ARCNET
+>20 lelong 8 (SLIP
+>20 lelong 9 (PPP
+>20 lelong 10 (FDDI
+>20 lelong 11 (RFC 1483 ATM
+>20 lelong 12 (raw IP
+>20 lelong 13 (BSD/OS SLIP
+>20 lelong 14 (BSD/OS PPP
+>20 lelong 19 (Linux ATM Classical IP
+>20 lelong 50 (PPP or Cisco HDLC
+>20 lelong 51 (PPP-over-Ethernet
+>20 lelong 99 (Symantec Enterprise Firewall
+>20 lelong 100 (RFC 1483 ATM
+>20 lelong 101 (raw IP
+>20 lelong 102 (BSD/OS SLIP
+>20 lelong 103 (BSD/OS PPP
+>20 lelong 104 (BSD/OS Cisco HDLC
+>20 lelong 105 (802.11
+>20 lelong 106 (Linux Classical IP over ATM
+>20 lelong 107 (Frame Relay
+>20 lelong 108 (OpenBSD loopback
+>20 lelong 109 (OpenBSD IPsec encrypted
+>20 lelong 112 (Cisco HDLC
+>20 lelong 113 (Linux "cooked"
+>20 lelong 114 (LocalTalk
+>20 lelong 117 (OpenBSD PFLOG
+>20 lelong 119 (802.11 with Prism header
+>20 lelong 122 (RFC 2625 IP over Fibre Channel
+>20 lelong 123 (SunATM
+>20 lelong 127 (802.11 with radiotap header
+>20 lelong 129 (Linux ARCNET
+>20 lelong 138 (Apple IP over IEEE 1394
+>20 lelong 140 (MTP2
+>20 lelong 141 (MTP3
+>20 lelong 143 (DOCSIS
+>20 lelong 144 (IrDA
+>20 lelong 147 (Private use 0
+>20 lelong 148 (Private use 1
+>20 lelong 149 (Private use 2
+>20 lelong 150 (Private use 3
+>20 lelong 151 (Private use 4
+>20 lelong 152 (Private use 5
+>20 lelong 153 (Private use 6
+>20 lelong 154 (Private use 7
+>20 lelong 155 (Private use 8
+>20 lelong 156 (Private use 9
+>20 lelong 157 (Private use 10
+>20 lelong 158 (Private use 11
+>20 lelong 159 (Private use 12
+>20 lelong 160 (Private use 13
+>20 lelong 161 (Private use 14
+>20 lelong 162 (Private use 15
+>20 lelong 163 (802.11 with AVS header
+>16 lelong x \b, capture length %d)
+
+#
+# "libpcap"-with-Alexey-Kuznetsov's-patches capture files.
+# (We call them "tcpdump capture file(s)" for now, as "tcpdump" is
+# the main program that uses that format, but there are other programs
+# that use "libpcap", or that use the same capture file format.)
+#
+0 ubelong 0xa1b2cd34 extended tcpdump capture file (big-endian)
+>4 beshort x - version %d
+>6 beshort x \b.%d
+>20 belong 0 (No link-layer encapsulation
+>20 belong 1 (Ethernet
+>20 belong 2 (3Mb Ethernet
+>20 belong 3 (AX.25
+>20 belong 4 (ProNET
+>20 belong 5 (CHAOS
+>20 belong 6 (Token Ring
+>20 belong 7 (ARCNET
+>20 belong 8 (SLIP
+>20 belong 9 (PPP
+>20 belong 10 (FDDI
+>20 belong 11 (RFC 1483 ATM
+>20 belong 12 (raw IP
+>20 belong 13 (BSD/OS SLIP
+>20 belong 14 (BSD/OS PPP
+>16 belong x \b, capture length %d)
+0 ulelong 0xa1b2cd34 extended tcpdump capture file (little-endian)
+>4 leshort x - version %d
+>6 leshort x \b.%d
+>20 lelong 0 (No link-layer encapsulation
+>20 lelong 1 (Ethernet
+>20 lelong 2 (3Mb Ethernet
+>20 lelong 3 (AX.25
+>20 lelong 4 (ProNET
+>20 lelong 5 (CHAOS
+>20 lelong 6 (Token Ring
+>20 lelong 7 (ARCNET
+>20 lelong 8 (SLIP
+>20 lelong 9 (PPP
+>20 lelong 10 (FDDI
+>20 lelong 11 (RFC 1483 ATM
+>20 lelong 12 (raw IP
+>20 lelong 13 (BSD/OS SLIP
+>20 lelong 14 (BSD/OS PPP
+>16 lelong x \b, capture length %d)
+
+#
+# AIX "iptrace" capture files.
+#
+0 string iptrace\ 1.0 "iptrace" capture file
+0 string iptrace\ 2.0 "iptrace" capture file
+
+#
+# Novell LANalyzer capture files.
+#
+0 leshort 0x1001 LANalyzer capture file
+0 leshort 0x1007 LANalyzer capture file
+
+#
+# HP-UX "nettl" capture files.
+#
+0 string \x54\x52\x00\x64\x00 "nettl" capture file
+
+#
+# RADCOM WAN/LAN Analyzer capture files.
+#
+0 string \x42\xd2\x00\x34\x12\x66\x22\x88 RADCOM WAN/LAN Analyzer capture file
+
+#
+# NetStumbler log files. Not really packets, per se, but about as
+# close as you can get. These are log files from NetStumbler, a
+# Windows program, that scans for 802.11b networks.
+#
+0 string NetS NetStumbler log file
+>8 lelong x \b, %d stations found
+
+#
+# EtherPeek/AiroPeek "version 9" capture files.
+#
+0 string \177ver EtherPeek/AiroPeek capture file
+
+#
+# Visual Networks traffic capture files.
+#
+0 string \x05VNF Visual Networks traffic capture file
+
+#
+# Network Instruments Observer capture files.
+#
+0 string ObserverPktBuffe Network Instruments Observer capture file
+
+#
+# Files from Accellent Group's 5View products.
+#
+0 string \xaa\xaa\xaa\xaa 5View capture file
+
+#------------------------------------------------------------------------------
+# $File: softquad,v 1.13 2009/09/19 16:28:12 christos Exp $
+# softquad: file(1) magic for SoftQuad Publishing Software
+#
+# Author/Editor and RulesBuilder
+#
+# XXX - byte order?
+#
+0 string \<!SQ\ DTD> Compiled SGML rules file
+>9 string >\0 Type %s
+0 string \<!SQ\ A/E> A/E SGML Document binary
+>9 string >\0 Type %s
+0 string \<!SQ\ STS> A/E SGML binary styles file
+>9 string >\0 Type %s
+0 short 0xc0de Compiled PSI (v1) data
+0 short 0xc0da Compiled PSI (v2) data
+>3 string >\0 (%s)
+# Binary sqtroff font/desc files...
+0 short 0125252 SoftQuad DESC or font file binary
+>2 short >0 - version %d
+# Bitmaps...
+0 search/1 SQ\ BITMAP1 SoftQuad Raster Format text
+#0 string SQ\ BITMAP2 SoftQuad Raster Format data
+# sqtroff intermediate language (replacement for ditroff int. lang.)
+0 string X\ SoftQuad troff Context intermediate
+>2 string 495 for AT&T 495 laser printer
+>2 string hp for Hewlett-Packard LaserJet
+>2 string impr for IMAGEN imPRESS
+>2 string ps for PostScript
+
+# From: Michael Piefel <piefel at debian.org>
+# sqtroff intermediate language (replacement for ditroff int. lang.)
+0 string X\ 495 SoftQuad troff Context intermediate for AT&T 495 laser printer
+0 string X\ hp SoftQuad troff Context intermediate for HP LaserJet
+0 string X\ impr SoftQuad troff Context intermediate for IMAGEN imPRESS
+0 string X\ ps SoftQuad troff Context intermediate for PostScript
+
+#------------------------------------------------------------------------------
+# $File: spec,v 1.4 2009/09/19 16:28:12 christos Exp $
+# spec: file(1) magic for SPEC raw results (*.raw, *.rsf)
+#
+# Cloyce D. Spradling <cloyce at headgear.org>
+
+0 string spec SPEC
+>4 string .cpu CPU
+>>8 string <: \b%.4s
+>>12 string . raw result text
+
+17 string version=SPECjbb SPECjbb
+>32 string <: \b%.4s
+>>37 string <: v%.4s raw result text
+
+0 string BEGIN\040SPECWEB SPECweb
+>13 string <: \b%.2s
+>>15 string _SSL \b_SSL
+>>>20 string <: v%.4s raw result text
+>>16 string <: v%.4s raw result text
+
+#------------------------------------------------------------------------------
+# $File: spectrum,v 1.6 2009/09/19 16:28:12 christos Exp $
+# spectrum: file(1) magic for Spectrum emulator files.
+#
+# John Elliott <jce at seasip.demon.co.uk>
+
+#
+# Spectrum +3DOS header
+#
+0 string PLUS3DOS\032 Spectrum +3 data
+>15 byte 0 - BASIC program
+>15 byte 1 - number array
+>15 byte 2 - character array
+>15 byte 3 - memory block
+>>16 belong 0x001B0040 (screen)
+>15 byte 4 - Tasword document
+>15 string TAPEFILE - ZXT tapefile
+#
+# Tape file. This assumes the .TAP starts with a Spectrum-format header,
+# which nearly all will.
+#
+0 string \023\000\000 Spectrum .TAP data
+>4 string x "%-10.10s"
+>3 byte 0 - BASIC program
+>3 byte 1 - number array
+>3 byte 2 - character array
+>3 byte 3 - memory block
+>>14 belong 0x001B0040 (screen)
+
+# The following three blocks are from pak21-spectrum at srcf.ucam.org
+# TZX tape images
+0 string ZXTape!\x1a Spectrum .TZX data
+>8 byte x version %d
+>9 byte x \b.%d
+
+# RZX input recording files
+0 string RZX! Spectrum .RZX data
+>4 byte x version %d
+>5 byte x \b.%d
+
+# Floppy disk images
+0 string MV\ -\ CPCEMU\ Disk-Fil Amstrad/Spectrum .DSK data
+0 string MV\ -\ CPC\ format\ Dis Amstrad/Spectrum DU54 .DSK data
+0 string EXTENDED\ CPC\ DSK\ Fil Amstrad/Spectrum Extended .DSK data
+0 string SINCLAIR Spectrum .SCL Betadisk image
+
+# Hard disk images
+0 string RS-IDE\x1a Spectrum .HDF hard disk image
+>7 byte x \b, version 0x%02x
+
+#------------------------------------------------------------------------------
+# $File: sql,v 1.6 2009/09/19 16:28:12 christos Exp $
+# sql: file(1) magic for SQL files
+#
+# From: "Marty Leisner" <mleisner at eng.mc.xerox.com>
+# Recognize some MySQL files.
+#
+0 beshort 0xfe01 MySQL table definition file
+>2 byte x Version %d
+0 belong&0xffffff00 0xfefe0300 MySQL MISAM index file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0700 MySQL MISAM compressed data file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0500 MySQL ISAM index file
+>3 byte x Version %d
+0 belong&0xffffff00 0xfefe0600 MySQL ISAM compressed data file
+>3 byte x Version %d
+0 string \376bin MySQL replication log
+
+#------------------------------------------------------------------------------
+# iRiver H Series database file
+# From Ken Guest <ken at linux.ie>
+# As observed from iRivNavi.iDB and unencoded firmware
+#
+0 string iRivDB iRiver Database file
+>11 string >\0 Version %s
+>39 string iHP-100 [H Series]
+
+#------------------------------------------------------------------------------
+# SQLite database files
+# Ken Guest <ken at linux.ie>, Ty Sarna, Zack Weinberg
+#
+# Version 1 used GDBM internally; its files cannot be distinguished
+# from other GDBM files.
+#
+# Version 2 used this format:
+0 string **\ This\ file\ contains\ an\ SQLite SQLite 2.x database
+
+# Version 3 of SQLite allows applications to embed their own "user version"
+# number in the database. Detect this and distinguish those files.
+
+0 string SQLite\ format\ 3
+>60 string _MTN Monotone source repository
+>60 belong !0 SQLite 3.x database, user version %u
+>60 belong 0 SQLite 3.x database
+
+#------------------------------------------------------------------------------
+# $File: sun,v 1.20 2009/09/19 16:28:12 christos Exp $
+# sun: file(1) magic for Sun machines
+#
+# Values for big-endian Sun (MC680x0, SPARC) binaries on pre-5.x
+# releases. (5.x uses ELF.)
+#
+0 belong&077777777 0600413 sparc demand paged
+>0 byte &0x80
+>>20 belong <4096 shared library
+>>20 belong =4096 dynamically linked executable
+>>20 belong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0600410 sparc pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0600407 sparc
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0400413 mc68020 demand paged
+>0 byte &0x80
+>>20 belong <4096 shared library
+>>20 belong =4096 dynamically linked executable
+>>20 belong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0400410 mc68020 pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0400407 mc68020
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0200413 mc68010 demand paged
+>0 byte &0x80
+>>20 belong <4096 shared library
+>>20 belong =4096 dynamically linked executable
+>>20 belong >4096 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0200410 mc68010 pure
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+0 belong&077777777 0200407 mc68010
+>0 byte &0x80 dynamically linked executable
+>0 byte ^0x80 executable
+>16 belong >0 not stripped
+
+# reworked these to avoid anything beginning with zero becoming "old sun-2"
+0 belong 0407 old sun-2 executable
+>16 belong >0 not stripped
+0 belong 0410 old sun-2 pure executable
+>16 belong >0 not stripped
+0 belong 0413 old sun-2 demand paged executable
+>16 belong >0 not stripped
+
+#
+# Core files. "SPARC 4.x BCP" means "core file from a SunOS 4.x SPARC
+# binary executed in compatibility mode under SunOS 5.x".
+#
+0 belong 0x080456 SunOS core file
+>4 belong 432 (SPARC)
+>>132 string >\0 from '%s'
+>>116 belong =3 (quit)
+>>116 belong =4 (illegal instruction)
+>>116 belong =5 (trace trap)
+>>116 belong =6 (abort)
+>>116 belong =7 (emulator trap)
+>>116 belong =8 (arithmetic exception)
+>>116 belong =9 (kill)
+>>116 belong =10 (bus error)
+>>116 belong =11 (segmentation violation)
+>>116 belong =12 (bad argument to system call)
+>>116 belong =29 (resource lost)
+>>120 belong x (T=%dK,
+>>124 belong x D=%dK,
+>>128 belong x S=%dK)
+>4 belong 826 (68K)
+>>128 string >\0 from '%s'
+>4 belong 456 (SPARC 4.x BCP)
+>>152 string >\0 from '%s'
+# Sun SunPC
+0 long 0xfa33c08e SunPC 4.0 Hard Disk
+0 string #SUNPC_CONFIG SunPC 4.0 Properties Values
+# Sun snoop (see RFC 1761, which describes the capture file format).
+#
+0 string snoop Snoop capture file
+>8 belong >0 - version %ld
+>12 belong 0 (IEEE 802.3)
+>12 belong 1 (IEEE 802.4)
+>12 belong 2 (IEEE 802.5)
+>12 belong 3 (IEEE 802.6)
+>12 belong 4 (Ethernet)
+>12 belong 5 (HDLC)
+>12 belong 6 (Character synchronous)
+>12 belong 7 (IBM channel-to-channel adapter)
+>12 belong 8 (FDDI)
+>12 belong 9 (Unknown)
+
+# Microsoft ICM color profile
+36 string acspMSFT Microsoft ICM Color Profile
+# Sun KCMS
+36 string acsp Kodak Color Management System, ICC Profile
+
+#---------------------------------------------------------------------------
+# The following entries have been tested by Duncan Laurie <duncan at sun.com> (a
+# lead Sun/Cobalt developer) who agrees that they are good and worthy of
+# inclusion.
+
+# Boot ROM images for Sun/Cobalt Linux server appliances
+0 string Cobalt\ Networks\ Inc.\nFirmware\ v Paged COBALT boot rom
+>38 string x V%.4s
+
+# New format for Sun/Cobalt boot ROMs is annoying, it stores the version code
+# at the very end where file(1) can't get it.
+0 string CRfs COBALT boot rom data (Flat boot rom or file system)
+
+
+
+#------------------------------------------------------------------------
+# $File: sysex,v 1.6 2009/09/19 16:28:12 christos Exp $
+# sysex: file(1) magic for MIDI sysex files
+#
+#
+0 byte 0xF0 SysEx File -
+
+# North American Group
+>1 byte 0x01 Sequential
+>1 byte 0x02 IDP
+>1 byte 0x03 OctavePlateau
+>1 byte 0x04 Moog
+>1 byte 0x05 Passport
+>1 byte 0x06 Lexicon
+>1 byte 0x07 Kurzweil/Future Retro
+>>3 byte 0x77 777
+>>4 byte 0x00 Bank
+>>4 byte 0x01 Song
+>>5 byte 0x0f 16
+>>5 byte 0x0e 15
+>>5 byte 0x0d 14
+>>5 byte 0x0c 13
+>>5 byte 0x0b 12
+>>5 byte 0x0a 11
+>>5 byte 0x09 10
+>>5 byte 0x08 9
+>>5 byte 0x07 8
+>>5 byte 0x06 7
+>>5 byte 0x05 6
+>>5 byte 0x04 5
+>>5 byte 0x03 4
+>>5 byte 0x02 3
+>>5 byte 0x01 2
+>>5 byte 0x00 1
+>>5 byte 0x10 (ALL)
+>>2 byte x \b, Channel %d
+>1 byte 0x08 Fender
+>1 byte 0x09 Gulbransen
+>1 byte 0x0a AKG
+>1 byte 0x0b Voyce
+>1 byte 0x0c Waveframe
+>1 byte 0x0d ADA
+>1 byte 0x0e Garfield
+>1 byte 0x0f Ensoniq
+>1 byte 0x10 Oberheim
+>>2 byte 0x06 Matrix 6 series
+>>3 byte 0x0A Dump (All)
+>>3 byte 0x01 Dump (Bank)
+>>4 belong 0x0002040E Matrix 1000
+>>>11 byte <2 User bank %d
+>>>11 byte >1 Preset bank %d
+>1 byte 0x11 Apple
+>1 byte 0x12 GreyMatter
+>1 byte 0x14 PalmTree
+>1 byte 0x15 JLCooper
+>1 byte 0x16 Lowrey
+>1 byte 0x17 AdamsSmith
+>1 byte 0x18 E-mu
+>1 byte 0x19 Harmony
+>1 byte 0x1a ART
+>1 byte 0x1b Baldwin
+>1 byte 0x1c Eventide
+>1 byte 0x1d Inventronics
+>1 byte 0x1f Clarity
+
+# European Group
+>1 byte 0x21 SIEL
+>1 byte 0x22 Synthaxe
+>1 byte 0x24 Hohner
+>1 byte 0x25 Twister
+>1 byte 0x26 Solton
+>1 byte 0x27 Jellinghaus
+>1 byte 0x28 Southworth
+>1 byte 0x29 PPG
+>1 byte 0x2a JEN
+>1 byte 0x2b SSL
+>1 byte 0x2c AudioVertrieb
+
+>1 byte 0x2f ELKA
+>>3 byte 0x09 EK-44
+
+>1 byte 0x30 Dynacord
+>1 byte 0x31 Jomox
+>1 byte 0x33 Clavia
+>1 byte 0x39 Soundcraft
+# Some Waldorf info from http://Stromeko.Synth.net/Downloads#WaldorfDocs
+>1 byte 0x3e Waldorf
+>>2 byte 0x00 microWave
+>>2 byte 0x0E microwave2 / XT
+>>2 byte 0x0F Q / Q+
+>>3 byte =0 (default id)
+>>3 byte >0 (
+>>>3 byte <0x7F \bdevice %d)
+>>>3 byte =0x7F \bbroadcast id)
+>>3 byte 0x7f Microwave I
+>>>4 byte 0x00 SNDR (Sound Request)
+>>>4 byte 0x10 SNDD (Sound Dump)
+>>>4 byte 0x20 SNDP (Sound Parameter Change)
+>>>4 byte 0x30 SNDQ (Sound Parameter Inquiry)
+>>>4 byte 0x70 BOOT (Sound Reserved)
+>>>4 byte 0x01 MULR (Multi Request)
+>>>4 byte 0x11 MULD (Multi Dump)
+>>>4 byte 0x21 MULP (Multi Parameter Change)
+>>>4 byte 0x31 MULQ (Multi Parameter Inquiry)
+>>>4 byte 0x71 OS (Multi Reserved)
+>>>4 byte 0x02 DRMR (Drum Map Request)
+>>>4 byte 0x12 DRMD (Drum Map Dump)
+>>>4 byte 0x22 DRMP (Drum Map Parameter Change)
+>>>4 byte 0x32 DRMQ (Drum Map Parameter Inquiry)
+>>>4 byte 0x72 BIN (Drum Map Reserved)
+>>>4 byte 0x03 PATR (Sequencer Pattern Request)
+>>>4 byte 0x13 PATD (Sequencer Pattern Dump)
+>>>4 byte 0x23 PATP (Sequencer Pattern Parameter Change)
+>>>4 byte 0x33 PATQ (Sequencer Pattern Parameter Inquiry)
+>>>4 byte 0x73 AFM (Sequencer Pattern Reserved)
+>>>4 byte 0x04 GLBR (Global Parameter Request)
+>>>4 byte 0x14 GLBD (Global Parameter Dump)
+>>>4 byte 0x24 GLBP (Global Parameter Parameter Change)
+>>>4 byte 0x34 GLBQ (Global Parameter Parameter Inquiry)
+>>>4 byte 0x07 MODR (Mode Parameter Request)
+>>>4 byte 0x17 MODD (Mode Parameter Dump)
+>>>4 byte 0x27 MODP (Mode Parameter Parameter Change)
+>>>4 byte 0x37 MODQ (Mode Parameter Parameter Inquiry)
+>>2 byte 0x10 microQ
+>>>4 byte 0x00 SNDR (Sound Request)
+>>>4 byte 0x10 SNDD (Sound Dump)
+>>>4 byte 0x20 SNDP (Sound Parameter Change)
+>>>4 byte 0x30 SNDQ (Sound Parameter Inquiry)
+>>>4 byte 0x70 (Sound Reserved)
+>>>4 byte 0x01 MULR (Multi Request)
+>>>4 byte 0x11 MULD (Multi Dump)
+>>>4 byte 0x21 MULP (Multi Parameter Change)
+>>>4 byte 0x31 MULQ (Multi Parameter Inquiry)
+>>>4 byte 0x71 OS (Multi Reserved)
+>>>4 byte 0x02 DRMR (Drum Map Request)
+>>>4 byte 0x12 DRMD (Drum Map Dump)
+>>>4 byte 0x22 DRMP (Drum Map Parameter Change)
+>>>4 byte 0x32 DRMQ (Drum Map Parameter Inquiry)
+>>>4 byte 0x72 BIN (Drum Map Reserved)
+>>>4 byte 0x04 GLBR (Global Parameter Request)
+>>>4 byte 0x14 GLBD (Global Parameter Dump)
+>>>4 byte 0x24 GLBP (Global Parameter Parameter Change)
+>>>4 byte 0x34 GLBQ (Global Parameter Parameter Inquiry)
+>>2 byte 0x11 rackAttack
+>>>4 byte 0x00 SNDR (Sound Parameter Request)
+>>>4 byte 0x10 SNDD (Sound Parameter Dump)
+>>>4 byte 0x20 SNDP (Sound Parameter Parameter Change)
+>>>4 byte 0x30 SNDQ (Sound Parameter Parameter Inquiry)
+>>>4 byte 0x01 PRGR (Program Parameter Request)
+>>>4 byte 0x11 PRGD (Program Parameter Dump)
+>>>4 byte 0x21 PRGP (Program Parameter Parameter Change)
+>>>4 byte 0x31 PRGQ (Program Parameter Parameter Inquiry)
+>>>4 byte 0x71 OS (Program Parameter Reserved)
+>>>4 byte 0x03 PATR (Pattern Parameter Request)
+>>>4 byte 0x13 PATD (Pattern Parameter Dump)
+>>>4 byte 0x23 PATP (Pattern Parameter Parameter Change)
+>>>4 byte 0x33 PATQ (Pattern Parameter Parameter Inquiry)
+>>>4 byte 0x04 GLBR (Global Parameter Request)
+>>>4 byte 0x14 GLBD (Global Parameter Dump)
+>>>4 byte 0x24 GLBP (Global Parameter Parameter Change)
+>>>4 byte 0x34 GLBQ (Global Parameter Parameter Inquiry)
+>>>4 byte 0x05 EFXR (FX Parameter Request)
+>>>4 byte 0x15 EFXD (FX Parameter Dump)
+>>>4 byte 0x25 EFXP (FX Parameter Parameter Change)
+>>>4 byte 0x35 EFXQ (FX Parameter Parameter Inquiry)
+>>>4 byte 0x07 MODR (Mode Command Request)
+>>>4 byte 0x17 MODD (Mode Command Dump)
+>>>4 byte 0x27 MODP (Mode Command Parameter Change)
+>>>4 byte 0x37 MODQ (Mode Command Parameter Inquiry)
+>>2 byte 0x03 Wave
+>>>4 byte 0x00 SBPR (Soundprogram)
+>>>4 byte 0x01 SAPR (Performance)
+>>>4 byte 0x02 SWAVE (Wave)
+>>>4 byte 0x03 SWTBL (Wave control table)
+>>>4 byte 0x04 SVT (Velocity Curve)
+>>>4 byte 0x05 STT (Tuning Table)
+>>>4 byte 0x06 SGLB (Global Parameters)
+>>>4 byte 0x07 SARRMAP (Performance Program Change Map)
+>>>4 byte 0x08 SBPRMAP (Sound Program Change Map)
+>>>4 byte 0x09 SBPRPAR (Sound Parameter)
+>>>4 byte 0x0A SARRPAR (Performance Parameter)
+>>>4 byte 0x0B SINSPAR (Instrument/External Parameter)
+>>>4 byte 0x0F SBULK (Bulk Switch on/off)
+
+# Japanese Group
+>1 byte 0x40 Kawai
+>>3 byte 0x20 K1
+>>3 byte 0x22 K4
+
+>1 byte 0x41 Roland
+>>3 byte 0x14 D-50
+>>3 byte 0x2b U-220
+>>3 byte 0x02 TR-707
+
+>1 byte 0x42 Korg
+>>3 byte 0x19 M1
+
+>1 byte 0x43 Yamaha
+>1 byte 0x44 Casio
+>1 byte 0x46 Kamiya
+>1 byte 0x47 Akai
+>1 byte 0x48 Victor
+>1 byte 0x49 Mesosha
+>1 byte 0x4b Fujitsu
+>1 byte 0x4c Sony
+>1 byte 0x4e Teac
+>1 byte 0x50 Matsushita
+>1 byte 0x51 Fostex
+>1 byte 0x52 Zoom
+>1 byte 0x54 Matsushita
+>1 byte 0x57 Acoustic tech. lab.
+
+>1 belong&0xffffff00 0x00007400 Ta Horng
+>1 belong&0xffffff00 0x00007500 e-Tek
+>1 belong&0xffffff00 0x00007600 E-Voice
+>1 belong&0xffffff00 0x00007700 Midisoft
+>1 belong&0xffffff00 0x00007800 Q-Sound
+>1 belong&0xffffff00 0x00007900 Westrex
+>1 belong&0xffffff00 0x00007a00 Nvidia*
+>1 belong&0xffffff00 0x00007b00 ESS
+>1 belong&0xffffff00 0x00007c00 Mediatrix
+>1 belong&0xffffff00 0x00007d00 Brooktree
+>1 belong&0xffffff00 0x00007e00 Otari
+>1 belong&0xffffff00 0x00007f00 Key Electronics
+>1 belong&0xffffff00 0x00010000 Shure
+>1 belong&0xffffff00 0x00010100 AuraSound
+>1 belong&0xffffff00 0x00010200 Crystal
+>1 belong&0xffffff00 0x00010300 Rockwell
+>1 belong&0xffffff00 0x00010400 Silicon Graphics
+>1 belong&0xffffff00 0x00010500 Midiman
+>1 belong&0xffffff00 0x00010600 PreSonus
+>1 belong&0xffffff00 0x00010800 Topaz
+>1 belong&0xffffff00 0x00010900 Cast Lightning
+>1 belong&0xffffff00 0x00010a00 Microsoft
+>1 belong&0xffffff00 0x00010b00 Sonic Foundry
+>1 belong&0xffffff00 0x00010c00 Line 6
+>1 belong&0xffffff00 0x00010d00 Beatnik Inc.
+>1 belong&0xffffff00 0x00010e00 Van Koerving
+>1 belong&0xffffff00 0x00010f00 Altech Systems
+>1 belong&0xffffff00 0x00011000 S & S Research
+>1 belong&0xffffff00 0x00011100 VLSI Technology
+>1 belong&0xffffff00 0x00011200 Chromatic
+>1 belong&0xffffff00 0x00011300 Sapphire
+>1 belong&0xffffff00 0x00011400 IDRC
+>1 belong&0xffffff00 0x00011500 Justonic Tuning
+>1 belong&0xffffff00 0x00011600 TorComp
+>1 belong&0xffffff00 0x00011700 Newtek Inc.
+>1 belong&0xffffff00 0x00011800 Sound Sculpture
+>1 belong&0xffffff00 0x00011900 Walker Technical
+>1 belong&0xffffff00 0x00011a00 Digital Harmony
+>1 belong&0xffffff00 0x00011b00 InVision
+>1 belong&0xffffff00 0x00011c00 T-Square
+>1 belong&0xffffff00 0x00011d00 Nemesys
+>1 belong&0xffffff00 0x00011e00 DBX
+>1 belong&0xffffff00 0x00011f00 Syndyne
+>1 belong&0xffffff00 0x00012000 Bitheadz
+>1 belong&0xffffff00 0x00012100 Cakewalk
+>1 belong&0xffffff00 0x00012200 Staccato
+>1 belong&0xffffff00 0x00012300 National Semicon.
+>1 belong&0xffffff00 0x00012400 Boom Theory
+>1 belong&0xffffff00 0x00012500 Virtual DSP Corp
+>1 belong&0xffffff00 0x00012600 Antares
+>1 belong&0xffffff00 0x00012700 Angel Software
+>1 belong&0xffffff00 0x00012800 St Louis Music
+>1 belong&0xffffff00 0x00012900 Lyrrus dba G-VOX
+>1 belong&0xffffff00 0x00012a00 Ashley Audio
+>1 belong&0xffffff00 0x00012b00 Vari-Lite
+>1 belong&0xffffff00 0x00012c00 Summit Audio
+>1 belong&0xffffff00 0x00012d00 Aureal Semicon.
+>1 belong&0xffffff00 0x00012e00 SeaSound
+>1 belong&0xffffff00 0x00012f00 U.S. Robotics
+>1 belong&0xffffff00 0x00013000 Aurisis
+>1 belong&0xffffff00 0x00013100 Nearfield Multimedia
+>1 belong&0xffffff00 0x00013200 FM7 Inc.
+>1 belong&0xffffff00 0x00013300 Swivel Systems
+>1 belong&0xffffff00 0x00013400 Hyperactive
+>1 belong&0xffffff00 0x00013500 MidiLite
+>1 belong&0xffffff00 0x00013600 Radical
+>1 belong&0xffffff00 0x00013700 Roger Linn
+>1 belong&0xffffff00 0x00013800 Helicon
+>1 belong&0xffffff00 0x00013900 Event
+>1 belong&0xffffff00 0x00013a00 Sonic Network
+>1 belong&0xffffff00 0x00013b00 Realtime Music
+>1 belong&0xffffff00 0x00013c00 Apogee Digital
+
+>1 belong&0xffffff00 0x00202b00 Medeli Electronics
+>1 belong&0xffffff00 0x00202c00 Charlie Lab
+>1 belong&0xffffff00 0x00202d00 Blue Chip Music
+>1 belong&0xffffff00 0x00202e00 BEE OH Corp
+>1 belong&0xffffff00 0x00202f00 LG Semicon America
+>1 belong&0xffffff00 0x00203000 TESI
+>1 belong&0xffffff00 0x00203100 EMAGIC
+>1 belong&0xffffff00 0x00203200 Behringer
+>1 belong&0xffffff00 0x00203300 Access Music
+>1 belong&0xffffff00 0x00203400 Synoptic
+>1 belong&0xffffff00 0x00203500 Hanmesoft Corp
+>1 belong&0xffffff00 0x00203600 Terratec
+>1 belong&0xffffff00 0x00203700 Proel SpA
+>1 belong&0xffffff00 0x00203800 IBK MIDI
+>1 belong&0xffffff00 0x00203900 IRCAM
+>1 belong&0xffffff00 0x00203a00 Propellerhead Software
+>1 belong&0xffffff00 0x00203b00 Red Sound Systems
+>1 belong&0xffffff00 0x00203c00 Electron ESI AB
+>1 belong&0xffffff00 0x00203d00 Sintefex Audio
+>1 belong&0xffffff00 0x00203e00 Music and More
+>1 belong&0xffffff00 0x00203f00 Amsaro
+>1 belong&0xffffff00 0x00204000 CDS Advanced Technology
+>1 belong&0xffffff00 0x00204100 Touched by Sound
+>1 belong&0xffffff00 0x00204200 DSP Arts
+>1 belong&0xffffff00 0x00204300 Phil Rees Music
+>1 belong&0xffffff00 0x00204400 Stamer Musikanlagen GmbH
+>1 belong&0xffffff00 0x00204500 Soundart
+>1 belong&0xffffff00 0x00204600 C-Mexx Software
+>1 belong&0xffffff00 0x00204700 Klavis Tech.
+>1 belong&0xffffff00 0x00204800 Noteheads AB
+
+0 string T707 Roland TR-707 Data
+
+#------------------------------------------------------------------------------
+# $File: teapot,v 1.4 2009/09/19 16:28:12 christos Exp $
+# teapot: file(1) magic for "teapot" spreadsheet
+#
+0 string #!teapot\012xdr teapot work sheet (XDR format)
+
+#------------------------------------------------------------------------------
+# $File: terminfo,v 1.6 2009/09/19 16:28:12 christos Exp $
+# terminfo: file(1) magic for terminfo
+#
+# XXX - byte order for screen images?
+#
+0 string \032\001 Compiled terminfo entry
+0 short 0433 Curses screen image
+0 short 0434 Curses screen image
+
+#------------------------------------------------------------------------------
+# $File: tex,v 1.16 2009/09/19 16:28:12 christos Exp $
+# tex: file(1) magic for TeX files
+#
+# XXX - needs byte-endian stuff (big-endian and little-endian DVI?)
+#
+# From <conklin at talisman.kaleida.com>
+
+# Although we may know the offset of certain text fields in TeX DVI
+# and font files, we can't use them reliably because they are not
+# zero terminated. [but we do anyway, christos]
+0 string \367\002 TeX DVI file
+!:mime application/x-dvi
+>16 string >\0 (%s)
+0 string \367\203 TeX generic font data
+0 string \367\131 TeX packed font data
+>3 string >\0 (%s)
+0 string \367\312 TeX virtual font data
+0 search/1 This\ is\ TeX, TeX transcript text
+0 search/1 This\ is\ METAFONT, METAFONT transcript text
+
+# There is no way to detect TeX Font Metric (*.tfm) files without
+# breaking them apart and reading the data. The following patterns
+# match most *.tfm files generated by METAFONT or afm2tfm.
+2 string \000\021 TeX font metric data
+!:mime application/x-tex-tfm
+>33 string >\0 (%s)
+2 string \000\022 TeX font metric data
+!:mime application/x-tex-tfm
+>33 string >\0 (%s)
+
+# Texinfo and GNU Info, from Daniel Quinlan (quinlan at yggdrasil.com)
+0 search/1 \\input\ texinfo Texinfo source text
+!:mime text/x-texinfo
+0 search/1 This\ is\ Info\ file GNU Info text
+!:mime text/x-info
+
+# TeX documents, from Daniel Quinlan (quinlan at yggdrasil.com)
+0 search/4096 \\input TeX document text
+!:mime text/x-tex
+!:strength - 4
+0 search/4096 \\section LaTeX document text
+!:mime text/x-tex
+!:strength - 1
+0 search/4096 \\setlength LaTeX document text
+!:mime text/x-tex
+!:strength - 4
+0 search/4096 \\documentstyle LaTeX document text
+!:mime text/x-tex
+!:strength - 1
+0 search/4096 \\chapter LaTeX document text
+!:mime text/x-tex
+!:strength - 1
+0 search/4096 \\documentclass LaTeX 2e document text
+!:mime text/x-tex
+!:strength - 4
+0 search/4096 \\relax LaTeX auxiliary file
+!:mime text/x-tex
+!:strength - 4
+0 search/4096 \\contentsline LaTeX table of contents
+!:mime text/x-tex
+!:strength - 4
+0 search/4096 %\ -*-latex-*- LaTeX document text
+!:mime text/x-tex
+
+# Tex document, from Hendrik Scholz <hendrik at scholz.net>
+0 search/1 \\ifx TeX document text
+
+# Index and glossary files
+0 search/4096 \\indexentry LaTeX raw index file
+!:strength - 15
+0 search/4096 \\begin{theindex} LaTeX sorted index
+!:strength - 15
+0 search/4096 \\glossaryentry LaTeX raw glossary
+!:strength - 15
+0 search/4096 \\begin{theglossary} LaTeX sorted glossary
+!:strength - 15
+0 search/4096 This\ is\ makeindex Makeindex log file
+!:strength - 15
+
+# End of TeX
+
+#------------------------------------------------------------------------------
+# file(1) magic for BibTex text files
+# From Hendrik Scholz <hendrik at scholz.net>
+
+0 search/1/c @article{ BibTeX text file
+0 search/1/c @book{ BibTeX text file
+0 search/1/c @inbook{ BibTeX text file
+0 search/1/c @incollection{ BibTeX text file
+0 search/1/c @inproceedings{ BibTeX text file
+0 search/1/c @manual{ BibTeX text file
+0 search/1/c @misc{ BibTeX text file
+0 search/1/c @preamble{ BibTeX text file
+0 search/1/c @phdthesis{ BibTeX text file
+0 search/1/c @techreport{ BibTeX text file
+0 search/1/c @unpublished{ BibTeX text file
+
+73 search/1 %%%\ \ BibTeX-file{ BibTex text file (with full header)
+
+73 search/1 %%%\ \ @BibTeX-style-file{ BibTeX style text file (with full header)
+
+0 search/1 %\ BibTeX\ standard\ bibliography\ BibTeX standard bibliography style text file
+
+0 search/1 %\ BibTeX\ ` BibTeX custom bibliography style text file
+
+0 search/1 @c\ @mapfile{ TeX font aliases text file
+
+#------------------------------------------------------------------------------
+# $File: tgif,v 1.5 2009/09/19 16:28:12 christos Exp $
+# file(1) magic for tgif(1) files
+# From Hendrik Scholz <hendrik at scholz.net>
+
+0 string %TGIF\ x Tgif file version %s
+
+
+#------------------------------------------------------------------------------
+# $File: ti-8x,v 1.6 2009/09/19 16:28:12 christos Exp $
+# ti-8x: file(1) magic for the TI-8x and TI-9x Graphing Calculators.
+#
+# From: Ryan McGuire (rmcguire at freenet.columbus.oh.us).
+#
+# Update: Romain Lievin (roms at lpg.ticalc.org).
+#
+# NOTE: This list is not complete.
+# Files for the TI-80 and TI-81 are pretty rare. I'm not going to put the
+# program/group magic numbers in here because I cannot find any.
+0 string **TI80** TI-80 Graphing Calculator File.
+0 string **TI81** TI-81 Graphing Calculator File.
+#
+# Magic Numbers for the TI-73
+#
+0 string **TI73** TI-73 Graphing Calculator
+>0x00003B byte 0x00 (real number)
+>0x00003B byte 0x01 (list)
+>0x00003B byte 0x02 (matrix)
+>0x00003B byte 0x03 (equation)
+>0x00003B byte 0x04 (string)
+>0x00003B byte 0x05 (program)
+>0x00003B byte 0x06 (assembly program)
+>0x00003B byte 0x07 (picture)
+>0x00003B byte 0x08 (gdb)
+>0x00003B byte 0x0C (complex number)
+>0x00003B byte 0x0F (window settings)
+>0x00003B byte 0x10 (zoom)
+>0x00003B byte 0x11 (table setup)
+>0x00003B byte 0x13 (backup)
+
+# Magic Numbers for the TI-82
+#
+0 string **TI82** TI-82 Graphing Calculator
+>0x00003B byte 0x00 (real)
+>0x00003B byte 0x01 (list)
+>0x00003B byte 0x02 (matrix)
+>0x00003B byte 0x03 (Y-variable)
+>0x00003B byte 0x05 (program)
+>0x00003B byte 0x06 (protected prgm)
+>0x00003B byte 0x07 (picture)
+>0x00003B byte 0x08 (gdb)
+>0x00003B byte 0x0B (window settings)
+>0x00003B byte 0x0C (window settings)
+>0x00003B byte 0x0D (table setup)
+>0x00003B byte 0x0E (screenshot)
+>0x00003B byte 0x0F (backup)
+#
+# Magic Numbers for the TI-83
+#
+0 string **TI83** TI-83 Graphing Calculator
+>0x00003B byte 0x00 (real)
+>0x00003B byte 0x01 (list)
+>0x00003B byte 0x02 (matrix)
+>0x00003B byte 0x03 (Y-variable)
+>0x00003B byte 0x04 (string)
+>0x00003B byte 0x05 (program)
+>0x00003B byte 0x06 (protected prgm)
+>0x00003B byte 0x07 (picture)
+>0x00003B byte 0x08 (gdb)
+>0x00003B byte 0x0B (window settings)
+>0x00003B byte 0x0C (window settings)
+>0x00003B byte 0x0D (table setup)
+>0x00003B byte 0x0E (screenshot)
+>0x00003B byte 0x13 (backup)
+#
+# Magic Numbers for the TI-83+
+#
+0 string **TI83F* TI-83+ Graphing Calculator
+>0x00003B byte 0x00 (real number)
+>0x00003B byte 0x01 (list)
+>0x00003B byte 0x02 (matrix)
+>0x00003B byte 0x03 (equation)
+>0x00003B byte 0x04 (string)
+>0x00003B byte 0x05 (program)
+>0x00003B byte 0x06 (assembly program)
+>0x00003B byte 0x07 (picture)
+>0x00003B byte 0x08 (gdb)
+>0x00003B byte 0x0C (complex number)
+>0x00003B byte 0x0F (window settings)
+>0x00003B byte 0x10 (zoom)
+>0x00003B byte 0x11 (table setup)
+>0x00003B byte 0x13 (backup)
+>0x00003B byte 0x15 (application variable)
+>0x00003B byte 0x17 (group of variable)
+
+#
+# Magic Numbers for the TI-85
+#
+0 string **TI85** TI-85 Graphing Calculator
+>0x00003B byte 0x00 (real number)
+>0x00003B byte 0x01 (complex number)
+>0x00003B byte 0x02 (real vector)
+>0x00003B byte 0x03 (complex vector)
+>0x00003B byte 0x04 (real list)
+>0x00003B byte 0x05 (complex list)
+>0x00003B byte 0x06 (real matrix)
+>0x00003B byte 0x07 (complex matrix)
+>0x00003B byte 0x08 (real constant)
+>0x00003B byte 0x09 (complex constant)
+>0x00003B byte 0x0A (equation)
+>0x00003B byte 0x0C (string)
+>0x00003B byte 0x0D (function GDB)
+>0x00003B byte 0x0E (polar GDB)
+>0x00003B byte 0x0F (parametric GDB)
+>0x00003B byte 0x10 (diffeq GDB)
+>0x00003B byte 0x11 (picture)
+>0x00003B byte 0x12 (program)
+>0x00003B byte 0x13 (range)
+>0x00003B byte 0x17 (window settings)
+>0x00003B byte 0x18 (window settings)
+>0x00003B byte 0x19 (window settings)
+>0x00003B byte 0x1A (window settings)
+>0x00003B byte 0x1B (zoom)
+>0x00003B byte 0x1D (backup)
+>0x00003B byte 0x1E (unknown)
+>0x00003B byte 0x2A (equation)
+>0x000032 string ZS4 - ZShell Version 4 File.
+>0x000032 string ZS3 - ZShell Version 3 File.
+#
+# Magic Numbers for the TI-86
+#
+0 string **TI86** TI-86 Graphing Calculator
+>0x00003B byte 0x00 (real number)
+>0x00003B byte 0x01 (complex number)
+>0x00003B byte 0x02 (real vector)
+>0x00003B byte 0x03 (complex vector)
+>0x00003B byte 0x04 (real list)
+>0x00003B byte 0x05 (complex list)
+>0x00003B byte 0x06 (real matrix)
+>0x00003B byte 0x07 (complex matrix)
+>0x00003B byte 0x08 (real constant)
+>0x00003B byte 0x09 (complex constant)
+>0x00003B byte 0x0A (equation)
+>0x00003B byte 0x0C (string)
+>0x00003B byte 0x0D (function GDB)
+>0x00003B byte 0x0E (polar GDB)
+>0x00003B byte 0x0F (parametric GDB)
+>0x00003B byte 0x10 (diffeq GDB)
+>0x00003B byte 0x11 (picture)
+>0x00003B byte 0x12 (program)
+>0x00003B byte 0x13 (range)
+>0x00003B byte 0x17 (window settings)
+>0x00003B byte 0x18 (window settings)
+>0x00003B byte 0x19 (window settings)
+>0x00003B byte 0x1A (window settings)
+>0x00003B byte 0x1B (zoom)
+>0x00003B byte 0x1D (backup)
+>0x00003B byte 0x1E (unknown)
+>0x00003B byte 0x2A (equation)
+#
+# Magic Numbers for the TI-89
+#
+0 string **TI89** TI-89 Graphing Calculator
+>0x000048 byte 0x00 (expression)
+>0x000048 byte 0x04 (list)
+>0x000048 byte 0x06 (matrix)
+>0x000048 byte 0x0A (data)
+>0x000048 byte 0x0B (text)
+>0x000048 byte 0x0C (string)
+>0x000048 byte 0x0D (graphic data base)
+>0x000048 byte 0x0E (figure)
+>0x000048 byte 0x10 (picture)
+>0x000048 byte 0x12 (program)
+>0x000048 byte 0x13 (function)
+>0x000048 byte 0x14 (macro)
+>0x000048 byte 0x1C (zipped)
+>0x000048 byte 0x21 (assembler)
+#
+# Magic Numbers for the TI-92
+#
+0 string **TI92** TI-92 Graphing Calculator
+>0x000048 byte 0x00 (expression)
+>0x000048 byte 0x04 (list)
+>0x000048 byte 0x06 (matrix)
+>0x000048 byte 0x0A (data)
+>0x000048 byte 0x0B (text)
+>0x000048 byte 0x0C (string)
+>0x000048 byte 0x0D (graphic data base)
+>0x000048 byte 0x0E (figure)
+>0x000048 byte 0x10 (picture)
+>0x000048 byte 0x12 (program)
+>0x000048 byte 0x13 (function)
+>0x000048 byte 0x14 (macro)
+>0x000048 byte 0x1D (backup)
+#
+# Magic Numbers for the TI-92+/V200
+#
+0 string **TI92P* TI-92+/V200 Graphing Calculator
+>0x000048 byte 0x00 (expression)
+>0x000048 byte 0x04 (list)
+>0x000048 byte 0x06 (matrix)
+>0x000048 byte 0x0A (data)
+>0x000048 byte 0x0B (text)
+>0x000048 byte 0x0C (string)
+>0x000048 byte 0x0D (graphic data base)
+>0x000048 byte 0x0E (figure)
+>0x000048 byte 0x10 (picture)
+>0x000048 byte 0x12 (program)
+>0x000048 byte 0x13 (function)
+>0x000048 byte 0x14 (macro)
+>0x000048 byte 0x1C (zipped)
+>0x000048 byte 0x21 (assembler)
+#
+# Magic Numbers for the TI-73/83+/89/92+/V200 FLASH upgrades
+#
+0x0000016 string Advanced TI-XX Graphing Calculator (FLASH)
+0 string **TIFL** TI-XX Graphing Calculator (FLASH)
+>8 byte >0 - Revision %d
+>>9 byte x \b.%d,
+>12 byte >0 Revision date %02x
+>>13 byte x \b/%02x
+>>14 beshort x \b/%04x,
+>17 string >/0 name: '%s',
+>48 byte 0x74 device: TI-73,
+>48 byte 0x73 device: TI-83+,
+>48 byte 0x98 device: TI-89,
+>48 byte 0x88 device: TI-92+,
+>49 byte 0x23 type: OS upgrade,
+>49 byte 0x24 type: application,
+>49 byte 0x25 type: certificate,
+>49 byte 0x3e type: license,
+>74 lelong >0 size: %ld bytes
+
+# VTi & TiEmu skins (TI Graphing Calculators).
+# From: Romain Lievin (roms at lpg.ticalc.org).
+# Magic Numbers for the VTi skins
+0 string VTI Virtual TI skin
+>3 string v - Version
+>>4 byte >0 \b %c
+>>6 byte x \b.%c
+# Magic Numbers for the TiEmu skins
+0 string TiEmu TiEmu skin
+>6 string v - Version
+>>7 byte >0 \b %c
+>>9 byte x \b.%c
+>>10 byte x \b%c
+
+#------------------------------------------------------------------------------
+# $File: timezone,v 1.11 2009/09/19 16:28:12 christos Exp $
+# timezone: file(1) magic for timezone data
+#
+# from Daniel Quinlan (quinlan at yggdrasil.com)
+# this should work on Linux, SunOS, and maybe others
+# Added new official magic number for recent versions of the Olson code
+0 string TZif timezone data
+>4 byte 0 \b, old version
+>4 byte >0 \b, version %c
+>20 belong 0 \b, no gmt time flags
+>20 belong 1 \b, 1 gmt time flag
+>20 belong >1 \b, %d gmt time flags
+>24 belong 0 \b, no std time flags
+>20 belong 1 \b, 1 std time flag
+>24 belong >1 \b, %d std time flags
+>28 belong 0 \b, no leap seconds
+>28 belong 1 \b, 1 leap second
+>28 belong >1 \b, %d leap seconds
+>32 belong 0 \b, no transition times
+>32 belong 1 \b, 1 transition time
+>32 belong >1 \b, %d transition times
+>36 belong 0 \b, no abbreviation chars
+>36 belong 1 \b, 1 abbreviation char
+>36 belong >1 \b, %d abbreviation chars
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0 old timezone data
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\2\0 old timezone data
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\3\0 old timezone data
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\4\0 old timezone data
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\5\0 old timezone data
+0 string \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\6\0 old timezone data
+
+#------------------------------------------------------------------------------
+# $File: troff,v 1.10 2009/09/19 16:28:12 christos Exp $
+# troff: file(1) magic for *roff
+#
+# updated by Daniel Quinlan (quinlan at yggdrasil.com)
+
+# troff input
+0 search/1 .\\" troff or preprocessor input text
+!:mime text/troff
+0 search/1 '\\" troff or preprocessor input text
+!:mime text/troff
+0 search/1 '.\\" troff or preprocessor input text
+!:mime text/troff
+0 search/1 \\" troff or preprocessor input text
+!:mime text/troff
+0 search/1 ''' troff or preprocessor input text
+!:mime text/troff
+0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9][\ \t] troff or preprocessor input text
+!:mime text/troff
+0 regex/20 \^\\.[A-Za-z0-9][A-Za-z0-9]$ troff or preprocessor input text
+!:mime text/troff
+
+# ditroff intermediate output text
+0 search/1 x\ T ditroff output text
+>4 search/1 cat for the C/A/T phototypesetter
+>4 search/1 ps for PostScript
+>4 search/1 dvi for DVI
+>4 search/1 ascii for ASCII
+>4 search/1 lj4 for LaserJet 4
+>4 search/1 latin1 for ISO 8859-1 (Latin 1)
+>4 search/1 X75 for xditview at 75dpi
+>>7 search/1 -12 (12pt)
+>4 search/1 X100 for xditview at 100dpi
+>>8 search/1 -12 (12pt)
+
+# output data formats
+0 string \100\357 very old (C/A/T) troff output data
+
+#------------------------------------------------------------------------------
+# $File: tuxedo,v 1.4 2009/09/19 16:28:13 christos Exp $
+# tuxedo: file(1) magic for BEA TUXEDO data files
+#
+# from Ian Springer <ispringer at hotmail.com>
+#
+0 string \0\0\1\236\0\0\0\0\0\0\0\0\0\0\0\0 BEA TUXEDO DES mask data
+
+#------------------------------------------------------------------------------
+# $File: typeset,v 1.8 2009/09/19 16:28:13 christos Exp $
+# typeset: file(1) magic for other typesetting
+#
+0 string Interpress/Xerox Xerox InterPress data
+>16 string / (version
+>>17 string >\0 %s)
+
+#------------------------------------------------------------------------------
+# $File: unicode,v 1.5 2009/09/19 16:28:13 christos Exp $
+# Unicode: BOM prefixed text files - Adrian Havill <havill at turbolinux.co.jp>
+# GRR: These types should be recognised in file_ascmagic so these
+# encodings can be treated by text patterns.
+# Missing types are already dealt with internally.
+#
+0 string +/v8 Unicode text, UTF-7
+0 string +/v9 Unicode text, UTF-7
+0 string +/v+ Unicode text, UTF-7
+0 string +/v/ Unicode text, UTF-7
+0 string \335\163\146\163 Unicode text, UTF-8-EBCDIC
+0 string \376\377\000\000 Unicode text, UTF-32, big-endian
+0 string \377\376\000\000 Unicode text, UTF-32, little-endian
+0 string \016\376\377 Unicode text, SCSU (Standard Compression Scheme for Unicode)
+
+#------------------------------------------------------------------------------
+# $File: unknown,v 1.7 2009/09/19 16:28:13 christos Exp $
+# unknown: file(1) magic for unknown machines
+#
+# XXX - this probably should be pruned, as it'll match PDP-11 and
+# VAX image formats.
+#
+# 0x107 is 0407; 0x108 is 0410; both are PDP-11 (executable and pure,
+# respectively).
+#
+# 0x109 is 0411; that's PDP-11 split I&D, but the PDP-11 version doesn't
+# have the "version %ld", which may be a bogus COFFism (I don't think
+# there ever was COFF for the PDP-11).
+#
+# 0x10B is 0413; that's VAX demand-paged, but this is a short, not a
+# long, as it would be on a VAX.
+#
+# 0x10C is 0414 and 0x10E is 416; those *are* unknown.
+#
+0 short 0x107 unknown machine executable
+>8 short >0 not stripped
+>15 byte >0 - version %ld
+0 short 0x108 unknown pure executable
+>8 short >0 not stripped
+>15 byte >0 - version %ld
+0 short 0x109 PDP-11 separate I&D
+>8 short >0 not stripped
+>15 byte >0 - version %ld
+0 short 0x10b unknown pure executable
+>8 short >0 not stripped
+>15 byte >0 - version %ld
+0 long 0x10c unknown demand paged pure executable
+>16 long >0 not stripped
+0 long 0x10e unknown readable demand paged pure executable
+
+#------------------------------------------------------------------------------
+# $File: uuencode,v 1.7 2009/09/19 16:28:13 christos Exp $
+# uuencode: file(1) magic for ASCII-encoded files
+#
+
+# GRR: the first line of xxencoded files is identical to that in uuencoded
+# files, but the first character in most subsequent lines is 'h' instead of
+# 'M'. (xxencoding uses lowercase letters in place of most of uuencode's
+# punctuation and survives BITNET gateways better.) If regular expressions
+# were supported, this entry could possibly be split into two with
+# "begin\040\.\*\012M" or "begin\040\.\*\012h" (where \. and \* are REs).
+0 search/1 begin\ uuencoded or xxencoded text
+
+# btoa(1) is an alternative to uuencode that requires less space.
+0 search/1 xbtoa\ Begin btoa'd text
+
+# ship(1) is another, much cooler alternative to uuencode.
+# Greg Roelofs, newt at uchicago.edu
+0 search/1 $\012ship ship'd binary text
+
+# bencode(8) is used to encode compressed news batches (Bnews/Cnews only?)
+# Greg Roelofs, newt at uchicago.edu
+0 search/1 Decode\ the\ following\ with\ bdeco bencoded News text
+
+# BinHex is the Macintosh ASCII-encoded file format (see also "apple")
+# Daniel Quinlan, quinlan at yggdrasil.com
+11 search/1 must\ be\ converted\ with\ BinHex BinHex binary text
+>41 search/1 x \b, version %.3s
+
+# GRR: handle BASE64
+
+#------------------------------------------------------------------------------
+# $File: varied.out,v 1.21 2009/09/19 16:28:13 christos Exp $
+# varied.out: file(1) magic for various USG systems
+#
+# Herewith many of the object file formats used by USG systems.
+# Most have been moved to files for a particular processor,
+# and deleted if they duplicate other entries.
+#
+0 short 0610 Perkin-Elmer executable
+# AMD 29K
+0 beshort 0572 amd 29k coff noprebar executable
+0 beshort 01572 amd 29k coff prebar executable
+0 beshort 0160007 amd 29k coff archive
+# Cray
+6 beshort 0407 unicos (cray) executable
+# Ultrix 4.3
+596 string \130\337\377\377 Ultrix core file
+>600 string >\0 from '%s'
+# BeOS and MAcOS PEF executables
+# From: hplus at zilker.net (Jon Watte)
+0 string Joy!peffpwpc header for PowerPC PEF executable
+#
+# ava assembler/linker Uros Platise <uros.platise at ijs.si>
+0 string avaobj AVR assembler object code
+>7 string >\0 version '%s'
+# gnu gmon magic From: Eugen Dedu <dedu at ese-metz.fr>
+0 string gmon GNU prof performance data
+>4 long x - version %ld
+# From: Dave Pearson <davep at davep.org>
+# Harbour <URL:http://www.harbour-project.org/> HRB files.
+0 string \xc0HRB Harbour HRB file
+>4 short x version %d
+
+# From: Alex Beregszaszi <alex at fsn.hu>
+# 0 string exec BugOS executable
+# 0 string pack BugOS archive
+
+# From: Jason Spence <jspence at lightconsulting.com>
+# Generated by the "examples" in STM's ST40 devkit, and derived code.
+0 lelong 0x13a9f17e ST40 component image format
+>4 string >\0 \b, name '%s'
+
+
+#------------------------------------------------------------------------------
+# $File: varied.script,v 1.6 2009/09/19 16:28:13 christos Exp $
+# varied.script: file(1) magic for various interpreter scripts
+
+0 string #!\ / a
+>3 string >\0 %s script text executable
+0 string #!\t/ a
+>3 string >\0 %s script text executable
+0 string #!/ a
+>2 string >\0 %s script text executable
+0 string #!\ script text executable
+>3 string >\0 for %s
+
+# From: arno <arenevier at fdn.fr>
+# mozilla xpconnect typelib
+# see http://www.mozilla.org/scriptable/typelib_file.html
+0 string XPCOM\nTypeLib\r\n\032 XPConnect Typelib
+>0x10 byte x version %d
+>>0x11 byte x \b.%d
+
+#------------------------------------------------------------------------------
+# $File: vax,v 1.7 2009/09/19 16:28:13 christos Exp $
+# vax: file(1) magic for VAX executable/object and APL workspace
+#
+0 lelong 0101557 VAX single precision APL workspace
+0 lelong 0101556 VAX double precision APL workspace
+
+#
+# VAX a.out (32V, BSD)
+#
+0 lelong 0407 VAX executable
+>16 lelong >0 not stripped
+
+0 lelong 0410 VAX pure executable
+>16 lelong >0 not stripped
+
+0 lelong 0413 VAX demand paged pure executable
+>16 lelong >0 not stripped
+
+0 lelong 0420 VAX demand paged (first page unmapped) pure executable
+>16 lelong >0 not stripped
+
+#
+# VAX COFF
+#
+# The `versions' should be un-commented if they work for you.
+# (Was the problem just one of endianness?)
+#
+0 leshort 0570 VAX COFF executable
+>12 lelong >0 not stripped
+>22 leshort >0 - version %ld
+0 leshort 0575 VAX COFF pure executable
+>12 lelong >0 not stripped
+>22 leshort >0 - version %ld
+
+#------------------------------------------------------------------------------
+# $File: vicar,v 1.4 2009/09/19 16:28:13 christos Exp $
+# vicar: file(1) magic for VICAR files.
+#
+# From: Ossama Othman <othman at astrosun.tn.cornell.edu
+# VICAR is JPL's in-house spacecraft image processing program
+# VICAR image
+0 string LBLSIZE= VICAR image data
+>32 string BYTE \b, 8 bits = VAX byte
+>32 string HALF \b, 16 bits = VAX word = Fortran INTEGER*2
+>32 string FULL \b, 32 bits = VAX longword = Fortran INTEGER*4
+>32 string REAL \b, 32 bits = VAX longword = Fortran REAL*4
+>32 string DOUB \b, 64 bits = VAX quadword = Fortran REAL*8
+>32 string COMPLEX \b, 64 bits = VAX quadword = Fortran COMPLEX*8
+# VICAR label file
+43 string SFDU_LABEL VICAR label file
+
+#------------------------------------------------------------------------------
+# $File: virtual,v 1.1 2009/12/25 16:04:30 christos Exp $
+# From: James Nobis <quel at quelrod.net>
+# Microsoft hard disk images for:
+# Virtual Server
+# Virtual PC
+# http://technet.microsoft.com/en-us/virtualserver/bb676673.aspx
+# .vhd
+0 string conectix Microsoft Disk Image, Virtual Server or Virtual PC
+
+# Sun xVM VirtualBox Disk Image
+# string <<< Sun xVM VirtualBox Disk Image >>>
+# .vdi
+0 string \<\<\<\ Sun\ xVM\ VirtualBox\ Disk Sun xVM VirtualBox Disk Image
+
+
+
+#------------------------------------------------------------------------------
+# $File: virtutech,v 1.4 2009/09/19 16:28:13 christos Exp $
+# Virtutech Compressed Random Access File Format
+#
+# From <gustav at virtutech.com>
+0 string \211\277\036\203 Virtutech CRAFF
+>4 belong x v%d
+>20 belong 0 uncompressed
+>20 belong 1 bzipp2ed
+>20 belong 2 gzipped
+>24 belong 0 not clean
+
+#------------------------------------------------------------------------------
+# $File: visx,v 1.5 2009/09/19 16:28:13 christos Exp $
+# visx: file(1) magic for Visx format files
+#
+0 short 0x5555 VISX image file
+>2 byte 0 (zero)
+>2 byte 1 (unsigned char)
+>2 byte 2 (short integer)
+>2 byte 3 (float 32)
+>2 byte 4 (float 64)
+>2 byte 5 (signed char)
+>2 byte 6 (bit-plane)
+>2 byte 7 (classes)
+>2 byte 8 (statistics)
+>2 byte 10 (ascii text)
+>2 byte 15 (image segments)
+>2 byte 100 (image set)
+>2 byte 101 (unsigned char vector)
+>2 byte 102 (short integer vector)
+>2 byte 103 (float 32 vector)
+>2 byte 104 (float 64 vector)
+>2 byte 105 (signed char vector)
+>2 byte 106 (bit plane vector)
+>2 byte 121 (feature vector)
+>2 byte 122 (feature vector library)
+>2 byte 124 (chain code)
+>2 byte 126 (bit vector)
+>2 byte 130 (graph)
+>2 byte 131 (adjacency graph)
+>2 byte 132 (adjacency graph library)
+>2 string .VISIX (ascii text)
+
+#------------------------------------------------------------------------------
+# $File: vms,v 1.6 2009/09/19 16:28:13 christos Exp $
+# vms: file(1) magic for VMS executables (experimental)
+#
+# VMS .exe formats, both VAX and AXP (Greg Roelofs, newt at uchicago.edu)
+
+# GRR 950122: I'm just guessing on these, based on inspection of the headers
+# of three executables each for Alpha and VAX architectures. The VAX files
+# all had headers similar to this:
+#
+# 00000 b0 00 30 00 44 00 60 00 00 00 00 00 30 32 30 35 ..0.D.`.....0205
+# 00010 01 01 00 00 ff ff ff ff ff ff ff ff 00 00 00 00 ................
+#
+0 string \xb0\0\x30\0 VMS VAX executable
+>44032 string PK\003\004 \b, Info-ZIP SFX archive v5.12 w/decryption
+#
+# The AXP files all looked like this, except that the byte at offset 0x22
+# was 06 in some of them and 07 in others:
+#
+# 00000 03 00 00 00 00 00 00 00 ec 02 00 00 10 01 00 00 ................
+# 00010 68 00 00 00 98 00 00 00 b8 00 00 00 00 00 00 00 h...............
+# 00020 00 00 07 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
+# 00030 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................
+# 00040 00 00 00 00 ff ff ff ff ff ff ff ff 02 00 00 00 ................
+#
+0 belong 0x03000000 VMS Alpha executable
+>75264 string PK\003\004 \b, Info-ZIP SFX archive v5.12 w/decryption
+
+#------------------------------------------------------------------------------
+# $File: vmware,v 1.7 2009/09/19 16:28:13 christos Exp $
+# VMware specific files (deducted from version 1.1 and log file entries)
+# Anthon van der Neut (anthon at mnt.org)
+0 belong 0x4d52564e VMware nvram
+
+#------------------------------------------------------------------------------
+# $File: vorbis,v 1.16 2009/09/19 16:28:13 christos Exp $
+# vorbis: file(1) magic for Ogg/Vorbis files
+#
+# From Felix von Leitner <leitner at fefe.de>
+# Extended by Beni Cherniavsky <cben at crosswinds.net>
+# Further extended by Greg Wooledge <greg at wooledge.org>
+#
+# Most (everything but the number of channels and bitrate) is commented
+# out with `##' as it's not interesting to the average user. The most
+# probable things advanced users would want to uncomment are probably
+# the number of comments and the encoder version.
+#
+# FIXME: The first match has been made a search, so that it can skip
+# over prepended ID3 tags. This will work for MIME type detection, but
+# won't work for detecting other properties of the file (they all need
+# to be made relative to the search). In any case, if the file has ID3
+# tags, the ID3 information will be printed, not the Ogg information,
+# so until that's fixed, this doesn't matter.
+# FIXME[2]: Disable the above for now, since search assumes text mode.
+#
+# --- Ogg Framing ---
+#0 search/1000 OggS Ogg data
+0 string OggS Ogg data
+!:mime application/ogg
+>4 byte !0 UNKNOWN REVISION %u
+##>4 byte 0 revision 0
+>4 byte 0
+##>>14 lelong x (Serial %lX)
+# non-Vorbis content: FLAC (Free Lossless Audio Codec, http://flac.sourceforge.net)
+>>28 string \x7fFLAC \b, FLAC audio
+# non-Vorbis content: Theora
+>>28 string \x80theora \b, Theora video
+# non-Vorbis content: Kate
+>>28 string \x80kate\0\0\0\0 \b, Kate
+>>>37 ubyte x v%u
+>>>38 ubyte x \b.%u,
+>>>40 byte 0 utf8 encoding,
+>>>40 byte !0 unknown character encoding,
+>>>60 string >\0 language %s,
+>>>60 string \0 no language set,
+>>>76 string >\0 category %s
+>>>76 string \0 no category set
+# non-Vorbis content: Skeleton
+>>28 string fishead\0 \b, Skeleton
+>>>36 short x v%u
+>>>40 short x \b.%u
+# non-Vorbis content: Speex
+>>28 string Speex\ \ \ \b, Speex audio
+# non-Vorbis content: OGM
+>>28 string \x01video\0\0\0 \b, OGM video
+>>>37 string/c div3 (DivX 3)
+>>>37 string/c divx (DivX 4)
+>>>37 string/c dx50 (DivX 5)
+>>>37 string/c xvid (XviD)
+# --- First vorbis packet - general header ---
+>>28 string \x01vorbis \b, Vorbis audio,
+>>>35 lelong !0 UNKNOWN VERSION %lu,
+##>>>35 lelong 0 version 0,
+>>>35 lelong 0
+>>>>39 ubyte 1 mono,
+>>>>39 ubyte 2 stereo,
+>>>>39 ubyte >2 %u channels,
+>>>>40 lelong x %lu Hz
+# Minimal, nominal and maximal bitrates specified when encoding
+>>>>48 string <\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff \b,
+# The above tests if at least one of these is specified:
+>>>>>52 lelong !-1
+# Vorbis RC2 has a bug which puts -1000 in the min/max bitrate fields
+# instead of -1.
+# Vorbis 1.0 uses 0 instead of -1.
+>>>>>>52 lelong !0
+>>>>>>>52 lelong !-1000
+>>>>>>>>52 lelong x <%lu
+>>>>>48 lelong !-1
+>>>>>>48 lelong x ~%lu
+>>>>>44 lelong !-1
+>>>>>>44 lelong !-1000
+>>>>>>>44 lelong !0
+>>>>>>>>44 lelong x >%lu
+>>>>>48 string <\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff bps
+# -- Second vorbis header packet - the comments
+# A kludge to read the vendor string. It's a counted string, not a
+# zero-terminated one, so file(1) can't read it in a generic way.
+# libVorbis is the only one existing currently, so I detect specifically
+# it. The interesting value is the cvs date (8 digits decimal).
+# Post-RC1 Ogg files have the second header packet (and thus the version)
+# in a different place, so we must use an indirect offset.
+>>>(84.b+85) string \x03vorbis
+>>>>(84.b+96) string/c Xiphophorus\ libVorbis\ I \b, created by: Xiphophorus libVorbis I
+>>>>>(84.b+120) string >00000000
+# Map to beta version numbers:
+>>>>>>(84.b+120) string <20000508 (<beta1, prepublic)
+>>>>>>(84.b+120) string 20000508 (1.0 beta 1 or beta 2)
+>>>>>>(84.b+120) string >20000508
+>>>>>>>(84.b+120) string <20001031 (beta2-3)
+>>>>>>(84.b+120) string 20001031 (1.0 beta 3)
+>>>>>>(84.b+120) string >20001031
+>>>>>>>(84.b+120) string <20010225 (beta3-4)
+>>>>>>(84.b+120) string 20010225 (1.0 beta 4)
+>>>>>>(84.b+120) string >20010225
+>>>>>>>(84.b+120) string <20010615 (beta4-RC1)
+>>>>>>(84.b+120) string 20010615 (1.0 RC1)
+>>>>>>(84.b+120) string 20010813 (1.0 RC2)
+>>>>>>(84.b+120) string 20010816 (RC2 - Garf tuned v1)
+>>>>>>(84.b+120) string 20011014 (RC2 - Garf tuned v2)
+>>>>>>(84.b+120) string 20011217 (1.0 RC3)
+>>>>>>(84.b+120) string 20011231 (1.0 RC3)
+# Some pre-1.0 CVS snapshots still had "Xiphphorus"...
+>>>>>>(84.b+120) string >20011231 (pre-1.0 CVS)
+# For the 1.0 release, Xiphophorus is replaced by Xiph.Org
+>>>>(84.b+96) string/c Xiph.Org\ libVorbis\ I \b, created by: Xiph.Org libVorbis I
+>>>>>(84.b+117) string >00000000
+>>>>>>(84.b+117) string <20020717 (pre-1.0 CVS)
+>>>>>>(84.b+117) string 20020717 (1.0)
+>>>>>>(84.b+117) string 20030909 (1.0.1)
+>>>>>>(84.b+117) string 20040629 (1.1.0 RC1)
+
+#------------------------------------------------------------------------------
+# $File: vxl,v 1.4 2009/09/19 16:28:13 christos Exp $
+# VXL: file(1) magic for VXL binary IO data files
+#
+# from Ian Scott <scottim at sf.net>
+#
+# VXL is a collection of C++ libraries for Computer Vision.
+# See the vsl chapter in the VXL Book for more info
+# http://www.isbe.man.ac.uk/public_vxl_doc/books/vxl/book.html
+# http:/vxl.sf.net
+
+2 lelong 0x472b2c4e VXL data file,
+>0 leshort >0 schema version no %d
+
+#------------------------------------------------------------------------------
+# $File: warc,v 1.2 2009/09/19 16:28:13 christos Exp $
+# warc: file(1) magic for WARC files
+
+0 string WARC/ WARC Archive
+>5 string x version %.4s
+
+#------------------------------------------------------------------------------
+# weak: file(1) magic for very weak magic entries, disabled by default
+#
+# These entries are so weak that they might interfere identification of
+# other formats. Example include:
+# - Only identify for 1 or 2 bytes
+# - Match against very wide range of values
+# - Match against generic word in some spoken languages (e.g. English)
+
+# Summary: Computer Graphics Metafile
+# Extension: .cgm
+#0 beshort&0xffe0 0x0020 binary Computer Graphics Metafile
+#0 beshort 0x3020 character Computer Graphics Metafile
+
+#0 string =!! Bennet Yee's "face" format
+
+#------------------------------------------------------------------------------
+# $File: windows,v 1.4 2009/09/19 16:28:13 christos Exp $
+# windows: file(1) magic for Microsoft Windows
+#
+# This file is mainly reserved for files where programs
+# using them are run almost always on MS Windows 3.x or
+# above, or files only used exclusively in Windows OS,
+# where there is no better category to allocate for.
+# For example, even though WinZIP almost run on Windows
+# only, it is better to treat them as "archive" instead.
+# For format usable in DOS, such as generic executable
+# format, please specify under "msdos" file.
+#
+
+
+# Summary: Outlook Express DBX file
+# Extension: .dbx
+# Created by: Christophe Monniez
+0 string \xCF\xAD\x12\xFE MS Outlook Express DBX file
+>4 byte =0xC5 \b, message database
+>4 byte =0xC6 \b, folder database
+>4 byte =0xC7 \b, account information
+>4 byte =0x30 \b, offline database
+
+
+# Summary: Windows crash dump
+# Extension: .dmp
+# Created by: Andreas Schuster (http://computer.forensikblog.de/)
+# Reference (1): http://computer.forensikblog.de/en/2008/02/64bit_magic.html
+# Modified by (1): Abel Cheung (Avoid match with first 4 bytes only)
+0 string PAGE
+>4 string DUMP MS Windows 32bit crash dump
+>>0x05c byte 0 \b, no PAE
+>>0x05c byte 1 \b, PAE
+>>0xf88 lelong 1 \b, full dump
+>>0xf88 lelong 2 \b, kernel dump
+>>0xf88 lelong 3 \b, small dump
+>>0x068 lelong x \b, %ld pages
+>4 string DU64 MS Windows 64bit crash dump
+>>0xf98 lelong 1 \b, full dump
+>>0xf98 lelong 2 \b, kernel dump
+>>0xf98 lelong 3 \b, small dump
+>>0x090 lequad x \b, %lld pages
+
+
+# Summary: Vista Event Log
+# Extension: .evtx
+# Created by: Andreas Schuster (http://computer.forensikblog.de/)
+# Reference (1): http://computer.forensikblog.de/en/2007/05/some_magic.html
+0 string ElfFile\0 MS Windows Vista Event Log
+>0x2a leshort x \b, %d chunks
+>>0x10 lelong x \b (no. %d in use)
+>0x18 lelong >1 \b, next record no. %d
+>0x18 lelong =1 \b, empty
+>0x78 lelong &1 \b, DIRTY
+>0x78 lelong &2 \b, FULL
+
+
+# Summary: Windows 3.1 group files
+# Extension: .grp
+# Created by: unknown
+0 string \120\115\103\103 MS Windows 3.1 group files
+
+
+# Summary: Old format help files
+# Extension: .hlp
+# Created by: Dirk Jagdmann <doj at cubic.org>
+0 lelong 0x00035f3f MS Windows 3.x help file
+
+
+# Summary: Hyper terminal
+# Extension: .ht
+# Created by: unknown
+0 string HyperTerminal\
+>15 string 1.0\ --\ HyperTerminal\ data\ file MS Windows HyperTerminal profile
+
+
+# Summary: Windows shortcut
+# Extension: .lnk
+# Created by: unknown
+0 string \114\0\0\0\001\024\002\0\0\0\0\0\300\0\0\0\0\0\0\106 MS Windows shortcut
+
+
+# Summary: Outlook Personal Folders
+# Created by: unknown
+0 lelong 0x4E444221 Microsoft Outlook email folder
+>10 leshort 0x0e (<=2002)
+>10 leshort 0x17 (>=2003)
+
+
+# Summary: Windows help cache
+# Created by: unknown
+0 string \164\146\115\122\012\000\000\000\001\000\000\000 MS Windows help cache
+
+
+# Summary: IE cache file
+# Created by: Christophe Monniez
+0 string Client\ UrlCache\ MMF Internet Explorer cache file
+>20 string >\0 version %s
+
+
+# Summary: Registry files
+# Created by: unknown
+# Modified by (1): Joerg Jenderek
+0 string regf MS Windows registry file, NT/2000 or above
+0 string CREG MS Windows 95/98/ME registry file
+0 string SHCC3 MS Windows 3.1 registry file
+
+
+# Summary: Windows Registry text
+# Extension: .reg
+# Submitted by: Abel Cheung <abelcheung at gmail.com>
+0 string REGEDIT4\r\n\r\n Windows Registry text (Win95 or above)
+0 string Windows\ Registry\ Editor\
+>&0 string Version\ 5.00\r\n\r\n Windows Registry text (Win2K or above)
+
+
+# From: Pal Tamas <folti at balabit.hu>
+# Autorun File
+0 string/c [autorun]\r\n Microsoft Windows Autorun file.
+!:mime application/x-setupscript.
+
+#------------------------------------------------------------------------------
+# $File: wireless,v 1.2 2009/09/19 16:28:13 christos Exp $
+# wireless-regdb: file(1) magic for CRDA wireless-regdb file format
+#
+0 string RGDB CRDA wireless regulatory database file
+>4 belong 19 (Version 1)
+
+#------------------------------------------------------------------------------
+# $File: wordprocessors,v 1.14 2009/09/19 16:28:13 christos Exp $
+# wordprocessors: file(1) magic fo word processors.
+#
+####### PWP file format used on Smith Corona Personal Word Processors:
+2 string \040\040\040\040\040\040\040\040\040\040\040ML4D\040'92 Smith Corona PWP
+>24 byte 2 \b, single spaced
+>24 byte 3 \b, 1.5 spaced
+>24 byte 4 \b, double spaced
+>25 byte 0x42 \b, letter
+>25 byte 0x54 \b, legal
+>26 byte 0x46 \b, A4
+
+#WordPerfect type files Version 1.6 - PLEASE DO NOT REMOVE THIS LINE
+0 string \377WPC\020\000\000\000\022\012\001\001\000\000\000\000 (WP) loadable file
+>15 byte 0 Optimized for Intel
+>15 byte 1 Optimized for Non-Intel
+1 string WPC (Corel/WP)
+>8 short 257 WordPerfect macro
+>8 short 258 WordPerfect help file
+>8 short 259 WordPerfect keyboard file
+>8 short 266 WordPerfect document
+>8 short 267 WordPerfect dictionary
+>8 short 268 WordPerfect thesaurus
+>8 short 269 WordPerfect block
+>8 short 270 WordPerfect rectangular block
+>8 short 271 WordPerfect column block
+>8 short 272 WordPerfect printer data
+>8 short 275 WordPerfect printer data
+>8 short 276 WordPerfect driver resource data
+>8 short 279 WordPerfect hyphenation code
+>8 short 280 WordPerfect hyphenation data
+>8 short 281 WordPerfect macro resource data
+>8 short 283 WordPerfect hyphenation lex
+>8 short 285 WordPerfect wordlist
+>8 short 286 WordPerfect equation resource data
+>8 short 289 WordPerfect spell rules
+>8 short 290 WordPerfect dictionary rules
+>8 short 295 WordPerfect spell rules (Microlytics)
+>8 short 299 WordPerfect settings file
+>8 short 301 WordPerfect 4.2 document
+>8 short 325 WordPerfect dialog file
+>8 short 332 WordPerfect button bar
+>8 short 513 Shell macro
+>8 short 522 Shell definition
+>8 short 769 Notebook macro
+>8 short 770 Notebook help file
+>8 short 771 Notebook keyboard file
+>8 short 778 Notebook definition
+>8 short 1026 Calculator help file
+>8 short 1538 Calendar help file
+>8 short 1546 Calendar data file
+>8 short 1793 Editor macro
+>8 short 1794 Editor help file
+>8 short 1795 Editor keyboard file
+>8 short 1817 Editor macro resource file
+>8 short 2049 Macro editor macro
+>8 short 2050 Macro editor help file
+>8 short 2051 Macro editor keyboard file
+>8 short 2305 PlanPerfect macro
+>8 short 2306 PlanPerfect help file
+>8 short 2307 PlanPerfect keyboard file
+>8 short 2314 PlanPerfect worksheet
+>8 short 2319 PlanPerfect printer definition
+>8 short 2322 PlanPerfect graphic definition
+>8 short 2323 PlanPerfect data
+>8 short 2324 PlanPerfect temporary printer
+>8 short 2329 PlanPerfect macro resource data
+>8 byte 11 Mail
+>8 short 2818 help file
+>8 short 2821 distribution list
+>8 short 2826 out box
+>8 short 2827 in box
+>8 short 2836 users archived mailbox
+>8 short 2837 archived message database
+>8 short 2838 archived attachments
+>8 short 3083 Printer temporary file
+>8 short 3330 Scheduler help file
+>8 short 3338 Scheduler in file
+>8 short 3339 Scheduler out file
+>8 short 3594 GroupWise settings file
+>8 short 3601 GroupWise directory services
+>8 short 3627 GroupWise settings file
+>8 short 4362 Terminal resource data
+>8 short 4363 Terminal resource data
+>8 short 4395 Terminal resource data
+>8 short 4619 GUI loadable text
+>8 short 4620 graphics resource data
+>8 short 4621 printer settings file
+>8 short 4622 port definition file
+>8 short 4623 print queue parameters
+>8 short 4624 compressed file
+>8 short 5130 Network service msg file
+>8 short 5131 Network service msg file
+>8 short 5132 Async gateway login msg
+>8 short 5134 GroupWise message file
+>8 short 7956 GroupWise admin domain database
+>8 short 7957 GroupWise admin host database
+>8 short 7959 GroupWise admin remote host database
+>8 short 7960 GroupWise admin ADS deferment data file
+>8 short 8458 IntelliTAG (SGML) compiled DTD
+>8 long 18219264 WordPerfect graphic image (1.0)
+>8 long 18219520 WordPerfect graphic image (2.0)
+#end of WordPerfect type files Version 1.6 - PLEASE DO NOT REMOVE THIS LINE
+
+# Hangul (Korean) Word Processor File
+0 string HWP\ Document\ File Hangul (Korean) Word Processor File 3.0
+# From: Won-Kyu Park <wkpark at kldp.org>
+512 string R\0o\0o\0t\0 Hangul (Korean) Word Processor File 2000
+!:mime application/x-hwp
+
+# CosmicBook, from Beno�t Rouits
+0 string CSBK Ted Neslson's CosmicBook hypertext file
+
+2 string EYWR AmigaWriter file
+
+# chi: file(1) magic for ChiWriter files
+0 string \\1cw\ ChiWriter file
+>5 string >\0 version %s
+0 string \\1cw ChiWriter file
+
+# Quark Express from http://www.garykessler.net/library/file_sigs.html
+2 string IIXPR3 Intel Quark Express Document (English)
+2 string IIXPRa Intel Quark Express Document (Korean)
+2 string MMXPR3 Motorola Quark Express Document (English)
+!:mime application/x-quark-xpress-3
+2 string MMXPRa Motorola Quark Express Document (Korean)
+
+# adobe indesign (document, whatever...) from querkan
+0 belong 0x0606edf5 Adobe InDesign
+>16 string DOCUMENT Document
+
+#------------------------------------------------------------------------------
+# ichitaro456: file(1) magic for Just System Word Processor Ichitaro
+#
+# Contributor kenzo-:
+# Reversed-engineered JS Ichitaro magic numbers
+#
+
+0 string DOC
+>43 byte 0x14 Just System Word Processor Ichitaro v4
+!:mime application/x-ichitaro4
+>144 string JDASH application/x-ichitaro4
+
+0 string DOC
+>43 byte 0x15 Just System Word Processor Ichitaro v5
+!:mime application/x-ichitaro5
+
+0 string DOC
+>43 byte 0x16 Just System Word Processor Ichitaro v6
+!:mime application/x-ichitaro6
+
+#------------------------------------------------------------------------------
+# $File: xdelta,v 1.4 2009/09/19 16:28:13 christos Exp $
+# file(1) magic(5) data for xdelta Josh MacDonald <jmacd at CS.Berkeley.EDU>
+#
+0 string %XDELTA% XDelta binary patch file 0.14
+0 string %XDZ000% XDelta binary patch file 0.18
+0 string %XDZ001% XDelta binary patch file 0.20
+0 string %XDZ002% XDelta binary patch file 1.0
+0 string %XDZ003% XDelta binary patch file 1.0.4
+0 string %XDZ004% XDelta binary patch file 1.1
+
+#------------------------------------------------------------------------------
+# $File: xenix,v 1.9 2009/09/19 16:28:13 christos Exp $
+# xenix: file(1) magic for Microsoft Xenix
+#
+# "Middle model" stuff, and "Xenix 8086 relocatable or 80286 small
+# model" lifted from "magic.xenix", with comment "derived empirically;
+# treat as folklore until proven"
+#
+# "small model", "large model", "huge model" stuff lifted from XXX
+#
+# XXX - "x.out" collides with PDP-11 archives
+#
+0 string core core file (Xenix)
+0 byte 0x80 8086 relocatable (Microsoft)
+0 leshort 0xff65 x.out
+>2 string __.SYMDEF randomized
+>0 byte x archive
+0 leshort 0x206 Microsoft a.out
+>8 leshort 1 Middle model
+>0x1e leshort &0x10 overlay
+>0x1e leshort &0x2 separate
+>0x1e leshort &0x4 pure
+>0x1e leshort &0x800 segmented
+>0x1e leshort &0x400 standalone
+>0x1e leshort &0x8 fixed-stack
+>0x1c byte &0x80 byte-swapped
+>0x1c byte &0x40 word-swapped
+>0x10 lelong >0 not-stripped
+>0x1e leshort ^0xc000 pre-SysV
+>0x1e leshort &0x4000 V2.3
+>0x1e leshort &0x8000 V3.0
+>0x1c byte &0x4 86
+>0x1c byte &0xb 186
+>0x1c byte &0x9 286
+>0x1c byte &0xa 386
+>0x1f byte <0x040 small model
+>0x1f byte =0x048 large model
+>0x1f byte =0x049 huge model
+>0x1e leshort &0x1 executable
+>0x1e leshort ^0x1 object file
+>0x1e leshort &0x40 Large Text
+>0x1e leshort &0x20 Large Data
+>0x1e leshort &0x120 Huge Objects Enabled
+>0x10 lelong >0 not stripped
+
+0 leshort 0x140 old Microsoft 8086 x.out
+>0x3 byte &0x4 separate
+>0x3 byte &0x2 pure
+>0 byte &0x1 executable
+>0 byte ^0x1 relocatable
+>0x14 lelong >0 not stripped
+
+0 lelong 0x206 b.out
+>0x1e leshort &0x10 overlay
+>0x1e leshort &0x2 separate
+>0x1e leshort &0x4 pure
+>0x1e leshort &0x800 segmented
+>0x1e leshort &0x400 standalone
+>0x1e leshort &0x1 executable
+>0x1e leshort ^0x1 object file
+>0x1e leshort &0x4000 V2.3
+>0x1e leshort &0x8000 V3.0
+>0x1c byte &0x4 86
+>0x1c byte &0xb 186
+>0x1c byte &0x9 286
+>0x1c byte &0x29 286
+>0x1c byte &0xa 386
+>0x1e leshort &0x4 Large Text
+>0x1e leshort &0x2 Large Data
+>0x1e leshort &0x102 Huge Objects Enabled
+
+0 leshort 0x580 XENIX 8086 relocatable or 80286 small model
+
+#------------------------------------------------------------------------------
+# $File: xilinx,v 1.4 2009/09/19 16:28:13 christos Exp $
+# This is Aaron's attempt at a MAGIC file for Xilinx .bit files.
+# Xilinx-Magic at RevRagnarok.com
+# Got the info from FPGA-FAQ 0026
+#
+# First there is the sync header and its length
+0 beshort 0x0009
+>2 belong =0x0ff00ff0
+>>&0 belong =0x0ff00ff0
+>>>&0 beshort =0x0000
+>>>>&0 pstring a Xilinx BIT data
+# Next is a Pascal-style string with the NCD name. We want to capture that.
+>>>>0x0F pstring x - from %s
+# It is followed by a NUL
+>>>>>&1 byte 0x00
+# And then 'b'
+>>>>>&2 string b
+# With the part number:
+#>>>>>&5 string 4v (Virtex4)
+#>>>>>&5 string 2v (Virtex II
+#>>>>>>&0 string !p \b)
+#>>>>>>&0 string p Pro)
+>>>>>&4 pstring x - for %s
+# And then NUL / 'c' / Build Data / NUL / 'd' / Date / NUL / 'e' / Data Length
+>>>>>>&1 byte 0x00
+>>>>>>&2 string c
+>>>>>>&4 pstring x - built %s
+>>>>>>>&1 byte 0x00
+>>>>>>>&2 string d
+>>>>>>>&4 pstring x \b(%s)
+>>>>>>>>&1 byte 0x00
+>>>>>>>>&2 string e
+>>>>>>>>&4 belong x - data length 0x%lx
+
+#------------------------------------------------------------------------------
+# $File: xo65,v 1.4 2009/09/19 16:28:13 christos Exp $
+# xo65 object files
+# From: "Ullrich von Bassewitz" <uz at cc65.org>
+#
+0 string \x55\x7A\x6E\x61 xo65 object,
+>4 leshort x version %d,
+>6 leshort&0x0001 =0x0001 with debug info
+>6 leshort&0x0001 =0x0000 no debug info
+
+# xo65 library files
+0 string \x6E\x61\x55\x7A xo65 library,
+>4 leshort x version %d
+
+# o65 object files
+0 string \x01\x00\x6F\x36\x35 o65
+>6 leshort&0x1000 =0x0000 executable,
+>6 leshort&0x1000 =0x1000 object,
+>5 byte x version %d,
+>6 leshort&0x8000 =0x8000 65816,
+>6 leshort&0x8000 =0x0000 6502,
+>6 leshort&0x2000 =0x2000 32 bit,
+>6 leshort&0x2000 =0x0000 16 bit,
+>6 leshort&0x4000 =0x4000 page reloc,
+>6 leshort&0x4000 =0x0000 byte reloc,
+>6 leshort&0x0003 =0x0000 alignment 1
+>6 leshort&0x0003 =0x0001 alignment 2
+>6 leshort&0x0003 =0x0002 alignment 4
+>6 leshort&0x0003 =0x0003 alignment 256
+
+#------------------------------------------------------------------------------
+# $File: xwindows,v 1.6 2009/09/19 16:28:13 christos Exp $
+# xwindows: file(1) magic for various X/Window system file formats.
+
+# Compiled X Keymap
+# XKM (compiled X keymap) files (including version and byte ordering)
+1 string mkx Compiled XKB Keymap: lsb,
+>0 byte >0 version %d
+>0 byte =0 obsolete
+0 string xkm Compiled XKB Keymap: msb,
+>3 byte >0 version %d
+>0 byte =0 obsolete
+
+# xfsdump archive
+0 string xFSdump0 xfsdump archive
+>8 belong x (version %d)
+
+# Jaleo XFS files
+0 long 395726 Jaleo XFS file
+>4 long x - version %ld
+>8 long x - [%ld -
+>20 long x %ldx
+>24 long x %ldx
+>28 long 1008 YUV422]
+>28 long 1000 RGB24]
+
+# Xcursor data
+# X11 mouse cursor format defined in libXcursor, see
+# http://www.x.org/archive/X11R6.8.1/doc/Xcursor.3.html
+# http://cgit.freedesktop.org/xorg/lib/libXcursor/tree/include/X11/Xcursor/Xcursor.h
+0 string Xcur Xcursor data
+!:mime image/x-xcursor
+>10 leshort x version %hd
+>>8 leshort x \b.%hd
+
+#------------------------------------------------------------------------------
+# $File: zilog,v 1.7 2009/09/19 16:28:13 christos Exp $
+# zilog: file(1) magic for Zilog Z8000.
+#
+# Was it big-endian or little-endian? My Product Specification doesn't
+# say.
+#
+0 long 0xe807 object file (z8000 a.out)
+0 long 0xe808 pure object file (z8000 a.out)
+0 long 0xe809 separate object file (z8000 a.out)
+0 long 0xe805 overlay object file (z8000 a.out)
+
+#------------------------------------------------------------------------------
+# $File: zyxel,v 1.6 2009/09/19 16:28:13 christos Exp $
+# zyxel: file(1) magic for ZyXEL modems
+#
+# From <rob at pe1chl.ampr.org>
+# These are the /etc/magic entries to decode datafiles as used for the
+# ZyXEL U-1496E DATA/FAX/VOICE modems. (This header conforms to a
+# ZyXEL-defined standard)
+
+0 string ZyXEL\002 ZyXEL voice data
+>10 byte 0 - CELP encoding
+>10 byte&0x0B 1 - ADPCM2 encoding
+>10 byte&0x0B 2 - ADPCM3 encoding
+>10 byte&0x0B 3 - ADPCM4 encoding
+>10 byte&0x0B 8 - New ADPCM3 encoding
+>10 byte&0x04 4 with resync
diff --git a/tools/dbgap-mount/Makefile b/tools/dbgap-mount/Makefile
index 1e97665..dfa25d0 100644
--- a/tools/dbgap-mount/Makefile
+++ b/tools/dbgap-mount/Makefile
@@ -45,7 +45,8 @@ endif
INT_TOOLS = \
EXT_TOOLS = \
- demo
+ demo \
+ dbgap-mount-tool
ALL_TOOLS = \
$(INT_TOOLS) \
@@ -105,17 +106,37 @@ tag: \
# demo
# test program for new XFS toy
#
-ZDB_NFUS_SRC = \
+ZDB_DEMO_SRC = \
demo
-ZDB_NFUS_OBJ = \
- $(addsuffix .$(OBJX),$(ZDB_NFUS_SRC))
+ZDB_DEMO_OBJ = \
+ $(addsuffix .$(OBJX),$(ZDB_DEMO_SRC))
-ZDB_NFUS_LIB = -lkapp -lncbi-vdb -lxfs $(DOCAN_LIBS)
+ZDB_DEMO_LIB = -lkapp -lncbi-vdb -lxfs $(DOCAN_LIBS)
-$(BINDIR)/demo: $(ZDB_NFUS_OBJ)
- $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(ZDB_NFUS_LIB)
+$(BINDIR)/demo: $(ZDB_DEMO_OBJ)
+ $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(ZDB_DEMO_LIB)
demo_tag:
- @ $(TOP)/build/tag-module.sh $(MODULE) demo $(ZDB_NFUS_OBJ)
+ @ $(TOP)/build/tag-module.sh $(MODULE) demo $(ZDB_DEMO_OBJ)
+
+#-------------------------------------------------------------------------------
+# demo
+# test program for new XFS toy
+#
+ZDB_TOOL_SRC = \
+ wrap \
+ dbgap-mount-tool
+
+ZDB_TOOL_OBJ = \
+ $(addsuffix .$(OBJX),$(ZDB_TOOL_SRC))
+
+# ZDB_TOOL_LIB = -lkapp -lncbi-vdb -lxfs $(DOCAN_LIBS)
+ZDB_TOOL_LIB = -sxfs -skapp -sncbi-vdb -skfg $(DOCAN_LIBS)
+
+$(BINDIR)/dbgap-mount-tool: $(ZDB_TOOL_OBJ)
+ $(LD) --exe --vers $(SRCDIR) -o $@ $^ $(ZDB_TOOL_LIB)
+
+dbgap-mount-tool_tag:
+ @ $(TOP)/build/tag-module.sh $(MODULE) dbgap-mount-tool $(ZDB_TOOL_OBJ)
diff --git a/tools/dbgap-mount/dbgap-mount-tool.c b/tools/dbgap-mount/dbgap-mount-tool.c
new file mode 100644
index 0000000..4303799
--- /dev/null
+++ b/tools/dbgap-mount/dbgap-mount-tool.c
@@ -0,0 +1,565 @@
+/*==============================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "dbgap-mount-tool.vers.h" /* VDB_PASSWD_VERS */
+
+#include <kapp/main.h> /* KMain */
+#include <kapp/args.h> /* KMain */
+
+#include <klib/text.h>
+#include <klib/printf.h>
+#include <klib/out.h> /* OUTMSG */
+#include <klib/refcount.h>
+#include <klib/rc.h>
+#include <klib/log.h>
+
+#include <kfs/directory.h>
+#include <kfs/file.h>
+
+#include <xfs/model.h>
+#include <xfs/node.h>
+#include <xfs/tree.h>
+#include <xfs/xfs.h>
+
+#include "dbgap-mount-tool.h"
+
+#include <sysalloc.h>
+#include <stdio.h>
+#include <string.h>
+
+/******************************************************************************/
+
+/*)))
+ \\\ Celebrity is here ...
+ (((*/
+
+XFS_EXTERN rc_t CC XFS_InitAll_MHR ( const char * ConfigFile );
+XFS_EXTERN rc_t CC XFS_DisposeAll_MHR ();
+
+static
+rc_t CC
+MakeModel (
+ struct XFSModel ** Model,
+ const char * ProjectId,
+ bool ReadOnly
+)
+{
+ rc_t RCt;
+ struct XFSModel * Mod;
+ struct XFSModelNode * ModNod;
+
+ RCt = 0;
+ Mod = NULL;
+
+ RCt = XFSModelFromScratch ( & Mod, NULL );
+ if ( RCt == 0 ) {
+ RCt = XFSModelAddRootNode ( Mod, "gap-project" );
+ if ( RCt == 0 ) {
+ ModNod = ( struct XFSModelNode * ) XFSModelRootNode ( Mod );
+ if ( ModNod == NULL ) {
+ RCt = XFS_RC ( rcInvalid );
+ }
+ else {
+ RCt = XFSModelNodeSetProperty (
+ ModNod,
+ XFS_MODEL_MODE,
+ ( ReadOnly
+ ? XFS_MODEL_MODE_RO
+ : XFS_MODEL_MODE_RW
+ )
+ );
+ if ( RCt == 0 ) {
+ RCt = XFSModelNodeSetProperty (
+ ModNod,
+ XFS_MODEL_PROJECTID,
+ ProjectId
+ );
+ if ( RCt == 0 ) {
+ * Model = Mod;
+ }
+ }
+
+ }
+ }
+ }
+
+ return RCt;
+}
+
+
+static
+rc_t CC
+DoFukan (
+ const char * ProjectId,
+ const char * MountPoint,
+ const char * LogFile,
+ const char * ProgName,
+ bool Daemonize,
+ bool ReadOnly
+)
+{
+ rc_t RCt;
+ struct XFSModel * TheModel;
+ struct XFSTree * TheTree;
+ struct XFSControl * TheControl;
+ char Lable [ 256 ];
+ size_t NumWr;
+
+ RCt = 0;
+ TheModel = NULL;
+ TheTree = NULL;
+ TheControl = NULL;
+ * Lable = 0;
+ NumWr = 0;
+
+ XFS_CAN ( ProjectId )
+ XFS_CAN ( MountPoint )
+
+
+ /* Some messages good to say
+ */
+ LogMsg ( klogInfo, "Start" );
+ pLogMsg ( klogInfo, "ProjectID: $(project)", "project=%s", ProjectId );
+ pLogMsg ( klogInfo, "MountPoint: $(point)", "point=%s", MountPoint );
+ if ( LogFile != NULL ) {
+ pLogMsg ( klogInfo, "LogFile: $(file)", "file=%s", LogFile );
+ }
+ pLogMsg ( klogInfo, "ReadOnly: $(ro)", "ro=%s", ( ReadOnly ? "true" : "false" ) );
+ pLogMsg ( klogInfo, "Daemonize: $(pokemon)", "pokemon=%s", ( Daemonize ? "true" : "false" ) );
+
+ /* Initializing all depots and heavy gunz
+ */
+ RCt = XFS_InitAll_MHR ( NULL );
+ pLogMsg ( klogDebug, "[XFS_InitAll_MHR][$(rc)]", "rc=%d", RCt );
+ if ( RCt == 0 ) {
+
+ RCt = MakeModel ( & TheModel, ProjectId, ReadOnly );
+ pLogMsg ( klogDebug, "[XFSModelMake][$(rc)]", "rc=%d", RCt );
+ if ( RCt == 0 ) {
+
+ RCt = XFSTreeMake ( TheModel, & TheTree );
+ pLogMsg ( klogDebug, "[XFSTreeMake][$(rc)]", "rc=%d", RCt );
+ if ( RCt == 0 ) {
+
+ RCt = XFSControlMake ( TheTree, & TheControl );
+ pLogMsg ( klogDebug, "[XFSControlMake][$(rc)]", "rc=%d", RCt );
+ if ( RCt == 0 ) {
+
+ XFSControlSetMountPoint ( TheControl, MountPoint );
+
+ RCt = string_printf (
+ Lable,
+ sizeof ( Lable ) - 1,
+ & NumWr,
+ "dbGaP(%s)",
+ ProjectId
+ );
+ XFSControlSetLabel ( TheControl, Lable );
+ if ( LogFile != NULL ) {
+ XFSControlSetLogFile ( TheControl, LogFile );
+ }
+ if ( Daemonize ) {
+ XFSControlDaemonize ( TheControl );
+ }
+
+ LogMsg ( klogDebug, "[XFSStart]" );
+ RCt = XFSStart ( TheControl );
+ pLogMsg ( klogDebug, "[XFSStart][$(rc)]", "rc=%d", RCt );
+ if ( RCt == 0 ) {
+ LogMsg ( klogDebug, "[XFSStop]" );
+ RCt = XFSStop ( TheControl );
+ pLogMsg ( klogDebug, "[XFSStop][$(rc)]", "rc=%d", RCt );
+ }
+ else {
+ LogErr ( klogFatal, RCt, "CRITICAL ERROR: Can not start MOUNTER" );
+ }
+ }
+
+ XFSControlDispose ( TheControl );
+
+ XFSTreeRelease ( TheTree );
+ }
+
+ XFSModelRelease ( TheModel );
+ }
+
+ XFS_DisposeAll_MHR ();
+
+ }
+
+ /* Another message good to say
+ */
+ LogMsg ( klogDebug, "[Exiting]" );
+
+ return RCt;
+} /* DoFukan () */
+
+static
+rc_t CC
+CheckParameters (
+ struct Args * TheArgs,
+ const char ** ProjectId,
+ const char ** MountPoint,
+ bool * ReadOnly
+)
+{
+ rc_t RCt;
+ uint32_t ParamCount;
+ uint32_t Idx;
+ const char * ParamValue;
+
+ RCt = 0;
+ ParamCount = 0;
+ Idx = 0;
+ ParamValue = NULL;
+
+ XFS_CSAN ( ProjectId )
+ XFS_CSAN ( MountPoint )
+ XFS_CSA ( ReadOnly, false )
+ XFS_CAN ( TheArgs )
+ XFS_CAN ( ProjectId )
+ XFS_CAN ( MountPoint )
+ XFS_CAN ( ReadOnly )
+
+ RCt = ArgsParamCount ( TheArgs, & ParamCount );
+ if ( RCt == 0 ) {
+ if ( ParamCount != 2 && ParamCount != 3 ) {
+ RCt = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcInsufficient );
+ }
+ else {
+ if ( ParamCount == 3 ) {
+ RCt = ArgsParamValue (
+ TheArgs,
+ Idx,
+ ( const void ** ) & ParamValue
+ );
+ if ( RCt == 0 ) {
+ if ( strcmp ( PARAM_RO, ParamValue ) == 0 ) {
+ * ReadOnly = true;
+ }
+ else {
+ if ( strcmp ( PARAM_RW, ParamValue ) == 0 ) {
+ * ReadOnly = false;
+ }
+ else {
+ RCt = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcAmbiguous );
+ }
+ }
+ }
+
+ Idx ++;
+ }
+
+ /* Next Param should be ProjectId
+ */
+ if ( RCt == 0 ) {
+ RCt = ArgsParamValue (
+ TheArgs,
+ Idx,
+ ( const void ** ) & ParamValue
+ );
+ if ( RCt == 0 ) {
+ * ProjectId = ParamValue;
+ }
+
+ Idx ++;
+ }
+
+ /* Next Param should be MountPoint
+ */
+ if ( RCt == 0 ) {
+ RCt = ArgsParamValue (
+ TheArgs,
+ Idx,
+ ( const void ** ) & ParamValue
+ );
+ if ( RCt == 0 ) {
+ * MountPoint = ParamValue;
+ }
+
+ Idx ++;
+ }
+ }
+ }
+
+ return RCt;
+} /* CheckParameters () */
+
+static
+rc_t CC
+CheckArgs (
+ struct Args * TheArgs,
+ const char ** LogFile,
+ const char ** ProgName,
+ bool * Daemonize
+)
+{
+ rc_t RCt;
+ const char * OptValue;
+ uint32_t OptCount;
+
+ RCt = 0;
+ OptValue = NULL;
+ OptCount = 0;
+
+ XFS_CSAN ( LogFile )
+ XFS_CSAN ( ProgName )
+ XFS_CSA ( Daemonize, false )
+ XFS_CAN ( TheArgs )
+ XFS_CAN ( LogFile )
+ XFS_CAN ( ProgName )
+ XFS_CAN ( Daemonize )
+
+ RCt = ArgsOptionCount ( TheArgs, OPT_LOGFILE, & OptCount );
+ if ( RCt == 0 && OptCount == 1 ) {
+ RCt = ArgsOptionValue (
+ TheArgs,
+ OPT_LOGFILE,
+ 0,
+ ( const void ** ) & OptValue
+ );
+ if ( RCt == 0 ) {
+ * LogFile = OptValue;
+ }
+ }
+
+ RCt = ArgsOptionCount ( TheArgs, OPT_DAEMONIZE, & OptCount );
+ if ( RCt == 0 ) {
+ * Daemonize = OptCount == 1;
+ }
+
+ RCt = ArgsProgram ( TheArgs, & OptValue, ProgName );
+
+ return RCt;
+} /* CheckArgs () */
+
+static
+rc_t CC
+RunApp ( struct Args * TheArgs )
+{
+ rc_t RCt;
+ const char * ProjectId;
+ const char * MountPoint;
+ const char * LogFile;
+ const char * ProgName;
+ bool ReadOnly;
+ bool Daemonize;
+
+ RCt = 0;
+ ProjectId = NULL;
+ MountPoint = NULL;
+ LogFile = NULL;
+ ProgName = NULL;
+ ReadOnly = false;
+ Daemonize = false;
+
+ XFS_CAN ( TheArgs )
+
+
+
+ /* First we are checking parameters
+ */
+ RCt = CheckParameters (
+ TheArgs,
+ & ProjectId,
+ & MountPoint,
+ & ReadOnly
+ );
+
+ if ( RCt == 0 ) {
+ /* Second we are checking Arguments
+ */
+ RCt = CheckArgs ( TheArgs, & LogFile, & ProgName, & Daemonize );
+ }
+
+ if ( RCt != 0 ) {
+ UsageSummary ( ProgName == NULL ? UsageDefaultName : ProgName );
+ }
+ else {
+ RCt = DoFukan (
+ ProjectId,
+ MountPoint,
+ LogFile,
+ ProgName,
+ Daemonize,
+ ReadOnly
+ );
+ }
+
+ return RCt;
+} /* RunApp () */
+
+static
+rc_t CC
+DoUnmount ( const char * MountPoint )
+{
+ rc_t RCt;
+
+ RCt = 0;
+
+ XFS_CAN ( MountPoint )
+
+ pLogMsg ( klogDebug, "[DoUnmount] [$(point)]", "point=%s", MountPoint );
+
+ XFSUnmountAndDestroy ( MountPoint );
+
+ return RCt;
+} /* DoUnmount () */
+
+/*)))
+ \\\ KApp and Options ...
+ (((*/
+
+ver_t CC KAppVersion(void) { return MOUNT_TOOL_VERS; }
+
+struct OptDef ToolOpts [] = {
+ { OPT_DAEMONIZE, ALS_DAEMONIZE, NULL, UsgDaemonize, 1, false, false },
+ { OPT_LOGFILE, ALS_LOGFILE, NULL, UsgLogFile, 1, true, false },
+ { OPT_UNMOUNT, ALS_UNMOUNT, NULL, UsgUnmount, 1, true, false }
+}; /* OptDef */
+
+const char UsageDefaultName[] = "dbgap-mount-tool";
+
+rc_t CC
+UsageSummary ( const char * ProgName )
+{
+ return KOutMsg (
+ "\n"
+ "Usage:\n"
+ " %s [options]"
+ " [%s|%s]"
+ " <project-id>"
+ " <mount-point>"
+ "\n"
+ "Or:\n"
+ " %s [options] [unmount-options]"
+ "\n"
+ "\n",
+ ProgName,
+ PARAM_RO,
+ PARAM_RW,
+ ProgName
+ );
+} /* UsageSummary () */
+
+rc_t CC
+Usage ( const struct Args * TheArgs )
+{
+ rc_t RCt;
+ const char * ProgName;
+ const char * FullPath;
+
+ RCt = 0;
+ ProgName = NULL;
+ FullPath = NULL;
+
+ if ( TheArgs == NULL ) {
+ RCt = RC ( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
+ }
+ else {
+ RCt = ArgsProgram ( TheArgs, & FullPath, & ProgName );
+ }
+
+ if ( RCt != 0 ) {
+ ProgName = FullPath = UsageDefaultName;
+ }
+
+ UsageSummary ( ProgName );
+
+ KOutMsg ( "Options:\n" );
+
+ HelpOptionLine (
+ ALS_DAEMONIZE,
+ OPT_DAEMONIZE,
+ PRM_DAEMONIZE,
+ UsgDaemonize
+ );
+
+ HelpOptionLine (
+ ALS_LOGFILE,
+ OPT_LOGFILE,
+ PRM_LOGFILE,
+ UsgLogFile
+ );
+
+ KOutMsg ( "\n" );
+
+ KOutMsg ( "Unmount Options:\n" );
+
+ HelpOptionLine (
+ ALS_UNMOUNT,
+ OPT_UNMOUNT,
+ PRM_UNMOUNT,
+ UsgUnmount
+ );
+
+ KOutMsg ( "\n" );
+ KOutMsg ( "Standard Options:\n" );
+ HelpOptionsStandard ();
+ HelpVersion ( FullPath, KAppVersion () );
+
+ return RCt;
+} /* Usage () */
+
+rc_t CC
+KMain ( int ArgC, char * ArgV [] )
+{
+ rc_t RCt;
+ struct Args * TheArgs;
+ const char * MountPoint;
+
+ RCt = 0;
+ TheArgs = NULL;
+
+ RCt = ArgsMakeAndHandle (
+ & TheArgs,
+ ArgC,
+ ArgV,
+ 1,
+ ToolOpts,
+ sizeof ( ToolOpts ) / sizeof ( OptDef )
+ );
+ if ( RCt == 0 ) {
+ /* First we do check if that is unmount command
+ */
+ RCt = ArgsOptionValue (
+ TheArgs,
+ OPT_UNMOUNT,
+ 0,
+ ( const void ** ) & MountPoint
+ );
+ if ( RCt != 0 ) {
+ RCt = WrapIt ( TheArgs, RunApp );
+ }
+ else {
+ RCt = DoUnmount ( MountPoint );
+ }
+
+ ArgsWhack ( TheArgs );
+ }
+
+ return RCt;
+}
diff --git a/tools/bam-loader/loader-imp.h b/tools/dbgap-mount/dbgap-mount-tool.h
similarity index 53%
copy from tools/bam-loader/loader-imp.h
copy to tools/dbgap-mount/dbgap-mount-tool.h
index 6f524a7..c0ef3c6 100644
--- a/tools/bam-loader/loader-imp.h
+++ b/tools/dbgap-mount/dbgap-mount-tool.h
@@ -24,6 +24,49 @@
*
*/
-rc_t run(char const argv0[],
- unsigned countAligned, char const *bamFile[],
- unsigned countUnaligned, char const *unaligned[]);
+#ifndef _mount_tool_h_
+#define _mount_tool_h_
+
+#include <xfs/xfs-defs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*_*/
+
+/*)))
+ ||| Log init/dispose
+(((*/
+
+XFS_EXTERN rc_t CC WrapIt (
+ struct Args * TheArgs,
+ rc_t ( CC * runner ) ( struct Args * TheArgs )
+ );
+
+/*)))
+ \\\ Argumends ... raznye
+ (((*/
+#define OPT_DAEMONIZE "daemonize"
+#define ALS_DAEMONIZE "d"
+#define PRM_DAEMONIZE NULL
+static const char * UsgDaemonize [] = { "Run tool as a daemon", NULL };
+
+#define OPT_LOGFILE "log-file"
+#define ALS_LOGFILE "l"
+#define PRM_LOGFILE "log-file-path"
+static const char * UsgLogFile [] = { "Log file", NULL };
+
+#define OPT_UNMOUNT "unmount"
+#define ALS_UNMOUNT "u"
+#define PRM_UNMOUNT "mount-point"
+static const char * UsgUnmount [] = { "Unmount", NULL };
+
+#define PARAM_RO "ro"
+#define PARAM_RW "rw"
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _mount_tool_h_ */
diff --git a/tools/ref-variation/ref-variation.vers b/tools/dbgap-mount/dbgap-mount-tool.vers
similarity index 100%
copy from tools/ref-variation/ref-variation.vers
copy to tools/dbgap-mount/dbgap-mount-tool.vers
diff --git a/tools/dbgap-mount/dbgap-mount-tool.vers.h b/tools/dbgap-mount/dbgap-mount-tool.vers.h
new file mode 100644
index 0000000..917ffa4
--- /dev/null
+++ b/tools/dbgap-mount/dbgap-mount-tool.vers.h
@@ -0,0 +1 @@
+#define MOUNT_TOOL_VERS 0x00000001
diff --git a/tools/dbgap-mount/demo.c b/tools/dbgap-mount/demo.c
index 4d331c4..6833000 100644
--- a/tools/dbgap-mount/demo.c
+++ b/tools/dbgap-mount/demo.c
@@ -174,6 +174,7 @@ rc_t run (
XFSControlSetMountPoint ( TheControl, MountPoint );
XFSControlSetLabel ( TheControl, "Olaffsen" );
+ XFSControlSetLogFile ( TheControl, NULL );
if ( ! Daemonize ) {
XFSControlSetArg ( TheControl, "-f", "-f" );
@@ -219,24 +220,29 @@ int ProjectIdInt = 0;
char MountPoint[333];
bool ReadOnly = true;
bool Daemonize = false;
+bool LogToFile = false;
+char LogFile [ 777 ];
#define RO_TAG "ro"
#define RW_TAG "rw"
#define DM_TAG "-d"
+#define LF_TAG "-l"
+
static
void
RightUsage()
{
printf("\ndbGaP mount tool demo program. Will mount and show content of cart files\n");
- printf("\nUsage: %s [%s|%s] [%s] project_id mount_point\n\n\
+ printf("\nUsage: %s [%s|%s] [%s log_file] [%s] project_id mount_point\n\n\
Where:\n\
project_id - usually integer greater that zero and less than twelve\n\
%s - mount in read only mode\n\
%s - mount in read-write mode\n\
%s - run mounter as daemon\n\
mount_point - point to mount\n\
-\n\n", ProgramName, RO_TAG, RW_TAG, DM_TAG, RO_TAG, RW_TAG, DM_TAG );
+ log_file - file to log logs\n\
+\n\n", ProgramName, RO_TAG, RW_TAG, LF_TAG, DM_TAG, RO_TAG, RW_TAG, DM_TAG );
} /* RightUsage() */
static
@@ -346,6 +352,10 @@ rc_t CC Usage ( const Args * args ) { return 0; }
rc_t CC KMain(int argc, char *argv[]) {
+ // KLogLevelSet ( klogInfo );
+ KLogLevelSet ( klogDebug );
+ // XFSLogInit ( "log.log" );
+
if ( ! ParseArgs ( argc, argv ) ) {
RightUsage();
return 1;
diff --git a/test/samline/refbases.h b/tools/dbgap-mount/unix/wrap.c
similarity index 77%
copy from test/samline/refbases.h
copy to tools/dbgap-mount/unix/wrap.c
index 94136fb..f89944f 100644
--- a/test/samline/refbases.h
+++ b/tools/dbgap-mount/unix/wrap.c
@@ -1,4 +1,4 @@
-/*===========================================================================
+/*==============================================================================
*
* PUBLIC DOMAIN NOTICE
* National Center for Biotechnology Information
@@ -23,19 +23,22 @@
* ===========================================================================
*
*/
+#include <kapp/args.h> /* KMain */
+#include <xfs/xfs-defs.h>
-#ifndef _h_refbases_
-#define _h_refbases_
+#include <sysalloc.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/* By default should call DoFukan ()
+ */
+LIB_EXPORT
+rc_t CC
+WrapIt (
+ struct Args * TheArgs,
+ rc_t ( CC * runner ) ( struct Args * TheArgs )
+)
+{
+ XFS_CAN ( TheArgs )
+ XFS_CAN ( runner )
+ return runner ( TheArgs );
+} /* WrapIt () */
diff --git a/tools/dbgap-mount/win/wrap.c b/tools/dbgap-mount/win/wrap.c
new file mode 100644
index 0000000..1554901
--- /dev/null
+++ b/tools/dbgap-mount/win/wrap.c
@@ -0,0 +1,344 @@
+/*==============================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+#include <kapp/args.h> /* KMain */
+
+#include <klib/text.h>
+#include <klib/out.h> /* OUTMSG */
+#include <klib/rc.h>
+#include <klib/log.h>
+
+#include <kfs/directory.h>
+#include <kfs/file.h>
+
+#include <xfs/xfs-defs.h>
+#include <xfs/xlog.h>
+
+#include "../dbgap-mount-tool.h"
+
+#include <sysalloc.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* There is some inconvinience. Before KMain is called, some filthy
+ * converts all cool WCHAR paths to uncool linux-style char paths
+ * so we had to go throught all parameters and regenerate command
+ * line with converting into path everything what looks like a path.
+ * or ... what starts with \
+ */
+
+static
+rc_t CC
+_CharToWChar ( WCHAR * Buf, size_t BufSz, const char * Str )
+{
+ XFS_CAN ( Str )
+ XFS_CAN ( Buf )
+
+
+ return mbstowcs ( Buf, Str, strlen ( Str ) ) == - 1
+ ? XFS_RC ( rcInvalid )
+ : 0
+ ;
+} /* _CharToWChar () */
+
+LIB_EXPORT
+rc_t CC
+XFSPathInnerToNative (
+ WCHAR * NativePathBuffer,
+ size_t NativePathBufferSize,
+ const char * InnerPath,
+ ...
+ );
+
+static
+rc_t CC
+_CheckConvert ( const char * Str, WCHAR * Buf, size_t BufSz )
+{
+ rc_t RCt = 0;
+
+ XFS_CAN ( Str )
+ XFS_CAN ( Buf )
+
+ if ( * Str == '/' || ( * Str == '.' && * ( Str + 1 ) == '/' ) ) {
+ /* That is candidate for a path */
+ RCt = XFSPathInnerToNative ( Buf, BufSz, Str );
+ }
+ else {
+ /* Just converting to WCHAR */
+ RCt = _CharToWChar ( Buf, BufSz, Str );
+ }
+
+ return RCt;
+} /* _CheckConvert () */
+
+static
+bool CC
+_IsDaemonizeToken ( const char * Param )
+{
+ const char * Str = Param;
+
+ if ( Str != NULL ) {
+ if ( * Str == '-' ) {
+ Str ++;
+ if ( * Str == '-' ) {
+ Str ++;
+ return strcmp ( Str, OPT_DAEMONIZE ) == 0;
+ }
+ else {
+ return strcmp ( Str, ALS_DAEMONIZE ) == 0;
+ }
+ }
+ }
+
+ return false;
+} /* _IsDaemonizeToken () */
+
+static
+rc_t CC
+_MakeCommandString (
+ struct Args * TheArgs,
+ WCHAR ** Str,
+ WCHAR ** Prog
+)
+{
+ rc_t RCt;
+ uint32_t Cnt;
+ uint32_t Idx;
+ const char * Val;
+ WCHAR BF [ 4096 ];
+ WCHAR BF1 [ 1096 ];
+
+ RCt = 0;
+ Cnt = 0;
+ Idx = 0;
+ Val = NULL;
+ * BF = 0;
+ * BF1 = 0;
+
+ XFS_CSAN ( Str )
+ XFS_CAN ( TheArgs )
+ XFS_CAN ( Str )
+
+ RCt = ArgsArgvCount ( TheArgs, & Cnt );
+ if ( RCt == 0 ) {
+ for ( Idx = 0; Idx < Cnt; Idx ++ ) {
+ ZeroMemory ( BF1, sizeof ( BF1 ) );
+
+ RCt = ArgsArgvValue ( TheArgs, Idx, & Val );
+ if ( RCt != 0 ) {
+ break;
+ }
+
+ if ( _IsDaemonizeToken ( Val ) ) {
+ continue;
+ }
+
+ RCt = _CheckConvert ( Val, BF1, sizeof ( BF1 ) );
+ if ( RCt != 0 ) {
+ break;
+ }
+
+ if ( 0 < Idx ) {
+ wcscat ( BF, L" " );
+ }
+ wcscat ( BF, BF1 );
+ }
+ }
+
+ if ( RCt == 0 ) {
+ ZeroMemory ( BF1, sizeof ( BF1 ) );
+ RCt = ArgsArgvValue ( TheArgs, 0, & Val );
+ if ( RCt == 0 ) {
+ RCt = _CheckConvert ( Val, BF1, sizeof ( BF1 ) );
+ }
+ }
+
+ * Str = _wcsdup ( BF );
+ * Prog = _wcsdup ( BF1 );
+
+ if ( * Str == NULL || * Prog == NULL ) {
+ RCt = XFS_RC ( rcExhausted );
+
+ if ( * Str != NULL ) {
+ free ( * Str );
+ * Str = NULL;
+ }
+
+ if ( * Prog != NULL ) {
+ free ( * Prog );
+ * Prog = NULL;
+ }
+ }
+
+ return 0;
+} /* _MakeCommandString () */
+
+static
+rc_t CC
+_CreateDetached ( LPCTSTR AppName, LPTSTR Cmd )
+{
+ rc_t RCt;
+ BOOL Ret;
+ STARTUPINFO StartInfo;
+ PROCESS_INFORMATION Process;
+ int Err;
+
+ RCt = 0;
+ Ret = FALSE;
+ ZeroMemory ( & StartInfo, sizeof( StartInfo ) );
+ ZeroMemory ( & Process, sizeof( Process ) );
+ Err = 0;
+
+ Ret = CreateProcessW (
+ AppName, // application name
+ Cmd, // command line
+ NULL, // process attributes
+ NULL, // trhead attributes
+ FALSE, // no file handler inheritance
+ DETACHED_PROCESS, // Creation Flags
+ NULL, // Inherit environment
+ NULL, // Inherit CWD
+ & StartInfo, // Startup Info
+ & Process // Process Information
+ );
+ if ( Ret == FALSE ) {
+ Err = GetLastError ();
+ wprintf ( L"CRITICAL ERROR: Can not run in background [%s] Error[%d]\n", AppName, Err );
+ RCt = XFS_RC ( rcInvalid );
+ }
+ else {
+ wprintf ( L"RUN DETACHED\n" );
+ }
+
+ return RCt;
+} /* _CreateDetached () */
+
+static
+rc_t CC
+RunDaemon ( struct Args * TheArgs )
+{
+ rc_t RCt;
+ WCHAR * CmdLine;
+ WCHAR * ProgName;
+
+ RCt = 0;
+ CmdLine = NULL;
+ ProgName = NULL;
+
+ RCt = _MakeCommandString ( TheArgs, & CmdLine, & ProgName );
+ if ( RCt == 0 ) {
+ RCt = _CreateDetached ( ProgName, CmdLine );
+
+ free ( CmdLine );
+ free ( ProgName );
+ }
+
+ return 0;
+} /* RunDaemon () */
+
+static
+rc_t CC
+_SetLog ( struct Args * TheArgs )
+{
+ rc_t RCt;
+ const char * LogFile;
+ uint32_t OptCount;
+
+ RCt = 0;
+ LogFile = NULL;
+ OptCount = 0;
+
+ XFS_CAN ( TheArgs )
+
+ if ( TheArgs != NULL ) {
+ RCt = ArgsOptionCount ( TheArgs, OPT_LOGFILE, & OptCount );
+ if ( RCt == 0 && OptCount == 1 ) {
+ RCt = ArgsOptionValue (
+ TheArgs,
+ OPT_LOGFILE,
+ 0,
+ ( const void ** ) & LogFile
+ );
+ }
+ }
+
+ if ( RCt == 0 ) {
+ if ( LogFile != NULL ) {
+ printf ( "Log File [%s]\n", LogFile );
+ }
+ else {
+/*
+ printf ( "Log File [NULL]\n" );
+*/
+ }
+ RCt = XFSLogInit ( LogFile );
+ }
+
+ return RCt;
+} /* _SetLog () */
+
+/* By default should call DoFukan ()
+ */
+rc_t CC
+WrapIt (
+ struct Args * TheArgs,
+ rc_t ( CC * runner ) ( struct Args * TheArgs )
+)
+{
+ rc_t RCt;
+ uint32_t OptCount;
+
+ RCt = 0;
+ OptCount = 0;
+
+ XFS_CAN ( TheArgs );
+ XFS_CAN ( runner );
+
+ RCt = ArgsOptionCount ( TheArgs, OPT_DAEMONIZE, & OptCount );
+ if ( RCt == 0 ) {
+ if ( OptCount == 1 ) {
+ /* Here we are daemonizing
+ */
+ RCt = RunDaemon ( TheArgs );
+ }
+ else {
+ /* Setting log file
+ */
+ RCt = _SetLog ( TheArgs );
+ if ( RCt == 0 ) {
+ RCt = runner ( TheArgs );
+ }
+ else {
+ LogErr ( klogErr, RCt, "CRITICAL ERROR: Can not initialize log file" );
+ }
+ }
+ }
+
+ /* TODO : that is soo stupid - remove
+ */
+ return RCt;
+} /* WrapIt () */
diff --git a/tools/fastdump/.gitignore b/tools/fastdump/.gitignore
new file mode 100644
index 0000000..2d51571
--- /dev/null
+++ b/tools/fastdump/.gitignore
@@ -0,0 +1,3 @@
+lookup_bin.dat
+special.txt
+vdb_dump_special.txt
diff --git a/test/samline/Makefile b/tools/fastdump/Makefile
similarity index 90%
copy from test/samline/Makefile
copy to tools/fastdump/Makefile
index e3a76ef..a40fa08 100644
--- a/test/samline/Makefile
+++ b/tools/fastdump/Makefile
@@ -26,14 +26,14 @@
default: std
TOP ?= $(abspath ../..)
-MODULE = test/samline
+MODULE = tools/fastdump
include $(TOP)/build/Makefile.env
INT_TOOLS = \
EXT_TOOLS = \
- samline
+ fastdump
ALL_TOOLS = \
$(INT_TOOLS) \
@@ -81,12 +81,22 @@ clean: stdclean
.PHONY: clean
#-------------------------------------------------------------------------------
-# TOOL_SRC
+# fastdump
#
TOOL_SRC = \
- refbases \
- cigar \
- alig-gen
+ helper \
+ index \
+ lookup_writer \
+ lookup_reader \
+ file_printer \
+ merge_sorter \
+ sorter \
+ cmn_iter \
+ raw_read_iter \
+ special_iter \
+ fastq_iter \
+ join \
+ fastdump
TOOL_OBJ = \
$(addsuffix .$(OBJX),$(TOOL_SRC))
@@ -96,5 +106,6 @@ TOOL_LIB = \
-sncbi-vdb \
-lm
-$(BINDIR)/samline: $(TOOL_OBJ)
+$(BINDIR)/fastdump: $(TOOL_OBJ)
$(LD) --exe --vers $(SRCDIR) -o $@ $^ $(TOOL_LIB)
+
diff --git a/tools/fastdump/cmn_iter.c b/tools/fastdump/cmn_iter.c
new file mode 100644
index 0000000..48ffed2
--- /dev/null
+++ b/tools/fastdump/cmn_iter.c
@@ -0,0 +1,287 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "cmn_iter.h"
+#include "helper.h"
+
+#include <klib/progressbar.h>
+#include <klib/out.h>
+#include <sra/sraschema.h>
+
+#include <vdb/manager.h>
+#include <vdb/schema.h>
+#include <vdb/table.h>
+#include <vdb/cursor.h>
+#include <vdb/database.h>
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+typedef struct cmn_iter
+{
+ const VDBManager * mgr;
+ VSchema * schema;
+ const VDatabase * db;
+ const VTable * tbl;
+ const VCursor * cursor;
+ const char * row_range;
+ struct num_gen * ranges;
+ const struct num_gen_iter * row_iter;
+ struct progressbar * progressbar;
+ uint64_t count;
+ int64_t first, row_id;
+} cmn_iter;
+
+
+void destroy_cmn_iter( struct cmn_iter * iter )
+{
+ if ( iter != NULL )
+ {
+ if ( iter->progressbar != NULL )
+ {
+ destroy_progressbar( iter->progressbar );
+ KOutMsg( "\n" );
+ }
+ if ( iter->row_iter != NULL ) num_gen_iterator_destroy( iter->row_iter );
+ if ( iter->ranges != NULL ) num_gen_destroy( iter->ranges );
+ if ( iter->cursor != NULL ) VCursorRelease( iter->cursor );
+ if ( iter->tbl != NULL ) VTableRelease( iter->tbl );
+ if ( iter->db != NULL ) VDatabaseRelease( iter->db );
+ if ( iter->schema != NULL ) VSchemaRelease( iter->schema );
+ if ( iter->mgr != NULL ) VDBManagerRelease( iter->mgr );
+ free( ( void * ) iter );
+ }
+}
+
+
+rc_t make_cmn_iter( cmn_params * params, const char * tblname, struct cmn_iter ** iter )
+{
+ rc_t rc = 0;
+ cmn_iter * i = calloc( 1, sizeof * i );
+ if ( i == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "make_cmn_iter.calloc( %d ) -> %R", ( sizeof * i ), rc );
+ }
+ else
+ {
+ rc = VDBManagerMakeRead( &i->mgr, params->dir );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.VDBManagerMakeRead() -> %R\n", rc );
+ else
+ {
+ rc = VDBManagerMakeSRASchema( i->mgr, &i->schema );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.VDBManagerMakeSRASchema() -> %R\n", rc );
+ else
+ {
+ rc = VDBManagerOpenDBRead( i->mgr, &i->db, i->schema, "%s", params->acc );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.VDBManagerOpenDBRead( '%s' ) -> %R\n", params->acc, rc );
+ else
+ {
+ rc = VDatabaseOpenTableRead( i->db, &i->tbl, "%s", tblname );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.VDBManagerOpenDBRead( '%s', '%s' ) -> %R\n", params->acc, tblname, rc );
+ else
+ {
+ rc = VTableCreateCachedCursorRead( i->tbl, &i->cursor, params->cursor_cache );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.VTableCreateCachedCursorRead() -> %R\n", rc );
+ else
+ {
+ if ( rc == 0 && params->show_progress )
+ make_progressbar( &i->progressbar, 2 );
+ i->row_range = params->row_range;
+ i->first = params->first;
+ i->count = params->count;
+
+ *iter = i;
+ }
+ }
+ }
+ }
+ }
+ }
+ if ( rc != 0 )
+ destroy_cmn_iter( i );
+ return rc;
+}
+
+
+rc_t cmn_iter_add_column( struct cmn_iter * iter, const char * name, uint32_t * id )
+{
+ return add_column( iter->cursor, name, id );
+}
+
+
+int64_t cmn_iter_row_id( const struct cmn_iter * iter )
+{
+ return iter->row_id;
+}
+
+
+uint64_t cmn_iter_row_count( struct cmn_iter * iter )
+{
+ uint64_t res = 0;
+ rc_t rc = num_gen_iterator_count( iter->row_iter, &res );
+ if ( rc != 0 )
+ ErrMsg( "make_cmn_iter.num_gen_iterator_count() -> %R\n", rc );
+ return res;
+}
+
+
+bool cmn_iter_next( struct cmn_iter * iter, rc_t * rc )
+{
+ bool res = num_gen_iterator_next( iter->row_iter, &iter->row_id, rc );
+ if ( res && iter->progressbar != NULL )
+ {
+ uint64_t percent = calc_percent( iter->count, iter->row_id, 2 );
+ update_progressbar( iter->progressbar, percent );
+ }
+ return res;
+}
+
+
+rc_t cmn_iter_range( struct cmn_iter * iter, uint32_t col_id )
+{
+ rc_t rc = VCursorOpen( iter->cursor );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.VCursorOpen() -> %R", rc );
+ else
+ {
+ rc = num_gen_make_sorted( &iter->ranges, true );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.num_gen_make_sorted() -> %R\n", rc );
+ else
+ {
+ if ( iter->row_range != NULL )
+ {
+ rc = num_gen_parse( iter->ranges, iter->row_range );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.num_gen_parse( %s ) -> %R\n", iter->row_range, rc );
+ }
+ else if ( iter->count > 0 )
+ {
+ rc = num_gen_add( iter->ranges, iter->first, iter->count );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.num_gen_add( %ld.%lu ) -> %R\n",
+ iter->first, iter->count, iter->row_range, rc );
+ }
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ rc = VCursorIdRange( iter->cursor, col_id, &iter->first, &iter->count );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.VCursorIdRange() -> %R", rc );
+ else
+ {
+ rc = make_row_iter( iter->ranges, iter->first, iter->count, &iter->row_iter );
+ if ( rc != 0 )
+ ErrMsg( "cmn_iter_range.make_row_iter( %s ) -> %R\n", iter->row_range, rc );
+ }
+ }
+ return rc;
+}
+
+
+rc_t cmn_read_uint64( struct cmn_iter * iter, uint32_t col_id, uint64_t *value )
+{
+ uint32_t elem_bits, boff, row_len;
+ const uint64_t * value_ptr;
+ rc_t rc = VCursorCellDataDirect( iter->cursor, iter->row_id, col_id, &elem_bits,
+ (const void **)&value_ptr, &boff, &row_len );
+ if ( rc != 0 )
+ ErrMsg( "VCursorCellDataDirect( #%ld ) -> %R\n", iter->row_id, rc );
+ else if ( elem_bits != 64 || boff != 0 || row_len < 1 )
+ {
+ ErrMsg( "row#%ld : bits=%d, boff=%d, len=%d\n", iter->row_id, elem_bits, boff, row_len );
+ rc = RC( rcApp, rcNoTarg, rcAccessing, rcRow, rcInvalid );
+ }
+ else
+ *value = *value_ptr;
+ return rc;
+}
+
+
+rc_t cmn_read_uint64_array( struct cmn_iter * iter, uint32_t col_id, uint64_t *value, uint32_t num_values )
+{
+ uint32_t elem_bits, boff, row_len;
+ const uint64_t * value_ptr;
+ rc_t rc = VCursorCellDataDirect( iter->cursor, iter->row_id, col_id, &elem_bits,
+ (const void **)&value_ptr, &boff, &row_len );
+ if ( rc != 0 )
+ ErrMsg( "VCursorCellDataDirect( #%ld ) -> %R\n", iter->row_id, rc );
+ else if ( elem_bits != 64 || boff != 0 || row_len < 1 )
+ {
+ ErrMsg( "row#%ld : bits=%d, boff=%d, len=%d\n", iter->row_id, elem_bits, boff, row_len );
+ rc = RC( rcApp, rcNoTarg, rcAccessing, rcRow, rcInvalid );
+ }
+ else
+ {
+ if ( row_len > num_values ) row_len = num_values;
+ memmove( (void *)value, (void *)value_ptr, row_len * 8 );
+ }
+ return rc;
+}
+
+
+rc_t cmn_read_uint32( struct cmn_iter * iter, uint32_t col_id, uint32_t *value )
+{
+ uint32_t elem_bits, boff, row_len;
+ const uint32_t * value_ptr;
+ rc_t rc = VCursorCellDataDirect( iter->cursor, iter->row_id, col_id, &elem_bits,
+ (const void **)&value_ptr, &boff, &row_len );
+ if ( rc != 0 )
+ ErrMsg( "VCursorCellDataDirect( #%ld ) -> %R\n", iter->row_id, rc );
+ else if ( elem_bits != 32 || boff != 0 || row_len < 1 )
+ {
+ ErrMsg( "row#%ld : bits=%d, boff=%d, len=%d\n", iter->row_id, elem_bits, boff, row_len );
+ rc = RC( rcApp, rcNoTarg, rcAccessing, rcRow, rcInvalid );
+ }
+ else
+ *value = *value_ptr;
+ return rc;
+}
+
+rc_t cmn_read_String( struct cmn_iter * iter, uint32_t col_id, String *value )
+{
+ uint32_t elem_bits, boff;
+ rc_t rc = VCursorCellDataDirect( iter->cursor, iter->row_id, col_id, &elem_bits,
+ (const void **)&value->addr, &boff, &value->len );
+ if ( rc != 0 )
+ ErrMsg( "VCursorCellDataDirect( #%ld ) -> %R\n", iter->row_id, rc );
+ else if ( elem_bits != 8 || boff != 0 )
+ {
+ ErrMsg( "row#%ld : bits=%d, boff=%d, len=%d\n", iter->row_id, elem_bits, boff, value->len );
+ rc = RC( rcApp, rcNoTarg, rcAccessing, rcRow, rcInvalid );
+ }
+ else
+ value->size = value->len;
+ return rc;
+}
diff --git a/test/samline/cigar.h b/tools/fastdump/cmn_iter.h
similarity index 51%
copy from test/samline/cigar.h
copy to tools/fastdump/cmn_iter.h
index d0ec04a..f15e0e0 100644
--- a/test/samline/cigar.h
+++ b/tools/fastdump/cmn_iter.h
@@ -24,33 +24,54 @@
*
*/
-#ifndef _h_cigar_
-#define _h_cigar_
+#ifndef _h_cmn_iter_
+#define _h_cmn_iter_
#ifdef __cplusplus
extern "C" {
#endif
-struct cigar_t;
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+struct cmn_iter;
-struct cigar_t * make_cigar_t( const char * cigar_str );
-void free_cigar_t( struct cigar_t * c );
+typedef struct cmn_params
+{
+ KDirectory * dir;
+ const char * acc;
+ const char * row_range;
+ int64_t first;
+ uint64_t count;
+ size_t cursor_cache;
+ bool show_progress;
+} cmn_params;
-int cigar_t_reflen( const struct cigar_t * c );
-int cigar_t_readlen( const struct cigar_t * c );
-int cigar_t_inslen( const struct cigar_t * c );
+void destroy_cmn_iter( struct cmn_iter * iter );
-size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c );
+rc_t make_cmn_iter( cmn_params * params, const char * tblname, struct cmn_iter ** iter );
-struct cigar_t * merge_cigar_t( const struct cigar_t * c );
+rc_t cmn_iter_add_column( struct cmn_iter * iter, const char * name, uint32_t * id );
+rc_t cmn_iter_range( struct cmn_iter * iter, uint32_t col_id );
-size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
+bool cmn_iter_next( struct cmn_iter * iter, rc_t * rc );
+int64_t cmn_iter_row_id( const struct cmn_iter * iter );
-void debug_cigar_t( const struct cigar_t * c );
+uint64_t cmn_iter_row_count( struct cmn_iter * iter );
-size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+rc_t cmn_read_uint64( struct cmn_iter * iter, uint32_t col_id, uint64_t *value );
+rc_t cmn_read_uint64_array( struct cmn_iter * iter, uint32_t col_id, uint64_t *value, uint32_t num_values );
+rc_t cmn_read_uint32( struct cmn_iter * iter, uint32_t col_id, uint32_t *value );
+rc_t cmn_read_String( struct cmn_iter * iter, uint32_t col_id, String *value );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/fastdump.c b/tools/fastdump/fastdump.c
new file mode 100644
index 0000000..cc4ef0b
--- /dev/null
+++ b/tools/fastdump/fastdump.c
@@ -0,0 +1,432 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "fastdump.vers.h"
+
+#include "cmn_iter.h"
+#include "file_printer.h"
+#include "raw_read_iter.h"
+#include "special_iter.h"
+#include "fastq_iter.h"
+#include "lookup_writer.h"
+#include "lookup_reader.h"
+#include "join.h"
+#include "sorter.h"
+#include "helper.h"
+
+#include <kapp/main.h>
+#include <kapp/args.h>
+#include <klib/out.h>
+#include <klib/vector.h>
+#include <kfs/directory.h>
+#include <kproc/thread.h>
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+static const char * lookup_usage[] = { "lookup file", NULL };
+#define OPTION_LOOKUP "lookup"
+#define ALIAS_LOOKUP "l"
+
+static const char * range_usage[] = { "row-range", NULL };
+#define OPTION_RANGE "range"
+#define ALIAS_RANGE "R"
+
+static const char * format_usage[] = { "format (special, fastq, lookup, default=special)", NULL };
+#define OPTION_FORMAT "format"
+#define ALIAS_FORMAT "f"
+
+static const char * output_usage[] = { "output-file", NULL };
+#define OPTION_OUTPUT "out"
+#define ALIAS_OUTPUT "o"
+
+static const char * progress_usage[] = { "show progress", NULL };
+#define OPTION_PROGRESS "progress"
+#define ALIAS_PROGRESS "p"
+
+static const char * bufsize_usage[] = { "size of file-buffer ( default=1MB )", NULL };
+#define OPTION_BUFSIZE "bufsize"
+#define ALIAS_BUFSIZE "b"
+
+static const char * curcache_usage[] = { "size of cursor-cache ( default=10MB )", NULL };
+#define OPTION_CURCACHE "curcache"
+#define ALIAS_CURCACHE "c"
+
+static const char * mem_usage[] = { "memory limit for sorting ( default=2GB )", NULL };
+#define OPTION_MEM "mem"
+#define ALIAS_MEM "m"
+
+static const char * temp_usage[] = { "where to put temp. files ( default=curr. dir )", NULL };
+#define OPTION_TEMP "temp"
+#define ALIAS_TEMP "t"
+
+static const char * threads_usage[] = { "how many thread ( default=1 )", NULL };
+#define OPTION_THREADS "threads"
+#define ALIAS_THREADS "e"
+
+static const char * index_usage[] = { "name of index-file", NULL };
+#define OPTION_INDEX "index"
+#define ALIAS_INDEX "i"
+
+OptDef ToolOptions[] =
+{
+ { OPTION_RANGE, ALIAS_RANGE, NULL, range_usage, 1, true, false },
+ { OPTION_LOOKUP, ALIAS_LOOKUP, NULL, lookup_usage, 1, true, false },
+ { OPTION_FORMAT, ALIAS_FORMAT, NULL, format_usage, 1, true, false },
+ { OPTION_OUTPUT, ALIAS_OUTPUT, NULL, output_usage, 1, true, false },
+ { OPTION_BUFSIZE, ALIAS_BUFSIZE, NULL, bufsize_usage, 1, true, false },
+ { OPTION_CURCACHE, ALIAS_CURCACHE, NULL, curcache_usage, 1, true, false },
+ { OPTION_MEM, ALIAS_MEM, NULL, mem_usage, 1, true, false },
+ { OPTION_TEMP, ALIAS_TEMP, NULL, temp_usage, 1, true, false },
+ { OPTION_THREADS, ALIAS_THREADS, NULL, threads_usage, 1, true, false },
+ { OPTION_INDEX, ALIAS_INDEX, NULL, index_usage, 1, true, false },
+ { OPTION_PROGRESS, ALIAS_PROGRESS, NULL, progress_usage, 1, false, false }
+};
+
+const char UsageDefaultName[] = "fastdump";
+
+rc_t CC UsageSummary( const char * progname )
+{
+ return KOutMsg( "\n"
+ "Usage:\n"
+ " %s <path> [options]\n"
+ "\n", progname );
+}
+
+
+rc_t CC Usage ( const Args * args )
+{
+ rc_t rc;
+ uint32_t idx, count = ( sizeof ToolOptions ) / ( sizeof ToolOptions[ 0 ] );
+ const char * progname = UsageDefaultName;
+ const char * fullpath = UsageDefaultName;
+
+ if ( args == NULL )
+ rc = RC( rcApp, rcArgv, rcAccessing, rcSelf, rcNull );
+ else
+ rc = ArgsProgram( args, &fullpath, &progname );
+
+ if ( rc != 0 )
+ progname = fullpath = UsageDefaultName;
+
+ UsageSummary( progname );
+
+ KOutMsg( "Options:\n" );
+ for ( idx = 1; idx < count; ++idx ) /* start with 1, do not advertize row-range-option*/
+ HelpOptionLine( ToolOptions[ idx ].aliases, ToolOptions[ idx ].name, NULL, ToolOptions[ idx ].help );
+
+ HelpOptionsStandard();
+ HelpVersion( fullpath, KAppVersion() );
+ return rc;
+}
+
+
+/* Version EXTERN
+ * return 4-part version code: 0xMMmmrrrr, where
+ * MM = major release
+ * mm = minor release
+ * rrrr = bug-fix release
+ */
+ver_t CC KAppVersion( void ) { return FASTDUMP_VERS; }
+
+
+/* -------------------------------------------------------------------------------------------- */
+
+typedef struct fd_ctx
+{
+ cmn_params cmn;
+ const char * lookup_filename;
+ const char * output_filename;
+ const char * index_filename;
+ const char * temp_path;
+ size_t buf_size, mem_limit;
+ uint64_t num_threads;
+} fd_ctx;
+
+
+static void init_sorter_params( const fd_ctx * fd_ctx, sorter_params * sp )
+{
+ sp->dir = fd_ctx->cmn.dir;
+ sp->acc = fd_ctx->cmn.acc;
+ sp->output_filename = fd_ctx->output_filename;
+ sp->index_filename = fd_ctx->index_filename;
+ sp->temp_path = fd_ctx->temp_path;
+ sp->src = NULL; /* sorter takes ownership! */
+ sp->prefix = 0;
+ sp->mem_limit = fd_ctx->mem_limit;
+ sp->buf_size = fd_ctx->buf_size;
+ sp->cursor_cache = fd_ctx->cmn.cursor_cache;
+ sp->sort_progress = NULL;
+ sp->num_threads = 0;
+ sp->show_progress = fd_ctx->cmn.show_progress;
+}
+
+/* --------------------------------------------------------------------------------------------
+ produce the lookup-table by iterating over the PRIMARY_ALIGNMENT - table:
+ --------------------------------------------------------------------------------------------
+ reading SEQ_SPOT_ID, SEQ_READ_ID and RAW_READ
+ SEQ_SPOT_ID and SEQ_READ_ID is merged into a 64-bit-key
+ RAW_READ is read as 4na-unpacked ( Schema does not provide 4na-packed for this column )
+ these key-pairs are temporarely stored in a KVector until a limit is reached
+ after that limit is reached they are writen sorted into the file-system as sub-files
+ this repeats until the requested row-range is exhausted ( row_range ... NULL -> all rows )
+ These sub-files are than merge-sorted into the final output-file.
+ This output-file is a binary data-file:
+ content: [KEY][RAW_READ]
+ KEY... 64-bit value as SEQ_SPOT_ID shifted left by 1 bit, zero-bit contains SEQ_READ_ID
+ RAW_READ... 16-bit binary-chunk-lenght, followed by n bytes of packed 4na
+-------------------------------------------------------------------------------------------- */
+static rc_t single_threaded_make_lookup( fd_ctx * fd_ctx )
+{
+ struct raw_read_iter * iter;
+ rc_t rc = make_raw_read_iter( &fd_ctx->cmn, &iter );
+ if ( rc == 0 )
+ {
+ sorter_params sp;
+
+ init_sorter_params( fd_ctx, &sp );
+ sp.src = iter; /* sorter takes ownership! */
+ rc = run_sorter( &sp );
+ }
+ return rc;
+}
+
+
+static rc_t multi_threaded_make_lookup( fd_ctx * fd_ctx )
+{
+ sorter_params sp;
+
+ init_sorter_params( fd_ctx, &sp );
+ sp.num_threads = fd_ctx->num_threads;
+ return run_sorter_pool( &sp );
+}
+
+
+/* --------------------------------------------------------------------------------------------
+ produce the lookup-table by iterating over the PRIMARY_ALIGNMENT - table:
+ --------------------------------------------------------------------------------------------
+ reading SEQ_SPOT_ID, SEQ_READ_ID and RAW_READ
+ SEQ_SPOT_ID and SEQ_READ_ID is merged into a 64-bit-key
+ RAW_READ is read as 4na-unpacked ( Schema does not provide 4na-packed for this column )
+ these key-pairs are temporarely stored in a KVector until a limit is reached
+ after that limit is reached they are writen sorted into the file-system as sub-files
+ this repeats until the requested row-range is exhausted ( row_range ... NULL -> all rows )
+ These sub-files are than merge-sorted into the final output-file.
+ This output-file is a binary data-file:
+ content: [KEY][RAW_READ]
+ KEY... 64-bit value as SEQ_SPOT_ID shifted left by 1 bit, zero-bit contains SEQ_READ_ID
+ RAW_READ... 16-bit binary-chunk-lenght, followed by n bytes of packed 4na
+-------------------------------------------------------------------------------------------- */
+static rc_t fastdump_make_lookup( fd_ctx * fd_ctx )
+{
+ rc_t rc;
+ if ( fd_ctx->num_threads > 1 )
+ rc = multi_threaded_make_lookup( fd_ctx );
+ else
+ rc = single_threaded_make_lookup( fd_ctx );
+ return rc;
+}
+
+
+/* --------------------------------------------------------------------------------------------
+ produce special-output ( SPOT_ID,READ,SPOT_GROUP ) by iterating over the SEQUENCE - table:
+ produce fastq-output by iterating over the SEQUENCE - table:
+ --------------------------------------------------------------------------------------------
+
+-------------------------------------------------------------------------------------------- */
+
+static rc_t perform_join( fd_ctx * fd_ctx, format_t fmt )
+{
+ rc_t rc = 0;
+ if ( !file_exists( fd_ctx->cmn.dir, "%s", fd_ctx->lookup_filename ) )
+ {
+ const char * temp = fd_ctx->output_filename;
+ fd_ctx->output_filename = fd_ctx->lookup_filename;
+ rc = fastdump_make_lookup( fd_ctx );
+ fd_ctx->output_filename = temp;
+ }
+
+ if ( rc == 0 )
+ {
+ join_params jp;
+
+ jp.dir = fd_ctx->cmn.dir;
+ jp.accession = fd_ctx->cmn.acc;
+ jp.lookup_filename = fd_ctx->lookup_filename;
+ jp.index_filename = fd_ctx->index_filename;
+ jp.output_filename = fd_ctx->output_filename;
+ jp.temp_path = fd_ctx->temp_path;
+ jp.join_progress = NULL;
+ jp.buf_size = fd_ctx->buf_size;
+ jp.cur_cache = fd_ctx->cmn.cursor_cache;
+ jp.show_progress = fd_ctx->cmn.show_progress;
+ jp.num_threads = fd_ctx->num_threads;
+ jp.first = 0;
+ jp.count = 0;
+ jp.fmt = fmt;
+
+ rc = execute_join( &jp );
+ }
+ return rc;
+}
+
+
+/*
+static rc_t fastdump_test( fd_ctx * fd_ctx )
+{
+ rc_t rc = 0;
+ struct index_reader * index = NULL;
+
+ if ( !file_exists( fd_ctx->cmn.dir, "%s", fd_ctx->lookup_filename ) )
+ {
+ const char * temp = fd_ctx->output_filename;
+ fd_ctx->output_filename = fd_ctx->lookup_filename;
+ rc = fastdump_make_lookup( fd_ctx );
+ fd_ctx->output_filename = temp;
+ }
+
+ if ( fd_ctx->index_filename != NULL )
+ {
+ if ( file_exists( fd_ctx->cmn.dir, "%s", fd_ctx->index_filename ) )
+ rc = make_index_reader( fd_ctx->cmn.dir, &index, fd_ctx->buf_size, "%s", fd_ctx->index_filename );
+ }
+ if ( rc == 0 )
+ {
+ struct lookup_reader * lookup;
+ rc = make_lookup_reader( fd_ctx->cmn.dir, index, &lookup, fd_ctx->buf_size,
+ "%s", fd_ctx->lookup_filename );
+ if ( rc == 0 )
+ {
+ uint64_t max_key = 0;
+ rc = get_max_key( index, &max_key );
+ if ( rc == 0 )
+ {
+ uint64_t key_to_find = 7549714;
+ uint64_t key_found = 0;
+ KOutMsg( "max-key = %ld\n", max_key );
+ rc_t rc1 = seek_lookup_reader( lookup, key_to_find, &key_found, true );
+ if ( rc1 == 0 )
+ KOutMsg( "key '%ld' found\n", key_to_find );
+ else
+ KOutMsg( "key '%ld' not found, nearest: %ld\n", key_to_find, key_found );
+ }
+ release_lookup_reader( lookup );
+ }
+ release_index_reader( index );
+ }
+ return rc;
+}
+*/
+
+/* -------------------------------------------------------------------------------------------- */
+
+rc_t CC KMain ( int argc, char *argv [] )
+{
+ Args * args;
+ uint32_t num_options = sizeof ToolOptions / sizeof ToolOptions [ 0 ];
+ rc_t rc = ArgsMakeAndHandle ( &args, argc, argv, 1, ToolOptions, num_options );
+ if ( rc != 0 )
+ ErrMsg( "ArgsMakeAndHandle() -> %R", rc );
+ if ( rc == 0 )
+ {
+ fd_ctx fd_ctx;
+ rc = ArgsParamValue( args, 0, (const void **)&fd_ctx.cmn.acc );
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamValue() -> %R", rc );
+ else
+ {
+ const char * format = get_str_option( args, OPTION_FORMAT, NULL );
+ format_t fmt = get_format_t( format );
+ char dflt_lookup[ 4096 ];
+ char dflt_index[ 4096 ];
+ char dflt_output[ 4096 ];
+
+ dflt_lookup[ 0 ] = 0;
+ dflt_index[ 0 ] = 0;
+ dflt_output[ 0 ] = 0;
+
+ fd_ctx.cmn.row_range = get_str_option( args, OPTION_RANGE, NULL );
+ fd_ctx.cmn.cursor_cache = get_size_t_option( args, OPTION_CURCACHE, 5 * 1024 * 1024 );
+ fd_ctx.cmn.show_progress = get_bool_option( args, OPTION_PROGRESS );
+ fd_ctx.cmn.count = 0;
+
+ fd_ctx.temp_path = get_str_option( args, OPTION_TEMP, NULL );
+ fd_ctx.output_filename = get_str_option( args, OPTION_OUTPUT, NULL );
+ fd_ctx.lookup_filename = get_str_option( args, OPTION_LOOKUP, NULL );
+ fd_ctx.index_filename = get_str_option( args, OPTION_INDEX, NULL );
+ fd_ctx.buf_size = get_size_t_option( args, OPTION_BUFSIZE, 1024 * 1024 );
+ fd_ctx.mem_limit = get_size_t_option( args, OPTION_MEM, 1024L * 1024 * 100 );
+ fd_ctx.num_threads = get_uint64_t_option( args, OPTION_THREADS, 1 );
+
+ if ( fd_ctx.lookup_filename == NULL )
+ {
+ rc = make_prefixed( dflt_lookup, sizeof dflt_lookup, fd_ctx.temp_path,
+ fd_ctx.cmn.acc, ".lookup" );
+ if ( rc == 0 )
+ fd_ctx.lookup_filename = dflt_lookup;
+ }
+
+ if ( fd_ctx.index_filename == NULL )
+ {
+ rc = make_prefixed( dflt_index, sizeof dflt_index, fd_ctx.temp_path,
+ fd_ctx.cmn.acc, ".lookup.idx" );
+ if ( rc == 0 )
+ fd_ctx.index_filename = dflt_index;
+ }
+
+ if ( fd_ctx.output_filename == NULL )
+ {
+ rc = make_prefixed( dflt_output, sizeof dflt_output, NULL,
+ fd_ctx.cmn.acc, ".txt" );
+ if ( rc == 0 )
+ fd_ctx.output_filename = dflt_output;
+ }
+
+ rc = KDirectoryNativeDir( &fd_ctx.cmn.dir );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryNativeDir() -> %R", rc );
+ else
+ {
+ switch( fmt )
+ {
+ case ft_special : rc = perform_join( &fd_ctx, fmt ); break;
+ case ft_fastq : rc = perform_join( &fd_ctx, fmt ); break;
+ case ft_lookup : rc = fastdump_make_lookup( &fd_ctx ); break;
+ case ft_test : /* rc = fastdump_test( &fd_ctx ); */ break;
+ }
+
+ if ( dflt_lookup[ 0 ] != 0 )
+ KDirectoryRemove( fd_ctx.cmn.dir, true, "%s", dflt_lookup );
+
+ if ( dflt_index[ 0 ] != 0 )
+ KDirectoryRemove( fd_ctx.cmn.dir, true, "%s", dflt_index );
+
+ KDirectoryRelease( fd_ctx.cmn.dir );
+ }
+ }
+ }
+ return rc;
+}
diff --git a/tools/kget/kget.vers b/tools/fastdump/fastdump.vers
similarity index 100%
copy from tools/kget/kget.vers
copy to tools/fastdump/fastdump.vers
diff --git a/tools/fastdump/fastq_iter.c b/tools/fastdump/fastq_iter.c
new file mode 100644
index 0000000..b5ae751
--- /dev/null
+++ b/tools/fastdump/fastq_iter.c
@@ -0,0 +1,97 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "fastq_iter.h"
+#include "helper.h"
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+typedef struct fastq_iter
+{
+ struct cmn_iter * cmn;
+ uint32_t prim_alig_id, cmp_read_id, quality_id;
+} fastq_iter;
+
+
+void destroy_fastq_iter( struct fastq_iter * iter )
+{
+ if ( iter != NULL )
+ {
+ destroy_cmn_iter( iter->cmn );
+ free( ( void * ) iter );
+ }
+}
+
+rc_t make_fastq_iter( cmn_params * params, struct fastq_iter ** iter )
+{
+ rc_t rc = 0;
+ fastq_iter * i = calloc( 1, sizeof * i );
+ if ( i == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "make_fastq_iter.calloc( %d ) -> %R", ( sizeof * i ), rc );
+ }
+ else
+ {
+ rc = make_cmn_iter( params, "SEQUENCE", &i->cmn );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "PRIMARY_ALIGNMENT_ID", &i->prim_alig_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "CMP_READ", &i->cmp_read_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "(INSDC:quality:text:phred_33)QUALITY", &i->quality_id );
+ if ( rc == 0 )
+ rc = cmn_iter_range( i->cmn, i->prim_alig_id );
+
+ if ( rc != 0 )
+ destroy_fastq_iter( i );
+ else
+ *iter = i;
+ }
+ return rc;
+}
+
+bool get_from_fastq_iter( struct fastq_iter * iter, fastq_rec * rec, rc_t * rc )
+{
+ bool res = cmn_iter_next( iter->cmn, rc );
+ if ( res )
+ {
+ rec->row_id = cmn_iter_row_id( iter->cmn );
+ *rc = cmn_read_uint64_array( iter->cmn, iter->prim_alig_id, rec->prim_alig_id, 2 );
+ if ( *rc == 0 )
+ *rc = cmn_read_String( iter->cmn, iter->cmp_read_id, &rec->cmp_read );
+ if ( *rc == 0 )
+ *rc = cmn_read_String( iter->cmn, iter->quality_id, &rec->quality );
+ }
+ return res;
+
+}
+
+uint64_t get_row_count_of_fastq_iter( struct fastq_iter * iter )
+{
+ return cmn_iter_row_count( iter->cmn );
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/fastq_iter.h
similarity index 67%
copy from test/samline/refbases.h
copy to tools/fastdump/fastq_iter.h
index 94136fb..3660e3d 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/fastq_iter.h
@@ -24,15 +24,42 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_fastq_iter_
+#define _h_fastq_iter_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_cmn_iter_
+#include "cmn_iter.h"
+#endif
+
+struct fastq_iter;
+
+typedef struct fastq_rec
+{
+ int64_t row_id;
+ uint64_t prim_alig_id[ 2 ];
+ String cmp_read;
+ String quality;
+} fastq_rec;
+
+void destroy_fastq_iter( struct fastq_iter * iter );
+
+rc_t make_fastq_iter( cmn_params * params, struct fastq_iter ** iter );
+
+bool get_from_fastq_iter( struct fastq_iter * iter, fastq_rec * rec, rc_t * rc );
+
+uint64_t get_row_count_of_fastq_iter( struct fastq_iter * iter );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/file_printer.c b/tools/fastdump/file_printer.c
new file mode 100644
index 0000000..1e79216
--- /dev/null
+++ b/tools/fastdump/file_printer.c
@@ -0,0 +1,130 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "file_printer.h"
+#include "helper.h"
+
+#include <kfs/file.h>
+#include <kfs/buffile.h>
+
+
+typedef struct file_printer
+{
+ struct KFile * f;
+ SBuffer print_buffer;
+ uint64_t file_pos;
+} file_printer;
+
+
+void destroy_file_printer( struct file_printer * printer )
+{
+ if ( printer != NULL )
+ {
+ if ( printer->f != NULL ) KFileRelease( printer->f );
+ release_SBuffer( &printer->print_buffer );
+ free( ( void * ) printer );
+ }
+}
+
+
+rc_t make_file_printer( KDirectory *dir, struct file_printer ** printer,
+ size_t file_buffer_size, size_t print_buffer_size, const char * fmt, ... )
+{
+ rc_t rc;
+ struct KFile * f;
+
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = KDirectoryVCreateFile( dir, &f, false, 0664, kcmInit, fmt, args );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryVCreateFile() -> %R", rc );
+ else
+ {
+ struct KFile * temp_file = f;
+ if ( file_buffer_size > 0 )
+ {
+ rc = KBufFileMakeWrite( &temp_file, f, false, file_buffer_size );
+ KFileRelease( f );
+ if ( rc != 0 )
+ ErrMsg( "KBufFileMakeWrite() -> %R", rc );
+ }
+ if ( rc == 0 )
+ {
+ file_printer * p = calloc( 1, sizeof * p );
+ if ( p == NULL )
+ {
+ KFileRelease( temp_file );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * p ), rc );
+ }
+ else
+ {
+ rc = make_SBuffer( &p->print_buffer, print_buffer_size );
+ if ( rc != 0 )
+ KFileRelease( temp_file );
+ else
+ {
+ p->f = temp_file;
+ *printer = p;
+ }
+ }
+ }
+ }
+
+ va_end ( args );
+ return rc;
+
+}
+
+
+rc_t file_print( struct file_printer * printer, const char * fmt, ... )
+{
+ rc_t rc;
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = print_to_SBufferV( &printer->print_buffer, fmt, args );
+ if ( rc == 0 )
+ {
+ size_t num_writ, to_write;
+ to_write = printer->print_buffer.S.size;
+ const char * src = printer->print_buffer.S.addr;
+ rc = KFileWriteAll( printer->f, printer->file_pos, src, to_write, &num_writ );
+ if ( rc != 0 )
+ ErrMsg( "KFileWriteAll( at %lu ) -> %R", printer->file_pos, rc );
+ else if ( num_writ != to_write )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcWriting, rcFormat, rcInvalid );
+ ErrMsg( "KFileWriteAll( at %lu ) ( %d vs %d ) -> %R", printer->file_pos, to_write, num_writ, rc );
+ }
+ else
+ printer->file_pos += num_writ;
+ }
+
+ va_end ( args );
+ return rc;
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/file_printer.h
similarity index 73%
copy from test/samline/refbases.h
copy to tools/fastdump/file_printer.h
index 94136fb..83b8fd3 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/file_printer.h
@@ -24,15 +24,30 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_file_printer_
+#define _h_file_printer_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+struct file_printer;
+
+void destroy_file_printer( struct file_printer * printer );
+
+rc_t make_file_printer( KDirectory *dir, struct file_printer ** printer,
+ size_t file_buffer_size, size_t print_buffer_size, const char * fmt, ... );
+
+rc_t file_print( struct file_printer * printer, const char * fmt, ... );
+
#ifdef __cplusplus
}
diff --git a/tools/fastdump/helper.c b/tools/fastdump/helper.c
new file mode 100644
index 0000000..dee80d8
--- /dev/null
+++ b/tools/fastdump/helper.c
@@ -0,0 +1,685 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "helper.h"
+#include <klib/log.h>
+#include <klib/printf.h>
+#include <klib/progressbar.h>
+#include <klib/time.h>
+#include <klib/out.h>
+#include <kfs/defs.h>
+#include <kfs/file.h>
+#include <kfs/buffile.h>
+#include <kproc/thread.h>
+
+rc_t ErrMsg( const char * fmt, ... )
+{
+ rc_t rc;
+ char buffer[ 4096 ];
+ size_t num_writ;
+
+ va_list list;
+ va_start( list, fmt );
+ rc = string_vprintf( buffer, sizeof buffer, &num_writ, fmt, list );
+ if ( rc == 0 )
+ rc = pLogMsg( klogErr, "$(E)", "E=%s", buffer );
+ va_end( list );
+ return rc;
+}
+
+rc_t CC ArgsOptionCount( const struct Args * self, const char * option_name, uint32_t * count );
+rc_t CC ArgsOptionValue( const struct Args * self, const char * option_name, uint32_t iteration, const void ** value );
+
+const char * get_str_option( const struct Args *args, const char *name, const char * dflt )
+{
+ const char* res = dflt;
+ uint32_t count;
+ rc_t rc = ArgsOptionCount( args, name, &count );
+ if ( rc == 0 && count > 0 )
+ {
+ rc = ArgsOptionValue( args, name, 0, (const void**)&res );
+ if ( rc != 0 ) res = dflt;
+ }
+ return res;
+}
+
+bool get_bool_option( const struct Args *args, const char *name )
+{
+ uint32_t count;
+ rc_t rc = ArgsOptionCount( args, name, &count );
+ return ( rc == 0 && count > 0 );
+}
+
+
+uint64_t get_uint64_t_option( const struct Args * args, const char *name, uint64_t dflt )
+{
+ uint64_t res = dflt;
+ const char * s = get_str_option( args, name, NULL );
+ if ( s != NULL )
+ {
+ size_t l = string_size( s );
+ if ( l > 0 )
+ {
+ char * endptr;
+ res = strtol( s, &endptr, 0 );
+ }
+ }
+ return res;
+
+}
+
+size_t get_size_t_option( const struct Args * args, const char *name, size_t dflt )
+{
+ size_t res = dflt;
+ const char * s = get_str_option( args, name, NULL );
+ if ( s != NULL )
+ {
+ size_t l = string_size( s );
+ if ( l > 0 )
+ {
+ size_t multipl = 1;
+ switch( s[ l - 1 ] )
+ {
+ case 'k' :
+ case 'K' : multipl = 1024; break;
+ case 'm' :
+ case 'M' : multipl = 1024 * 1024; break;
+ case 'g' :
+ case 'G' : multipl = 1024 * 1024 * 1024; break;
+ }
+
+ if ( multipl > 1 )
+ {
+ char * src = string_dup( s, l - 1 );
+ if ( src != NULL )
+ {
+ char * endptr;
+ res = strtol( src, &endptr, 0 ) * multipl;
+ free( src );
+ }
+ }
+ else
+ {
+ char * endptr;
+ res = strtol( s, &endptr, 0 );
+ }
+ }
+ }
+ return res;
+}
+
+
+rc_t make_SBuffer( SBuffer * buffer, size_t len )
+{
+ rc_t rc = 0;
+ String * S = &buffer->S;
+ S->addr = malloc( len );
+ if ( S->addr == NULL )
+ {
+ S->size = S->len = 0;
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "malloc( %d ) -> %R", ( len ), rc );
+ }
+ else
+ {
+ S->size = 0;
+ S->len = 0;
+ buffer->buffer_size = len;
+ }
+ return rc;
+}
+
+
+void release_SBuffer( SBuffer * buffer )
+{
+ if ( buffer != NULL )
+ {
+ String * S = &buffer->S;
+ if ( S->addr != NULL )
+ free( ( void * ) S->addr );
+ }
+}
+
+
+rc_t print_to_SBufferV( SBuffer * buffer, const char * fmt, va_list args )
+{
+ char * dst = ( char * )buffer->S.addr;
+ size_t num_writ = 0;
+
+ rc_t rc = string_vprintf( dst, buffer->buffer_size, &num_writ, fmt, args );
+ if ( rc != 0 )
+ ErrMsg( "string_vprintf() -> %R", rc );
+ buffer->S.len = buffer->S.size = num_writ;
+
+ return rc;
+}
+
+
+rc_t print_to_SBuffer( SBuffer * buffer, const char * fmt, ... )
+{
+ rc_t rc;
+ va_list args;
+
+ va_start( args, fmt );
+ rc = print_to_SBufferV( buffer, fmt, args );
+ va_end( args );
+
+ return rc;
+}
+
+
+rc_t add_column( const VCursor * cursor, const char * name, uint32_t * id )
+{
+ rc_t rc = VCursorAddColumn( cursor, id, name );
+ if ( rc != 0 )
+ ErrMsg( "VCursorAddColumn( '%s' ) -> %R", name, rc );
+ return rc;
+}
+
+
+rc_t make_row_iter( struct num_gen * ranges, int64_t first, uint64_t count,
+ const struct num_gen_iter ** iter )
+{
+ rc_t rc;
+ if ( num_gen_empty( ranges ) )
+ {
+ rc = num_gen_add( ranges, first, count );
+ if ( rc != 0 )
+ ErrMsg( "num_gen_add( %li, %ld ) -> %R", first, count, rc );
+ }
+ else
+ {
+ rc = num_gen_trim( ranges, first, count );
+ if ( rc != 0 )
+ ErrMsg( "num_gen_trim( %li, %ld ) -> %R", first, count, rc );
+ }
+ rc = num_gen_iterator_make( ranges, iter );
+ if ( rc != 0 )
+ ErrMsg( "num_gen_iterator_make() -> %R", rc );
+ return rc;
+}
+
+
+rc_t split_string( String * in, String * p0, String * p1, uint32_t ch )
+{
+ rc_t rc = 0;
+ char * ch_ptr = string_chr( in->addr, in->size, ch );
+ if ( ch_ptr == NULL )
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcTransfer, rcInvalid );
+ else
+ {
+ p0->addr = in->addr;
+ p0->len = p0->size = ( ch_ptr - p0->addr );
+ p1->addr = ch_ptr + 1;
+ p1->len = p1->size = in->len - ( p0->len + 1 );
+ }
+ return rc;
+}
+
+
+format_t get_format_t( const char * format )
+{
+ format_t res = ft_special;
+ if ( format != NULL && format[ 0 ] != 0 )
+ {
+ String Format, FastqFormat;
+ StringInitCString( &Format, format );
+ StringInitCString( &FastqFormat, "fastq" );
+ if ( 0 == StringCaseCompare ( &Format, &FastqFormat ) )
+ res = ft_fastq;
+ else
+ {
+ String LookupFormat;
+ StringInitCString( &LookupFormat, "lookup" );
+ if ( 0 == StringCaseCompare ( &Format, &LookupFormat ) )
+ res = ft_lookup;
+ else
+ {
+ String TestFormat;
+ StringInitCString( &TestFormat, "test" );
+ if ( 0 == StringCaseCompare ( &Format, &TestFormat ) )
+ res = ft_test;
+ }
+ }
+ }
+ return res;
+}
+
+
+uint64_t make_key( int64_t seq_spot_id, uint32_t seq_read_id )
+{
+ uint64_t key = seq_spot_id;
+ key <<= 1;
+ key |= ( seq_read_id == 2 ) ? 1 : 0;
+ return key;
+}
+
+
+void pack_4na( const String * unpacked, SBuffer * packed )
+{
+ uint32_t i;
+ char * src = ( char * )unpacked->addr;
+ char * dst = ( char * )packed->S.addr;
+ uint16_t dna_len = ( unpacked->len & 0xFFFF );
+ uint32_t len = 0;
+ dst[ len++ ] = ( dna_len >> 8 );
+ dst[ len++ ] = ( dna_len & 0xFF );
+ for ( i = 0; i < unpacked->len; ++i )
+ {
+ if ( len < packed->buffer_size )
+ {
+ char base = ( src[ i ] & 0x0F );
+ if ( 0 == ( i & 0x01 ) )
+ dst[ len ] = ( base << 4 );
+ else
+ dst[ len++ ] |= base;
+ }
+ }
+ if ( unpacked->len & 0x01 )
+ len++;
+ packed->S.size = packed->S.len = len;
+}
+
+
+static char x4na_to_ASCII[ 16 ] =
+{
+ /* 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F */
+ 'N', 'A', 'C', 'N', 'G', 'N', 'N', 'N', 'T', 'N', 'N', 'N', 'N', 'N', 'N', 'N'
+};
+
+
+void unpack_4na( const String * packed, SBuffer * unpacked )
+{
+ uint32_t i;
+ char * src = ( char * )packed->addr;
+ char * dst = ( char * )unpacked->S.addr;
+ uint32_t dst_idx = 0;
+ uint16_t dna_len = src[ 0 ];
+ dna_len <<= 8;
+ dna_len |= src[ 1 ];
+ for ( i = 2; i < packed->len; ++i )
+ {
+ uint8_t packed_byte = src[ i ];
+ if ( dst_idx < unpacked->buffer_size )
+ dst[ dst_idx++ ] = x4na_to_ASCII[ ( packed_byte >> 4 ) & 0x0F ];
+ if ( dst_idx < unpacked->buffer_size )
+ dst[ dst_idx++ ] = x4na_to_ASCII[ packed_byte & 0x0F ];
+ }
+ unpacked->S.len = unpacked->S.size = dna_len;
+ dst[ dna_len + 2 ] = 0;
+}
+
+
+uint64_t calc_percent( uint64_t max, uint64_t value, uint16_t digits )
+{
+ uint64_t res = value;
+ switch ( digits )
+ {
+ case 1 : res *= 1000; break;
+ case 2 : res *= 10000; break;
+ default : res *= 100; break;
+ }
+ if ( max > 0 ) res /= max;
+ return res;
+}
+
+
+bool file_exists( const KDirectory * dir, const char * fmt, ... )
+{
+ uint32_t pt;
+ va_list list;
+
+ va_start( list, fmt );
+ pt = KDirectoryVPathType( dir, fmt, list );
+ va_end( list );
+
+ return ( pt == kptFile ) ;
+}
+
+
+void join_and_release_threads( Vector * threads )
+{
+ uint32_t i, n = VectorLength( threads );
+ for ( i = VectorStart( threads ); i < n; ++i )
+ {
+ KThread * thread = VectorGet( threads, i );
+ if ( thread != NULL )
+ {
+ KThreadWait( thread, NULL );
+ KThreadRelease( thread );
+ }
+ }
+}
+
+
+typedef struct cf_progress
+{
+ struct progressbar * progressbar;
+ uint64_t total_size;
+ uint64_t current_size;
+ uint32_t current_percent;
+} cf_progress;
+
+rc_t CC Quitting();
+
+static rc_t copy_file( KFile * dst, const KFile * src, uint64_t * dst_pos,
+ size_t buf_size, cf_progress * cfp )
+{
+ rc_t rc = 0;
+ char * buffer = malloc( buf_size );
+ if ( buffer == NULL )
+ {
+ rc = RC( rcExe, rcFile, rcPacking, rcMemory, rcExhausted );
+ ErrMsg( "copy_file.malloc( %d ) -> %R", buf_size, rc );
+ }
+ else
+ {
+ uint64_t src_pos = 0;
+ size_t num_trans = 1;
+ while ( rc == 0 && num_trans > 0 )
+ {
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ size_t num_read;
+ rc = KFileRead( src, src_pos, buffer, buf_size, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "copy_file.KFileRead( at %lu ) -> %R", src_pos, rc );
+ else if ( num_read > 0 )
+ {
+ rc = KFileWrite( dst, *dst_pos, buffer, num_read, &num_trans );
+ if ( rc != 0 )
+ ErrMsg( "copy_file.KFileWrite( at %lu ) -> %R", *dst_pos, rc );
+ else
+ {
+ *dst_pos += num_trans;
+ src_pos += num_trans;
+ if ( cfp != NULL && cfp->progressbar != NULL )
+ {
+ uint32_t percent;
+
+ cfp->current_size += num_trans;
+ percent = calc_percent( cfp->total_size, cfp->current_size, 2 );
+ if ( percent > cfp->current_percent )
+ {
+ uint32_t i;
+ for ( i = cfp->current_percent + 1; i <= percent; ++i )
+ update_progressbar( cfp->progressbar, i );
+ cfp->current_percent = percent;
+ }
+ }
+ }
+ }
+ else
+ num_trans = 0;
+ }
+ }
+ free( buffer );
+ }
+ return rc;
+}
+
+
+static rc_t total_filesize( const KDirectory * dir, const VNamelist * files, uint64_t *total )
+{
+ uint32_t count;
+ rc_t rc = VNameListCount( files, &count );
+ *total = 0;
+ if ( rc != 0 )
+ ErrMsg( "VNameListCount() -> %R", rc );
+ else
+ {
+ uint32_t idx;
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * filename;
+ rc = VNameListGet( files, idx, &filename );
+ if ( rc != 0 )
+ ErrMsg( "VNameListGet( #%d) -> %R", idx, rc );
+ else
+ {
+ uint64_t size;
+ rc_t rc1 = KDirectoryFileSize( dir, &size, "%s", filename );
+ if ( rc1 == 0 )
+ *total += size;
+ }
+ }
+ }
+ return rc;
+}
+
+rc_t concat_files( KDirectory * dir, const VNamelist * files, size_t buf_size,
+ const char * output, bool show_progress )
+{
+ struct KFile * dst;
+ rc_t rc = KDirectoryCreateFile( dir, &dst, false, 0664, kcmInit, "%s", output );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryCreateFile( '%s' ) -> %R", output, rc );
+ else
+ {
+ struct KFile * temp_dst;
+ rc = KBufFileMakeWrite( &temp_dst, dst, false, buf_size );
+ KFileRelease( dst );
+ if ( rc != 0 )
+ ErrMsg( "KBufFileMakeWrite() -> %R", rc );
+ else
+ {
+ cf_progress cfp;
+ uint32_t count;
+
+ dst = temp_dst;
+ if ( show_progress )
+ {
+ cfp.current_size = 0;
+ cfp.current_percent = 0;
+ rc = make_progressbar( &cfp.progressbar, 2 );
+ if ( rc == 0 )
+ rc = total_filesize( dir, files, &cfp.total_size );
+ }
+ else
+ cfp.progressbar = NULL;
+ if ( rc == 0 )
+ {
+ rc = VNameListCount( files, &count );
+ if ( rc != 0 )
+ ErrMsg( "VNameListCount() -> %R", rc );
+ else
+ {
+ uint32_t idx;
+ uint64_t dst_pos = 0;
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * filename;
+ rc = VNameListGet( files, idx, &filename );
+ if ( rc != 0 )
+ ErrMsg( "VNameListGet( #%d) -> %R", idx, rc );
+ else
+ {
+ const struct KFile * src;
+ rc_t rc1 = KDirectoryOpenFileRead( dir, &src, "%s", filename );
+ if ( rc1 == 0 )
+ {
+ const struct KFile * temp_src;
+ rc = KBufFileMakeRead( &temp_src, src, buf_size );
+ KFileRelease( src );
+ if ( rc != 0 )
+ ErrMsg( "KBufFileMakeRead() -> %R", rc );
+ else
+ {
+ src = temp_src;
+ rc = copy_file( dst, src, &dst_pos, buf_size, &cfp );
+ }
+ KFileRelease( src );
+ }
+ }
+ }
+ }
+
+ if ( cfp.progressbar != NULL )
+ {
+ destroy_progressbar( cfp.progressbar );
+ KOutMsg( "\n" );
+ }
+ }
+ }
+ KFileRelease( dst );
+ }
+ return rc;
+}
+
+rc_t delete_files( KDirectory * dir, const VNamelist * files )
+{
+ uint32_t count;
+ rc_t rc = VNameListCount( files, &count );
+ if ( rc != 0 )
+ ErrMsg( "VNameListCount() -> %R", rc );
+ else
+ {
+ uint32_t idx;
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * filename;
+ rc = VNameListGet( files, idx, &filename );
+ if ( rc != 0 )
+ ErrMsg( "VNameListGet( #%d) -> %R", idx, rc );
+ else
+ {
+ rc = KDirectoryRemove( dir, true, "%s", filename );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryRemove( '%s' ) -> %R", filename, rc );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t CC progress_thread_func( const KThread *self, void *data )
+{
+ multi_progress * sp = data;
+ struct progressbar * progressbar;
+ uint32_t curr = 0, percent = 0;
+ rc_t rc = make_progressbar( &progressbar, 2 );
+
+ update_progressbar( progressbar, curr );
+ while ( atomic_read( &sp->progress_done ) == 0 )
+ {
+ percent = calc_percent( sp->row_count, atomic_read( &sp->progress_rows ), 2 );
+ if ( percent > curr )
+ {
+ uint32_t i;
+ for ( i = curr + 1; i <= percent; ++i )
+ update_progressbar( progressbar, i );
+ curr = percent;
+ }
+ KSleepMs( 100 );
+ }
+
+ percent = calc_percent( sp->row_count, atomic_read( &sp->progress_rows ), 2 );
+ if ( percent > curr )
+ {
+ uint32_t i;
+ for ( i = curr + 1; i <= percent; ++i )
+ update_progressbar( progressbar, i );
+ curr = percent;
+ }
+
+ destroy_progressbar( progressbar );
+ KOutMsg( "\n" );
+ return rc;
+}
+
+
+void init_progress_data( multi_progress * progress_data, uint64_t row_count )
+{
+ atomic_set( &progress_data->progress_done, 0 );
+ atomic_set( &progress_data->progress_rows, 0 );
+ progress_data->row_count = row_count;
+}
+
+rc_t start_multi_progress( KThread **t, multi_progress * progress_data )
+{
+ rc_t rc = KThreadMake( t, progress_thread_func, progress_data );
+ if ( rc != 0 )
+ ErrMsg( "KThreadMake( progress_thread ) -> %R", rc );
+ return rc;
+}
+
+
+void join_multi_progress( KThread *t, multi_progress * progress_data )
+{
+ if ( t != NULL )
+ {
+ atomic_set( &progress_data->progress_done, 1 );
+ KThreadWait( t, NULL );
+ KThreadRelease( t );
+ }
+}
+
+
+rc_t make_prefixed( char * buffer, size_t bufsize, const char * prefix,
+ const char * path, const char * postfix )
+{
+ rc_t rc;
+ size_t num_writ;
+ if ( prefix != NULL )
+ {
+ uint32_t l = string_measure( prefix, NULL );
+ if ( l == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "make_prefixed.string_measure() = 0 -> %R", rc );
+ }
+ else
+ {
+ if ( postfix == NULL )
+ {
+ if ( prefix[ l-1 ] == '/' )
+ rc = string_printf( buffer, bufsize, &num_writ, "%s%s", prefix, path );
+ else
+ rc = string_printf( buffer, bufsize, &num_writ, "%s/%s", prefix, path );
+ }
+ else
+ {
+ if ( prefix[ l-1 ] == '/' )
+ rc = string_printf( buffer, bufsize, &num_writ, "%s%s%s", prefix, path, postfix );
+ else
+ rc = string_printf( buffer, bufsize, &num_writ, "%s/%s%s", prefix, path, postfix );
+ }
+ }
+ }
+ else
+ {
+ if ( postfix == NULL )
+ rc = string_printf( buffer, bufsize, &num_writ, "%s", path );
+ else
+ rc = string_printf( buffer, bufsize, &num_writ, "%s%s", path, postfix );
+ }
+
+ if ( rc != 0 )
+ ErrMsg( "make_prefixed.string_printf() -> %R", rc );
+ return rc;
+}
diff --git a/tools/fastdump/helper.h b/tools/fastdump/helper.h
new file mode 100644
index 0000000..7d23b01
--- /dev/null
+++ b/tools/fastdump/helper.h
@@ -0,0 +1,141 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#ifndef _h_helper_
+#define _h_helper_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_klib_num_gen_
+#include <klib/num-gen.h>
+#endif
+
+#ifndef _h_vdb_cursor_
+#include <vdb/cursor.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+#ifndef _h_klib_vector_
+#include <klib/vector.h>
+#endif
+
+#ifndef _h_klib_namelist_
+#include <klib/namelist.h>
+#endif
+
+/*
+ this is in interfaces/cc/XXX/YYY/atomic.h
+ XXX ... the compiler ( cc, gcc, icc, vc++ )
+ YYY ... the architecture ( fat86, i386, noarch, ppc32, x86_64 )
+ */
+#ifndef _h_atomic_
+#include <atomic.h>
+#endif
+
+#ifndef _h_kproc_thread_
+#include <kproc/thread.h>
+#endif
+
+typedef struct SBuffer
+{
+ String S;
+ size_t buffer_size;
+} SBuffer;
+
+
+typedef enum format_t { ft_special, ft_fastq, ft_lookup, ft_test } format_t;
+
+rc_t ErrMsg( const char * fmt, ... );
+
+rc_t make_SBuffer( SBuffer * buffer, size_t len );
+void release_SBuffer( SBuffer * buffer );
+rc_t print_to_SBufferV( SBuffer * buffer, const char * fmt, va_list args );
+rc_t print_to_SBuffer( SBuffer * buffer, const char * fmt, ... );
+
+rc_t add_column( const VCursor * cursor, const char * name, uint32_t * id );
+
+rc_t make_row_iter( struct num_gen * ranges, int64_t first, uint64_t count,
+ const struct num_gen_iter ** iter );
+
+rc_t split_string( String * in, String * p0, String * p1, uint32_t ch );
+
+format_t get_format_t( const char * format );
+
+struct Args;
+const char * get_str_option( const struct Args *args, const char *name, const char * dflt );
+bool get_bool_option( const struct Args *args, const char *name );
+size_t get_size_t_option( const struct Args * args, const char *name, size_t dflt );
+uint64_t get_uint64_t_option( const struct Args * args, const char *name, uint64_t dflt );
+
+uint64_t make_key( int64_t seq_spot_id, uint32_t seq_read_id );
+
+void pack_4na( const String * unpacked, SBuffer * packed );
+void unpack_4na( const String * packed, SBuffer * unpacked );
+
+uint64_t calc_percent( uint64_t max, uint64_t value, uint16_t digits );
+
+bool file_exists( const KDirectory * dir, const char * fmt, ... );
+
+void join_and_release_threads( Vector * threads );
+
+rc_t concat_files( KDirectory * dir, const VNamelist * files, size_t buf_size,
+ const char * output, bool show_progress );
+
+rc_t delete_files( KDirectory * dir, const VNamelist * files );
+
+
+typedef struct multi_progress
+{
+ atomic_t progress_done;
+ atomic_t progress_rows;
+ uint64_t row_count;
+} multi_progress;
+
+void init_progress_data( multi_progress * progress_data, uint64_t row_count );
+rc_t start_multi_progress( KThread **t, multi_progress * progress_data );
+void join_multi_progress( KThread *t, multi_progress * progress_data );
+
+rc_t make_prefixed( char * buffer, size_t bufsize, const char * prefix,
+ const char * path, const char * postfix );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/fastdump/index.c b/tools/fastdump/index.c
new file mode 100644
index 0000000..4036233
--- /dev/null
+++ b/tools/fastdump/index.c
@@ -0,0 +1,335 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+#include "index.h"
+#include "helper.h"
+
+#include <kfs/file.h>
+#include <kfs/buffile.h>
+
+typedef struct index_writer
+{
+ struct KFile * f;
+ uint64_t frequency, pos, last_key;
+} index_writer;
+
+
+void release_index_writer( struct index_writer * writer )
+{
+ if ( writer != NULL )
+ {
+ if ( writer->f != NULL ) KFileRelease( writer->f );
+ free( ( void * ) writer );
+ }
+}
+
+
+static rc_t write_value( index_writer * writer, uint64_t value )
+{
+ size_t num_writ;
+ rc_t rc = KFileWrite( writer->f, writer->pos, &value, sizeof value, &num_writ );
+ if ( rc != 0 )
+ ErrMsg( "write_value.KFileWriteAll( key ) -> %R", rc );
+ else if ( num_writ != sizeof value )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcWriting, rcFormat, rcInvalid );
+ ErrMsg( "write_value.KFileWriteAll( key ) -> %R", rc );
+ }
+ else
+ writer->pos += num_writ;
+ return rc;
+}
+
+
+static rc_t write_key_and_offset( index_writer * writer, uint64_t key, uint64_t offset )
+{
+ rc_t rc = write_value( writer, key );
+ if ( rc == 0 )
+ rc = write_value( writer, offset );
+ return rc;
+}
+
+
+rc_t write_key( struct index_writer * writer, uint64_t key, uint64_t offset )
+{
+ rc_t rc = 0;
+ if ( writer == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcParam, rcInvalid );
+ ErrMsg( "write_index_valuer() -> %R", rc );
+ }
+ else
+ {
+ if ( key > ( writer->last_key + writer->frequency ) )
+ {
+ rc = write_key_and_offset( writer, key, offset );
+ if ( rc == 0 )
+ writer->last_key = key;
+ }
+ }
+ return rc;
+}
+
+
+rc_t make_index_writer( KDirectory * dir, struct index_writer ** writer,
+ size_t buf_size, uint64_t frequency, const char * fmt, ... )
+{
+ rc_t rc;
+ struct KFile * f;
+
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = KDirectoryVCreateFile( dir, &f, false, 0664, kcmInit, fmt, args );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryVCreateFile() -> %R", rc );
+ else
+ {
+ struct KFile * temp_file;
+ rc = KBufFileMakeWrite( &temp_file, f, false, buf_size );
+ KFileRelease( f );
+ if ( rc != 0 )
+ ErrMsg( "KBufFileMakeWrite() -> %R", rc );
+ else
+ {
+ index_writer * w = calloc( 1, sizeof * w );
+ if ( w == NULL )
+ {
+ KFileRelease( temp_file );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * w ), rc );
+ }
+ else
+ {
+ w->f = temp_file;
+ w->frequency = frequency;
+ rc = write_value( w, frequency );
+ if ( rc == 0 )
+ rc = write_key_and_offset( w, 1, 0 );
+
+ if ( rc == 0 )
+ *writer = w;
+ else
+ release_index_writer( w );
+ }
+ }
+ }
+ va_end ( args );
+
+ return rc;
+}
+
+
+/* ----------------------------------------------------------------------- */
+
+typedef struct index_reader
+{
+ const struct KFile * f;
+ uint64_t frequency, file_size, max_key;
+} index_reader;
+
+
+void release_index_reader( struct index_reader * reader )
+{
+ if ( reader != NULL )
+ {
+ if ( reader->f != NULL ) KFileRelease( reader->f );
+ free( ( void * ) reader );
+ }
+}
+
+
+static rc_t read_value( struct index_reader * reader, uint64_t pos, uint64_t * value )
+{
+ size_t num_read;
+ rc_t rc = KFileRead( reader->f, pos, ( void *)value, sizeof *value, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "read_value.KFileRead( at %ld ) -> %R", pos, rc );
+ else if ( num_read != sizeof *value )
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcFormat, rcInvalid );
+ return rc;
+}
+
+
+rc_t make_index_reader( KDirectory * dir, struct index_reader ** reader,
+ size_t buf_size, const char * fmt, ... )
+{
+ rc_t rc;
+ const struct KFile * f;
+
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = KDirectoryVOpenFileRead( dir, &f, fmt, args );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryVOpenFileRead() -> %R", rc );
+ else
+ {
+ const struct KFile * temp_file;
+ rc = KBufFileMakeRead( &temp_file, f, buf_size );
+ KFileRelease( f );
+ if ( rc != 0 )
+ {
+ ErrMsg( "KBufFileMakeRead() -> %R", rc );
+ }
+ else
+ {
+ index_reader * r = calloc( 1, sizeof * r );
+ if ( r == NULL )
+ {
+ KFileRelease( temp_file );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * r ), rc );
+ }
+ else
+ {
+ r->f = temp_file;
+ rc = read_value( r, 0, &r->frequency );
+ if ( rc == 0 )
+ rc = KFileSize( temp_file, &r->file_size );
+
+ if ( rc == 0 )
+ {
+ get_max_key( r, &r->max_key );
+ *reader = r;
+ }
+ else
+ release_index_reader( r );
+ }
+ }
+ }
+ va_end ( args );
+ return rc;
+}
+
+
+static uint64_t key_to_pos_guess( const struct index_reader * reader, uint64_t key )
+{
+ uint64_t chunk_id = ( key / reader->frequency );
+ return ( ( sizeof reader->frequency ) + ( chunk_id * ( 2 * ( sizeof reader->frequency ) ) ) );
+}
+
+
+static rc_t read_3( const struct index_reader * reader, uint64_t pos, uint64_t * data, size_t to_read )
+{
+ size_t num_read;
+ rc_t rc = KFileRead( reader->f, pos, ( void *)data, to_read, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "read_3.KFileRead( at %ld ) -> %R", pos, rc );
+ else if ( num_read != to_read )
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcFormat, rcInvalid );
+ return rc;
+}
+
+
+rc_t get_nearest_offset( const struct index_reader * reader, uint64_t key_to_find,
+ uint64_t * key_found, uint64_t * offset )
+{
+ rc_t rc = 0;
+ if ( reader == NULL || key_found == NULL || offset == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcParam, rcInvalid );
+ ErrMsg( "get_nearest_offset() -> %R", rc );
+ }
+ else
+ {
+ uint64_t data[ 6 ];
+ /*
+ data[ 0 ] ... key0 data[ 1 ] ... offset0
+ data[ 2 ] ... key1 data[ 3 ] ... offset1
+ data[ 4 ] ... key2 data[ 5 ] ... offset2
+ */
+ uint64_t pos = key_to_pos_guess( reader, key_to_find );
+ bool found = false;
+ while ( rc == 0 && !found && pos < reader->file_size )
+ {
+ rc = read_3( reader, pos, data, sizeof data );
+ if ( rc == 0 )
+ {
+ if ( key_to_find >= data[ 0 ] && key_to_find < data[ 2 ] )
+ {
+ /* key_to_find is between key0 and key1 */
+ found = true;
+ *key_found = data[ 0 ];
+ *offset = data[ 1 ];
+ }
+ else if ( key_to_find >= data[ 2 ] && key_to_find < data[ 4 ] )
+ {
+ /* key_to_find is between key1 and key2 */
+ found = true;
+ *key_found = data[ 2 ];
+ *offset = data[ 3 ];
+ }
+ if ( !found )
+ {
+ if ( key_to_find < data[ 0 ] )
+ {
+ /* key_to_find is smaller than our guess */
+ if ( pos > sizeof reader->frequency )
+ pos -= ( 2 * ( sizeof reader->frequency ) );
+ else
+ {
+ found = true;
+ *key_found = data[ 0 ];
+ *offset = data[ 1 ];
+ }
+ }
+ else if ( key_to_find > data[ 4 ] )
+ {
+ /* key_to_find is bigger than our guess */
+ pos += ( 2 * ( sizeof reader->frequency ) );
+ }
+ }
+ }
+ }
+ if ( !found )
+ rc = SILENT_RC( rcVDB, rcNoTarg, rcReading, rcId, rcNotFound );
+ }
+ return rc;
+}
+
+
+rc_t get_max_key( const struct index_reader * reader, uint64_t * max_key )
+{
+ rc_t rc = 0;
+ if ( reader == NULL || max_key == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcParam, rcInvalid );
+ ErrMsg( "get_nearest_offset() -> %R", rc );
+ }
+ else if ( reader->max_key > 0 )
+ {
+ *max_key = reader->max_key;
+ }
+ else
+ {
+ uint64_t data[ 6 ];
+ uint64_t pos = reader->file_size - ( sizeof data );
+ rc = read_3( reader, pos, data, sizeof data );
+ if ( rc == 0 )
+ *max_key = data[ 4 ];
+ }
+ return rc;
+}
\ No newline at end of file
diff --git a/test/samline/cigar.h b/tools/fastdump/index.h
similarity index 58%
copy from test/samline/cigar.h
copy to tools/fastdump/index.h
index d0ec04a..0a164fa 100644
--- a/test/samline/cigar.h
+++ b/tools/fastdump/index.h
@@ -24,33 +24,41 @@
*
*/
-#ifndef _h_cigar_
-#define _h_cigar_
+#ifndef _h_index_
+#define _h_index_
#ifdef __cplusplus
extern "C" {
#endif
-struct cigar_t;
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
-struct cigar_t * make_cigar_t( const char * cigar_str );
-void free_cigar_t( struct cigar_t * c );
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
-int cigar_t_reflen( const struct cigar_t * c );
-int cigar_t_readlen( const struct cigar_t * c );
-int cigar_t_inslen( const struct cigar_t * c );
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
-size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c );
+struct index_writer;
-struct cigar_t * merge_cigar_t( const struct cigar_t * c );
+void release_index_writer( struct index_writer * writer );
+rc_t make_index_writer( KDirectory * dir, struct index_writer ** writer,
+ size_t buf_size, uint64_t frequency, const char * fmt, ... );
+rc_t write_key( struct index_writer * writer, uint64_t key, uint64_t offset );
-size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
+struct index_reader;
-void debug_cigar_t( const struct cigar_t * c );
+void release_index_reader( struct index_reader * reader );
+rc_t make_index_reader( KDirectory * dir, struct index_reader ** reader,
+ size_t buf_size, const char * fmt, ... );
+rc_t get_nearest_offset( const struct index_reader * reader, uint64_t key_to_find,
+ uint64_t * key_found, uint64_t * offset );
-size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+rc_t get_max_key( const struct index_reader * reader, uint64_t * max_key );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/join.c b/tools/fastdump/join.c
new file mode 100644
index 0000000..ca6c0a6
--- /dev/null
+++ b/tools/fastdump/join.c
@@ -0,0 +1,543 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "join.h"
+#include "lookup_reader.h"
+#include "index.h"
+#include "file_printer.h"
+#include "special_iter.h"
+#include "fastq_iter.h"
+#include "helper.h"
+
+#include <klib/out.h>
+#include <klib/printf.h>
+#include <kproc/thread.h>
+
+typedef struct join
+{
+ struct lookup_reader * lookup;
+ struct file_printer * printer;
+ SBuffer B1, B2;
+} join;
+
+
+static void release_join_ctx( join * j )
+{
+ if ( j != NULL )
+ {
+ release_lookup_reader( j->lookup );
+ destroy_file_printer( j->printer );
+ release_SBuffer( &j->B1 );
+ release_SBuffer( &j->B2 );
+ }
+}
+
+
+static rc_t init_join( const join_params * jp, struct join *j, struct index_reader * index )
+{
+ rc_t rc;
+ j->lookup = NULL;
+ j->printer = NULL;
+ j->B1.S.addr = NULL;
+ j->B2.S.addr = NULL;
+ rc = make_lookup_reader( jp->dir, index, &j->lookup, jp->buf_size, "%s", jp->lookup_filename );
+ if ( rc == 0 && jp->output_filename != NULL )
+ rc = make_file_printer( jp->dir, &j->printer, jp->buf_size, 4096 * 4, "%s", jp->output_filename );
+ if ( rc == 0 )
+ rc = make_SBuffer( &j->B1, 4096 );
+ if ( rc == 0 )
+ rc = make_SBuffer( &j->B2, 4096 );
+
+ /* the rc-code of seek_lookup_reader is not checked, because if the row-id to be seeked to is in
+ the range of the fully unaligned data - seek will fail, because the are no alignments = lookup-records
+ in this area !*/
+ if ( rc == 0 && jp->first > 1 )
+ {
+ uint64_t key_to_find = jp->first << 1;
+ uint64_t key_found = 0;
+ rc_t rc1 = seek_lookup_reader( j->lookup, key_to_find, &key_found, true );
+ if ( GetRCState( rc1 ) != rcTooBig )
+ rc = rc1;
+ }
+ if ( rc != 0 )
+ release_join_ctx( j );
+ return rc;
+}
+
+
+static void copy_join_params( join_params * dst, const join_params * src )
+{
+ dst->dir = src->dir;
+ dst->accession = src->accession;
+ dst->lookup_filename = src->lookup_filename;
+ dst->index_filename = src->index_filename;
+ dst->output_filename = src->output_filename;
+ dst->temp_path = src->temp_path;
+ dst->join_progress = src->join_progress;
+ dst->buf_size = src->buf_size;
+ dst->cur_cache = src->cur_cache;
+ dst->num_threads = src->num_threads;
+ dst->first = src->first;
+ dst->count = src->count;
+ dst->show_progress = src->show_progress;
+ dst->fmt = src->fmt;
+}
+
+static void init_cmn_params( const join_params * jp, cmn_params * cmn )
+{
+ cmn->dir = jp->dir;
+ cmn->acc = jp->accession;
+ cmn->row_range = NULL;
+ cmn->first = jp->first;
+ cmn->count = jp->count;
+ cmn->cursor_cache = jp->cur_cache;
+ cmn->show_progress = jp->show_progress;
+}
+
+static rc_t print_special( special_rec * rec, join * j )
+{
+ rc_t rc = 0;
+ int64_t row_id = rec->row_id;
+
+ if ( rec->prim_alig_id[ 0 ] == 0 )
+ {
+ if ( rec->prim_alig_id[ 1 ] == 0 )
+ {
+ /* both unaligned, print what is in row->cmp_read ( !!! no lookup !!! )*/
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, "%ld\t%S\t%S\n", row_id, &rec->cmp_read, &rec->spot_group );
+ else
+ rc = KOutMsg( "%ld\t%S\t%S\n", row_id, &rec->cmp_read, &rec->spot_group );
+ }
+ else
+ {
+ /* A0 is unaligned / A1 is aligned (lookup) */
+ rc = lookup_bases( j->lookup, row_id, 2, &j->B2 );
+ if ( rc == 0 )
+ {
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, "%ld\t%S%S\t%S\n", row_id, &rec->cmp_read, &j->B2.S, &rec->spot_group );
+ else
+ rc = KOutMsg( "%ld\t%S%S\t%S\n", row_id, &rec->cmp_read, &j->B2.S, &rec->spot_group );
+ }
+ }
+ }
+ else
+ {
+ if ( rec->prim_alig_id[ 1 ] == 0 )
+ {
+ /* A0 is aligned (lookup) / A1 is unaligned */
+ rc = lookup_bases( j->lookup, row_id, 1, &j->B1 );
+ if ( rc == 0 )
+ {
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, "%ld\t%S%S\t%S\n", row_id, &j->B1.S, &rec->cmp_read, &rec->spot_group );
+ else
+ rc = KOutMsg( "%ld\t%S%S\t%S\n", row_id, &j->B1.S, &rec->cmp_read, &rec->spot_group );
+ }
+ }
+ else
+ {
+ /* A0 and A1 are aligned (2 lookups)*/
+ rc = lookup_bases( j->lookup, row_id, 1, &j->B1 );
+ if ( rc == 0 )
+ rc = lookup_bases( j->lookup, row_id, 2, &j->B2 );
+ if ( rc == 0 )
+ {
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, "%ld\t%S%S\t%S\n", row_id, &j->B1.S, &j->B2.S, &rec->spot_group );
+ else
+ rc = KOutMsg( "%ld\t%S%S\t%S\n", row_id, &j->B1.S, &j->B2.S, &rec->spot_group );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t print_fastq( fastq_rec * rec, join * j, const char * acc )
+{
+ rc_t rc = 0;
+ int64_t row_id = rec->row_id;
+
+ if ( rec->prim_alig_id[ 0 ] == 0 )
+ {
+ if ( rec->prim_alig_id[ 1 ] == 0 )
+ {
+ /* both unaligned, print what is in row->cmp_read (no lookup)*/
+ const char * fmt = "@%s.%ld %ld length=%d\n%S\n+%s.%ld %ld length=%d\n%S\n";
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, fmt,
+ acc, row_id, row_id, rec->cmp_read.len, &rec->cmp_read,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ else
+ rc = KOutMsg( fmt,
+ acc, row_id, row_id, rec->cmp_read.len, &rec->cmp_read,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+
+ }
+ else
+ {
+ /* A0 is unaligned / A1 is aligned (lookup) */
+ rc = lookup_bases( j->lookup, row_id, 2, &j->B2 );
+ if ( rc == 0 )
+ {
+ const char * fmt = "@%s.%ld %ld length=%d\n%S%S\n+%s.%ld %ld length=%d\n%S\n";
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, fmt,
+ acc, row_id, row_id, rec->cmp_read.len + j->B2.S.len, &rec->cmp_read, &j->B2.S,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ else
+ rc = KOutMsg( fmt,
+ acc, row_id, row_id, rec->cmp_read.len + j->B2.S.len, &rec->cmp_read, &j->B2.S,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ }
+ }
+ }
+ else
+ {
+ if ( rec->prim_alig_id[ 1 ] == 0 )
+ {
+ /* A0 is aligned (lookup) / A1 is unaligned */
+ rc = lookup_bases( j->lookup, row_id, 1, &j->B1 );
+ if ( rc == 0 )
+ {
+ const char * fmt = "@%s.%ld %ld length=%d\n%S%S\n+%s.%ld %ld length=%d\n%S\n";
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, fmt,
+ acc, row_id, row_id, rec->cmp_read.len + j->B1.S.len, &j->B1.S, &rec->cmp_read,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ else
+ rc = KOutMsg( fmt,
+ acc, row_id, row_id, rec->cmp_read.len + j->B1.S.len, &j->B1.S, &rec->cmp_read,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ }
+ }
+ else
+ {
+ /* A0 and A1 are aligned (2 lookups)*/
+ rc = lookup_bases( j->lookup, row_id, 1, &j->B1 );
+ if ( rc == 0 )
+ rc = lookup_bases( j->lookup, row_id, 2, &j->B2 );
+ if ( rc == 0 )
+ {
+ const char * fmt = "@%s.%ld %ld length=%d\n%S%S\n+%s.%ld %ld length=%d\n%S\n";
+ if ( j->printer != NULL )
+ rc = file_print( j->printer, fmt,
+ acc, row_id, row_id, j->B1.S.len + j->B2.S.len, &j->B1.S, &j->B2.S,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ else
+ rc = KOutMsg( fmt,
+ acc, row_id, row_id, j->B1.S.len + j->B2.S.len, &j->B1.S, &j->B2.S,
+ acc, row_id, row_id, rec->quality.len, &rec->quality );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t extract_row_count_cmn( const join_params * jp, uint64_t * row_count )
+{
+ rc_t rc = 0;
+ cmn_params cmn;
+ init_cmn_params( jp, &cmn );
+ {
+ switch( jp->fmt )
+ {
+ case ft_special : {
+ struct special_iter * iter;
+ rc = make_special_iter( &cmn, &iter );
+ if ( rc == 0 )
+ {
+ *row_count = get_row_count_of_special_iter( iter );
+ destroy_special_iter( iter );
+ }
+ } break;
+
+ case ft_fastq : {
+ struct fastq_iter * iter;
+ rc = make_fastq_iter( &cmn, &iter );
+ if ( rc == 0 )
+ {
+ *row_count = get_row_count_of_fastq_iter( iter );
+ destroy_fastq_iter( iter );
+ }
+ } break;
+
+ default : break;
+ }
+ }
+ return rc;
+}
+
+rc_t CC Quitting();
+
+static rc_t perform_special_join( const join_params * jp, struct index_reader * index )
+{
+ rc_t rc;
+ struct special_iter * iter;
+ cmn_params cmn;
+
+ init_cmn_params( jp, &cmn );
+ rc = make_special_iter( &cmn, &iter );
+ if ( rc == 0 )
+ {
+ join j;
+ rc = init_join( jp, &j, index );
+ if ( rc == 0 )
+ {
+ special_rec rec;
+ while ( get_from_special_iter( iter, &rec, &rc ) && rc == 0 )
+ {
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ rc = print_special( &rec, &j );
+ if ( jp->join_progress != NULL )
+ atomic_inc( jp->join_progress );
+ }
+ }
+ release_join_ctx( &j );
+ }
+ destroy_special_iter( iter );
+ }
+ return rc;
+}
+
+
+static rc_t perform_fastq_join( const join_params * jp, struct index_reader * index )
+{
+ rc_t rc;
+ struct fastq_iter * iter;
+ cmn_params cmn;
+
+ init_cmn_params( jp, &cmn );
+ rc = make_fastq_iter( &cmn, &iter );
+ if ( rc == 0 )
+ {
+ join j;
+ rc = init_join( jp, &j, index );
+ if ( rc == 0 )
+ {
+ fastq_rec rec;
+ while ( get_from_fastq_iter( iter, &rec, &rc ) && rc == 0 )
+ {
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ rc = print_fastq( &rec, &j, jp->accession );
+ if ( jp->join_progress != NULL )
+ atomic_inc( jp->join_progress );
+ }
+ }
+ release_join_ctx( &j );
+ }
+ destroy_fastq_iter( iter );
+ }
+ return rc;
+}
+
+
+
+/* ------------------------------------------------------------------------------------------ */
+
+typedef struct join_thread_data
+{
+ const join_params * jp;
+ int64_t first;
+ uint64_t count;
+ uint32_t idx;
+} join_thread_data;
+
+
+static rc_t make_part_filename( const join_params * jp, char * buffer, size_t bufsize, uint32_t id )
+{
+ rc_t rc;
+ size_t num_writ;
+ if ( jp->temp_path != NULL )
+ {
+ uint32_t l = string_measure( jp->temp_path, NULL );
+ if ( l == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "make_part_filename.string_measure() = 0 -> %R", rc );
+ }
+ else
+ {
+ if ( jp->temp_path[ l-1 ] == '/' )
+ rc = string_printf( buffer, bufsize, &num_writ, "%s%s.%d",
+ jp->temp_path, jp->output_filename, id );
+ else
+ rc = string_printf( buffer, bufsize, &num_writ, "%s/%s.%d",
+ jp->temp_path, jp->output_filename, id );
+ }
+ }
+ else
+ rc = string_printf( buffer, bufsize, &num_writ, "%s.%d", jp->output_filename, id );
+
+ if ( rc != 0 )
+ ErrMsg( "make_part_filename.string_printf() -> %R", rc );
+ return rc;
+}
+
+
+static rc_t concat_part_files( const join_params * jp, uint32_t count )
+{
+ struct VNamelist * files;
+ rc_t rc = VNamelistMake( &files, count );
+ if ( rc == 0 )
+ {
+ uint32_t idx;
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ char part_file[ 4096 ];
+ rc = make_part_filename( jp, part_file, sizeof part_file, idx );
+ if ( rc == 0 )
+ rc = VNamelistAppend( files, part_file );
+ }
+ if ( rc == 0 )
+ rc = concat_files( jp->dir, files, jp->buf_size, jp->output_filename, jp->show_progress );
+ if ( rc == 0 )
+ rc = delete_files( jp->dir, files );
+ VNamelistRelease( files );
+ }
+ return rc;
+}
+
+/* ------------------------------------------------------------------------------------------ */
+
+static rc_t CC cmn_thread_func( const KThread *self, void *data )
+{
+ rc_t rc = 0;
+ join_thread_data * jtd = data;
+ const join_params * jp = jtd->jp;
+ struct index_reader * index = NULL;
+
+ if ( jp->index_filename != NULL )
+ {
+ if ( file_exists( jp->dir, "%s", jp->index_filename ) )
+ rc = make_index_reader( jp->dir, &index, jp->buf_size, "%s", jp->index_filename );
+ }
+
+ if ( rc == 0 )
+ {
+ char part_file[ 4096 ];
+ rc = make_part_filename( jp, part_file, sizeof part_file, jtd->idx );
+ if ( rc == 0 )
+ {
+ join_params cjp;
+
+ copy_join_params( &cjp, jp );
+ cjp.num_threads = 0;
+ cjp.first = jtd->first;
+ cjp.count = jtd->count;
+ cjp.output_filename = part_file;
+ cjp.show_progress = false;
+
+ switch( jp->fmt )
+ {
+ case ft_special : rc = perform_special_join( &cjp, index ); break;
+ case ft_fastq : rc = perform_fastq_join( &cjp, index ); break;
+ default : break;
+
+ }
+ }
+ }
+
+ release_index_reader( index );
+ free( ( void * ) data );
+ return rc;
+}
+
+
+rc_t execute_join( const join_params * jp )
+{
+ rc_t rc = 0;
+ if ( jp->num_threads < 2 )
+ {
+ /* on the main thread */
+ switch( jp->fmt )
+ {
+ case ft_special : rc = perform_special_join( jp, NULL ); break;
+ case ft_fastq : rc = perform_fastq_join( jp, NULL ); break;
+ default : break;
+ }
+ }
+ else
+ {
+ uint64_t row_count = 0;
+ rc = extract_row_count_cmn( jp, &row_count );
+ if ( rc == 0 && row_count > 0 )
+ {
+ Vector threads;
+ int64_t first = 1;
+ uint64_t i, per_thread = ( row_count / jp->num_threads ) + 1;
+ KThread * progress_thread = NULL;
+ multi_progress progress;
+
+ init_progress_data( &progress, row_count );
+ VectorInit( &threads, 0, jp->num_threads );
+
+ if ( jp->show_progress )
+ {
+ join_params * nc_jp = ( join_params * )jp;
+ nc_jp->join_progress = &progress.progress_rows;
+ rc = start_multi_progress( &progress_thread, &progress );
+
+ }
+ for ( i = 0; rc == 0 && i < jp->num_threads; ++i )
+ {
+ join_thread_data * jtd = calloc( 1, sizeof * jtd );
+ if ( jtd != NULL )
+ {
+ KThread * thread;
+
+ jtd->jp = jp;
+ jtd->first = first;
+ jtd->count = per_thread;
+ jtd->idx = i;
+
+ rc = KThreadMake( &thread, cmn_thread_func, jtd );
+ if ( rc != 0 )
+ ErrMsg( "KThreadMake( fastq/special #%d ) -> %R", i, rc );
+ else
+ {
+ rc = VectorAppend( &threads, NULL, thread );
+ if ( rc != 0 )
+ ErrMsg( "VectorAppend( sort-thread #%d ) -> %R", i, rc );
+ }
+ first += per_thread;
+ }
+ }
+
+ join_and_release_threads( &threads );
+ join_multi_progress( progress_thread, &progress );
+ rc = concat_part_files( jp, jp->num_threads );
+ }
+ }
+ return rc;
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/join.h
similarity index 64%
copy from test/samline/refbases.h
copy to tools/fastdump/join.h
index 94136fb..83cd4a0 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/join.h
@@ -24,15 +24,50 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_join_
+#define _h_join_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+#ifndef _h_atomic_
+#include <atomic.h>
+#endif
+
+#ifndef _h_helper_
+#include "helper.h"
+#endif
+
+typedef struct join_params
+{
+ KDirectory * dir;
+ const char * accession;
+ const char * lookup_filename;
+ const char * index_filename;
+ const char * output_filename;
+ const char * temp_path;
+ atomic_t * join_progress;
+ size_t buf_size, cur_cache, num_threads;
+ int64_t first;
+ uint64_t count;
+ bool show_progress;
+ format_t fmt;
+} join_params;
+
+rc_t execute_join( const join_params * jp );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/line_iter.c b/tools/fastdump/line_iter.c
new file mode 100644
index 0000000..0ccd234
--- /dev/null
+++ b/tools/fastdump/line_iter.c
@@ -0,0 +1,199 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "line_iter.h"
+#include "helper.h"
+
+#include <kfs/file.h>
+
+#define ITER_DONE 0x01
+#define ITER_EOF 0x02
+
+typedef struct line_iter
+{
+ const struct KFile * f;
+ String buffer, content, line;
+ uint64_t pos_in_file;
+ uint32_t state;
+} line_iter;
+
+
+void release_line_iter( struct line_iter * iter )
+{
+ if ( iter != NULL )
+ {
+ if ( iter->f != NULL )
+ KFileRelease( iter->f );
+ if ( iter->buffer.addr != NULL )
+ free( ( void * ) iter->buffer.addr );
+ free( ( void * ) iter );
+ }
+}
+
+
+static void read_line_iter( struct line_iter * iter )
+{
+ if ( iter->content.size > 0 )
+ memmove( (void *)iter->buffer.addr, iter->content.addr, iter->content.size );
+ iter->content.addr = iter->buffer.addr;
+ {
+ char * dst = ( char * )iter->buffer.addr + iter->content.size;
+ size_t num_read;
+ size_t to_read = ( ( iter->buffer.size - 1 ) - iter->content.size );
+ rc_t rc = KFileRead ( iter->f, iter->pos_in_file, dst, iter->buffer.size - iter->content.size, &num_read );
+ if ( rc == 0 )
+ {
+ iter->pos_in_file += num_read;
+ iter->content.size += num_read;
+ if ( num_read < to_read )
+ iter->state |= ITER_EOF;
+ }
+ else
+ iter->state |= ITER_EOF;
+ }
+}
+
+
+static bool slice_iter_content( struct line_iter * iter, size_t by )
+{
+ size_t l;
+ iter->line.addr = iter->content.addr;
+ iter->line.len = by;
+ iter->line.size = by;
+ l = ( by + 1 );
+ iter->content.addr += l;
+ if ( l < iter->content.size )
+ iter->content.size -= l;
+ else
+ iter->content.size = 0;
+ return true;
+}
+
+
+bool advance_line_iter( struct line_iter * iter )
+{
+ bool res = ( 0 == ( iter->state & ITER_DONE ) );
+ if ( res )
+ {
+ if ( iter->content.size == 0 )
+ read_line_iter( iter );
+
+ if ( iter->content.size == 0 && ( iter->state & ITER_EOF ) )
+ {
+ iter->state |= ITER_DONE;
+ res = false;
+ }
+ else
+ {
+ char * newline = string_chr( iter->content.addr, iter->content.size, '\n' );
+ if ( newline == NULL )
+ {
+ if ( iter->state & ITER_EOF )
+ res = slice_iter_content( iter, iter->content.size );
+ else
+ {
+ read_line_iter( iter );
+ res = advance_line_iter( iter ); /* recursion! */
+ }
+ }
+ else
+ res = slice_iter_content( iter, newline - iter->content.addr );
+ }
+ }
+ return res;
+}
+
+
+String * get_line_iter( struct line_iter * iter )
+{
+ String * res = NULL;
+ if ( iter != NULL )
+ {
+ if ( 0 == ( iter->state & ITER_DONE ) )
+ res = &iter->line;
+ }
+ return res;
+}
+
+
+bool is_line_iter_done( const struct line_iter * iter )
+{
+ if ( iter != NULL )
+ return ( iter->state & ITER_DONE );
+ return true;
+}
+
+
+rc_t make_line_iter( const KDirectory *dir, line_iter ** iter,
+ const char * filename, size_t buffer_size )
+{
+ const struct KFile * f;
+ rc_t rc = KDirectoryOpenFileRead( dir, &f, "%s", filename );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryOpenFileRead( '%s' ) -> %R", filename, rc );
+ else
+ {
+ if ( rc == 0 )
+ {
+ line_iter * l = calloc( 1, sizeof * l );
+ if ( l == NULL )
+ {
+ KFileRelease( f );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * l ), rc );
+ }
+ else
+ {
+ l->f = f;
+ l->buffer.addr = malloc( buffer_size );
+ if ( l->buffer.addr == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "malloc( %d ) -> %R", ( buffer_size ), rc );
+ KFileRelease( f );
+ free( ( void * ) l );
+ }
+ else
+ {
+ l->buffer.size = buffer_size;
+ l->buffer.len = buffer_size;
+ l->content.addr = l->buffer.addr;
+ read_line_iter( l );
+ if ( advance_line_iter( l ) )
+ {
+ *iter = l;
+ }
+ else
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcRange, rcInvalid );
+ release_line_iter( l );
+ }
+ }
+ }
+ }
+ }
+ return rc;
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/line_iter.h
similarity index 69%
copy from test/samline/refbases.h
copy to tools/fastdump/line_iter.h
index 94136fb..b453c4d 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/line_iter.h
@@ -24,15 +24,34 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_line_iter_
+#define _h_line_iter_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+struct line_iter;
+
+void release_line_iter( struct line_iter * iter );
+bool advance_line_iter( struct line_iter * iter );
+String * get_line_iter( struct line_iter * iter );
+bool is_line_iter_done( const struct line_iter * iter );
+
+rc_t make_line_iter( const KDirectory *dir, struct line_iter ** iter,
+ const char * filename, size_t buffer_size );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/lookup_reader.c b/tools/fastdump/lookup_reader.c
new file mode 100644
index 0000000..ae30c51
--- /dev/null
+++ b/tools/fastdump/lookup_reader.c
@@ -0,0 +1,331 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "lookup_reader.h"
+#include "helper.h"
+
+#include <klib/printf.h>
+#include <kfs/file.h>
+#include <kfs/buffile.h>
+
+#include <string.h>
+
+typedef struct lookup_reader
+{
+ const struct KFile * f;
+ const struct index_reader * index;
+ SBuffer buf;
+ uint64_t pos;
+} lookup_reader;
+
+
+void release_lookup_reader( struct lookup_reader * reader )
+{
+ if ( reader != NULL )
+ {
+ if ( reader->f != NULL ) KFileRelease( reader->f );
+ release_SBuffer( &reader->buf );
+ free( ( void * ) reader );
+ }
+}
+
+
+rc_t make_lookup_reader( const KDirectory *dir, const struct index_reader * index,
+ struct lookup_reader ** reader, size_t buf_size, const char * fmt, ... )
+{
+ rc_t rc;
+ const struct KFile * f = NULL;
+
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = KDirectoryVOpenFileRead( dir, &f, fmt, args );
+ if ( rc != 0 )
+ {
+ char tmp[ 4096 ];
+ size_t num_writ;
+ rc_t rc1 = string_vprintf( tmp, sizeof tmp, &num_writ, fmt, args );
+ if ( rc1 != 0 )
+ ErrMsg( "make_lookup_reader.KDirectoryVOpenFileRead( '?' ) -> %R", rc );
+ else
+ ErrMsg( "make_lookup_reader.KDirectoryVOpenFileRead( '%s' ) -> %R", tmp, rc );
+ }
+ else
+ {
+ const struct KFile * temp_file = NULL;
+ rc = KBufFileMakeRead( &temp_file, f, buf_size );
+ KFileRelease( f );
+ if ( rc != 0 )
+ {
+ ErrMsg( "make_lookup_reader.KBufFileMakeRead() -> %R", rc );
+ }
+ else
+ {
+ lookup_reader * r = calloc( 1, sizeof * r );
+ if ( r == NULL )
+ {
+ KFileRelease( temp_file );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "make_lookup_reader.calloc( %d ) -> %R", ( sizeof * r ), rc );
+ }
+ else
+ {
+ r->f = temp_file;
+ r->index = index;
+ rc = make_SBuffer( &r->buf, 4096 );
+ if ( rc == 0 )
+ *reader = r;
+ else
+ release_lookup_reader( r );
+ }
+ }
+ }
+ va_end ( args );
+ return rc;
+}
+
+
+static rc_t read_key_and_len( struct lookup_reader * reader, uint64_t pos, uint64_t *key, size_t *len )
+{
+ size_t num_read;
+ char buffer[ 10 ];
+ rc_t rc = KFileRead( reader->f, pos, buffer, sizeof buffer, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "read_key_and_len.KFileRead( at %ld, to_read %u ) -> %R", pos, sizeof buffer, rc );
+ else if ( num_read != sizeof buffer )
+ rc = SILENT_RC( rcVDB, rcNoTarg, rcReading, rcFormat, rcInvalid );
+ else
+ {
+ uint16_t dna_len;
+ size_t packed_len;
+ memmove( key, buffer, sizeof *key );
+ dna_len = buffer[ 8 ];
+ dna_len <<= 8;
+ dna_len |= buffer[ 9 ];
+ packed_len = ( dna_len & 1 ) ? ( dna_len + 1 ) >> 1 : dna_len >> 1;
+ *len = ( ( sizeof *key ) + ( sizeof dna_len ) + packed_len );
+ }
+ return rc;
+}
+
+
+static bool keys_equal( uint64_t key1, uint64_t key2 )
+{
+ bool res = ( key1 == key2 );
+ if ( !res )
+ res = ( ( ( key1 & 0x01 ) == 0 ) && key2 == ( key1 + 1 ) );
+ return res;
+}
+
+static rc_t loop_until_key_found( struct lookup_reader * reader, uint64_t key_to_find,
+ uint64_t *key_found , uint64_t *offset )
+{
+ rc_t rc = 0;
+ bool done = false;
+ uint64_t curr = *offset;
+ while ( !done && rc == 0 )
+ {
+ size_t found_len;
+ rc = read_key_and_len( reader, curr, key_found, &found_len );
+ if ( keys_equal( key_to_find, *key_found ) )
+ {
+ done = true;
+ *offset = curr;
+ }
+ else if ( key_to_find > *key_found )
+ curr += found_len;
+ else
+ {
+ done = true;
+ rc = SILENT_RC( rcVDB, rcNoTarg, rcReading, rcId, rcNotFound );
+ }
+ }
+ return rc;
+}
+
+
+rc_t seek_lookup_reader( struct lookup_reader * reader, uint64_t key_to_find, uint64_t * key_found, bool exactly )
+{
+ rc_t rc = 0;
+ uint64_t offset = 0;
+ if ( reader == NULL || key_found == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcParam, rcInvalid );
+ ErrMsg( "seek_lookup_reader() -> %R", rc );
+ }
+ else if ( reader->index != NULL )
+ {
+ /* we have a index! find set pos to the found offset */
+ uint64_t max_key;
+ rc = get_max_key( reader->index, &max_key );
+ if ( rc == 0 )
+ {
+ if ( key_to_find > max_key )
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcId, rcTooBig );
+ else
+ {
+ rc = get_nearest_offset( reader->index, key_to_find, key_found, &offset ); /* in index.c */
+ if ( rc == 0 )
+ {
+ if ( keys_equal( key_to_find, *key_found ) )
+ reader->pos = offset;
+ else
+ {
+ if ( exactly )
+ {
+ rc = loop_until_key_found( reader, key_to_find, key_found, &offset );
+ if ( rc == 0 )
+ {
+ if ( keys_equal( key_to_find, *key_found ) )
+ reader->pos = offset;
+ else
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcId, rcNotFound );
+ ErrMsg( "seek_lookup_reader( key: %ld ) -> %R", key_to_find, rc );
+ }
+ }
+ }
+ else
+ reader->pos = offset;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ rc = loop_until_key_found( reader, key_to_find, key_found, &offset );
+ if ( rc == 0 )
+ {
+ if ( keys_equal( key_to_find, *key_found ) )
+ reader->pos = offset;
+ else
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcId, rcNotFound );
+ ErrMsg( "seek_lookup_reader( key: %ld ) -> %R", key_to_find, rc );
+ }
+ }
+ }
+ return rc;
+}
+
+
+rc_t get_packed_and_key_from_lookup_reader( struct lookup_reader * reader,
+ uint64_t * key, SBuffer * packed_bases )
+{
+ rc_t rc;
+ if ( reader == NULL || key == NULL || packed_bases == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcParam, rcInvalid );
+ ErrMsg( "get_packed_and_key_from_lookup_reader() -> %R", rc );
+ }
+ else
+ {
+ size_t num_read;
+ char buffer1[ 10 ];
+ rc = KFileRead( reader->f, reader->pos, buffer1, sizeof buffer1, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "KFileRead( at %ld, to_read %u ) -> %R", reader->pos, sizeof buffer1, rc );
+ else if ( num_read != sizeof buffer1 )
+ rc = SILENT_RC( rcVDB, rcNoTarg, rcReading, rcFormat, rcInvalid );
+ else
+ {
+ uint16_t dna_len;
+ size_t to_read;
+ char * dst = ( char * )packed_bases->S.addr;
+
+ memmove( key, buffer1, sizeof *key );
+
+ dna_len = buffer1[ 8 ];
+ dna_len <<= 8;
+ dna_len |= buffer1[ 9 ];
+ dst[ 0 ] = buffer1[ 8 ];
+ dst[ 1 ] = buffer1[ 9 ];
+ dst += 2;
+ to_read = ( dna_len & 1 ) ? ( dna_len + 1 ) >> 1 : dna_len >> 1;
+ if ( to_read > ( packed_bases->buffer_size - 2 ) )
+ to_read = ( packed_bases->buffer_size - 2 );
+ if ( rc == 0 )
+ {
+ rc = KFileRead( reader->f, reader->pos + 10, dst, to_read, &num_read );
+ if ( rc != 0 )
+ ErrMsg( "KFileRead( at %ld, to_read %u ) -> %R", reader->pos + 10, to_read, rc );
+ else if ( num_read != to_read )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcReading, rcFormat, rcInvalid );
+ ErrMsg( "KFileRead( %ld ) %d vs %d -> %R", reader->pos + 10, num_read, to_read, rc );
+ }
+ else
+ {
+ packed_bases->S.len = packed_bases->S.size = num_read + 2;
+ reader->pos += ( num_read + 10 );
+ }
+ }
+ }
+ }
+ return rc;
+}
+
+rc_t get_packed_from_lookup_reader( struct lookup_reader * reader,
+ int64_t * seq_spot_id, uint32_t * seq_read_id, SBuffer * packed_bases )
+{
+ uint64_t key;
+ rc_t rc = get_packed_and_key_from_lookup_reader( reader, &key, packed_bases );
+ if ( rc == 0 )
+ {
+ *seq_spot_id = key >> 1;
+ *seq_read_id = key & 1 ? 2 : 1;
+ }
+ return rc;
+}
+
+
+rc_t get_bases_from_lookup_reader( struct lookup_reader * reader,
+ int64_t * seq_spot_id, uint32_t * seq_read_id, SBuffer * bases )
+{
+ rc_t rc = get_packed_from_lookup_reader( reader, seq_spot_id, seq_read_id, &reader->buf );
+ if ( rc == 0 )
+ unpack_4na( &reader->buf.S, bases );
+ return rc;
+}
+
+
+rc_t lookup_bases( struct lookup_reader * lookup, int64_t row_id, uint32_t read_id, SBuffer * B )
+{
+ int64_t found_seq_spot_id;
+ uint32_t found_seq_read_id;
+ rc_t rc = get_bases_from_lookup_reader( lookup, &found_seq_spot_id, &found_seq_read_id, B );
+ if ( rc == 0 )
+ {
+ if ( found_seq_spot_id != row_id || found_seq_read_id != read_id )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcTransfer, rcInvalid );
+ ErrMsg( "id-mismatch for seq_id = %lu vs. %lu / read_id = %u vs %lu",
+ found_seq_spot_id, row_id, found_seq_read_id, read_id );
+ }
+ }
+ return rc;
+}
diff --git a/test/samline/cigar.h b/tools/fastdump/lookup_reader.h
similarity index 52%
copy from test/samline/cigar.h
copy to tools/fastdump/lookup_reader.h
index d0ec04a..ecf5ce9 100644
--- a/test/samline/cigar.h
+++ b/tools/fastdump/lookup_reader.h
@@ -24,33 +24,49 @@
*
*/
-#ifndef _h_cigar_
-#define _h_cigar_
+#ifndef _h_lookup_reader_
+#define _h_lookup_reader_
#ifdef __cplusplus
extern "C" {
#endif
-struct cigar_t;
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_helper_
+#include "helper.h"
+#endif
+
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+#ifndef _h_index_
+#include "index.h"
+#endif
+
+struct lookup_reader;
-struct cigar_t * make_cigar_t( const char * cigar_str );
-void free_cigar_t( struct cigar_t * c );
+void release_lookup_reader( struct lookup_reader * reader );
-int cigar_t_reflen( const struct cigar_t * c );
-int cigar_t_readlen( const struct cigar_t * c );
-int cigar_t_inslen( const struct cigar_t * c );
+rc_t make_lookup_reader( const KDirectory *dir, const struct index_reader * index,
+ struct lookup_reader ** reader, size_t buf_size, const char * fmt, ... );
-size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c );
+rc_t seek_lookup_reader( struct lookup_reader * reader, uint64_t key, uint64_t * key_found, bool exactly );
-struct cigar_t * merge_cigar_t( const struct cigar_t * c );
+rc_t get_packed_and_key_from_lookup_reader( struct lookup_reader * reader,
+ uint64_t * key, SBuffer * packed_bases );
-size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
+rc_t get_packed_from_lookup_reader( struct lookup_reader * reader,
+ int64_t * seq_spot_id, uint32_t * seq_read_id, SBuffer * packed_bases );
-void debug_cigar_t( const struct cigar_t * c );
+rc_t get_bases_from_lookup_reader( struct lookup_reader * reader,
+ int64_t * seq_spot_id, uint32_t * seq_read_id, SBuffer * bases );
-size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+rc_t lookup_bases( struct lookup_reader * lookup, int64_t row_id, uint32_t read_id, SBuffer * B );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/lookup_writer.c b/tools/fastdump/lookup_writer.c
new file mode 100644
index 0000000..badea64
--- /dev/null
+++ b/tools/fastdump/lookup_writer.c
@@ -0,0 +1,138 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "lookup_writer.h"
+#include "helper.h"
+
+#include <kfs/file.h>
+#include <kfs/buffile.h>
+
+typedef struct lookup_writer
+{
+ struct KFile * f;
+ struct index_writer * idx;
+ SBuffer buf;
+ uint64_t pos;
+} lookup_writer;
+
+
+void release_lookup_writer( struct lookup_writer * writer )
+{
+ if ( writer != NULL )
+ {
+ if ( writer->f != NULL ) KFileRelease( writer->f );
+ release_SBuffer( &writer->buf );
+ free( ( void * ) writer );
+ }
+}
+
+
+rc_t make_lookup_writer( KDirectory *dir, struct index_writer * idx,
+ struct lookup_writer ** writer, size_t buf_size,
+ const char * fmt, ... )
+{
+ rc_t rc;
+ struct KFile * f;
+
+ va_list args;
+ va_start ( args, fmt );
+
+ rc = KDirectoryVCreateFile( dir, &f, false, 0664, kcmInit, fmt, args );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryVCreateFile() -> %R", rc );
+ else
+ {
+ struct KFile * temp_file;
+ rc = KBufFileMakeWrite( &temp_file, f, false, buf_size );
+ KFileRelease( f );
+ if ( rc != 0 )
+ ErrMsg( "KBufFileMakeWrite() -> %R", rc );
+ else
+ {
+ lookup_writer * w = calloc( 1, sizeof * w );
+ if ( w == NULL )
+ {
+ KFileRelease( temp_file );
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * w ), rc );
+ }
+ else
+ {
+ w->f = temp_file;
+ w->idx = idx;
+ rc = make_SBuffer( &w->buf, 4096 );
+ if ( rc == 0 )
+ *writer = w;
+ else
+ release_lookup_writer( w );
+ }
+ }
+ }
+ va_end ( args );
+ return rc;
+}
+
+
+rc_t write_unpacked_to_lookup_writer( struct lookup_writer * writer,
+ int64_t seq_spot_id, uint32_t seq_read_id, const String * bases_as_unpacked_4na )
+{
+ pack_4na( bases_as_unpacked_4na, &writer->buf );
+ return write_packed_to_lookup_writer( writer, make_key( seq_spot_id, seq_read_id ), &writer->buf.S );
+}
+
+rc_t write_packed_to_lookup_writer( struct lookup_writer * writer,
+ uint64_t key, const String * bases_as_packed_4na )
+{
+ size_t num_writ;
+ rc_t rc = KFileWrite( writer->f, writer->pos, &key, sizeof key, &num_writ );
+ if ( rc != 0 )
+ ErrMsg( "KFileWriteAll( key ) -> %R", rc );
+ else if ( num_writ != sizeof key )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcWriting, rcFormat, rcInvalid );
+ ErrMsg( "KFileWriteAll( key ) -> %R", rc );
+ }
+ else
+ {
+ uint64_t start_pos = writer->pos;
+ writer->pos += num_writ;
+ rc = KFileWrite( writer->f, writer->pos, bases_as_packed_4na->addr, bases_as_packed_4na->size, &num_writ );
+ if ( rc != 0 )
+ ErrMsg( "KFileWriteAll( bases ) -> %R", rc );
+ else if ( num_writ != bases_as_packed_4na->size )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcWriting, rcFormat, rcInvalid );
+ ErrMsg( "KFileWriteAll( bases ) -> %R", rc );
+ }
+ else
+ {
+ if ( writer->idx != NULL )
+ rc = write_key( writer->idx, key, start_pos );
+ writer->pos += num_writ;
+ }
+ }
+ return rc;
+}
diff --git a/test/samline/cigar.h b/tools/fastdump/lookup_writer.h
similarity index 62%
copy from test/samline/cigar.h
copy to tools/fastdump/lookup_writer.h
index d0ec04a..9e83051 100644
--- a/test/samline/cigar.h
+++ b/tools/fastdump/lookup_writer.h
@@ -24,33 +24,41 @@
*
*/
-#ifndef _h_cigar_
-#define _h_cigar_
+#ifndef _h_lookup_writer_
+#define _h_lookup_writer_
#ifdef __cplusplus
extern "C" {
#endif
-struct cigar_t;
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
-struct cigar_t * make_cigar_t( const char * cigar_str );
-void free_cigar_t( struct cigar_t * c );
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
-int cigar_t_reflen( const struct cigar_t * c );
-int cigar_t_readlen( const struct cigar_t * c );
-int cigar_t_inslen( const struct cigar_t * c );
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+#ifndef _h_index_
+#include "index.h"
+#endif
-size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c );
+struct lookup_writer;
-struct cigar_t * merge_cigar_t( const struct cigar_t * c );
+void release_lookup_writer( struct lookup_writer * writer );
-size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
+rc_t make_lookup_writer( KDirectory *dir, struct index_writer * idx, struct lookup_writer ** writer,
+ size_t buf_size, const char * fmt, ... );
-void debug_cigar_t( const struct cigar_t * c );
+rc_t write_unpacked_to_lookup_writer( struct lookup_writer * writer,
+ int64_t seq_spot_id, uint32_t seq_read_id, const String * bases_as_unpacked_4na );
-size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+rc_t write_packed_to_lookup_writer( struct lookup_writer * writer,
+ uint64_t key, const String * bases_as_packed_4na );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/merge_sorter.c b/tools/fastdump/merge_sorter.c
new file mode 100644
index 0000000..2080e8a
--- /dev/null
+++ b/tools/fastdump/merge_sorter.c
@@ -0,0 +1,173 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+#include "merge_sorter.h"
+#include "lookup_reader.h"
+#include "lookup_writer.h"
+#include "index.h"
+#include "helper.h"
+
+typedef struct merge_src
+{
+ struct lookup_reader * reader;
+ uint64_t key;
+ SBuffer packed_bases;
+ rc_t rc;
+} merge_src;
+
+
+typedef struct merge_sorter
+{
+ const merge_sorter_params * params;
+ merge_src * src_list;
+ struct lookup_writer * dst;
+ struct index_writer * idx;
+} merge_sorter;
+
+
+void release_merge_sorter( struct merge_sorter *ms )
+{
+ if ( ms != NULL )
+ {
+ uint32_t i;
+ release_lookup_writer( ms->dst );
+ release_index_writer( ms->idx );
+ if ( ms->src_list != NULL )
+ {
+ for ( i = 0; i < ms->params->count; ++i )
+ {
+ merge_src * src = &ms->src_list[ i ];
+ release_lookup_reader( src->reader );
+ release_SBuffer( &src->packed_bases );
+ }
+ free( ( void * ) ms->src_list );
+ }
+
+ free( ( void * ) ms );
+ }
+}
+
+
+rc_t make_merge_sorter( struct merge_sorter ** ms, const merge_sorter_params * params )
+{
+ rc_t rc = 0;
+ merge_sorter * m = calloc( 1, sizeof * m );
+ if ( m == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( sizeof * m ), rc );
+ }
+ else
+ {
+ if ( params->index_filename != NULL )
+ rc = make_index_writer( params->dir, &m->idx, params->buf_size,
+ 20000, "%s", params->index_filename );
+
+ if ( rc == 0 )
+ {
+ rc = make_lookup_writer( params->dir, m->idx, &m->dst, params->buf_size,
+ "%s", params->output_filename );
+ if ( rc == 0 )
+ {
+ m->src_list = calloc( params->count, sizeof * m->src_list );
+ if ( m->src_list == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "calloc( %d ) -> %R", ( ( sizeof * m->src_list ) * params->count ), rc );
+ }
+ else
+ {
+ m->params = params;
+ *ms = m;
+ }
+ }
+ }
+ }
+ if ( rc != 0 )
+ release_merge_sorter( m );
+ return rc;
+}
+
+
+rc_t add_merge_sorter_src( struct merge_sorter *ms, const char * filename, uint32_t id )
+{
+ rc_t rc;
+ if ( id >= ms->params->count )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "add_merge_sorter_src() ... invalid id of %d", id );
+ }
+ else
+ {
+ merge_src * src = &ms->src_list[ id ];
+ rc = make_lookup_reader( ms->params->dir, NULL, &src->reader, ms->params->buf_size, "%s", filename );
+ if ( rc == 0 )
+ {
+ rc = make_SBuffer( &src->packed_bases, 4096 );
+ if ( rc == 0 )
+ src->rc = get_packed_and_key_from_lookup_reader( src->reader, &src->key, &src->packed_bases );
+ }
+ }
+ return rc;
+}
+
+
+static merge_src * get_min_merge_src( merge_src * src, uint32_t count )
+{
+ merge_src * res = NULL;
+ uint32_t i;
+ for ( i = 0; i < count; ++i )
+ {
+ merge_src * item = &src[ i ];
+ if ( item->rc == 0 )
+ {
+ if ( res == NULL )
+ res = item;
+ else if ( item->key < res->key )
+ res = item;
+ }
+ }
+ return res;
+}
+
+rc_t CC Quitting();
+
+rc_t run_merge_sorter( struct merge_sorter *ms )
+{
+ rc_t rc = 0;
+ merge_src * to_write = get_min_merge_src( ms->src_list, ms->params->count );
+ while( rc == 0 && to_write != NULL )
+ {
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ rc = write_packed_to_lookup_writer( ms->dst, to_write->key, &to_write->packed_bases.S );
+ if ( rc == 0 )
+ to_write->rc = get_packed_and_key_from_lookup_reader( to_write->reader, &to_write->key, &to_write->packed_bases );
+ to_write = get_min_merge_src( ms->src_list, ms->params->count );
+ }
+ }
+ return rc;
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/merge_sorter.h
similarity index 64%
copy from test/samline/refbases.h
copy to tools/fastdump/merge_sorter.h
index 94136fb..fe3352c 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/merge_sorter.h
@@ -1,41 +1,66 @@
-/*===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-*/
-
-#ifndef _h_refbases_
-#define _h_refbases_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#ifndef _h_merge_sorter_
+#define _h_merge_sorter_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
+
+struct merge_sorter;
+
+typedef struct merge_sorter_params
+{
+ KDirectory *dir;
+ const char * output_filename;
+ const char * index_filename;
+ uint32_t count;
+ size_t buf_size;
+} merge_sorter_params;
+
+
+rc_t make_merge_sorter( struct merge_sorter ** ms, const merge_sorter_params * params );
+
+rc_t add_merge_sorter_src( struct merge_sorter *ms, const char * filename, uint32_t id );
+
+void release_merge_sorter( struct merge_sorter *ms );
+
+rc_t run_merge_sorter( struct merge_sorter *ms );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/fastdump/raw_read_iter.c b/tools/fastdump/raw_read_iter.c
new file mode 100644
index 0000000..ca5ab5e
--- /dev/null
+++ b/tools/fastdump/raw_read_iter.c
@@ -0,0 +1,100 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "raw_read_iter.h"
+#include "cmn_iter.h"
+#include "helper.h"
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+typedef struct raw_read_iter
+{
+ struct cmn_iter * cmn;
+ uint32_t seq_spot_id, seq_read_id, raw_read_id;
+} raw_read_iter;
+
+
+void destroy_raw_read_iter( struct raw_read_iter * iter )
+{
+ if ( iter != NULL )
+ {
+ destroy_cmn_iter( iter->cmn );
+ free( ( void * ) iter );
+ }
+}
+
+
+rc_t make_raw_read_iter( cmn_params * params, struct raw_read_iter ** iter )
+{
+
+ rc_t rc = 0;
+ raw_read_iter * i = calloc( 1, sizeof * i );
+ if ( i == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "make_raw_read_iter.calloc( %d ) -> %R", ( sizeof * i ), rc );
+ }
+ else
+ {
+ rc = make_cmn_iter( params, "PRIMARY_ALIGNMENT", &i->cmn );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "SEQ_SPOT_ID", &i->seq_spot_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "SEQ_READ_ID", &i->seq_read_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "(INSDC:4na:bin)RAW_READ", &i->raw_read_id );
+ if ( rc == 0 )
+ rc = cmn_iter_range( i->cmn, i->seq_spot_id );
+
+ if ( rc != 0 )
+ destroy_raw_read_iter( i );
+ else
+ *iter = i;
+ }
+ return rc;
+}
+
+
+bool get_from_raw_read_iter( struct raw_read_iter * iter, raw_read_rec * rec, rc_t * rc )
+{
+ bool res = cmn_iter_next( iter->cmn, rc );
+ if ( res )
+ {
+ *rc = cmn_read_uint64( iter->cmn, iter->seq_spot_id, &rec->seq_spot_id );
+ if ( *rc == 0 )
+ *rc = cmn_read_uint32( iter->cmn, iter->seq_read_id, &rec->seq_read_id );
+ if ( *rc == 0 )
+ *rc = cmn_read_String( iter->cmn, iter->raw_read_id, &rec->raw_read );
+ }
+ return res;
+}
+
+
+uint64_t get_row_count_of_raw_read( struct raw_read_iter * iter )
+{
+ return cmn_iter_row_count( iter->cmn );
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/raw_read_iter.h
similarity index 67%
copy from test/samline/refbases.h
copy to tools/fastdump/raw_read_iter.h
index 94136fb..11d2229 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/raw_read_iter.h
@@ -24,15 +24,41 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_raw_read_iter_
+#define _h_raw_read_iter_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_cmn_iter_
+#include "cmn_iter.h"
+#endif
+
+struct raw_read_iter;
+
+typedef struct raw_read_rec
+{
+ uint64_t seq_spot_id;
+ uint32_t seq_read_id;
+ String raw_read;
+} raw_read_rec;
+
+void destroy_raw_read_iter( struct raw_read_iter * iter );
+
+rc_t make_raw_read_iter( cmn_params * params, struct raw_read_iter ** iter );
+
+bool get_from_raw_read_iter( struct raw_read_iter * iter, raw_read_rec * rec, rc_t * rc );
+
+uint64_t get_row_count_of_raw_read( struct raw_read_iter * iter );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/readme.txt b/tools/fastdump/readme.txt
new file mode 100644
index 0000000..20e270c
--- /dev/null
+++ b/tools/fastdump/readme.txt
@@ -0,0 +1,92 @@
+Here is an example how to use the new fastdump-tool:
+
+The usage is in 2 stages:
+( stage 1 ) create a lookup-file from the accession
+ SRRXXXXXX ---> SRRXXXXXX.lookup
+
+( stage 2 ) create the final output from the accession using the lookupfile
+ SRRXXXXXX + SRRXXXXXX.lookup ---> SRRXXXXXX.txt
+
+
+example: accession = SRR833540
+
+================================================================================
+ stage 1:
+================================================================================
+
+(version a)
+create the lookup-file in the current directory, with memory limit of 4 GB
+
+fastdump SRR833540 -f lookup -o SRR833540.lookup -m 4G -p
+
+This will create a number of temporary files in your current directory.
+Make shure you have enough space for that. The lookup-file for this accession
+(SRR833540.lookup) will be about 121 GB in size. You will need double that space
+because of the temporary files. The tool will delete them after it created the
+lookup-file. How much memory should you give to the tool? Look at your
+available memory with 'free -h'. Give it about half as much as your free memory.
+You can give it more, but that will result in memory beeing swaped and that will
+result in a big slow down. One of our machines took about 500 minutes for this,
+without swapping. The '-p' switch turns a percent-bar on.
+
+
+
+(version b)
+create the lookup-file in the current directory, with memory limit of 4 GB on 6 threads
+
+fastdump SRR833540 -f lookup -o SRR833540.lookup -m 4G -e 6 -p
+
+This will create the same output, but much faster. But now you are using 4 GB on
+each of the 6 threads. You will need more than 4 GB * 6 = 24 GB, you will need
+about 35 GB of memory because other parts of the tool need memory too. If you do not
+have that much memory, reduce the amount of memory per thread or the number of threads
+until it fits your machine. You can check how much is actually used with
+'top -u your_username'. If you specify 6 threads, you should see about 600% of
+CPU utilization. If you see less than that you are limited by the speed of the
+filesystem access. Make shure that SRR833540 is local on your filesystem and all
+the references it uses are locally accessible too. This took us about 110 minutes.
+
+
+How do you know the accession is local?
+
+'vdb-dump SRR833540 --info'
+
+If the path points to your local filesystem ( '/home/user/ncbi/...' etc. )
+ ---> you are good to go.
+
+If the path points to a remote url ( 'http://sra-download.ncbi...' etc. )
+ ---> download the accession first, with the prefetch-tool.
+
+How do you know that you have all references locally?
+
+'sra-pileup SRR833540 --function ref'
+
+This will list all references used by the accession.
+If the location points to your local filesystem, your are good to go. If the location
+points to a remote url, download the references with the prefetch-tool.
+
+If after prefetch the accession is still not found locally, you have a configuration issue.
+
+If you have a SSD available, that helps too!
+
+================================================================================
+ stage 2:
+================================================================================
+
+(version a)
+create the lookup-file in the current directory into a file with percent-bar
+
+fastdump SRR833540 -l SRR833540.lookup -o SRR833540.txt -p
+
+
+(version b)
+create the output on stdout ( to be piped into other tools )
+
+fastdump SRR833540 -l SRR833540.lookup
+
+The output will be in this text-format:
+ID<tab>READ<tab>SPOTGROUP
+
+If you want FASTQ instead, add the option '-f fastq'.
+
+
diff --git a/tools/fastdump/sorter.c b/tools/fastdump/sorter.c
new file mode 100644
index 0000000..21d9325
--- /dev/null
+++ b/tools/fastdump/sorter.c
@@ -0,0 +1,538 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "sorter.h"
+#include "lookup_writer.h"
+#include "lookup_reader.h"
+#include "merge_sorter.h"
+#include "helper.h"
+
+#include <klib/vector.h>
+#include <klib/printf.h>
+#include <klib/progressbar.h>
+#include <kproc/thread.h>
+
+/*
+ this is in interfaces/cc/XXX/YYY/atomic.h
+ XXX ... the compiler ( cc, gcc, icc, vc++ )
+ YYY ... the architecture ( fat86, i386, noarch, ppc32, x86_64 )
+ */
+#include <atomic.h>
+
+
+typedef struct sorter
+{
+ sorter_params params;
+ KVector * store;
+ SBuffer buf;
+ uint64_t bytes_in_store;
+ uint32_t sub_file_id;
+} sorter;
+
+
+static void release_sorter( struct sorter * sorter )
+{
+ if ( sorter != NULL )
+ {
+ release_SBuffer( &sorter->buf );
+ if ( sorter->params.src != NULL )
+ destroy_raw_read_iter( sorter->params.src );
+ if ( sorter->store != NULL )
+ KVectorRelease( sorter->store );
+ }
+}
+
+static rc_t init_sorter( struct sorter * sorter, const sorter_params * params )
+{
+ rc_t rc = KVectorMake( &sorter->store );
+ if ( rc != 0 )
+ ErrMsg( "KVectorMake() -> %R", rc );
+ else
+ {
+ rc = make_SBuffer( &sorter->buf, 4096 );
+ if ( rc == 0 )
+ {
+ sorter->params.dir = params->dir;
+ sorter->params.output_filename = params->output_filename;
+ sorter->params.index_filename = NULL;
+ sorter->params.temp_path = params->temp_path;
+ sorter->params.src = params->src;
+ sorter->params.buf_size = params->buf_size;
+ sorter->params.mem_limit = params->mem_limit;
+ sorter->params.prefix = params->prefix;
+ sorter->bytes_in_store = 0;
+ sorter->sub_file_id = 0;
+ }
+ }
+ return rc;
+}
+
+
+static rc_t make_subfilename( const sorter_params * params, uint32_t id, char * buffer, size_t buflen )
+{
+ rc_t rc;
+ size_t num_writ;
+ if ( params->temp_path != NULL )
+ {
+ uint32_t l = string_measure( params->temp_path, NULL );
+ if ( l == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "make_subfilename.string_measure() = 0 -> %R", rc );
+ }
+ else
+ {
+ if ( params->temp_path[ l-1 ] == '/' )
+ rc = string_printf( buffer, buflen, &num_writ, "%ssub_%d_%d.dat",
+ params->temp_path, params->prefix, id );
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "%s/sub_%d_%d.dat",
+ params->temp_path, params->prefix, id );
+ }
+ }
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "sub_%d_%d.dat",
+ params->prefix, id );
+
+ if ( rc != 0 )
+ ErrMsg( "make_subfilename.string_printf() -> %R", rc );
+ return rc;
+}
+
+
+static rc_t make_dst_filename( const sorter_params * params, char * buffer, size_t buflen )
+{
+ rc_t rc;
+ size_t num_writ;
+ if ( params->prefix > 0 )
+ {
+ if ( params->temp_path != NULL )
+ {
+ uint32_t l = string_measure( params->temp_path, NULL );
+ if ( l == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "make_subfilename.string_measure() = 0 -> %R", rc );
+ }
+ else
+ {
+ if ( params->temp_path[ l-1 ] == '/' )
+ rc = string_printf( buffer, buflen, &num_writ, "%stmp_%d.dat",
+ params->temp_path, params->prefix );
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "%s/tmp_%d.dat",
+ params->temp_path, params->prefix );
+ }
+ }
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "tmp_%d.dat", params->prefix );
+ }
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "%s", params->output_filename );
+
+ if ( rc != 0 )
+ ErrMsg( "make_dst_filename.string_printf() -> %R", rc );
+ return rc;
+}
+
+
+static rc_t CC on_store_entry( uint64_t key, const void *value, void *user_data )
+{
+ const String * bases = value;
+ struct lookup_writer * writer = user_data;
+ rc_t rc = write_packed_to_lookup_writer( writer, key, bases );
+ StringWhack( bases );
+ return rc;
+}
+
+
+static rc_t save_store( struct sorter * sorter )
+{
+ rc_t rc = 0;
+ if ( sorter->bytes_in_store > 0 )
+ {
+ char buffer[ 4096 ];
+ struct lookup_writer * writer;
+
+ if ( sorter->params.mem_limit > 0 )
+ {
+ rc = make_subfilename( &sorter->params, sorter->sub_file_id, buffer, sizeof buffer );
+ if ( rc == 0 )
+ sorter->sub_file_id++;
+ }
+ else
+ rc = make_dst_filename( &sorter->params, buffer, sizeof buffer );
+
+ if ( rc == 0 )
+ rc = make_lookup_writer( sorter->params.dir, NULL, &writer, sorter->params.buf_size, "%s", buffer );
+
+ if ( rc == 0 )
+ {
+ rc = KVectorVisitPtr( sorter->store, false, on_store_entry, writer );
+ release_lookup_writer( writer );
+ }
+ if ( rc == 0 )
+ {
+ sorter->bytes_in_store = 0;
+ rc = KVectorRelease( sorter->store );
+ if ( rc != 0 )
+ ErrMsg( "KVectorRelease() -> %R", rc );
+ else
+ {
+ sorter->store = NULL;
+ rc = KVectorMake( &sorter->store );
+ if ( rc != 0 )
+ ErrMsg( "KVectorMake() -> %R", rc );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t write_to_sorter( struct sorter * sorter, int64_t seq_spot_id, uint32_t seq_read_id,
+ const String * unpacked_bases )
+{
+ /* we write it to the store...*/
+ rc_t rc;
+ const String * to_store;
+ pack_4na( unpacked_bases, &sorter->buf );
+ rc = StringCopy( &to_store, &sorter->buf.S );
+ if ( rc != 0 )
+ ErrMsg( "StringCopy() -> %R", rc );
+ else
+ {
+ uint64_t key = make_key( seq_spot_id, seq_read_id );
+ rc = KVectorSetPtr( sorter->store, key, (const void *)to_store );
+ if ( rc != 0 )
+ ErrMsg( "KVectorSetPtr() -> %R", rc );
+ else
+ {
+ size_t item_size = ( sizeof key ) + ( sizeof *to_store ) + to_store->size;
+ sorter->bytes_in_store += item_size;
+ }
+ }
+
+ if ( rc == 0 &&
+ sorter->params.mem_limit > 0 &&
+ sorter->bytes_in_store >= sorter->params.mem_limit )
+ rc = save_store( sorter );
+ return rc;
+}
+
+
+static rc_t delete_sub_files( const sorter_params * params, uint32_t count )
+{
+ rc_t rc = 0;
+ char buffer[ 4096 ];
+ uint32_t i;
+ for ( i = 0; rc == 0 && i < count; ++ i )
+ {
+ rc = make_subfilename( params, i, buffer, sizeof buffer );
+ if ( rc == 0 )
+ rc = KDirectoryRemove( params->dir, true, "%s", buffer );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryRemove( 'sub_%d.dat' ) -> %R", i, rc );
+ }
+ return rc;
+}
+
+
+static rc_t final_merge_sort( const sorter_params * params, uint32_t count )
+{
+ rc_t rc = 0;
+ if ( count > 0 )
+ {
+ char buffer[ 4096 ];
+ rc = make_dst_filename( params, buffer, sizeof buffer );
+ if ( rc == 0 )
+ {
+ merge_sorter_params msp;
+ struct merge_sorter * ms;
+ uint32_t i;
+
+ msp.dir = params->dir;
+ msp.output_filename = buffer;
+ msp.index_filename = params->index_filename;
+ msp.count = count;
+ msp.buf_size = params->buf_size;
+
+ rc = make_merge_sorter( &ms, &msp );
+ for ( i = 0; rc == 0 && i < count; ++i )
+ {
+ char buffer2[ 4096 ];
+ rc = make_subfilename( params, i, buffer2, sizeof buffer2 );
+ if ( rc == 0 )
+ rc = add_merge_sorter_src( ms, buffer2, i );
+ }
+ if ( rc == 0 )
+ rc = run_merge_sorter( ms );
+
+ release_merge_sorter( ms );
+ }
+
+ if ( rc == 0 )
+ rc = delete_sub_files( params, count );
+ }
+ return rc;
+}
+
+rc_t CC Quitting();
+
+rc_t run_sorter( const sorter_params * params )
+{
+ sorter sorter;
+ rc_t rc = init_sorter( &sorter, params );
+ if ( rc == 0 )
+ {
+ raw_read_rec rec;
+ while ( rc == 0 && get_from_raw_read_iter( sorter.params.src, &rec, &rc ) )
+ {
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ rc = write_to_sorter( &sorter, rec.seq_spot_id, rec.seq_read_id, &rec.raw_read );
+ if ( rc == 0 && params->sort_progress != NULL )
+ atomic_inc( params->sort_progress );
+ }
+ }
+
+ if ( rc == 0 )
+ rc = save_store( &sorter );
+
+ if ( rc == 0 && sorter.params.mem_limit > 0 )
+ rc = final_merge_sort( params, sorter.sub_file_id );
+
+ release_sorter( &sorter );
+ }
+ return rc;
+}
+
+/* -------------------------------------------------------------------------------------------- */
+
+static uint64_t find_out_row_count( const sorter_params * params )
+{
+ rc_t rc;
+ uint64_t res = 0;
+ struct raw_read_iter * iter;
+ cmn_params cp;
+
+ cp.dir = params->dir;
+ cp.acc = params->acc;
+ cp.row_range = NULL;
+ cp.first = 0;
+ cp.count = 0;
+ cp.cursor_cache = params->cursor_cache;
+ cp.show_progress = false;
+
+ rc = make_raw_read_iter( &cp, &iter );
+ if ( rc == 0 )
+ {
+ res = get_row_count_of_raw_read( iter );
+ destroy_raw_read_iter( iter );
+ }
+ return res;
+}
+
+
+static rc_t make_pool_src_filename( const sorter_params * params, uint32_t id,
+ char * buffer, size_t buflen )
+{
+ rc_t rc;
+ size_t num_writ;
+ if ( params->temp_path != NULL )
+ {
+ uint32_t l = string_measure( params->temp_path, NULL );
+ if ( l == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "make_subfilename.string_measure() = 0 -> %R", rc );
+ }
+ else
+ {
+ if ( params->temp_path[ l - 1 ] == '/' )
+ rc = string_printf( buffer, buflen, &num_writ, "%stmp_%d.dat",
+ params->temp_path, id );
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "%s/tmp_%d.dat",
+ params->temp_path, id );
+ }
+ }
+ else
+ rc = string_printf( buffer, buflen, &num_writ, "tmp_%d.dat", id );
+
+ if ( rc != 0 )
+ ErrMsg( "make_pool_src_filename.string_printf() -> %R", rc );
+ return rc;
+}
+
+
+static rc_t delete_tmp_files( const sorter_params * params, uint32_t count )
+{
+ rc_t rc = 0;
+ char buffer[ 4096 ];
+ uint32_t i;
+ for ( i = 0; rc == 0 && i < count; ++ i )
+ {
+ make_pool_src_filename( params, i + 1, buffer, sizeof buffer );
+ if ( rc == 0 )
+ rc = KDirectoryRemove( params->dir, true, "%s", buffer );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryRemove( 'tmp_%d.dat' ) -> %R", rc );
+ }
+ return rc;
+}
+
+
+static rc_t merge_pool_files( const sorter_params * params )
+{
+ rc_t rc;
+ merge_sorter_params msp;
+ struct merge_sorter * ms;
+
+ msp.dir = params->dir;
+ msp.output_filename = params->output_filename;
+ msp.index_filename = params->index_filename;
+ msp.count = params->num_threads;
+ msp.buf_size = params->buf_size;
+
+ rc = make_merge_sorter( &ms, &msp );
+ if ( rc == 0 )
+ {
+ uint32_t i;
+ for ( i = 0; rc == 0 && i < params->num_threads; ++i )
+ {
+ char buffer[ 4096 ];
+ rc = make_pool_src_filename( params, i + 1, buffer, sizeof buffer );
+ if ( rc == 0 )
+ rc = add_merge_sorter_src( ms, buffer, i );
+ }
+ if ( rc == 0 )
+ rc = run_merge_sorter( ms );
+
+ release_merge_sorter( ms );
+ }
+
+ if ( rc == 0 )
+ rc = delete_tmp_files( params, params->num_threads );
+
+ return rc;
+}
+
+static void init_sorter_params( sorter_params * dst, const sorter_params * params, uint32_t prefix )
+{
+ dst->dir = params->dir;
+ dst->output_filename = NULL;
+ dst->index_filename = params->index_filename;
+ dst->temp_path = params->temp_path;
+ dst->src = NULL;
+ dst->prefix = prefix;
+ dst->mem_limit = params->mem_limit;
+ dst->buf_size = params->buf_size;
+}
+
+static void init_cmn_params( cmn_params * dst, const sorter_params * params, uint64_t row_count )
+{
+ dst->dir = params->dir;
+ dst->acc = params->acc;
+ dst->row_range = NULL;
+ dst->first = 1;
+ dst->count = ( row_count / params->num_threads ) + 1;
+ dst->cursor_cache = params->cursor_cache;
+ dst->show_progress = false;
+}
+
+
+static rc_t CC sort_thread_func( const KThread *self, void *data )
+{
+ rc_t rc = 0;
+ sorter_params * params = data;
+ params->index_filename = NULL;
+ rc = run_sorter( params );
+ free( data );
+ return rc;
+}
+
+
+rc_t run_sorter_pool( const sorter_params * params )
+{
+ rc_t rc = 0;
+ uint64_t row_count = find_out_row_count( params );
+ if ( row_count == 0 )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ ErrMsg( "multi_threaded_make_lookup: row_count == 0!" );
+ }
+ else
+ {
+ cmn_params cp;
+ Vector threads;
+ KThread * progress_thread = NULL;
+ uint32_t prefix = 1;
+ multi_progress progress;
+
+ init_progress_data( &progress, row_count );
+ VectorInit( &threads, 0, params->num_threads );
+ init_cmn_params( &cp, params, row_count );
+
+ if ( params->show_progress )
+ rc = start_multi_progress( &progress_thread, &progress );
+
+ while ( rc == 0 && cp.first < row_count )
+ {
+ sorter_params * sp = calloc( 1, sizeof *sp );
+ if ( sp != NULL )
+ {
+ init_sorter_params( sp, params, prefix++ );
+ rc = make_raw_read_iter( &cp, &sp->src );
+
+ if ( rc == 0 )
+ {
+ KThread * thread;
+
+ if ( params->show_progress )
+ sp->sort_progress = &progress.progress_rows;
+ rc = KThreadMake( &thread, sort_thread_func, sp );
+ if ( rc != 0 )
+ ErrMsg( "KThreadMake( sort-thread #%d ) -> %R", prefix - 1, rc );
+ else
+ {
+ rc = VectorAppend( &threads, NULL, thread );
+ if ( rc != 0 )
+ ErrMsg( "VectorAppend( sort-thread #%d ) -> %R", prefix - 1, rc );
+ }
+ }
+ cp.first += cp.count;
+ }
+ }
+
+ join_and_release_threads( &threads );
+ /* all sorter-threads are done now, tell the progress-thread to terminate! */
+ join_multi_progress( progress_thread, &progress );
+ rc = merge_pool_files( params );
+ }
+ return rc;
+}
diff --git a/test/samline/cigar.h b/tools/fastdump/sorter.h
similarity index 63%
copy from test/samline/cigar.h
copy to tools/fastdump/sorter.h
index d0ec04a..1c0aa26 100644
--- a/test/samline/cigar.h
+++ b/tools/fastdump/sorter.h
@@ -24,33 +24,49 @@
*
*/
-#ifndef _h_cigar_
-#define _h_cigar_
+#ifndef _h_sorter_
+#define _h_sorter_
#ifdef __cplusplus
extern "C" {
#endif
-struct cigar_t;
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
-struct cigar_t * make_cigar_t( const char * cigar_str );
-void free_cigar_t( struct cigar_t * c );
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
-int cigar_t_reflen( const struct cigar_t * c );
-int cigar_t_readlen( const struct cigar_t * c );
-int cigar_t_inslen( const struct cigar_t * c );
+#ifndef _h_atomic_
+#include <atomic.h>
+#endif
-size_t cigar_t_string( char * buffer, size_t buf_len, const struct cigar_t * c );
+#ifndef _h_kfs_directory_
+#include <kfs/directory.h>
+#endif
-struct cigar_t * merge_cigar_t( const struct cigar_t * c );
+#ifndef _h_raw_read_iter_
+#include "raw_read_iter.h"
+#endif
-size_t md_tag( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * read, const char * reference );
-void debug_cigar_t( const struct cigar_t * c );
+typedef struct sorter_params
+{
+ KDirectory * dir;
+ const char * acc;
+ const char * output_filename;
+ const char * index_filename;
+ const char * temp_path;
+ struct raw_read_iter * src;
+ size_t buf_size, mem_limit, prefix, num_threads, cursor_cache;
+ atomic_t * sort_progress;
+ bool show_progress;
+} sorter_params;
-size_t cigar_t_2_read( char * buffer, size_t buf_len,
- const struct cigar_t * c, const char * ref_bases, const char * ins_bases );
+rc_t run_sorter( const sorter_params * params );
+rc_t run_sorter_pool( const sorter_params * params );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/special_iter.c b/tools/fastdump/special_iter.c
new file mode 100644
index 0000000..f68753d
--- /dev/null
+++ b/tools/fastdump/special_iter.c
@@ -0,0 +1,98 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "special_iter.h"
+#include "helper.h"
+
+#include <os-native.h>
+#include <sysalloc.h>
+
+typedef struct special_iter
+{
+ struct cmn_iter * cmn;
+ uint32_t prim_alig_id, cmp_read_id, spot_group_id;
+} special_iter;
+
+
+void destroy_special_iter( struct special_iter * iter )
+{
+ if ( iter != NULL )
+ {
+ destroy_cmn_iter( iter->cmn );
+ free( ( void * ) iter );
+ }
+}
+
+rc_t make_special_iter( cmn_params * params, struct special_iter ** iter )
+{
+ rc_t rc = 0;
+ special_iter * i = calloc( 1, sizeof * i );
+ if ( i == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ ErrMsg( "make_special_iter.calloc( %d ) -> %R", ( sizeof * i ), rc );
+ }
+ else
+ {
+ rc = make_cmn_iter( params, "SEQUENCE", &i->cmn );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "PRIMARY_ALIGNMENT_ID", &i->prim_alig_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "CMP_READ", &i->cmp_read_id );
+ if ( rc == 0 )
+ rc = cmn_iter_add_column( i->cmn, "SPOT_GROUP", &i->spot_group_id );
+ if ( rc == 0 )
+ rc = cmn_iter_range( i->cmn, i->prim_alig_id );
+
+ if ( rc != 0 )
+ destroy_special_iter( i );
+ else
+ *iter = i;
+ }
+ return rc;
+}
+
+
+bool get_from_special_iter( struct special_iter * iter, special_rec * rec, rc_t * rc )
+{
+ bool res = cmn_iter_next( iter->cmn, rc );
+ if ( res )
+ {
+ rec->row_id = cmn_iter_row_id( iter->cmn );
+ *rc = cmn_read_uint64_array( iter->cmn, iter->prim_alig_id, rec->prim_alig_id, 2 );
+ if ( *rc == 0 )
+ *rc = cmn_read_String( iter->cmn, iter->cmp_read_id, &rec->cmp_read );
+ if ( *rc == 0 )
+ *rc = cmn_read_String( iter->cmn, iter->spot_group_id, &rec->spot_group );
+ }
+ return res;
+
+}
+
+uint64_t get_row_count_of_special_iter( struct special_iter * iter )
+{
+ return cmn_iter_row_count( iter->cmn );
+}
diff --git a/test/samline/refbases.h b/tools/fastdump/special_iter.h
similarity index 66%
copy from test/samline/refbases.h
copy to tools/fastdump/special_iter.h
index 94136fb..e072da6 100644
--- a/test/samline/refbases.h
+++ b/tools/fastdump/special_iter.h
@@ -24,15 +24,42 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_special_iter_
+#define _h_special_iter_
#ifdef __cplusplus
extern "C" {
#endif
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+#ifndef _h_klib_rc_
+#include <klib/rc.h>
+#endif
+
+#ifndef _h_klib_text_
+#include <klib/text.h>
+#endif
+
+#ifndef _h_cmn_iter_
+#include "cmn_iter.h"
+#endif
+
+struct special_iter;
+
+typedef struct special_rec
+{
+ int64_t row_id;
+ uint64_t prim_alig_id[ 2 ];
+ String cmp_read;
+ String spot_group;
+} special_rec;
+
+void destroy_special_iter( struct special_iter * iter );
+
+rc_t make_special_iter( cmn_params * params, struct special_iter ** iter );
+
+bool get_from_special_iter( struct special_iter * iter, special_rec * rec, rc_t * rc );
+
+uint64_t get_row_count_of_special_iter( struct special_iter * iter );
#ifdef __cplusplus
}
diff --git a/tools/fastdump/todo.txt b/tools/fastdump/todo.txt
new file mode 100644
index 0000000..258aa29
--- /dev/null
+++ b/tools/fastdump/todo.txt
@@ -0,0 +1,3 @@
+* progress-bar in merge ( if asked for )
+* check memory if no memory-limit provided
+* check for space on scratch or current-directory
\ No newline at end of file
diff --git a/tools/fastdump/verify.sh b/tools/fastdump/verify.sh
new file mode 100755
index 0000000..cd340bc
--- /dev/null
+++ b/tools/fastdump/verify.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+execute()
+{
+ echo "------------------------------------------------------"
+ echo $1
+ eval $1
+ echo "."
+}
+
+ACC="SRR341578"
+SCRATCH="-t /panfs/traces01/compress/qa/raetzw/fastdump/"
+THREADS="-e 4"
+
+check_special()
+{
+ FASTDUMP_OUT="$ACC.fastdump.special.txt"
+ VDB_DUMP_OUT="$ACC.vdb_dump.special.txt"
+
+ #remove output
+ CMD="rm -rf $FASTDUMP_OUT $VDB_DUMP_OUT"
+ execute "$CMD"
+
+ #produce the output using the lookup-file
+ CMD="time fastdump $ACC $SCRATCH -f special -o $FASTDUMP_OUT $THREADS -p"
+ execute "$CMD"
+
+ #produce the same output using vdb-dump with internal schema-joins
+ CMD="time vdb-dump $ACC -C SPOT_ID,READ,SPOT_GROUP -f tab > $VDB_DUMP_OUT"
+ execute "$CMD"
+
+ #verify that the output of fastdump via vdb-dump
+ CMD="time diff $FASTDUMP_OUT $VDB_DUMP_OUT"
+ execute "$CMD"
+}
+
+check_fastq()
+{
+ FASTDUMP_OUT="$ACC.fastdump.fastq.txt"
+ VDB_DUMP_OUT="$ACC.vdb_dump.fastq.txt"
+
+ #remove output
+ CMD="rm -rf $FASTDUMP_OUT $VDB_DUMP_OUT"
+ execute "$CMD"
+
+ #produce the output using the lookup-file
+ CMD="time fastdump $ACC $SCRATCH -f fastq -o $FASTDUMP_OUT $THREADS -p"
+ execute "$CMD"
+
+ #produce the same output using vdb-dump with internal schema-joins
+ CMD="time vdb-dump $ACC -f fastq > $VDB_DUMP_OUT"
+ execute "$CMD"
+
+ #verify that the output of fastdump via vdb-dump
+ CMD="time diff $FASTDUMP_OUT $VDB_DUMP_OUT"
+ execute "$CMD"
+}
+
+check_special
+check_fastq
diff --git a/tools/fastq-dump/fastq-dump.vers b/tools/fastq-dump/fastq-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/fastq-dump/fastq-dump.vers
+++ b/tools/fastq-dump/fastq-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/fastq-loader/Makefile b/tools/fastq-loader/Makefile
index e2552f7..d75a770 100644
--- a/tools/fastq-loader/Makefile
+++ b/tools/fastq-loader/Makefile
@@ -102,7 +102,7 @@ FASTQ_SRC = \
fastq-grammar \
fastq-lex
-# flex/bison should only be invoked manually in an environment ensures the correct versions:
+# flex/bison should only be invoked manually in an environment that ensures the correct versions:
# bison 2.5, flex 2.5.35
bison:
bison --warnings=error -o $(SRCDIR)/fastq-grammar.c --defines=$(SRCDIR)/fastq-tokens.h -v --no-lines $(SRCDIR)/fastq-grammar.y
diff --git a/tools/fastq-loader/fastq-grammar.c b/tools/fastq-loader/fastq-grammar.c
index 4a1fd9f..1c9313f 100644
--- a/tools/fastq-loader/fastq-grammar.c
+++ b/tools/fastq-loader/fastq-grammar.c
@@ -132,16 +132,17 @@
enum yytokentype {
fqENDOFTEXT = 0,
fqRUNDOTSPOT = 258,
- fqNUMBER = 259,
- fqALPHANUM = 260,
- fqWS = 261,
- fqENDLINE = 262,
- fqBASESEQ = 263,
- fqCOLORSEQ = 264,
- fqTOKEN = 265,
- fqASCQUAL = 266,
- fqCOORDS = 267,
- fqUNRECOGNIZED = 268
+ fqSPOTGROUP = 259,
+ fqNUMBER = 260,
+ fqALPHANUM = 261,
+ fqWS = 262,
+ fqENDLINE = 263,
+ fqBASESEQ = 264,
+ fqCOLORSEQ = 265,
+ fqTOKEN = 266,
+ fqASCQUAL = 267,
+ fqCOORDS = 268,
+ fqUNRECOGNIZED = 269
};
#endif
@@ -376,20 +377,20 @@ union yyalloc
/* YYFINAL -- State number of the termination state. */
#define YYFINAL 19
/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 151
+#define YYLAST 150
/* YYNTOKENS -- Number of terminals. */
#define YYNTOKENS 24
/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 52
+#define YYNNTS 49
/* YYNRULES -- Number of rules. */
-#define YYNRULES 105
+#define YYNRULES 101
/* YYNRULES -- Number of states. */
-#define YYNSTATES 150
+#define YYNSTATES 146
/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
#define YYUNDEFTOK 2
-#define YYMAXUTOK 268
+#define YYMAXUTOK 269
#define YYTRANSLATE(YYX) \
((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
@@ -400,13 +401,13 @@ static const yytype_uint8 yytranslate[] =
0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 21, 2, 2, 2, 2,
- 2, 2, 2, 23, 2, 20, 19, 22, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 14, 2,
- 2, 17, 16, 2, 15, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 23, 2, 21, 20, 22, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 15, 2,
+ 2, 18, 17, 2, 16, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 18, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 19, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -423,7 +424,7 @@ static const yytype_uint8 yytranslate[] =
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
};
#if YYDEBUG
@@ -435,66 +436,65 @@ static const yytype_uint16 yyprhs[] =
27, 29, 32, 34, 38, 43, 47, 50, 51, 55,
56, 61, 62, 66, 68, 70, 71, 75, 76, 81,
82, 86, 87, 92, 94, 96, 98, 101, 111, 116,
- 120, 121, 126, 127, 132, 134, 137, 141, 143, 145,
- 148, 149, 153, 156, 160, 164, 167, 170, 171, 177,
- 178, 184, 185, 191, 192, 199, 203, 205, 207, 210,
- 213, 216, 219, 222, 225, 226, 230, 231, 235, 237,
- 238, 242, 243, 248, 250, 251, 252, 253, 254, 255,
- 267, 268, 272, 273, 275, 277, 278, 282, 286, 288,
- 292, 297, 299, 302, 305, 309
+ 120, 121, 126, 127, 132, 135, 137, 140, 144, 146,
+ 148, 151, 152, 156, 159, 163, 167, 170, 173, 174,
+ 180, 181, 187, 188, 194, 195, 202, 206, 208, 210,
+ 213, 216, 219, 222, 225, 228, 229, 233, 234, 239,
+ 241, 242, 243, 244, 245, 246, 258, 259, 263, 264,
+ 266, 268, 269, 273, 277, 279, 283, 288, 290, 293,
+ 296, 300
};
/* YYRHS -- A `-1'-separated list of the rules' RHS. */
static const yytype_int8 yyrhs[] =
{
- 25, 0, -1, 31, 72, -1, 31, -1, -1, -1,
- -1, 55, 12, 26, 14, 27, 43, 14, 28, 74,
- -1, 5, 1, 30, -1, 29, -1, 0, -1, 30,
- 29, -1, 7, -1, 32, 30, 36, -1, 32, 30,
+ 25, 0, -1, 31, 69, -1, 31, -1, -1, -1,
+ -1, 55, 13, 26, 15, 27, 43, 15, 28, 71,
+ -1, 6, 1, 30, -1, 29, -1, 0, -1, 30,
+ 29, -1, 8, -1, 32, 30, 36, -1, 32, 30,
1, 30, -1, 1, 30, 36, -1, 1, 30, -1,
- -1, 15, 33, 44, -1, -1, 15, 6, 34, 44,
- -1, -1, 16, 35, 44, -1, 37, -1, 40, -1,
- -1, 8, 38, 30, -1, -1, 37, 8, 39, 30,
- -1, -1, 9, 41, 30, -1, -1, 40, 9, 42,
- 30, -1, 8, -1, 9, -1, 47, -1, 47, 59,
- -1, 47, 59, 6, 4, 14, 5, 14, 4, 68,
- -1, 47, 59, 6, 5, -1, 47, 59, 6, -1,
- -1, 47, 6, 45, 62, -1, -1, 47, 6, 46,
- 5, -1, 71, -1, 55, 59, -1, 55, 59, 6,
- -1, 55, -1, 50, -1, 50, 56, -1, -1, 55,
- 48, 56, -1, 49, 50, -1, 49, 50, 56, -1,
- 49, 5, 17, -1, 55, 6, -1, 55, 12, -1,
- -1, 55, 12, 18, 51, 62, -1, -1, 55, 12,
- 14, 52, 55, -1, -1, 55, 12, 19, 53, 55,
- -1, -1, 55, 12, 14, 19, 54, 55, -1, 55,
- 12, 14, -1, 5, -1, 4, -1, 55, 18, -1,
- 55, 20, -1, 55, 19, -1, 55, 14, -1, 55,
- 5, -1, 55, 4, -1, -1, 21, 57, 4, -1,
- -1, 21, 58, 5, -1, 21, -1, -1, 22, 60,
- 4, -1, -1, 59, 22, 61, 55, -1, 4, -1,
- -1, -1, -1, -1, -1, 4, 63, 14, 64, 5,
- 65, 14, 66, 4, 67, 68, -1, -1, 14, 69,
- 70, -1, -1, 8, -1, 4, -1, -1, 3, 19,
- 4, -1, 3, 22, 4, -1, 3, -1, 73, 30,
- 74, -1, 73, 30, 1, 30, -1, 23, -1, 73,
- 10, -1, 75, 30, -1, 74, 75, 30, -1, 11,
- -1
+ -1, 16, 33, 44, -1, -1, 16, 7, 34, 44,
+ -1, -1, 17, 35, 44, -1, 37, -1, 40, -1,
+ -1, 9, 38, 30, -1, -1, 37, 9, 39, 30,
+ -1, -1, 10, 41, 30, -1, -1, 40, 10, 42,
+ 30, -1, 9, -1, 10, -1, 47, -1, 47, 56,
+ -1, 47, 56, 7, 5, 15, 6, 15, 5, 65,
+ -1, 47, 56, 7, 6, -1, 47, 56, 7, -1,
+ -1, 47, 7, 45, 59, -1, -1, 47, 7, 46,
+ 6, -1, 68, 7, -1, 68, -1, 55, 56, -1,
+ 55, 56, 7, -1, 55, -1, 50, -1, 50, 4,
+ -1, -1, 55, 48, 4, -1, 49, 50, -1, 49,
+ 50, 4, -1, 49, 6, 18, -1, 55, 7, -1,
+ 55, 13, -1, -1, 55, 13, 19, 51, 59, -1,
+ -1, 55, 13, 15, 52, 55, -1, -1, 55, 13,
+ 20, 53, 55, -1, -1, 55, 13, 15, 20, 54,
+ 55, -1, 55, 13, 15, -1, 6, -1, 5, -1,
+ 55, 19, -1, 55, 21, -1, 55, 20, -1, 55,
+ 15, -1, 55, 6, -1, 55, 5, -1, -1, 22,
+ 57, 5, -1, -1, 56, 22, 58, 55, -1, 5,
+ -1, -1, -1, -1, -1, -1, 5, 60, 15, 61,
+ 6, 62, 15, 63, 5, 64, 65, -1, -1, 15,
+ 66, 67, -1, -1, 9, -1, 5, -1, -1, 3,
+ 20, 5, -1, 3, 22, 5, -1, 3, -1, 70,
+ 30, 71, -1, 70, 30, 1, 30, -1, 23, -1,
+ 70, 11, -1, 72, 30, -1, 71, 72, 30, -1,
+ 12, -1
};
/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
static const yytype_uint16 yyrline[] =
{
- 0, 81, 81, 83, 88, 89, 91, 87, 94, 96,
- 100, 101, 105, 109, 110, 111, 112, 116, 116, 117,
- 117, 118, 118, 122, 123, 127, 127, 129, 129, 134,
- 134, 136, 136, 141, 142, 147, 148, 150, 152, 153,
- 155, 155, 156, 156, 157, 158, 159, 160, 164, 165,
- 166, 166, 167, 168, 169, 173, 183, 185, 184, 191,
- 191, 192, 192, 193, 193, 194, 198, 199, 200, 201,
- 202, 203, 204, 205, 209, 209, 211, 211, 213, 218,
- 217, 230, 229, 241, 242, 243, 244, 245, 246, 242,
- 251, 251, 252, 256, 257, 258, 262, 263, 264, 270,
- 271, 275, 276, 280, 281, 284
+ 0, 82, 82, 84, 89, 90, 92, 88, 95, 97,
+ 101, 102, 106, 110, 111, 112, 113, 117, 117, 118,
+ 118, 119, 119, 123, 124, 128, 128, 130, 130, 135,
+ 135, 137, 137, 142, 143, 148, 149, 151, 153, 154,
+ 156, 156, 157, 157, 158, 159, 160, 161, 162, 166,
+ 167, 168, 168, 169, 170, 171, 175, 185, 187, 186,
+ 193, 193, 194, 194, 195, 195, 196, 200, 201, 202,
+ 203, 204, 205, 206, 207, 212, 211, 224, 223, 235,
+ 236, 237, 238, 239, 240, 236, 245, 245, 246, 250,
+ 251, 252, 256, 257, 258, 264, 265, 269, 270, 274,
+ 275, 278
};
#endif
@@ -503,17 +503,17 @@ static const yytype_uint16 yyrline[] =
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
static const char *const yytname[] =
{
- "fqENDOFTEXT", "error", "$undefined", "fqRUNDOTSPOT", "fqNUMBER",
- "fqALPHANUM", "fqWS", "fqENDLINE", "fqBASESEQ", "fqCOLORSEQ", "fqTOKEN",
- "fqASCQUAL", "fqCOORDS", "fqUNRECOGNIZED", "':'", "'@'", "'>'", "'='",
- "'_'", "'.'", "'-'", "'#'", "'/'", "'+'", "$accept", "sequence", "$@1",
- "$@2", "$@3", "endfile", "endline", "readLines", "header", "$@4", "$@5",
- "$@6", "read", "baseRead", "$@7", "$@8", "csRead", "$@9", "$@10",
+ "fqENDOFTEXT", "error", "$undefined", "fqRUNDOTSPOT", "fqSPOTGROUP",
+ "fqNUMBER", "fqALPHANUM", "fqWS", "fqENDLINE", "fqBASESEQ", "fqCOLORSEQ",
+ "fqTOKEN", "fqASCQUAL", "fqCOORDS", "fqUNRECOGNIZED", "':'", "'@'",
+ "'>'", "'='", "'_'", "'.'", "'-'", "'/'", "'+'", "$accept", "sequence",
+ "$@1", "$@2", "$@3", "endfile", "endline", "readLines", "header", "$@4",
+ "$@5", "$@6", "read", "baseRead", "$@7", "$@8", "csRead", "$@9", "$@10",
"inlineRead", "tagLine", "$@11", "$@12", "nameSpotGroup", "$@13",
"nameWS", "nameWithCoords", "$@14", "$@15", "$@16", "$@17", "name",
- "spotGroup", "$@18", "$@19", "readNumber", "$@20", "$@21", "casava1_8",
- "$@22", "$@23", "$@24", "$@25", "$@26", "indexSequence", "$@27", "index",
- "runSpotRead", "qualityLines", "qualityHeader", "quality", "qualityLine", 0
+ "readNumber", "$@18", "$@19", "casava1_8", "$@20", "$@21", "$@22",
+ "$@23", "$@24", "indexSequence", "$@25", "index", "runSpotRead",
+ "qualityLines", "qualityHeader", "quality", "qualityLine", 0
};
#endif
@@ -523,8 +523,8 @@ static const char *const yytname[] =
static const yytype_uint16 yytoknum[] =
{
0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 268, 58, 64, 62, 61, 95, 46,
- 45, 35, 47, 43
+ 265, 266, 267, 268, 269, 58, 64, 62, 61, 95,
+ 46, 45, 47, 43
};
# endif
@@ -535,13 +535,13 @@ static const yytype_uint8 yyr1[] =
29, 29, 30, 31, 31, 31, 31, 33, 32, 34,
32, 35, 32, 36, 36, 38, 37, 39, 37, 41,
40, 42, 40, 43, 43, 44, 44, 44, 44, 44,
- 45, 44, 46, 44, 44, 44, 44, 44, 47, 47,
- 48, 47, 47, 47, 47, 49, 50, 51, 50, 52,
- 50, 53, 50, 54, 50, 50, 55, 55, 55, 55,
- 55, 55, 55, 55, 57, 56, 58, 56, 56, 60,
- 59, 61, 59, 62, 63, 64, 65, 66, 67, 62,
- 69, 68, 68, 70, 70, 70, 71, 71, 71, 72,
- 72, 73, 73, 74, 74, 75
+ 45, 44, 46, 44, 44, 44, 44, 44, 44, 47,
+ 47, 48, 47, 47, 47, 47, 49, 50, 51, 50,
+ 52, 50, 53, 50, 54, 50, 50, 55, 55, 55,
+ 55, 55, 55, 55, 55, 57, 56, 58, 56, 59,
+ 60, 61, 62, 63, 64, 59, 66, 65, 65, 67,
+ 67, 67, 68, 68, 68, 69, 69, 70, 70, 71,
+ 71, 72
};
/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
@@ -551,13 +551,13 @@ static const yytype_uint8 yyr2[] =
1, 2, 1, 3, 4, 3, 2, 0, 3, 0,
4, 0, 3, 1, 1, 0, 3, 0, 4, 0,
3, 0, 4, 1, 1, 1, 2, 9, 4, 3,
- 0, 4, 0, 4, 1, 2, 3, 1, 1, 2,
- 0, 3, 2, 3, 3, 2, 2, 0, 5, 0,
- 5, 0, 5, 0, 6, 3, 1, 1, 2, 2,
- 2, 2, 2, 2, 0, 3, 0, 3, 1, 0,
- 3, 0, 4, 1, 0, 0, 0, 0, 0, 11,
- 0, 3, 0, 1, 1, 0, 3, 3, 1, 3,
- 4, 1, 2, 2, 3, 1
+ 0, 4, 0, 4, 2, 1, 2, 3, 1, 1,
+ 2, 0, 3, 2, 3, 3, 2, 2, 0, 5,
+ 0, 5, 0, 5, 0, 6, 3, 1, 1, 2,
+ 2, 2, 2, 2, 2, 0, 3, 0, 4, 1,
+ 0, 0, 0, 0, 0, 11, 0, 3, 0, 1,
+ 1, 0, 3, 3, 1, 3, 4, 1, 2, 2,
+ 3, 1
};
/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
@@ -565,89 +565,87 @@ static const yytype_uint8 yyr2[] =
means the default is an error. */
static const yytype_uint8 yydefact[] =
{
- 0, 10, 0, 67, 0, 12, 17, 21, 0, 9,
+ 0, 10, 0, 68, 0, 12, 17, 21, 0, 9,
0, 3, 0, 0, 16, 0, 19, 0, 0, 1,
- 11, 101, 2, 0, 0, 73, 72, 4, 71, 68,
- 70, 69, 25, 29, 15, 23, 24, 8, 0, 98,
- 66, 18, 35, 0, 48, 47, 44, 22, 102, 0,
+ 11, 97, 2, 0, 0, 74, 73, 4, 72, 69,
+ 71, 70, 25, 29, 15, 23, 24, 8, 0, 94,
+ 67, 18, 35, 0, 49, 48, 45, 22, 98, 0,
0, 13, 0, 0, 0, 27, 31, 20, 0, 0,
- 40, 79, 36, 66, 52, 0, 78, 49, 55, 56,
- 0, 45, 0, 105, 99, 0, 14, 5, 26, 30,
- 0, 0, 96, 97, 0, 0, 0, 39, 81, 54,
- 53, 0, 0, 65, 57, 61, 51, 46, 100, 0,
- 103, 0, 28, 32, 83, 41, 43, 80, 0, 38,
- 0, 75, 77, 63, 0, 0, 0, 104, 33, 34,
- 0, 0, 0, 82, 0, 60, 58, 62, 6, 85,
- 0, 64, 0, 0, 0, 7, 86, 92, 0, 90,
- 37, 87, 95, 0, 94, 93, 91, 88, 92, 89
+ 40, 75, 36, 67, 53, 0, 50, 56, 57, 0,
+ 46, 44, 0, 101, 95, 0, 14, 5, 26, 30,
+ 0, 0, 92, 93, 0, 0, 0, 39, 77, 55,
+ 54, 66, 58, 62, 52, 47, 96, 0, 99, 0,
+ 28, 32, 79, 41, 43, 76, 0, 38, 0, 64,
+ 0, 0, 0, 100, 33, 34, 0, 0, 0, 78,
+ 0, 61, 59, 63, 6, 81, 0, 65, 0, 0,
+ 0, 7, 82, 88, 0, 86, 37, 83, 91, 0,
+ 90, 89, 87, 84, 88, 85
};
/* YYDEFGOTO[NTERM-NUM]. */
static const yytype_int16 yydefgoto[] =
{
- -1, 8, 52, 101, 132, 9, 10, 11, 12, 17,
- 38, 18, 34, 35, 53, 80, 36, 54, 81, 120,
- 41, 84, 85, 42, 70, 43, 44, 115, 114, 116,
- 124, 45, 67, 91, 92, 62, 86, 110, 105, 121,
- 133, 138, 143, 148, 140, 142, 146, 46, 22, 23,
- 74, 75
+ -1, 8, 52, 99, 128, 9, 10, 11, 12, 17,
+ 38, 18, 34, 35, 53, 80, 36, 54, 81, 116,
+ 41, 84, 85, 42, 69, 43, 44, 111, 110, 112,
+ 120, 45, 62, 86, 108, 103, 117, 129, 134, 139,
+ 144, 136, 138, 142, 46, 22, 23, 74, 75
};
/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
STATE-NUM. */
#define YYPACT_NINF -73
-static const yytype_int16 yypact[] =
+static const yytype_int8 yypact[] =
{
- 40, -73, 1, -73, 12, -73, 36, -73, 18, -73,
- 39, 25, 1, 46, -2, 1, -73, 104, 104, -73,
- -73, -73, -73, 80, 83, -73, -73, -73, -73, -73,
- -73, -73, -73, -73, -73, 49, 50, -73, 104, 84,
- -73, -73, 3, 92, 51, 15, -73, -73, -73, 4,
- 1, -73, 56, 1, 1, -73, -73, -73, 74, 98,
- 108, -73, 6, 71, 51, 63, 100, -73, -73, 75,
- 51, 32, 1, -73, 110, 1, -73, -73, -73, -73,
- 1, 1, -73, -73, 111, 117, 119, 107, -73, -73,
- -73, 121, 122, 57, -73, -73, -73, -73, -73, 1,
- -73, 109, -73, -73, 112, -73, -73, -73, 114, -73,
- 115, -73, -73, -73, 115, 111, 115, -73, -73, -73,
- 116, 118, 124, 81, 115, 81, -73, 81, -73, -73,
- 120, 81, 110, 126, 129, 110, -73, 123, 125, -73,
- -73, -73, 65, 131, -73, -73, -73, -73, 123, -73
+ 50, -73, 8, -73, 4, -73, 1, -73, 21, -73,
+ 7, 11, 8, 57, 59, 8, -73, 82, 82, -73,
+ -73, -73, -73, 28, 32, -73, -73, -73, -73, -73,
+ -73, -73, -73, -73, -73, 77, 44, -73, 82, 53,
+ -73, -73, 5, 76, 88, 25, 93, -73, -73, 36,
+ 8, -73, 90, 8, 8, -73, -73, -73, 101, 102,
+ 103, -73, 6, 95, 110, 78, -73, -73, 45, 114,
+ 13, -73, 8, -73, 107, 8, -73, -73, -73, -73,
+ 8, 8, -73, -73, 116, 117, 119, 84, -73, -73,
+ -73, -2, -73, -73, -73, -73, -73, 8, -73, 85,
+ -73, -73, 111, -73, -73, -73, 112, -73, 98, -73,
+ 98, 116, 98, -73, -73, -73, 113, 115, 123, 96,
+ 98, 96, -73, 96, -73, -73, 118, 96, 107, 125,
+ 120, 107, -73, 121, 122, -73, -73, -73, 52, 127,
+ -73, -73, -73, -73, 121, -73
};
/* YYPGOTO[NTERM-NUM]. */
-static const yytype_int16 yypgoto[] =
+static const yytype_int8 yypgoto[] =
{
- -73, -73, -73, -73, -73, 128, -1, -73, -73, -73,
- -73, -73, 127, -73, -73, -73, -73, -73, -73, -73,
- -15, -73, -73, -73, -73, -73, 93, -73, -73, -73,
- -73, 0, -60, -73, -73, 95, -73, -73, 26, -73,
- -73, -73, -73, -73, -6, -73, -73, -73, -73, -73,
- 11, -72
+ -73, -73, -73, -73, -73, 124, -1, -73, -73, -73,
+ -73, -73, 126, -73, -73, -73, -73, -73, -73, -73,
+ -12, -73, -73, -73, -73, -73, 79, -73, -73, -73,
+ -73, 0, 94, -73, -73, 24, -73, -73, -73, -73,
+ -73, -6, -73, -73, -73, -73, -73, 12, -72
};
/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
positive, shift that token. If negative, reduce the rule which
number is the opposite. If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -85
+#define YYTABLE_NINF -81
static const yytype_int16 yytable[] =
{
- 13, 14, 99, 47, 90, 72, 32, 33, 5, 60,
- 96, 24, 87, 15, 37, 73, -66, -66, 19, 25,
- 26, 68, 49, 57, -66, 61, -66, 69, 88, 28,
- -66, -66, -66, 29, 30, 31, -50, 61, 97, 1,
- 1, 2, 16, 65, 3, 4, 5, 5, 21, 76,
- 25, 26, 78, 79, 88, 6, 7, 55, 27, 56,
- 28, -59, -59, 99, 29, 30, 31, 25, 26, 144,
- 77, 98, 66, 145, 100, 69, 113, 28, 82, 102,
- 103, 29, 30, 31, 50, 25, 26, 5, 89, 93,
- 48, 32, 33, 94, 95, 28, 3, 63, 117, 29,
- 30, 31, 83, 58, -74, -76, 59, 39, 3, 40,
- 123, 108, 109, -42, 125, 104, 127, 118, 119, 3,
- 40, 73, 106, 107, 131, 111, -84, 112, 122, 130,
- 128, 136, 129, 137, 134, 147, 64, 139, 20, 141,
- 71, 126, 149, 135, 0, 0, 0, 0, 0, 0,
- 0, 51
+ 13, 14, 97, -60, -60, 15, 47, 1, 16, -67,
+ -67, 24, 60, 87, 37, 5, 5, -67, 109, -67,
+ 95, 19, 49, -67, -67, -67, 57, 61, 88, -51,
+ 25, 26, 67, 50, 21, 88, 5, 72, 68, 48,
+ 28, 32, 33, 65, 29, 30, 31, 61, 73, 76,
+ 1, 2, 78, 79, 56, 3, 4, 140, 5, 97,
+ 91, 141, 25, 26, 92, 93, 6, 7, 32, 33,
+ 27, 96, 28, 58, 98, 59, 29, 30, 31, 100,
+ 101, 3, 63, 25, 26, 39, 55, 3, 40, 106,
+ 107, 68, 66, 28, 114, 115, 113, 29, 30, 31,
+ 71, 25, 26, 3, 40, 77, 82, 83, 119, -42,
+ 121, 28, 123, 89, 90, 29, 30, 31, 94, 73,
+ 127, 102, 64, 104, 105, 133, -80, 118, 124, 126,
+ 125, 132, 143, 130, 20, 122, 135, 137, 145, 70,
+ 131, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 51
};
#define yypact_value_is_default(yystate) \
@@ -658,43 +656,43 @@ static const yytype_int16 yytable[] =
static const yytype_int16 yycheck[] =
{
- 0, 2, 74, 18, 64, 1, 8, 9, 7, 6,
- 70, 12, 6, 1, 15, 11, 4, 5, 0, 4,
- 5, 6, 23, 38, 12, 22, 14, 12, 22, 14,
- 18, 19, 20, 18, 19, 20, 21, 22, 6, 0,
- 0, 1, 6, 43, 4, 5, 7, 7, 23, 50,
- 4, 5, 53, 54, 22, 15, 16, 8, 12, 9,
- 14, 4, 5, 135, 18, 19, 20, 4, 5, 4,
- 14, 72, 21, 8, 75, 12, 19, 14, 4, 80,
- 81, 18, 19, 20, 1, 4, 5, 7, 17, 14,
- 10, 8, 9, 18, 19, 14, 4, 5, 99, 18,
- 19, 20, 4, 19, 4, 5, 22, 3, 4, 5,
- 110, 4, 5, 5, 114, 4, 116, 8, 9, 4,
- 5, 11, 5, 4, 124, 4, 14, 5, 14, 5,
- 14, 5, 14, 4, 14, 4, 43, 14, 10, 14,
- 45, 115, 148, 132, -1, -1, -1, -1, -1, -1,
- -1, 24
+ 0, 2, 74, 5, 6, 1, 18, 0, 7, 5,
+ 6, 12, 7, 7, 15, 8, 8, 13, 20, 15,
+ 7, 0, 23, 19, 20, 21, 38, 22, 22, 4,
+ 5, 6, 7, 1, 23, 22, 8, 1, 13, 11,
+ 15, 9, 10, 43, 19, 20, 21, 22, 12, 50,
+ 0, 1, 53, 54, 10, 5, 6, 5, 8, 131,
+ 15, 9, 5, 6, 19, 20, 16, 17, 9, 10,
+ 13, 72, 15, 20, 75, 22, 19, 20, 21, 80,
+ 81, 5, 6, 5, 6, 3, 9, 5, 6, 5,
+ 6, 13, 4, 15, 9, 10, 97, 19, 20, 21,
+ 7, 5, 6, 5, 6, 15, 5, 5, 108, 6,
+ 110, 15, 112, 18, 4, 19, 20, 21, 4, 12,
+ 120, 5, 43, 6, 5, 5, 15, 15, 15, 6,
+ 15, 6, 5, 15, 10, 111, 15, 15, 144, 45,
+ 128, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 24
};
/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
symbol of state STATE-NUM. */
static const yytype_uint8 yystos[] =
{
- 0, 0, 1, 4, 5, 7, 15, 16, 25, 29,
- 30, 31, 32, 55, 30, 1, 6, 33, 35, 0,
- 29, 23, 72, 73, 30, 4, 5, 12, 14, 18,
- 19, 20, 8, 9, 36, 37, 40, 30, 34, 3,
- 5, 44, 47, 49, 50, 55, 71, 44, 10, 30,
- 1, 36, 26, 38, 41, 8, 9, 44, 19, 22,
- 6, 22, 59, 5, 50, 55, 21, 56, 6, 12,
- 48, 59, 1, 11, 74, 75, 30, 14, 30, 30,
- 39, 42, 4, 4, 45, 46, 60, 6, 22, 17,
- 56, 57, 58, 14, 18, 19, 56, 6, 30, 75,
- 30, 27, 30, 30, 4, 62, 5, 4, 4, 5,
- 61, 4, 5, 19, 52, 51, 53, 30, 8, 9,
- 43, 63, 14, 55, 54, 55, 62, 55, 14, 14,
- 5, 55, 28, 64, 14, 74, 5, 4, 65, 14,
- 68, 14, 69, 66, 4, 8, 70, 4, 67, 68
+ 0, 0, 1, 5, 6, 8, 16, 17, 25, 29,
+ 30, 31, 32, 55, 30, 1, 7, 33, 35, 0,
+ 29, 23, 69, 70, 30, 5, 6, 13, 15, 19,
+ 20, 21, 9, 10, 36, 37, 40, 30, 34, 3,
+ 6, 44, 47, 49, 50, 55, 68, 44, 11, 30,
+ 1, 36, 26, 38, 41, 9, 10, 44, 20, 22,
+ 7, 22, 56, 6, 50, 55, 4, 7, 13, 48,
+ 56, 7, 1, 12, 71, 72, 30, 15, 30, 30,
+ 39, 42, 5, 5, 45, 46, 57, 7, 22, 18,
+ 4, 15, 19, 20, 4, 7, 30, 72, 30, 27,
+ 30, 30, 5, 59, 6, 5, 5, 6, 58, 20,
+ 52, 51, 53, 30, 9, 10, 43, 60, 15, 55,
+ 54, 55, 59, 55, 15, 15, 6, 55, 28, 61,
+ 15, 71, 6, 5, 62, 15, 65, 15, 66, 63,
+ 5, 9, 67, 5, 64, 65
};
#define yyerrok (yyerrstatus = 0)
@@ -1663,23 +1661,43 @@ yyreduce:
{ FASTQScan_skip_to_eol(pb); }
break;
- case 46:
+ case 45:
+
+ { FASTQScan_skip_to_eol(pb); }
+ break;
+
+ case 47:
{ FASTQScan_skip_to_eol(pb); }
break;
case 50:
+ { SetSpotGroup(pb, &(yyvsp[(2) - (2)])); }
+ break;
+
+ case 51:
+
{ StopSpotName(pb); }
break;
+ case 52:
+
+ { SetSpotGroup(pb, &(yyvsp[(3) - (3)])); }
+ break;
+
case 54:
- { RevertSpotName(pb); FASTQScan_skip_to_eol(pb); }
+ { SetSpotGroup(pb, &(yyvsp[(3) - (3)])); }
break;
case 55:
+ { RevertSpotName(pb); FASTQScan_skip_to_eol(pb); }
+ break;
+
+ case 56:
+
{ /* 'name' without coordinates attached will be ignored if followed by a name with coordinates (see the previous production).
however, if not followed, this will be the spot name, so we need to save the 'name's coordinates in case
we need to revert to them later (see call to RevertSpotName() above) */
@@ -1689,12 +1707,12 @@ yyreduce:
}
break;
- case 56:
+ case 57:
{ GrowSpotName(pb, &(yyvsp[(2) - (2)])); StopSpotName(pb); }
break;
- case 57:
+ case 58:
{ /* another variation by Illumina, this time "_" is used as " /" */
GrowSpotName(pb, &(yyvsp[(2) - (3)]));
@@ -1703,29 +1721,24 @@ yyreduce:
}
break;
- case 59:
+ case 60:
{ GrowSpotName(pb, &(yyvsp[(2) - (3)])); GrowSpotName(pb, &(yyvsp[(3) - (3)]));}
break;
- case 61:
+ case 62:
{ GrowSpotName(pb, &(yyvsp[(2) - (3)])); GrowSpotName(pb, &(yyvsp[(3) - (3)]));}
break;
- case 63:
+ case 64:
{ GrowSpotName(pb, &(yyvsp[(2) - (4)])); GrowSpotName(pb, &(yyvsp[(3) - (4)])); GrowSpotName(pb, &(yyvsp[(4) - (4)]));}
break;
- case 65:
-
- { GrowSpotName(pb, &(yyvsp[(2) - (3)])); GrowSpotName(pb, &(yyvsp[(3) - (3)])); StopSpotName(pb); }
- break;
-
case 66:
- { GrowSpotName(pb, &(yyvsp[(1) - (1)])); }
+ { GrowSpotName(pb, &(yyvsp[(2) - (3)])); GrowSpotName(pb, &(yyvsp[(3) - (3)])); StopSpotName(pb); }
break;
case 67:
@@ -1735,7 +1748,7 @@ yyreduce:
case 68:
- { GrowSpotName(pb, &(yyvsp[(2) - (2)])); }
+ { GrowSpotName(pb, &(yyvsp[(1) - (1)])); }
break;
case 69:
@@ -1765,38 +1778,18 @@ yyreduce:
case 74:
- { StopSpotName(pb); }
+ { GrowSpotName(pb, &(yyvsp[(2) - (2)])); }
break;
case 75:
- { SetSpotGroup(pb, &(yyvsp[(3) - (3)])); }
- break;
-
- case 76:
-
- { StopSpotName(pb); }
- break;
-
- case 77:
-
- { SetSpotGroup(pb, &(yyvsp[(3) - (3)])); }
- break;
-
- case 78:
-
- { StopSpotName(pb); }
- break;
-
- case 79:
-
{ /* in PACBIO fastq, the first '/' and the following digits are treated as a continuation of the spot name, not a read number */
if (IS_PACBIO(pb)) pb->spotNameDone = false;
GrowSpotName(pb, &(yyvsp[(1) - (1)]));
}
break;
- case 80:
+ case 76:
{
if (!IS_PACBIO(pb)) SetReadNumber(pb, &(yyvsp[(3) - (3)]));
@@ -1805,7 +1798,7 @@ yyreduce:
}
break;
- case 81:
+ case 77:
{
if (IS_PACBIO(pb)) pb->spotNameDone = false;
@@ -1813,74 +1806,74 @@ yyreduce:
}
break;
- case 82:
+ case 78:
{
if (IS_PACBIO(pb)) StopSpotName(pb);
}
break;
- case 83:
+ case 79:
{ SetReadNumber(pb, &(yyvsp[(1) - (1)])); GrowSpotName(pb, &(yyvsp[(1) - (1)])); StopSpotName(pb); }
break;
- case 84:
+ case 80:
{ SetReadNumber(pb, &(yyvsp[(1) - (1)])); GrowSpotName(pb, &(yyvsp[(1) - (1)])); StopSpotName(pb); }
break;
- case 85:
+ case 81:
{ GrowSpotName(pb, &(yyvsp[(3) - (3)])); }
break;
- case 86:
+ case 82:
{ GrowSpotName(pb, &(yyvsp[(5) - (5)])); if ((yyvsp[(5) - (5)]).tokenLength == 1 && TokenTextPtr(pb, &(yyvsp[(5) - (5)]))[0] == 'Y') pb->record->seq.lowQuality = true; }
break;
- case 87:
+ case 83:
{ GrowSpotName(pb, &(yyvsp[(7) - (7)])); }
break;
- case 88:
+ case 84:
{ GrowSpotName(pb, &(yyvsp[(9) - (9)])); }
break;
- case 90:
+ case 86:
{ GrowSpotName(pb, &(yyvsp[(1) - (1)])); FASTQScan_inline_sequence(pb); }
break;
- case 93:
+ case 89:
{ SetSpotGroup(pb, &(yyvsp[(1) - (1)])); GrowSpotName(pb, &(yyvsp[(1) - (1)])); }
break;
- case 94:
+ case 90:
{ SetSpotGroup(pb, &(yyvsp[(1) - (1)])); GrowSpotName(pb, &(yyvsp[(1) - (1)])); }
break;
- case 96:
+ case 92:
{ GrowSpotName(pb, &(yyvsp[(1) - (3)])); StopSpotName(pb); SetReadNumber(pb, &(yyvsp[(3) - (3)])); }
break;
- case 97:
+ case 93:
{ GrowSpotName(pb, &(yyvsp[(1) - (3)])); StopSpotName(pb); SetReadNumber(pb, &(yyvsp[(3) - (3)])); }
break;
- case 98:
+ case 94:
{ GrowSpotName(pb, &(yyvsp[(1) - (1)])); StopSpotName(pb); }
break;
- case 105:
+ case 101:
{ AddQuality(pb, & (yyvsp[(1) - (1)])); }
break;
@@ -2293,11 +2286,18 @@ void StopSpotName(FASTQParseBlock* pb)
void SetSpotGroup(FASTQParseBlock* pb, const FASTQToken* token)
{
if ( ! pb->ignoreSpotGroups )
- {
- if (token->tokenLength != 1 || TokenTextPtr(pb, token)[0] != '0') /* ignore spot group 0 */
+ {
+ unsigned int nameStart = 0;
+ /* skip possible '#' at the start of spot group name */
+ if ( TokenTextPtr ( pb, token )[0] == '#' )
+ {
+ nameStart = 1;
+ }
+
+ if ( token->tokenLength != 1+nameStart || TokenTextPtr(pb, token)[nameStart] != '0' ) /* ignore spot group 0 */
{
- pb->spotGroupOffset = token->tokenStart;
- pb->spotGroupLength = token->tokenLength;
+ pb->spotGroupOffset = token->tokenStart + nameStart;
+ pb->spotGroupLength = token->tokenLength - nameStart;
}
}
}
diff --git a/tools/fastq-loader/fastq-grammar.y b/tools/fastq-loader/fastq-grammar.y
index 509e187..1434c08 100644
--- a/tools/fastq-loader/fastq-grammar.y
+++ b/tools/fastq-loader/fastq-grammar.y
@@ -59,9 +59,10 @@
%parse-param {FASTQParseBlock* pb }
%lex-param {FASTQParseBlock* pb }
%error-verbose
-%name-prefix="FASTQ_"
+%name-prefix "FASTQ_"
%token fqRUNDOTSPOT
+%token fqSPOTGROUP
%token fqNUMBER
%token fqALPHANUM
%token fqWS
@@ -154,7 +155,8 @@ tagLine
| nameSpotGroup fqWS { GrowSpotName(pb, &$1); StopSpotName(pb); } casava1_8 { FASTQScan_skip_to_eol(pb); }
| nameSpotGroup fqWS { GrowSpotName(pb, &$1); StopSpotName(pb); } fqALPHANUM { FASTQScan_skip_to_eol(pb); } /* no recognizable read number */
- | runSpotRead { FASTQScan_skip_to_eol(pb); }
+ | runSpotRead fqWS { FASTQScan_skip_to_eol(pb); }
+ | runSpotRead { FASTQScan_skip_to_eol(pb); }
| name readNumber
| name readNumber fqWS { FASTQScan_skip_to_eol(pb); }
| name
@@ -162,10 +164,10 @@ tagLine
nameSpotGroup
: nameWithCoords
- | nameWithCoords spotGroup
- | name { StopSpotName(pb); } spotGroup
- | nameWS nameWithCoords /* nameWS ignored */
- | nameWS nameWithCoords spotGroup /* nameWS ignored */
+ | nameWithCoords fqSPOTGROUP { SetSpotGroup(pb, &$2); }
+ | name { StopSpotName(pb); } fqSPOTGROUP { SetSpotGroup(pb, &$3); }
+ | nameWS nameWithCoords /* nameWS ignored */
+ | nameWS nameWithCoords fqSPOTGROUP { SetSpotGroup(pb, &$3); } /* nameWS ignored */
| nameWS fqALPHANUM '=' { RevertSpotName(pb); FASTQScan_skip_to_eol(pb); }
;
@@ -205,14 +207,6 @@ name
| name fqNUMBER { GrowSpotName(pb, &$2); }
;
-spotGroup
- : '#' { StopSpotName(pb); }
- fqNUMBER { SetSpotGroup(pb, &$3); }
- | '#' { StopSpotName(pb); }
- fqALPHANUM { SetSpotGroup(pb, &$3); }
- | '#' { StopSpotName(pb); }
- ;
-
readNumber
: '/'
{ /* in PACBIO fastq, the first '/' and the following digits are treated as a continuation of the spot name, not a read number */
@@ -461,11 +455,18 @@ void StopSpotName(FASTQParseBlock* pb)
void SetSpotGroup(FASTQParseBlock* pb, const FASTQToken* token)
{
if ( ! pb->ignoreSpotGroups )
- {
- if (token->tokenLength != 1 || TokenTextPtr(pb, token)[0] != '0') /* ignore spot group 0 */
+ {
+ unsigned int nameStart = 0;
+ /* skip possible '#' at the start of spot group name */
+ if ( TokenTextPtr ( pb, token )[0] == '#' )
+ {
+ nameStart = 1;
+ }
+
+ if ( token->tokenLength != 1+nameStart || TokenTextPtr(pb, token)[nameStart] != '0' ) /* ignore spot group 0 */
{
- pb->spotGroupOffset = token->tokenStart;
- pb->spotGroupLength = token->tokenLength;
+ pb->spotGroupOffset = token->tokenStart + nameStart;
+ pb->spotGroupLength = token->tokenLength - nameStart;
}
}
}
diff --git a/tools/fastq-loader/fastq-lex.c b/tools/fastq-loader/fastq-lex.c
index d3dabdc..54cc5d6 100644
--- a/tools/fastq-loader/fastq-lex.c
+++ b/tools/fastq-loader/fastq-lex.c
@@ -446,8 +446,8 @@ static void yy_fatal_error (yyconst char msg[] ,yyscan_t yyscanner );
yyg->yy_c_buf_p = yy_cp;
/* %% [4.0] data tables for the DFA and the user's section 1 definitions go here */
-#define YY_NUM_RULES 33
-#define YY_END_OF_BUFFER 34
+#define YY_NUM_RULES 34
+#define YY_END_OF_BUFFER 35
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
@@ -455,50 +455,51 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_acclist[207] =
+static yyconst flex_int16_t yy_acclist[211] =
{ 0,
- 31, 31, 34, 11, 33, 8, 11, 33, 32, 33,
- 11, 32, 33, 7, 11, 33, 6, 7, 11, 33,
- 11, 33, 7, 11, 33, 3, 11, 33, 11, 33,
- 16393, 2, 11, 33, 1, 11, 33, 7, 11, 33,
- 16393, 7, 11, 33,16393, 8, 11, 33, 12, 33,
- 11, 12, 33, 33, 33, 16, 33, 33,16399, 33,
- 16397,16399, 33,16397,16399, 33, 22, 33, 22, 33,
- 26, 33, 23, 26, 33, 25, 33, 25, 26, 33,
- 26, 33,16408, 23, 26, 33,16408, 20, 33, 20,
- 33, 17, 33, 19, 33, 17, 33, 29, 33, 23,
-
- 29, 33, 28, 33, 28, 29, 33, 29, 33,16411,
- 23, 29, 33,16411, 31, 33, 30, 33, 30, 31,
- 33, 8, 32, 7, 6, 7, 7, 8201, 8201,16393,
- 16393,16394, 7,16394, 7,16393, 8, 12, 12, 16,
- 8207, 16, 8207,16399, 8205, 8207, 16, 8205, 8207,16397,
- 16399,16397,16398,16399,16398,16399, 21, 21, 22, 23,
- 25, 8216, 8216,16408, 23,16408, 20, 17, 19, 17,
- 18, 18, 28, 8219, 8219,16411, 23,16411, 31, 30,
- 30, 31, 30, 7, 8201, 8202, 8201, 8202,16394, 8202,
- 8202, 8205, 8206, 8207, 16, 8205, 8206, 8207, 8206, 8207,
-
- 16, 8206, 8207, 7, 5, 4
+ 32, 32, 35, 12, 34, 9, 12, 34, 33, 34,
+ 12, 33, 34, 6, 12, 34, 8, 12, 34, 7,
+ 8, 12, 34, 12, 34, 8, 12, 34, 3, 12,
+ 34, 12, 34,16394, 2, 12, 34, 1, 12, 34,
+ 8, 12, 34,16394, 8, 12, 34,16394, 9, 12,
+ 34, 13, 34, 12, 13, 34, 34, 34, 17, 34,
+ 34,16400, 34,16398,16400, 34,16398,16400, 34, 23,
+ 34, 23, 34, 27, 34, 24, 27, 34, 26, 34,
+ 26, 27, 34, 27, 34,16409, 24, 27, 34,16409,
+ 21, 34, 21, 34, 18, 34, 20, 34, 18, 34,
+
+ 30, 34, 24, 30, 34, 29, 34, 29, 30, 34,
+ 30, 34,16412, 24, 30, 34,16412, 32, 34, 31,
+ 34, 31, 32, 34, 9, 33, 6, 8, 7, 8,
+ 8, 8202, 8202,16394,16394,16395, 8,16395, 8,16394,
+ 9, 13, 13, 17, 8208, 17, 8208,16400, 8206, 8208,
+ 17, 8206, 8208,16398,16400,16398,16399,16400,16399,16400,
+ 22, 22, 23, 24, 26, 8217, 8217,16409, 24,16409,
+ 21, 18, 20, 18, 19, 19, 29, 8220, 8220,16412,
+ 24,16412, 32, 31, 31, 32, 31, 8, 8202, 8203,
+ 8202, 8203,16395, 8203, 8203, 8206, 8207, 8208, 17, 8206,
+
+ 8207, 8208, 8207, 8208, 17, 8207, 8208, 8, 5, 4
} ;
-static yyconst flex_int16_t yy_accept[141] =
+static yyconst flex_int16_t yy_accept[143] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 3, 4, 6, 9, 11, 14,
- 17, 21, 23, 26, 29, 32, 35, 38, 42, 46,
- 49, 51, 54, 55, 56, 58, 60, 63, 66, 67,
- 69, 71, 73, 76, 78, 81, 84, 88, 90, 92,
- 94, 96, 98, 100, 103, 105, 108, 111, 115, 117,
- 119, 122, 123, 124, 125, 127, 127, 128, 129, 130,
- 131, 133, 135, 137, 138, 139, 140, 140, 141, 143,
- 144, 145, 148, 150, 152, 155, 157, 157, 158, 160,
-
- 161, 162, 163, 164, 165, 167, 168, 169, 170, 172,
- 173, 174, 175, 176, 177, 179, 180, 181, 183, 184,
- 184, 185, 187, 189, 190, 191, 192, 196, 199, 202,
- 204, 204, 205, 205, 205, 205, 206, 206, 207, 207
+ 17, 20, 24, 26, 29, 32, 35, 38, 41, 45,
+ 49, 52, 54, 57, 58, 59, 61, 63, 66, 69,
+ 70, 72, 74, 76, 79, 81, 84, 87, 91, 93,
+ 95, 97, 99, 101, 103, 106, 108, 111, 114, 118,
+ 120, 122, 125, 126, 127, 128, 129, 131, 131, 132,
+ 133, 134, 135, 137, 139, 141, 142, 143, 144, 144,
+ 145, 147, 148, 149, 152, 154, 156, 159, 161, 161,
+
+ 162, 164, 165, 166, 167, 168, 169, 171, 172, 173,
+ 174, 176, 177, 178, 179, 180, 181, 183, 184, 185,
+ 187, 188, 188, 189, 191, 193, 194, 195, 196, 200,
+ 203, 206, 208, 208, 209, 209, 209, 209, 210, 210,
+ 211, 211
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -506,17 +507,17 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 4, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 6, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 7, 5, 8, 9, 5, 10, 10, 10,
- 10, 11, 11, 11, 11, 11, 11, 12, 5, 5,
- 5, 13, 5, 14, 15, 8, 15, 16, 16, 8,
- 15, 8, 8, 8, 8, 8, 8, 17, 8, 8,
- 8, 18, 16, 15, 8, 8, 8, 8, 8, 8,
- 5, 5, 5, 5, 5, 5, 15, 8, 15, 8,
-
- 8, 8, 15, 8, 8, 8, 8, 8, 8, 17,
- 8, 8, 8, 8, 8, 15, 8, 8, 8, 8,
- 8, 8, 5, 5, 5, 5, 5, 1, 1, 1,
+ 5, 6, 5, 5, 7, 5, 5, 5, 5, 5,
+ 5, 5, 8, 5, 9, 10, 5, 11, 11, 11,
+ 11, 12, 12, 12, 12, 12, 12, 13, 5, 5,
+ 5, 14, 5, 15, 16, 9, 16, 17, 17, 9,
+ 16, 9, 9, 9, 9, 9, 9, 18, 9, 9,
+ 9, 19, 17, 16, 9, 9, 9, 9, 9, 9,
+ 5, 5, 5, 5, 20, 5, 16, 9, 16, 9,
+
+ 9, 9, 16, 9, 9, 9, 9, 9, 9, 18,
+ 9, 9, 9, 9, 9, 16, 9, 9, 9, 9,
+ 9, 9, 5, 5, 5, 5, 5, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -533,152 +534,158 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[19] =
+static yyconst flex_int32_t yy_meta[21] =
{ 0,
- 1, 1, 2, 2, 3, 3, 3, 4, 5, 6,
- 6, 7, 3, 3, 8, 4, 8, 4
+ 1, 1, 2, 2, 3, 3, 3, 3, 4, 5,
+ 6, 6, 7, 3, 3, 8, 4, 8, 4, 9
} ;
-static yyconst flex_int16_t yy_base[159] =
+static yyconst flex_int16_t yy_base[162] =
{ 0,
- 0, 12, 28, 26, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 43, 19, 33, 59, 65, 69, 0,
- 86, 92, 35, 45, 312, 335, 18, 335, 308, 0,
- 40, 43, 259, 335, 52, 335, 335, 96, 65, 113,
- 335, 273, 335, 271, 335, 119, 123, 67, 270, 335,
- 267, 335, 79, 335, 265, 79, 139, 335, 164, 0,
- 91, 99, 335, 127, 335, 162, 127, 144, 131, 335,
- 133, 147, 335, 0, 0, 147, 126, 335, 136, 148,
- 151, 166, 0, 0, 335, 125, 115, 335, 335, 107,
- 176, 335, 100, 0, 178, 186, 94, 335, 335, 156,
-
- 335, 335, 88, 153, 0, 335, 0, 153, 0, 162,
- 335, 335, 67, 170, 0, 180, 335, 182, 335, 181,
- 183, 335, 61, 194, 335, 56, 335, 54, 335, 39,
- 28, 24, 189, 191, 16, 195, 197, 199, 335, 210,
- 218, 226, 234, 242, 250, 258, 263, 270, 278, 286,
- 294, 301, 305, 312, 320, 323, 325, 327
+ 0, 13, 30, 25, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 43, 21, 34, 60, 66, 70, 0,
+ 88, 94, 36, 45, 292, 356, 20, 356, 288, 0,
+ 0, 39, 43, 270, 356, 53, 356, 356, 98, 65,
+ 116, 356, 285, 356, 282, 356, 122, 126, 67, 280,
+ 356, 276, 356, 77, 356, 274, 81, 143, 356, 205,
+ 0, 92, 95, 356, 129, 356, 196, 108, 148, 129,
+ 356, 134, 137, 356, 0, 0, 0, 177, 158, 356,
+ 166, 137, 152, 168, 0, 0, 356, 145, 131, 356,
+ 356, 118, 179, 356, 110, 0, 181, 190, 96, 356,
+
+ 356, 151, 356, 356, 90, 155, 0, 356, 0, 149,
+ 0, 154, 356, 356, 84, 163, 0, 170, 356, 172,
+ 356, 175, 177, 356, 68, 199, 356, 62, 356, 57,
+ 356, 55, 39, 32, 184, 186, 17, 193, 195, 200,
+ 356, 212, 221, 230, 239, 248, 257, 266, 272, 278,
+ 285, 293, 302, 311, 319, 324, 331, 340, 344, 346,
+ 348
} ;
-static yyconst flex_int16_t yy_def[159] =
+static yyconst flex_int16_t yy_def[162] =
{ 0,
- 139, 1, 1, 3, 140, 140, 140, 140, 140, 140,
- 140, 140, 140, 141, 142, 142, 143, 144, 140, 19,
- 145, 145, 146, 146, 139, 139, 139, 139, 139, 147,
- 147, 139, 147, 139, 148, 139, 139, 139, 38, 139,
- 139, 139, 139, 149, 139, 150, 150, 47, 151, 139,
- 151, 139, 139, 139, 139, 152, 152, 139, 139, 153,
- 139, 153, 139, 139, 139, 139, 154, 154, 155, 139,
- 155, 139, 139, 147, 31, 156, 147, 139, 139, 148,
- 139, 147, 39, 40, 139, 139, 149, 139, 139, 149,
- 150, 139, 149, 47, 47, 91, 151, 139, 139, 139,
-
- 139, 139, 139, 152, 57, 139, 153, 139, 62, 139,
- 139, 139, 139, 154, 68, 155, 139, 155, 139, 139,
- 147, 139, 139, 139, 139, 139, 139, 149, 139, 149,
- 157, 121, 139, 139, 158, 139, 139, 139, 0, 139,
- 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
- 139, 139, 139, 139, 139, 139, 139, 139
+ 141, 1, 1, 3, 142, 142, 142, 142, 142, 142,
+ 142, 142, 142, 143, 144, 144, 145, 146, 142, 19,
+ 147, 147, 148, 148, 141, 141, 141, 141, 141, 149,
+ 150, 150, 141, 150, 141, 151, 141, 141, 141, 39,
+ 141, 141, 141, 141, 152, 141, 153, 153, 48, 154,
+ 141, 154, 141, 141, 141, 141, 155, 155, 141, 141,
+ 156, 141, 156, 141, 141, 141, 141, 157, 157, 158,
+ 141, 158, 141, 141, 149, 150, 32, 159, 150, 141,
+ 141, 151, 141, 150, 40, 41, 141, 141, 152, 141,
+ 141, 152, 153, 141, 152, 48, 48, 93, 154, 141,
+
+ 141, 141, 141, 141, 141, 155, 58, 141, 156, 141,
+ 63, 141, 141, 141, 141, 157, 69, 158, 141, 158,
+ 141, 141, 150, 141, 141, 141, 141, 141, 141, 152,
+ 141, 152, 160, 123, 141, 141, 161, 141, 141, 141,
+ 0, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141
} ;
-static yyconst flex_int16_t yy_nxt[354] =
+static yyconst flex_int16_t yy_nxt[377] =
{ 0,
- 26, 27, 28, 29, 26, 27, 26, 30, 26, 31,
- 31, 32, 26, 26, 30, 33, 30, 30, 34, 72,
- 35, 50, 51, 72, 36, 37, 38, 137, 39, 40,
- 41, 42, 134, 40, 35, 50, 51, 70, 71, 133,
- 38, 129, 39, 44, 44, 45, 44, 70, 71, 75,
- 75, 47, 76, 76, 78, 79, 127, 48, 125, 47,
- 53, 54, 55, 122, 53, 52, 53, 54, 55, 112,
- 57, 58, 59, 80, 74, 95, 96, 60, 61, 61,
- 100, 102, 103, 62, 100, 60, 63, 64, 65, 66,
- 102, 68, 63, 64, 65, 66, 98, 68, 78, 79,
-
- 108, 108, 92, 74, 81, 82, 74, 109, 110, 89,
- 83, 74, 83, 74, 84, 85, 86, 88, 84, 87,
- 87, 89, 90, 87, 87, 92, 93, 85, 100, 112,
- 113, 94, 100, 117, 118, 119, 118, 94, 78, 94,
- 100, 102, 103, 121, 105, 100, 112, 113, 72, 115,
- 78, 79, 72, 122, 123, 102, 103, 100, 120, 81,
- 124, 100, 108, 108, 111, 80, 106, 80, 125, 126,
- 110, 110, 112, 113, 124, 82, 87, 87, 89, 90,
- 127, 128, 117, 118, 119, 118, 95, 96, 129, 130,
- 131, 131, 132, 132, 96, 96, 125, 126, 135, 135,
-
- 136, 136, 124, 124, 136, 136, 138, 138, 138, 138,
- 43, 43, 43, 43, 43, 43, 43, 43, 46, 46,
- 46, 46, 46, 46, 46, 46, 49, 49, 49, 49,
- 49, 49, 49, 49, 52, 52, 52, 52, 52, 52,
- 52, 52, 56, 56, 56, 56, 56, 56, 56, 56,
- 67, 67, 67, 67, 67, 67, 67, 67, 69, 69,
- 69, 69, 69, 69, 69, 69, 74, 101, 74, 99,
- 74, 80, 98, 88, 80, 85, 77, 80, 87, 87,
- 87, 87, 87, 87, 87, 87, 91, 91, 91, 91,
- 91, 91, 91, 91, 97, 97, 97, 97, 97, 97,
-
- 97, 97, 104, 104, 104, 104, 104, 104, 104, 107,
- 73, 139, 107, 114, 114, 114, 114, 114, 114, 114,
- 116, 116, 116, 116, 116, 116, 116, 116, 76, 76,
- 131, 131, 135, 135, 25, 139, 139, 139, 139, 139,
- 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
- 139, 139, 139
+ 26, 27, 28, 29, 26, 27, 30, 26, 31, 26,
+ 32, 32, 33, 26, 26, 31, 34, 31, 31, 26,
+ 35, 73, 36, 51, 52, 73, 37, 38, 39, 139,
+ 40, 41, 42, 43, 36, 41, 51, 52, 71, 72,
+ 39, 136, 40, 45, 45, 46, 45, 71, 72, 77,
+ 77, 135, 48, 78, 78, 80, 81, 131, 49, 129,
+ 48, 54, 55, 56, 127, 54, 53, 54, 55, 56,
+ 124, 58, 59, 60, 82, 76, 97, 98, 102, 61,
+ 62, 62, 102, 104, 105, 63, 114, 61, 64, 65,
+ 66, 67, 104, 69, 64, 65, 66, 67, 100, 69,
+
+ 80, 81, 110, 110, 111, 112, 76, 83, 84, 76,
+ 114, 115, 94, 85, 76, 85, 76, 86, 87, 88,
+ 91, 86, 89, 89, 91, 92, 89, 89, 94, 95,
+ 102, 119, 120, 90, 102, 96, 121, 120, 73, 80,
+ 81, 96, 73, 96, 102, 104, 105, 87, 107, 102,
+ 114, 115, 102, 117, 124, 125, 102, 104, 105, 110,
+ 110, 83, 126, 112, 112, 114, 115, 82, 80, 82,
+ 127, 128, 119, 120, 121, 120, 123, 126, 84, 89,
+ 89, 91, 92, 129, 130, 133, 133, 134, 134, 122,
+ 97, 98, 131, 132, 137, 137, 138, 138, 113, 98,
+
+ 98, 127, 128, 138, 138, 140, 140, 108, 126, 126,
+ 140, 140, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 47, 47, 47, 47, 47, 47, 47, 47, 47,
+ 50, 50, 50, 50, 50, 50, 50, 50, 50, 53,
+ 53, 53, 53, 53, 53, 53, 53, 53, 57, 57,
+ 57, 57, 57, 57, 57, 57, 57, 68, 68, 68,
+ 68, 68, 68, 68, 68, 68, 70, 70, 70, 70,
+ 70, 70, 70, 70, 70, 75, 103, 75, 101, 75,
+ 75, 76, 100, 76, 90, 76, 82, 87, 79, 82,
+ 74, 141, 82, 89, 89, 89, 89, 89, 89, 89,
+
+ 89, 89, 93, 93, 93, 93, 93, 93, 93, 93,
+ 93, 99, 99, 99, 99, 99, 99, 99, 99, 99,
+ 106, 106, 106, 106, 106, 106, 106, 106, 109, 141,
+ 141, 109, 116, 116, 116, 116, 116, 116, 116, 116,
+ 118, 118, 118, 118, 118, 118, 118, 118, 118, 78,
+ 78, 133, 133, 137, 137, 25, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141
} ;
-static yyconst flex_int16_t yy_chk[354] =
+static yyconst flex_int16_t yy_chk[377] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 27,
- 2, 15, 15, 27, 2, 2, 2, 135, 2, 3,
- 3, 3, 132, 3, 4, 16, 16, 23, 23, 131,
- 4, 130, 4, 14, 14, 14, 14, 24, 24, 31,
- 31, 14, 32, 32, 35, 35, 128, 14, 126, 14,
- 17, 17, 17, 123, 17, 18, 18, 18, 18, 113,
- 18, 19, 19, 39, 39, 48, 48, 19, 19, 19,
- 53, 56, 56, 19, 53, 19, 21, 21, 21, 21,
- 103, 21, 22, 22, 22, 22, 97, 22, 38, 38,
-
- 61, 61, 93, 38, 38, 38, 38, 62, 62, 90,
- 38, 38, 38, 38, 40, 40, 40, 87, 40, 46,
- 46, 46, 46, 47, 47, 47, 47, 86, 64, 67,
- 67, 47, 64, 69, 69, 71, 71, 47, 79, 47,
- 57, 57, 57, 77, 57, 68, 68, 68, 72, 68,
- 80, 80, 72, 81, 81, 104, 104, 100, 76, 81,
- 81, 100, 108, 108, 66, 81, 59, 81, 82, 82,
- 110, 110, 114, 114, 82, 82, 91, 91, 91, 91,
- 95, 95, 116, 116, 118, 118, 95, 95, 96, 96,
- 120, 120, 121, 121, 96, 96, 124, 124, 133, 133,
-
- 134, 134, 124, 124, 136, 136, 137, 137, 138, 138,
- 140, 140, 140, 140, 140, 140, 140, 140, 141, 141,
- 141, 141, 141, 141, 141, 141, 142, 142, 142, 142,
- 142, 142, 142, 142, 143, 143, 143, 143, 143, 143,
- 143, 143, 144, 144, 144, 144, 144, 144, 144, 144,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 27, 2, 15, 15, 27, 2, 2, 2, 137,
+ 2, 3, 3, 3, 4, 3, 16, 16, 23, 23,
+ 4, 134, 4, 14, 14, 14, 14, 24, 24, 32,
+ 32, 133, 14, 33, 33, 36, 36, 132, 14, 130,
+ 14, 17, 17, 17, 128, 17, 18, 18, 18, 18,
+ 125, 18, 19, 19, 40, 40, 49, 49, 54, 19,
+ 19, 19, 54, 57, 57, 19, 115, 19, 21, 21,
+ 21, 21, 105, 21, 22, 22, 22, 22, 99, 22,
+
+ 39, 39, 62, 62, 63, 63, 39, 39, 39, 39,
+ 68, 68, 95, 39, 39, 39, 39, 41, 41, 41,
+ 92, 41, 47, 47, 47, 47, 48, 48, 48, 48,
+ 65, 70, 70, 89, 65, 48, 72, 72, 73, 82,
+ 82, 48, 73, 48, 58, 58, 58, 88, 58, 69,
+ 69, 69, 102, 69, 83, 83, 102, 106, 106, 110,
+ 110, 83, 83, 112, 112, 116, 116, 83, 81, 83,
+ 84, 84, 118, 118, 120, 120, 79, 84, 84, 93,
+ 93, 93, 93, 97, 97, 122, 122, 123, 123, 78,
+ 97, 97, 98, 98, 135, 135, 136, 136, 67, 98,
+
+ 98, 126, 126, 138, 138, 139, 139, 60, 126, 126,
+ 140, 140, 142, 142, 142, 142, 142, 142, 142, 142,
+ 142, 143, 143, 143, 143, 143, 143, 143, 143, 143,
+ 144, 144, 144, 144, 144, 144, 144, 144, 144, 145,
145, 145, 145, 145, 145, 145, 145, 145, 146, 146,
- 146, 146, 146, 146, 146, 146, 147, 55, 147, 51,
- 147, 148, 49, 44, 148, 42, 33, 148, 149, 149,
- 149, 149, 149, 149, 149, 149, 150, 150, 150, 150,
- 150, 150, 150, 150, 151, 151, 151, 151, 151, 151,
-
- 151, 151, 152, 152, 152, 152, 152, 152, 152, 153,
- 29, 25, 153, 154, 154, 154, 154, 154, 154, 154,
- 155, 155, 155, 155, 155, 155, 155, 155, 156, 156,
- 157, 157, 158, 158, 139, 139, 139, 139, 139, 139,
- 139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
- 139, 139, 139
+ 146, 146, 146, 146, 146, 146, 146, 147, 147, 147,
+ 147, 147, 147, 147, 147, 147, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 149, 56, 149, 52, 149,
+ 149, 150, 50, 150, 45, 150, 151, 43, 34, 151,
+ 29, 25, 151, 152, 152, 152, 152, 152, 152, 152,
+
+ 152, 152, 153, 153, 153, 153, 153, 153, 153, 153,
+ 153, 154, 154, 154, 154, 154, 154, 154, 154, 154,
+ 155, 155, 155, 155, 155, 155, 155, 155, 156, 0,
+ 0, 156, 157, 157, 157, 157, 157, 157, 157, 157,
+ 158, 158, 158, 158, 158, 158, 158, 158, 158, 159,
+ 159, 160, 160, 161, 161, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141, 141, 141, 141, 141,
+ 141, 141, 141, 141, 141, 141
} ;
/* Table of booleans, true if rule could match eol. */
-static yyconst flex_int32_t yy_rule_can_match_eol[34] =
+static yyconst flex_int32_t yy_rule_can_match_eol[35] =
{ 0,
-0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0,
- 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, };
+0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
+ 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, };
-static yyconst flex_int16_t yy_rule_linenum[33] =
+static yyconst flex_int16_t yy_rule_linenum[34] =
{ 0,
90, 91, 93, 97, 98, 99, 100, 101, 102, 103,
- 104, 107, 111, 112, 113, 114, 119, 120, 121, 122,
- 127, 128, 133, 138, 139, 146, 151, 152, 153, 158,
- 159, 162
+ 104, 105, 108, 112, 113, 114, 115, 120, 121, 122,
+ 123, 128, 129, 134, 139, 140, 147, 152, 153, 154,
+ 159, 160, 163
} ;
#define YY_TRAILING_MASK 0x2000
@@ -1161,14 +1168,14 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 140 )
+ if ( yy_current_state >= 142 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
*yyg->yy_state_ptr++ = yy_current_state;
++yy_cp;
}
- while ( yy_current_state != 139 );
+ while ( yy_current_state != 141 );
yy_find_action:
/* %% [10.0] code to find the action number goes here */
@@ -1233,13 +1240,13 @@ do_action: /* This label is used only to access EOF actions. */
{
if ( yy_act == 0 )
fprintf( stderr, "--scanner backing up\n" );
- else if ( yy_act < 33 )
+ else if ( yy_act < 34 )
fprintf( stderr, "--accepting rule at line %ld (\"%s\")\n",
(long)yy_rule_linenum[yy_act], yytext );
- else if ( yy_act == 33 )
+ else if ( yy_act == 34 )
fprintf( stderr, "--accepting default rule (\"%s\")\n",
yytext );
- else if ( yy_act == 34 )
+ else if ( yy_act == 35 )
fprintf( stderr, "--(end of buffer or a NUL)\n" );
else
fprintf( stderr, "--EOF (start condition %d)\n", YY_START );
@@ -1272,54 +1279,58 @@ YY_RULE_SETUP
YY_BREAK
case 6:
YY_RULE_SETUP
-{ return fqNUMBER; }
+{ return fqSPOTGROUP; }
YY_BREAK
case 7:
YY_RULE_SETUP
-{ return fqALPHANUM; }
+{ return fqNUMBER; }
YY_BREAK
case 8:
YY_RULE_SETUP
-{ return fqWS; }
+{ return fqALPHANUM; }
YY_BREAK
case 9:
-/* rule 9 can match eol */
YY_RULE_SETUP
-{ return fqBASESEQ; }
+{ return fqWS; }
YY_BREAK
case 10:
/* rule 10 can match eol */
YY_RULE_SETUP
-{ return fqCOLORSEQ; }
+{ return fqBASESEQ; }
YY_BREAK
case 11:
+/* rule 11 can match eol */
YY_RULE_SETUP
-{ return yytext[0]; }
+{ return fqCOLORSEQ; }
YY_BREAK
-
case 12:
-/* rule 12 can match eol */
YY_RULE_SETUP
-{ BEGIN IN_SEQUENCE; ENDLINE; }
+{ return yytext[0]; }
YY_BREAK
-
case 13:
/* rule 13 can match eol */
YY_RULE_SETUP
-{ BEGIN 0; return fqBASESEQ; }
+{ BEGIN IN_SEQUENCE; ENDLINE; }
YY_BREAK
+
+
case 14:
/* rule 14 can match eol */
YY_RULE_SETUP
-{ BEGIN 0; return fqCOLORSEQ; }
+{ BEGIN 0; return fqBASESEQ; }
YY_BREAK
case 15:
/* rule 15 can match eol */
YY_RULE_SETUP
-{ BEGIN 0; return fqASCQUAL; }
+{ BEGIN 0; return fqCOLORSEQ; }
YY_BREAK
case 16:
+/* rule 16 can match eol */
+YY_RULE_SETUP
+{ BEGIN 0; return fqASCQUAL; }
+ YY_BREAK
+case 17:
*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
@@ -1329,55 +1340,55 @@ YY_RULE_SETUP
-case 17:
+case 18:
YY_RULE_SETUP
{ BEGIN 0; return fqBASESEQ; }
YY_BREAK
-case 18:
+case 19:
YY_RULE_SETUP
{ BEGIN 0; return fqCOLORSEQ; }
YY_BREAK
-case 19:
+case 20:
YY_RULE_SETUP
{ BEGIN 0; return fqNUMBER; }
YY_BREAK
-case 20:
-/* rule 20 can match eol */
+case 21:
+/* rule 21 can match eol */
YY_RULE_SETUP
{ BEGIN 0; ENDLINE; }
YY_BREAK
-case 21:
+case 22:
*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */
yyg->yy_c_buf_p = yy_cp -= 1;
YY_DO_BEFORE_ACTION; /* set up yytext again */
YY_RULE_SETUP
{ return fqTOKEN; /* do not bother to parse the inside of the line */ }
YY_BREAK
-case 22:
-/* rule 22 can match eol */
+case 23:
+/* rule 23 can match eol */
YY_RULE_SETUP
{ BEGIN IN_QUALITY; ENDLINE }
YY_BREAK
-case 23:
+case 24:
YY_RULE_SETUP
{ }
YY_BREAK
-case 24:
-/* rule 24 can match eol */
+case 25:
+/* rule 25 can match eol */
YY_RULE_SETUP
{ return fqASCQUAL; }
YY_BREAK
-case 25:
-/* rule 25 can match eol */
+case 26:
+/* rule 26 can match eol */
YY_RULE_SETUP
{ /* if read was split across several lines, expect the same number of lines in quality */
if (yyextra->expectedQualityLines <= 1)
@@ -1387,46 +1398,46 @@ YY_RULE_SETUP
ENDLINE;
}
YY_BREAK
-case 26:
+case 27:
YY_RULE_SETUP
{ return yytext[0]; }
YY_BREAK
-case 27:
-/* rule 27 can match eol */
+case 28:
+/* rule 28 can match eol */
YY_RULE_SETUP
{ return fqASCQUAL; }
YY_BREAK
-case 28:
-/* rule 28 can match eol */
+case 29:
+/* rule 29 can match eol */
YY_RULE_SETUP
{ BEGIN 0; ENDLINE; }
YY_BREAK
-case 29:
+case 30:
YY_RULE_SETUP
{ return yytext[0]; }
YY_BREAK
-case 30:
-/* rule 30 can match eol */
+case 31:
+/* rule 31 can match eol */
YY_RULE_SETUP
{ BEGIN 0; ENDLINE; }
YY_BREAK
-case 31:
+case 32:
YY_RULE_SETUP
{ BEGIN 0; return fqENDOFTEXT; }
YY_BREAK
-case 32:
-/* rule 32 can match eol */
+case 33:
+/* rule 33 can match eol */
YY_RULE_SETUP
{ ENDLINE; }
YY_BREAK
-case 33:
+case 34:
YY_RULE_SETUP
ECHO;
YY_BREAK
@@ -1725,7 +1736,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 140 )
+ if ( yy_current_state >= 142 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1754,11 +1765,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 140 )
+ if ( yy_current_state >= 142 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 139);
+ yy_is_jam = (yy_current_state == 141);
if ( ! yy_is_jam )
*yyg->yy_state_ptr++ = yy_current_state;
diff --git a/tools/fastq-loader/fastq-lex.l b/tools/fastq-loader/fastq-lex.l
index e96a37d..fee3135 100644
--- a/tools/fastq-loader/fastq-lex.l
+++ b/tools/fastq-loader/fastq-lex.l
@@ -96,6 +96,7 @@ ascqual [\x0E-\x1F \x21-\x7F]+
{
:{digits}:{digits}:{digits}:{digits} { return fqCOORDS; }
[SDE]RR{digits}\.{digits} { return fqRUNDOTSPOT; }
+ #[A-Za-z0-9\-_]* { return fqSPOTGROUP; }
{digits} { return fqNUMBER; }
{alphanum} { return fqALPHANUM; }
{ws} { return fqWS; }
diff --git a/tools/fastq-loader/fastq-loader.c b/tools/fastq-loader/fastq-loader.c
index 41f5670..351f217 100644
--- a/tools/fastq-loader/fastq-loader.c
+++ b/tools/fastq-loader/fastq-loader.c
@@ -281,10 +281,9 @@ static rc_t PathWithBasePath(char rslt[], size_t sz, char const path[], char con
if (string_printf(rslt, sz, NULL, "%s/%s", base, path) == 0)
return 0;
}
- else if (plen < sz) {
- strcpy(rslt, path);
- return 0;
- }
+ else if ( string_copy ( rslt, sz, path, plen ) < sz )
+ return 0;
+
{
rc_t const rc = RC(rcApp, rcArgv, rcAccessing, rcBuffer, rcInsufficient);
(void)LOGERR(klogErr, rc, "The path to the file is too long");
diff --git a/tools/fastq-loader/fastq-tokens.h b/tools/fastq-loader/fastq-tokens.h
index a78874f..33745fe 100644
--- a/tools/fastq-loader/fastq-tokens.h
+++ b/tools/fastq-loader/fastq-tokens.h
@@ -39,16 +39,17 @@
enum yytokentype {
fqENDOFTEXT = 0,
fqRUNDOTSPOT = 258,
- fqNUMBER = 259,
- fqALPHANUM = 260,
- fqWS = 261,
- fqENDLINE = 262,
- fqBASESEQ = 263,
- fqCOLORSEQ = 264,
- fqTOKEN = 265,
- fqASCQUAL = 266,
- fqCOORDS = 267,
- fqUNRECOGNIZED = 268
+ fqSPOTGROUP = 259,
+ fqNUMBER = 260,
+ fqALPHANUM = 261,
+ fqWS = 262,
+ fqENDLINE = 263,
+ fqBASESEQ = 264,
+ fqCOLORSEQ = 265,
+ fqTOKEN = 266,
+ fqASCQUAL = 267,
+ fqCOORDS = 268,
+ fqUNRECOGNIZED = 269
};
#endif
diff --git a/tools/fastq-loader/latf-load.vers b/tools/fastq-loader/latf-load.vers
index 35d16fb..097a15a 100644
--- a/tools/fastq-loader/latf-load.vers
+++ b/tools/fastq-loader/latf-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/fastq-loader/latf-load.vers.h b/tools/fastq-loader/latf-load.vers.h
index f9532ef..d8b420a 100644
--- a/tools/fastq-loader/latf-load.vers.h
+++ b/tools/fastq-loader/latf-load.vers.h
@@ -1 +1 @@
-#define LATF_LOAD_VERS 0x02050007
+#define LATF_LOAD_VERS 0x02060002
diff --git a/tools/fuse/remote-cache.c b/tools/fuse/remote-cache.c
index a7692a9..4301a9f 100644
--- a/tools/fuse/remote-cache.c
+++ b/tools/fuse/remote-cache.c
@@ -111,15 +111,6 @@ _ReadSchema ( const char * Path, char * Buffer, size_t BufferSize )
return _ReadSomething ( Path, Buffer, BufferSize, VPathReadScheme );
} /* _ReadSchema () */
-#ifdef IDDQD
-static
-rc_t CC
-_ReadHost ( const char * Path, char * Buffer, size_t BufferSize )
-{
- return _ReadSomething ( Path, Buffer, BufferSize, VPathReadHost );
-} /* _ReadHost () */
-#endif /* IDDQD */
-
bool CC
_MatchSchemas ( const char * Schema1, const char * Schema2 )
{
@@ -220,8 +211,15 @@ struct RCacheEntry {
char * Name;
char * Url;
+ char * Path;
- const KFile * File;
+ int read_qty;
+ bool is_local;
+ bool is_complete;
+
+ uint64_t actual_size;
+
+ const struct KFile * File;
};
/*))
@@ -409,54 +407,6 @@ _CheckRemoveOldCacheDirectory ( const char * CacheRoot )
return RCt;
} /* _CheckRemoveOldCacheDirectory () */
-#ifdef LOPPPATA
-######
-rc_t CC
-_EGetCachePathToFile (
- const char * CacheRoot,
- const char * PathToFile,
- char * Buffer,
- size_t BufferSize
-)
-{
- size_t NumWrit;
-
- if ( Buffer == NULL
- || BufferSize == 0
- || PathToFile == NULL
- || CacheRoot == NULL
- )
- {
- return RC ( rcExe, rcString, rcCopying, rcParam, rcNull );
- }
-
- return string_printf (
- Buffer,
- BufferSize,
- & NumWrit,
- "%s/%s/%s",
- CacheRoot,
- CacheDirName,
- PathToFile
- );
-} /* _EGetCachePathToFile () */
-
-rc_t CC
-_GetCachePathToFile (
- const char * PathToFile,
- char * Buffer,
- size_t BufferSize
-)
-{
- return _EGetCachePathToFile (
- RemoteCachePath (),
- PathToFile,
- Buffer,
- BufferSize
- );
-} /* _GetCachePathToFile () */
-#endif /* LOPPPATA */
-
/*
* Lyrics: This method will set buffer size for HTTP transport
* and return previous value.
@@ -716,27 +666,82 @@ RemoteCacheDispose ()
} /* RemoteCacheDispose () */
/*))
- // Generates effective name for a file
+ // Generates effective name and path for file
((*/
rc_t CC
-_RCacheEntryGenerateName ( char * Buffer, size_t Size )
+_RCacheEntryGenerateNameAndPath ( char ** Name, char ** Path )
{
+ rc_t RCt;
+ char Buffer [ 4096 ];
+ char Buffer2 [ 4096 ];
size_t NumWritten;
+ char * TheName;
+ char * ThePath;
+ RCt = 0;
+ * Buffer = 0;
+ * Buffer2 = 0;
NumWritten = 0;
+ TheName = NULL;
+ ThePath = NULL;
+
+ if ( Name != NULL ) { * Name = NULL; }
+ if ( Path != NULL ) { * Path = NULL; }
- if ( Buffer == NULL || Size <= 2 ) {
+ if ( Name == NULL || Path == NULL ) {
return RC ( rcExe, rcFile, rcInitializing, rcParam, rcNull );
}
- return string_printf (
+ RCt = string_printf (
Buffer,
- Size,
+ sizeof ( Buffer ),
& NumWritten,
"etwas.%d",
_CacheEntryNo + 1
);
-} /* _RCacheEntryGenerateName () */
+ if ( RCt == 0 ) {
+ TheName = string_dup_measure ( Buffer, NULL );
+ if ( TheName == NULL ) {
+ RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
+ }
+ else {
+ RCt = _GetCachePath ( Buffer2, sizeof ( Buffer2 ) );
+ if ( RCt == 0 ) {
+ RCt = string_printf (
+ Buffer,
+ sizeof ( Buffer ),
+ & NumWritten,
+ "%s/%s",
+ Buffer2,
+ TheName
+ );
+ if ( RCt == 0 ) {
+ ThePath = string_dup_measure ( Buffer, NULL );
+ if ( TheName == NULL ) {
+ RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
+ }
+ else {
+ * Name = TheName;
+ * Path = ThePath;
+ }
+ }
+ }
+ }
+ }
+
+ if ( RCt != 0 ) {
+ * Name = NULL;
+ if ( TheName != NULL ) {
+ free ( TheName );
+ }
+ * Path = NULL;
+ if ( ThePath != NULL ) {
+ free ( ThePath );
+ }
+ }
+
+ return RCt;
+} /* _RCacheEntryGenerateNameAndPath () */
/*))
// This method will destroy CacheEntry and free all resources
@@ -744,6 +749,9 @@ _RCacheEntryGenerateName ( char * Buffer, size_t Size )
rc_t CC
_RCacheEntryDestroy ( struct RCacheEntry * self )
{
+/*
+KOutMsg ( " [GGU] [EntryDestroy]\n" );
+*/
if ( self != NULL ) {
/*
RmOutMsg ( "++++++DL DESTROY [0x%p] entry\n", self );
@@ -754,6 +762,9 @@ _RCacheEntryDestroy ( struct RCacheEntry * self )
/*) File
(*/
if ( self -> File != NULL ) {
+/*
+KOutMsg ( "[GGU] [DestroyEnty] [%s]\n", self -> Name );
+*/
ReleaseComplain ( KFileRelease, self -> File );
self -> File = 0;
}
@@ -763,6 +774,12 @@ _RCacheEntryDestroy ( struct RCacheEntry * self )
free ( self -> Url );
self -> Url = NULL;
}
+ /*) Path
+ (*/
+ if ( self -> Path != NULL ) {
+ free ( self -> Path );
+ self -> Path = NULL;
+ }
/*) Name
(*/
if ( self -> Name != NULL ) {
@@ -777,7 +794,12 @@ _RCacheEntryDestroy ( struct RCacheEntry * self )
}
/*) refcount
(*/
- KRefcountWhack ( & ( self -> refcount ), _CacheEntryClassName );
+ KRefcountWhack ( & ( self -> refcount ), _CacheEntryClassName );
+
+ self -> read_qty = 0;
+ self -> is_local = false;
+ self -> is_complete = false;
+ self -> actual_size = 0;
free ( self );
}
@@ -796,22 +818,16 @@ _RCacheEntryMake (
{
rc_t RCt;
struct RCacheEntry * Entry;
- char Buffer [ 4096 ];
RCt = 0;
+ Entry = NULL;
- if ( Url == NULL || RetEntry == NULL ) {
- return RC ( rcExe, rcFile, rcInitializing, rcParam, rcNull );
+ if ( RetEntry != NULL ) {
+ * RetEntry = NULL;
}
- * RetEntry = NULL;
- if ( ! RemoteCacheIsDisklessMode () ) {
- /*) It is better do it here, before any allocation
- (*/
- RCt = _RCacheEntryGenerateName ( Buffer, sizeof ( Buffer ) );
- if ( RCt != 0 ) {
- return RCt;
- }
+ if ( Url == NULL || RetEntry == NULL ) {
+ return RC ( rcExe, rcFile, rcInitializing, rcParam, rcNull );
}
Entry = ( struct RCacheEntry * ) calloc (
@@ -819,47 +835,51 @@ _RCacheEntryMake (
sizeof ( struct RCacheEntry )
);
if ( Entry == NULL ) {
- return RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
+ RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
}
+ else {
+ Entry -> read_qty = 0;
+ Entry -> is_local = false;
+ Entry -> is_complete = false;
+ Entry -> actual_size = 0;
- /*) refcount
- (*/
- KRefcountInit (
- & ( Entry -> refcount ),
- 0,
- _CacheEntryClassName,
- "_RCacheEntryMake()",
- Buffer
- );
- /*) mutabor
- (*/
- RCt = KLockMake ( & ( Entry -> mutabor ) );
-
- if ( RCt == 0 ) {
if ( ! RemoteCacheIsDisklessMode () ) {
- /*) Name
+ /*) It is better do it here, before any allocation
(*/
- Entry -> Name = string_dup_measure ( Buffer, NULL );
- if ( Entry -> Name == NULL ) {
- RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
- }
+ RCt = _RCacheEntryGenerateNameAndPath (
+ & ( Entry -> Name ),
+ & ( Entry -> Path )
+ );
}
if ( RCt == 0 ) {
- /*) Url
+ /*) refcount
(*/
- Entry -> Url = string_dup_measure ( Url, NULL );
- if ( Entry -> Url == NULL ) {
- RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
- }
+ KRefcountInit (
+ & ( Entry -> refcount ),
+ 0,
+ _CacheEntryClassName,
+ "_RCacheEntryMake()",
+ Entry -> Name
+ );
+ /*) mutabor
+ (*/
+ RCt = KLockMake ( & ( Entry -> mutabor ) );
+
if ( RCt == 0 ) {
- /*) File, nothing about file, it will be opened by
- / on demand
- (*/
- /*) Increasing count and assigning value
+ /*) Url
(*/
- _CacheEntryNo ++;
- * RetEntry = Entry;
+ Entry -> Url = string_dup_measure ( Url, NULL );
+ if ( Entry -> Url == NULL ) {
+ RCt = RC ( rcExe, rcFile, rcInitializing, rcMemory, rcExhausted );
+ }
+ if ( RCt == 0 ) {
+ /*) File will be opened on demand
+ / Increasing count and assigning value
+ (*/
+ _CacheEntryNo ++;
+ * RetEntry = Entry;
+ }
}
}
}
@@ -1049,6 +1069,9 @@ _RCacheEntryReleaseWithoutLock ( struct RCacheEntry * self )
if ( self -> File != NULL ) {
/*
+KOutMsg ( "[GGU] [ReleaseEntry] [%s]\n", self -> Name );
+*/
+/*
RmOutMsg ( "|||<-- Releasing [%s][%s]\n", self -> Name, self -> Url );
*/
ReleaseComplain ( KFileRelease, self -> File );
@@ -1059,12 +1082,29 @@ RmOutMsg ( "|||<-- Releasing [%s][%s]\n", self -> Name, self -> Url );
} /* _RCacheEntryReleaseWithoutLock () */
rc_t CC
+_RCacheEntryReleaseWithLock ( struct RCacheEntry * self )
+{
+ rc_t RCt = 0;
+
+ RCt = KLockAcquire ( self -> mutabor );
+ if ( RCt == 0 ) {
+ RCt = _RCacheEntryReleaseWithoutLock ( self );
+ KLockUnlock ( self -> mutabor );
+ }
+
+ return RCt;
+} /* _RCacheEntryReleaseWithLock () */
+
+rc_t CC
RCacheEntryRelease ( struct RCacheEntry * self )
{
rc_t RCt;
RCt = 0;
+/*
+KOutMsg ( " [GGU] [Closing Entry]\n" );
+*/
if ( self != NULL ) {
RCt = KLockAcquire ( self -> mutabor );
@@ -1074,7 +1114,8 @@ RCacheEntryRelease ( struct RCacheEntry * self )
_CacheEntryClassName
) ) {
case krefWhack:
- _RCacheEntryReleaseWithoutLock ( self );
+// _RCacheEntryReleaseWithoutLock ( self );
+ KLockUnlock ( self -> mutabor );
if ( RemoteCacheIsDisklessMode () ) {
/*
RmOutMsg ( "++++++DL RELEASE [0x%p] entry\n", self );
@@ -1085,10 +1126,10 @@ RCacheEntryRelease ( struct RCacheEntry * self )
case krefNegative:
RCt = RC ( rcExe, rcFile, rcReleasing, rcRange, rcExcessive );
default:
+ KLockUnlock ( self -> mutabor );
break;
}
- KLockUnlock ( self -> mutabor );
}
}
@@ -1096,10 +1137,7 @@ RCacheEntryRelease ( struct RCacheEntry * self )
} /* RCacheEntryRelease () */
rc_t CC
-_RCacheEntryOpenFileReadRemote (
- struct RCacheEntry * self,
- const char * Path
-)
+_RCacheEntryOpenFileReadRemote ( struct RCacheEntry * self )
{
rc_t RCt;
struct KDirectory * Directory;
@@ -1113,13 +1151,17 @@ _RCacheEntryOpenFileReadRemote (
return RC ( rcExe, rcFile, rcOpening, rcParam, rcNull );
}
- if ( ( ! RemoteCacheIsDisklessMode () ) && Path == NULL ) {
+/*
+KOutMsg ( "[GGU] [OpenReadRemote] [%s]\n", self -> Name );
+*/
+
+ if ( ( ! RemoteCacheIsDisklessMode () ) && self -> Path == NULL ) {
return RC ( rcExe, rcFile, rcOpening, rcParam, rcNull );
}
/*
RmOutMsg ( "|||<-- Opening [R] [%s][%s]\n", self -> Name, self -> Url );
-RmOutMsg ( " |<-- Cache Entry [%s]\n", Path );
+RmOutMsg ( " |<-- Cache Entry [%s]\n", self -> Path );
*/
RCt = KNSManagerMakeHttpFile (
@@ -1141,7 +1183,7 @@ RmOutMsg ( " |<-- Cache Entry [%s]\n", Path );
& TeeFile,
HttpFile,
_HttpBlockSize, /* blocksize */
- Path
+ self -> Path
);
if ( RCt == 0 ) {
self -> File = ( KFile * ) TeeFile;
@@ -1153,14 +1195,17 @@ RmOutMsg ( " |<-- Cache Entry [%s]\n", Path );
}
}
+ if ( RCt == 0 ) {
+ if ( self -> actual_size == 0 ) {
+ RCt = KFileSize ( self -> File, & ( self -> actual_size ) );
+ }
+ }
+
return RCt;
} /* _RCacneEntryOpenFileReadRemote () */
rc_t CC
-_RCacheEntryOpenFileReadLocal (
- struct RCacheEntry * self,
- const char * Path
-)
+_RCacheEntryOpenFileReadLocal ( struct RCacheEntry * self )
{
rc_t RCt;
const struct KFile * File;
@@ -1170,18 +1215,25 @@ _RCacheEntryOpenFileReadLocal (
File = NULL;
Directory = NULL;
- if ( self == NULL || Path == NULL ) {
+ if ( self == NULL ) {
+ return RC ( rcExe, rcFile, rcOpening, rcParam, rcNull );
+ }
+
+ if ( self -> Path == NULL ) {
return RC ( rcExe, rcFile, rcOpening, rcParam, rcNull );
}
/*
+KOutMsg ( "[GGU] [OpenReadLocal] [%s]\n", self -> Name );
+*/
+/*
RmOutMsg ( "|||<-- Opening [L] [%s][%s]\n", self -> Name, self -> Url );
-RmOutMsg ( " |<-- Cache Entry [%s]\n", Path );
+RmOutMsg ( " |<-- Cache Entry [%s]\n", self -> Path );
*/
RCt = KDirectoryNativeDir ( & Directory );
if ( RCt == 0 ) {
- RCt = KDirectoryOpenFileRead ( Directory, & File, Path );
+ RCt = KDirectoryOpenFileRead ( Directory, & File, self -> Path );
if ( RCt == 0 ) {
self -> File = File;
}
@@ -1189,86 +1241,257 @@ RmOutMsg ( " |<-- Cache Entry [%s]\n", Path );
ReleaseComplain ( KDirectoryRelease, Directory );
}
+ if ( RCt == 0 ) {
+ if ( self -> actual_size == 0 ) {
+ RCt = KFileSize ( self -> File, & ( self -> actual_size ) );
+ }
+ }
+
return RCt;
} /* _RCacneEntryOpenFileReadLocal () */
-/*((
- \\ The only way to check that cache file completed, is to check if
- \\ it exists
- ((*/
-bool CC
-_RCacheCheckCompleted ( const char * Path )
+rc_t CC
+_RCacheEntryGetAndCheckFile (
+ struct RCacheEntry * self,
+ const struct KFile ** File,
+ bool * Synchronized
+)
{
- uint32_t PathType;
- KDirectory * Directory;
+ rc_t RCt;
+ struct KDirectory * NatDir;
+ bool OpenLocal;
+ bool OpenRemote;
+ bool CloseFile;
+ bool IsComplete;
- PathType = kptNotFound;
- Directory = NULL;
+ RCt = 0;
+ NatDir = NULL;
+ OpenLocal = false;
+ OpenRemote = false;
+ CloseFile = false;
+ IsComplete = false;
- if ( KDirectoryNativeDir ( & Directory ) == 0 ) {
- PathType = KDirectoryPathType ( Directory, Path );
- ReleaseComplain ( KDirectoryRelease, Directory );
+ if ( Synchronized != NULL ) {
+ * Synchronized = true;
}
- return PathType == kptFile;
-} /* _RCacheCheckCompleted () */
+ if ( File != NULL ) {
+ * File = NULL;
+ }
+
+ if ( self == NULL ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcNull );
+ }
+
+ if ( File == NULL ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcNull );
+ }
+
+ if ( Synchronized == NULL ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcNull );
+ }
+
+ /* How it is work:
+ *
+ * if file exists, it is complete and local, just open it
+ * any read operation should be unsynchronized.
+ *
+ * if file does not exists, but opened, and it is 100th read
+ * we should call IsCacheFileComplete () and if it is complete
+ * we should close file and open it as regular. Any reed
+ * operation will be unsynchronized.
+ *
+ * all other situations, it is not complete, open tee
+ * all read operations are synchronized.
+ *
+ * if it diskless mode - all operations are synchronized
+ */
+
+ /*) Diskless mode.
+ (*/
+ if ( RemoteCacheIsDisklessMode () ) {
+ if ( self -> File == NULL ) {
+ RCt = _RCacheEntryOpenFileReadRemote ( self );
+ }
+
+ if ( RCt == 0 ) {
+ * File = self -> File;
+ * Synchronized = true;
+ }
+
+ return RCt;
+ }
+
+ /*) Normal mode
+ (*/
+ if ( self -> File == NULL ) {
+ /* Checking if it is known that file complete */
+ if ( self -> is_complete ) {
+ OpenLocal = true;
+ }
+ else {
+ /* Checking if file exist */
+ RCt = KDirectoryNativeDir ( & NatDir );
+ if ( RCt == 0 ) {
+ if ( KDirectoryPathType ( NatDir, self -> Path ) == kptFile ) {
+ OpenLocal = true;
+ }
+ else {
+ OpenRemote = true;
+ }
+
+ ReleaseComplain ( KDirectoryRelease, NatDir );
+ }
+ }
+ }
+ else {
+ /* Checking if it is known that file complete */
+ if ( self -> is_complete ) {
+ if ( ! self -> is_local ) {
+ CloseFile = true;
+ OpenLocal = true;
+ }
+ }
+ else {
+ /* checking completiness is quite heavy operation */
+ RCt = IsCacheTeeComplete ( self -> File, & IsComplete );
+ if ( RCt == 0 ) {
+ if ( IsComplete ) {
+ CloseFile = true;
+ OpenLocal = true;
+ }
+ }
+ }
+ }
+ /*) Stupid checks
+ (*/
+ if ( OpenLocal && OpenRemote ) {
+ RCt = RC ( rcExe, rcFile, rcReading, rcParam, rcInvalid );
+ }
+
+ if ( CloseFile && self -> File == NULL ) {
+ RCt = RC ( rcExe, rcFile, rcReading, rcParam, rcInvalid );
+ }
+
+ /*) Here we are trying to animate that object
+ (*/
+ if ( RCt == 0 ) {
+ if ( CloseFile ) {
+ self -> read_qty = 0;
+
+ RCt = _RCacheEntryReleaseWithoutLock ( self );
+/*
+RmOutMsg ( "|||<-- Close file [%s][%s] [A=%d]\n", self -> Name, self -> Path, RCt );
+*/
+ }
+
+ if ( OpenLocal ) {
+ self -> is_complete = true;
+ self -> is_local = true;
+ self -> read_qty = 0;
+
+ RCt = _RCacheEntryOpenFileReadLocal ( self );
+/*
+RmOutMsg ( "|||<-- Open LOCAL file [%s][%s] [A=%d]\n", self -> Name, self -> Path, RCt );
+*/
+ }
+
+ if ( OpenRemote ) {
+ self -> is_complete = false;
+ self -> is_local = false;
+ self -> read_qty = 1;
+
+ RCt = _RCacheEntryOpenFileReadRemote ( self );
+/*
+RmOutMsg ( "|||<-- Open REMOTE file [%s][%s] [A=%d]\n", self -> Name, self -> Url, RCt );
+*/
+ }
+
+ if ( RCt == 0 ) {
+ * File = self -> File;
+ * Synchronized = ! self -> is_local;
+ }
+ }
+
+ return RCt;
+} /* _RCacheEntryGetAndCheckFile () */
rc_t CC
-_RCacheEntryOpenFileRead ( struct RCacheEntry * self)
+_RCacheEntryDoRead (
+ struct RCacheEntry * self,
+ char * Buffer,
+ size_t SizeToRead,
+ uint64_t Offset,
+ size_t * NumReaded
+)
{
rc_t RCt;
- char Buffer [ 4096 ], ThePath [ 4096 ];
- size_t TheSize;
+ const struct KFile * File;
+ bool Synchronized;
RCt = 0;
- TheSize = 0;
- * Buffer = 0;
- * ThePath = 0;
+ Synchronized = true;
- if ( self == NULL ) {
- return RC ( rcExe, rcFile, rcOpening, rcParam, rcNull );
+ if ( NumReaded != NULL ) {
+ * NumReaded = 0;
}
- if ( self -> File != NULL ) {
- return 0;
+ if ( self == NULL ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcNull );
}
- /*) Do that before messing with disk
- (*/
- if ( RemoteCacheIsDisklessMode () ) {
- RCt = _RCacheEntryOpenFileReadRemote ( self, NULL );
+ if ( NumReaded == NULL ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcInvalid );
}
- else {
- /*) First we should to make path to real cache file
- (*/
- RCt = _GetCachePath ( Buffer, sizeof ( Buffer ) );
+ if ( SizeToRead == 0 ) {
+ return RC ( rcExe, rcFile, rcReading, rcParam, rcInvalid );
+ }
+
+ /*) Here we are locking
+ (*/
+ RCt = KLockAcquire ( self -> mutabor );
+ if ( RCt == 0 ) {
+ RCt = _RCacheEntryGetAndCheckFile (
+ self,
+ & File,
+ & Synchronized
+ );
if ( RCt == 0 ) {
- RCt = string_printf (
- ThePath,
- sizeof ( ThePath ),
- & TheSize,
- "%s/%s",
- Buffer,
- self -> Name
- );
- }
+ if ( ! Synchronized ) {
+ /*) do not need synchronisation to read local file
+ (*/
+ KLockUnlock ( self -> mutabor );
+ }
- if ( RCt != 0 ) {
- return RCt;
+ RCt = KFileRead (
+ self -> File,
+ Offset,
+ Buffer,
+ SizeToRead,
+ NumReaded
+ );
+/*
+RmOutMsg ( "|||<-- Reading [%s][%s] [O=%d][S=%d][R=%d][A=%d]\n", self -> Name, self -> Url, Offset, SizeToRead, * NumReaded, RCt );
+*/
}
- if ( _RCacheCheckCompleted ( ThePath ) ) {
- RCt = _RCacheEntryOpenFileReadLocal ( self, ThePath );
- }
- else {
- RCt = _RCacheEntryOpenFileReadRemote ( self, ThePath );
+ if ( Synchronized ) {
+ KLockUnlock ( self -> mutabor );
}
+ }
+
+ if ( RCt != 0 ) {
+ * NumReaded = 0;
+/*
+RmOutMsg ( "|||<- Failed to read file [%s][%s] at attempt [%d]\n", self -> Name, self -> Url, llp + 1 );
+*/
+ _RCacheEntryReleaseWithLock ( self );
}
return RCt;
-} /* _RCacheEntryOpenFileRead () */
+} /* _RCacheEntryDoRead () */
rc_t CC
RCacheEntryRead (
@@ -1276,7 +1499,8 @@ RCacheEntryRead (
char * Buffer,
size_t SizeToRead,
uint64_t Offset,
- size_t * NumReaded
+ size_t * NumReaded,
+ uint64_t * ActualSize
)
{
rc_t RCt;
@@ -1290,58 +1514,37 @@ RCacheEntryRead (
return RC ( rcExe, rcFile, rcReading, rcParam, rcNull );
}
- /*) Here we are locking
- (*/
- RCt = KLockAcquire ( self -> mutabor );
-
- if ( RCt == 0 ) {
-
- for ( llp = 0; llp < NumAttempts; llp ++ ) {
- /*) There could be non zero value from previous pass
- (*/
- if ( RCt != 0 ) {
-PLOGMSG ( klogErr, ( klogErr, "|||<- Trying to read file $(n)$(u) at attempt $(l)", PLOG_3(PLOG_S(n),PLOG_S(u),PLOG_I64(l)), self -> Name, self -> Url, llp + 1 ) );
- RCt = 0;
- }
-
- /*) If error happen on previous pass file is released
- (*/
- if ( self -> File == NULL ) {
- /*) We are opening file for read here
- (*/
- RCt = _RCacheEntryOpenFileRead ( self );
-/*
-RmOutMsg ( "|||<-- Opening file [%s][%s] [A=%d]\n", self -> Name, self -> Url, RCt );
-*/
- }
+ for ( llp = 0; llp < NumAttempts; llp ++ ) {
+ /*) There could be non zero value from previous pass
+ (*/
+ if ( RCt != 0 ) {
+PLOGMSG ( klogErr, ( klogErr, "|||<- Trying to read file $(n) [$(u)] at attempt $(l)", PLOG_3(PLOG_S(n),PLOG_S(u),PLOG_I64(l)), self -> Name, self -> Url, llp + 1 ) );
+ RCt = 0;
+ }
- if ( RCt == 0 ) {
- RCt = KFileRead (
- self -> File,
- Offset,
+ RCt = _RCacheEntryDoRead (
+ self,
Buffer,
SizeToRead,
+ Offset,
NumReaded
);
/*
RmOutMsg ( "|||<-- Reading [%s][%s] [O=%d][S=%d][R=%d][A=%d]\n", self -> Name, self -> Url, Offset, SizeToRead, * NumReaded, RCt );
*/
- if ( RCt == 0 ) {
- break;
- }
- }
-/*
-RmOutMsg ( "|||<- Failed to read file [%s][%s] at attempt [%d]\n", self -> Name, self -> Url, llp + 1 );
-*/
- _RCacheEntryReleaseWithoutLock ( self );
-
+ if ( RCt == 0 ) {
+ break;
}
+ }
- if ( RCt != 0 ) {
+ if ( RCt != 0 ) {
PLOGMSG ( klogErr, ( klogErr, "|||<- Failed to read file $(n)$(u) after $(l) attempts", PLOG_3(PLOG_S(n),PLOG_S(u),PLOG_I64(l)), self -> Name, self -> Url, llp + 1 ) );
- }
+ }
- KLockUnlock ( self -> mutabor );
+ if ( RCt == 0 ) {
+ if ( ActualSize != NULL ) {
+ * ActualSize = self -> actual_size;
+ }
}
return RCt;
diff --git a/tools/fuse/remote-cache.h b/tools/fuse/remote-cache.h
index 66fbe30..1635904 100644
--- a/tools/fuse/remote-cache.h
+++ b/tools/fuse/remote-cache.h
@@ -95,7 +95,8 @@ rc_t CC RCacheEntryRead (
char * Buffer,
size_t BufferSize,
uint64_t Offset,
- size_t * NumRead
+ size_t * NumRead,
+ uint64_t * ActualSize
);
/*))
// This method will set block size for HTTP transport
@@ -133,7 +134,6 @@ rc_t CC RemoteCacheFindOrCreateEntry (
struct RCacheEntry ** Entry
);
-
/*))
// Found that interesting
((*/
diff --git a/tools/fuse/remote-file.c b/tools/fuse/remote-file.c
index 3a50776..f6b7b83 100644
--- a/tools/fuse/remote-file.c
+++ b/tools/fuse/remote-file.c
@@ -43,7 +43,7 @@ typedef struct RemoteFileNode RemoteFileNode;
#include <stdlib.h>
#include <time.h>
-rc_t RemoteFileAccessor_Make(const SAccessor** accessor, const char* name, struct RCacheEntry * rentry, uint64_t size);
+rc_t RemoteFileAccessor_Make(const SAccessor** accessor, const char* name, struct RCacheEntry * rentry, const RemoteFileNode* node);
struct RemoteFileNode {
FSNode node;
@@ -92,7 +92,7 @@ rc_t RemoteFileNode_Open(
accessor,
cself->node.name,
ke,
- cself->file_sz
+ cself
) ) != 0 ) {
ReleaseComplain(RCacheEntryRelease, ke);
}
@@ -235,7 +235,7 @@ rc_t RemoteFileNode_Make(const KXMLNode* xml_node, FSNode** cself, char* errmsg,
typedef struct RemoteFileAccessor_struct {
struct RCacheEntry* rentry;
- uint64_t size;
+ struct RemoteFileNode * remote_node;
} RemoteFileAccessor;
static
@@ -244,7 +244,7 @@ rc_t RemoteFileAccessor_Read(const SAccessor* cself, char* buf, size_t size, off
rc_t rc = 0;
RemoteFileAccessor* self = (RemoteFileAccessor*)cself;
size_t actual = 0;
- uint64_t actual_file_sz = self -> size;
+ uint64_t actual_file_sz = self -> remote_node -> file_sz;
/* Here we are truncating size if it is needed */
if ( actual_file_sz < offset + size ) {
@@ -256,12 +256,20 @@ rc_t RemoteFileAccessor_Read(const SAccessor* cself, char* buf, size_t size, off
&buf[*num_read],
size - * num_read,
offset + * num_read,
- &actual
+ &actual,
+ &actual_file_sz
);
- if( rc == 0 && actual == 0 ) {
+ if( rc != 0 ) {
+ break;
+ }
+ if( actual == 0 ) {
/* EOF */
break;
}
+
+ if ( actual_file_sz != self -> remote_node -> file_sz ) {
+ self -> remote_node -> file_sz = actual_file_sz;
+ }
*num_read += actual;
} while(rc == 0 && *num_read < size);
DEBUG_MSG(10, ("From %lu read %lu bytes\n", offset, *num_read));
@@ -285,14 +293,14 @@ RemoteFileAccessor_Make (
const SAccessor** accessor,
const char* name,
struct RCacheEntry * rentry,
- uint64_t size
+ const RemoteFileNode* node
)
{
rc_t rc = 0;
if( (rc = SAccessor_Make(accessor, sizeof(RemoteFileAccessor), name, RemoteFileAccessor_Read, RemoteFileAccessor_Release)) == 0 ) {
((RemoteFileAccessor*)(*accessor))->rentry = rentry;
- ((RemoteFileAccessor*)(*accessor))->size = size;
+ ((RemoteFileAccessor*)(*accessor))->remote_node = ( struct RemoteFileNode * ) node;
RCacheEntryAddRef ( rentry );
}
diff --git a/tools/fuse/remote-fuser.vers b/tools/fuse/remote-fuser.vers
index 35d16fb..097a15a 100644
--- a/tools/fuse/remote-fuser.vers
+++ b/tools/fuse/remote-fuser.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/fuse/sra-fuser-test.vers b/tools/fuse/sra-fuser-test.vers
index 35d16fb..097a15a 100644
--- a/tools/fuse/sra-fuser-test.vers
+++ b/tools/fuse/sra-fuser-test.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/fuse/sra-fuser-test.vers.h b/tools/fuse/sra-fuser-test.vers.h
index 3b1a842..02a6a64 100644
--- a/tools/fuse/sra-fuser-test.vers.h
+++ b/tools/fuse/sra-fuser-test.vers.h
@@ -1 +1 @@
-#define SRA_FUSER_TEST_VERS 0x02050007
+#define SRA_FUSER_TEST_VERS 0x02060002
diff --git a/tools/fuse/sra-fuser.vers b/tools/fuse/sra-fuser.vers
index 35d16fb..097a15a 100644
--- a/tools/fuse/sra-fuser.vers
+++ b/tools/fuse/sra-fuser.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/fuse/sra-makeidx.vers b/tools/fuse/sra-makeidx.vers
index 35d16fb..097a15a 100644
--- a/tools/fuse/sra-makeidx.vers
+++ b/tools/fuse/sra-makeidx.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/general-loader/database-loader.cpp b/tools/general-loader/database-loader.cpp
index a1ce2ed..3353ed8 100644
--- a/tools/general-loader/database-loader.cpp
+++ b/tools/general-loader/database-loader.cpp
@@ -862,8 +862,7 @@ GeneralLoader :: DatabaseLoader :: ErrorMessage ( const string & p_text )
rc_t
GeneralLoader :: DatabaseLoader :: LogMessage ( const string & p_text )
{
-#pragma message ( "need to pass the app-name from front end" )
- pLogMsg ( klogInfo, "general-loader: log \"$(t)\"", "t=%s", p_text . c_str () );
+ pLogMsg ( klogInfo, "general-loader: log from $(s): \"$(t)\"", "s=%s,t=%s", m_softwareName . c_str (), p_text . c_str () );
return 0;
}
diff --git a/tools/general-loader/general-loader.vers b/tools/general-loader/general-loader.vers
index 35d16fb..097a15a 100644
--- a/tools/general-loader/general-loader.vers
+++ b/tools/general-loader/general-loader.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/general-loader/general-loader.vers.h b/tools/general-loader/general-loader.vers.h
index 9b7ad62..259ee83 100644
--- a/tools/general-loader/general-loader.vers.h
+++ b/tools/general-loader/general-loader.vers.h
@@ -1 +1 @@
-#define GENERAL_LOADER_VERS 0x02050007
+#define GENERAL_LOADER_VERS 0x02060002
diff --git a/tools/general-loader/libgeneral-writer.vers b/tools/general-loader/libgeneral-writer.vers
index 35d16fb..097a15a 100644
--- a/tools/general-loader/libgeneral-writer.vers
+++ b/tools/general-loader/libgeneral-writer.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/kar/kar.c b/tools/kar/kar.c
index 6ab8231..2d9272c 100644
--- a/tools/kar/kar.c
+++ b/tools/kar/kar.c
@@ -37,6 +37,7 @@
#include <kfs/tar.h>
#include <kfs/toc.h>
#include <kfs/sra.h>
+#include <kfs/md5.h>
#include <klib/log.h>
#include <klib/out.h>
#include <klib/status.h>
@@ -69,6 +70,7 @@ KDirectory * kdir;
#define OPTION_LONGLIST "long-list"
#define OPTION_DIRECTORY "directory"
#define OPTION_ALIGN "align"
+#define OPTION_MD5 "md5"
#define ALIAS_CREATE "c"
#define ALIAS_TEST "t"
@@ -105,6 +107,8 @@ static const char * align_usage[] =
static const char * longlist_usage[] =
{ "more information will be given on each file",
"in test/list mode.", NULL };
+static const char * md5_usage[] = { "create md5sum-compatible checksum file", NULL };
+
OptDef Options[] =
{
@@ -115,7 +119,8 @@ OptDef Options[] =
{ OPTION_FORCE, ALIAS_FORCE, NULL, force_usage, 0, false, false },
{ OPTION_LONGLIST, ALIAS_LONGLIST, NULL, longlist_usage, 0, false, false },
{ OPTION_DIRECTORY, ALIAS_DIRECTORY, NULL, directory_usage, 1, true, false },
- { OPTION_ALIGN, ALIAS_ALIGN, NULL, align_usage, 1, true, false }
+ { OPTION_ALIGN, ALIAS_ALIGN, NULL, align_usage, 1, true, false },
+ { OPTION_MD5, NULL, NULL, md5_usage, 0, false, false }
};
const char UsageDefaultName[] = "kar";
@@ -181,6 +186,7 @@ rc_t CC Usage (const Args * args)
HelpOptionLine (ALIAS_FORCE, OPTION_FORCE, NULL, force_usage);
HelpOptionLine (ALIAS_ALIGN, OPTION_ALIGN, "alignment", align_usage);
HelpOptionLine (ALIAS_LONGLIST, OPTION_LONGLIST, NULL, longlist_usage);
+ HelpOptionLine (NULL, OPTION_MD5, NULL, md5_usage);
HelpOptionsStandard ();
@@ -228,6 +234,8 @@ static
bool long_list;
static
bool force;
+static
+bool md5sum;
static
KSRAFileAlignment alignment;
@@ -541,10 +549,61 @@ rc_t open_out_file (const char * path, KFile ** fout)
}
if (rc == 0)
{
- rc = KDirectoryVCreateFile (kdir, fout, false, 0664,
- mode, path, NULL);
+ rc = KDirectoryCreateFile (kdir, fout, false, 0664,
+ mode, "%s", path);
if (rc)
PLOGERR (klogFatal, (klogFatal, rc, "unable to create archive [$(A)]", PLOG_S(A), path));
+ else if ( md5sum )
+ {
+ KFile *md5_f;
+
+ /* create the *.md5 file to hold md5sum-compatible checksum */
+ rc = KDirectoryCreateFile ( kdir, &md5_f, false, 0664, mode, "%s.md5", path );
+ if ( rc )
+ PLOGERR (klogFatal, (klogFatal, rc, "unable to create md5 file [$(A).md5]", PLOG_S(A), path));
+ else
+ {
+ KMD5SumFmt *fmt;
+
+ /* create md5 formatter to write to md5_f */
+ rc = KMD5SumFmtMakeUpdate ( &fmt, md5_f );
+ if ( rc )
+ LOGERR (klogErr, rc, "failed to make KMD5SumFmt");
+ else
+ {
+ KMD5File *kmd5_f;
+
+ size_t size = string_size ( path );
+ const char *fname = string_rchr ( path, size, '/' );
+ if ( fname ++ == NULL )
+ fname = path;
+
+ /* KMD5SumFmtMakeUpdate() took over ownership of "md5_f" */
+ md5_f = NULL;
+
+ /* create a file that knows how to calculate md5 as data
+ are written-through to archive, and then write digest
+ result to fmt, using "fname" as description. */
+ rc = KMD5FileMakeWrite ( &kmd5_f, * fout, fmt, fname );
+ KMD5SumFmtRelease ( fmt );
+ if ( rc )
+ LOGERR (klogErr, rc, "failed to make KMD5File");
+ else
+ {
+ /* success */
+ *fout = KMD5FileToKFile ( kmd5_f );
+ return 0;
+ }
+ }
+
+ /* error cleanup */
+ KFileRelease ( md5_f );
+ }
+
+ /* error cleanup */
+ KFileRelease ( * fout );
+ * fout = NULL;
+ }
}
return rc;
}
@@ -1405,6 +1464,11 @@ rc_t CC KMain ( int argc, char *argv [] )
break;
force = (pcount != 0);
+ rc = ArgsOptionCount (args, OPTION_MD5, &pcount);
+ if (rc)
+ break;
+ md5sum = (pcount != 0);
+
mode = OM_NONE;
rc = ArgsOptionCount (args, OPTION_DIRECTORY, &pcount);
diff --git a/tools/kar/kar.vers b/tools/kar/kar.vers
index 35d16fb..097a15a 100644
--- a/tools/kar/kar.vers
+++ b/tools/kar/kar.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/kar/kar.vers.h b/tools/kar/kar.vers.h
index e25cd88..72ddb2d 100644
--- a/tools/kar/kar.vers.h
+++ b/tools/kar/kar.vers.h
@@ -1 +1 @@
-#define KAR_VERS 0x02050007
+#define KAR_VERS 0x02060002
diff --git a/tools/kar/sra-kar.vers b/tools/kar/sra-kar.vers
index 35d16fb..097a15a 100644
--- a/tools/kar/sra-kar.vers
+++ b/tools/kar/sra-kar.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/kar/sra-kar.vers.h b/tools/kar/sra-kar.vers.h
index e1d73c2..77c127a 100644
--- a/tools/kar/sra-kar.vers.h
+++ b/tools/kar/sra-kar.vers.h
@@ -1 +1 @@
-#define SRA_KAR_VERS 0x02050007
+#define SRA_KAR_VERS 0x02060002
diff --git a/tools/keyring-srv/keyring-srv.vers b/tools/keyring-srv/keyring-srv.vers
index 35d16fb..097a15a 100644
--- a/tools/keyring-srv/keyring-srv.vers
+++ b/tools/keyring-srv/keyring-srv.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/kget/examples.sh b/tools/kget/examples.sh
new file mode 100755
index 0000000..67cf37b
--- /dev/null
+++ b/tools/kget/examples.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+execute()
+{
+ echo "------------------------------------------------------"
+ echo $1
+ eval $1
+ echo "."
+}
+
+ACC="NC_011748.1"
+URL=`srapath $ACC`
+CACHEFILE="cache.dat"
+PROXY="localhost:8080"
+
+echo "example number 1: show the size of the remote file ( HEAD request )"
+execute "time kget $URL --show-size"
+
+echo "example number 2: download the remote file, no buffering, no cachefile"
+echo " in 32k blocks, using HTTPFile, filename extracted from URL"
+execute "time kget $URL"
+
+echo "example number 3: download the remote file, wget-style: without partial requests"
+echo " filename extracted from URL"
+execute "time kget $URL --full"
+
+echo "example number 4: download the remote file, no buffering, no cachefile"
+echo " in 32k blocks, using HTTPFile, filename different from URL"
+execute "time kget $URL some_reference.dat"
+
+echo "example number 5: download the remote file, no buffering, no cachefile"
+echo " in 32k blocks, using ReliableHTTPFile"
+execute "time kget $URL some_reference.dat --reliable"
+
+echo "example number 6: download the remote file, no buffering, no cachefile"
+echo " in 128k blocks, using HTTPFile"
+#blocksize of 128 kilobyte can be expressed as '128k' or '131072' or '0x020000'
+execute "time kget $URL --block-size 128k"
+
+echo "example number 7: download the remote file, no cachefile"
+echo " in 32k blocks, wraped in a buffer-file with 128k blocks"
+execute "time kget $URL --buffer 128k"
+
+echo "example number 8: download the remote file, using a cache-file "
+echo " ( dflt cache-blocksize of 128 k ) in 32k blocks, "
+execute "rm -f $CACHEFILE"
+execute "time kget $URL --cache $CACHEFILE"
+
+echo "example number 9: download the remote file, using a cache-file"
+echo " ( cache-blocksize of 256 k ) in 32k blocks, "
+execute "rm -f $CACHEFILE"
+execute "time kget $URL --cache $CACHEFILE --cache-block 256k"
+
+echo "example number 10: download the remote file, no buffering, no cachefile"
+echo " in 32k blocks, but requests are made in random order"
+execute "time kget $URL --random"
+
+#enable this example only after updating the PROXY-variable
+#and actually having a running proxy there!
+#echo "example number X: download the remote file, using a proxy"
+#echo " in 32k blocks, "
+#execute "time kget $URL --proxy $PROXY"
diff --git a/tools/kget/kget.c b/tools/kget/kget.c
index 8d5bdb3..87423fc 100644
--- a/tools/kget/kget.c
+++ b/tools/kget/kget.c
@@ -44,6 +44,7 @@
#include <kns/manager.h>
#include <kns/kns-mgr-priv.h>
#include <kns/http.h>
+#include <kns/stream.h>
#include <kproc/timeout.h>
@@ -73,81 +74,99 @@ rc_t CC UsageSummary ( const char * progname )
#define OPTION_VERB "verb"
#define ALIAS_VERB "b"
-static const char * verb_usage[] = { "execute verbose", NULL };
+static const char * verb_usage[] = { "execute verbose", NULL };
#define OPTION_BLOCK "block-size"
#define ALIAS_BLOCK "s"
-static const char * block_usage[] = { "how many bytes per block", NULL };
+static const char * block_usage[] = { "how many bytes per block", NULL };
#define OPTION_SHOW "show-size"
#define ALIAS_SHOW "w"
-static const char * show_usage[] = { "query size of remote file first", NULL };
+static const char * show_usage[] = { "query size of remote file first", NULL };
#define OPTION_CACHE "cache"
#define ALIAS_CACHE "c"
-static const char * cache_usage[] = { "wrap the remote-file into a KCacheTeeFile", NULL };
+static const char * cache_usage[] = { "wrap the remote-file into a KCacheTeeFile", NULL };
+
+#define OPTION_CACHE_BLK "cache-block"
+static const char * cache_blk_usage[] = { "blocksize inside a KCacheTeeFile", NULL };
+
+#define OPTION_PROXY "proxy"
+static const char * proxy_usage[] = { "use a proxy to download remote file", NULL };
#define OPTION_RAND "random"
#define ALIAS_RAND "r"
-static const char * random_usage[] = { "request blocks in random order", NULL };
+static const char * random_usage[] = { "request blocks in random order", NULL };
#define OPTION_REP "repeat"
#define ALIAS_REP "e"
-static const char * repeat_usage[] = { "request blocks with repeats if in random order", NULL };
+static const char * repeat_usage[] = { "request blocks with repeats if in random order", NULL };
#define OPTION_CREPORT "report"
#define ALIAS_CREPORT "p"
-static const char * creport_usage[] = { "report cache usage", NULL };
+static const char * creport_usage[] = { "report cache usage", NULL };
#define OPTION_COMPLETE "complete"
-static const char * complete_usage[] = { "check if 1st parameter is a complete", NULL };
+static const char * complete_usage[] = { "check if 1st parameter is a complete", NULL };
#define OPTION_TRUNC "truncate"
-static const char * truncate_usage[] = { "truncate the file ( 1st parameter ) / remove trailing cache-bitmap", NULL };
+static const char * truncate_usage[] = { "truncate the file ( 1st parameter ) / remove trailing cache-bitmap", NULL };
#define OPTION_START "start"
-static const char * start_usage[] = { "offset where to read from", NULL };
+static const char * start_usage[] = { "offset where to read from", NULL };
#define OPTION_COUNT "count"
-static const char * count_usage[] = { "number of bytes to read", NULL };
+static const char * count_usage[] = { "number of bytes to read", NULL };
#define OPTION_PROGRESS "progress"
-static const char * progress_usage[] = { "show progress", NULL };
+static const char * progress_usage[] = { "show progress", NULL };
#define OPTION_RELIABLE "reliable"
-static const char * reliable_usage[] = { "use reliable version of http-file", NULL };
+static const char * reliable_usage[] = { "use reliable version of http-file", NULL };
#define OPTION_BUFFER "buffer"
#define ALIAS_BUFFER "u"
-static const char * buffer_usage[] = { "wrap remote file into KBufFile with this buffer-size", NULL };
+static const char * buffer_usage[] = { "wrap remote file into KBufFile with this buffer-size", NULL };
#define OPTION_SLEEP "sleep"
#define ALIAS_SLEEP "i"
-static const char * sleep_usage[] = { "sleep inbetween requests by this amount of ms", NULL };
+static const char * sleep_usage[] = { "sleep inbetween requests by this amount of ms", NULL };
#define OPTION_TIMEOUT "timeout"
#define ALIAS_TIMEOUT "m"
-static const char * timeout_usage[] = { "use timed read with tis amount of ms as timeout", NULL };
+static const char * timeout_usage[] = { "use timed read with tis amount of ms as timeout", NULL };
+
+#define OPTION_CCOMPL "cache-complete"
+#define ALIAS_CCOMPL "a"
+static const char * ccompl_usage[] = { "check completeness on open cacheteefile", NULL };
+
+#define OPTION_FULL "full"
+#define ALIAS_FULL "f"
+static const char * full_usage[] = { "download via one http-request, not partial requests in a loop", NULL };
OptDef MyOptions[] =
{
-/* name alias fkt usage-txt, cnt, needs value, required */
- { OPTION_VERB, ALIAS_VERB, NULL, verb_usage, 1, false, false },
- { OPTION_BLOCK, ALIAS_BLOCK, NULL, block_usage, 1, true, false },
- { OPTION_SHOW, ALIAS_SHOW, NULL, show_usage, 1, false, false },
- { OPTION_CACHE, ALIAS_CACHE, NULL, cache_usage, 1, true, false },
- { OPTION_RAND, ALIAS_RAND, NULL, random_usage, 1, false, false },
- { OPTION_REP, ALIAS_REP, NULL, repeat_usage, 1, false, false },
- { OPTION_CREPORT, ALIAS_CREPORT, NULL, creport_usage, 1, false, false },
- { OPTION_BUFFER, ALIAS_BUFFER, NULL, buffer_usage, 1, true, false },
- { OPTION_SLEEP, ALIAS_SLEEP, NULL, sleep_usage, 1, true, false },
- { OPTION_TIMEOUT, ALIAS_TIMEOUT, NULL, timeout_usage, 1, true, false },
- { OPTION_COMPLETE,NULL, NULL, complete_usage, 1, false, false },
- { OPTION_TRUNC, NULL, NULL, truncate_usage, 1, false, false },
- { OPTION_START, NULL, NULL, start_usage, 1, true, false },
- { OPTION_COUNT, NULL, NULL, count_usage, 1, true, false },
- { OPTION_PROGRESS,NULL, NULL, progress_usage, 1, false, false },
- { OPTION_RELIABLE,NULL, NULL, reliable_usage, 1, false, false }
+/* name alias fkt usage-txt, cnt, needs value, required */
+ { OPTION_VERB, ALIAS_VERB, NULL, verb_usage, 1, false, false },
+ { OPTION_BLOCK, ALIAS_BLOCK, NULL, block_usage, 1, true, false },
+ { OPTION_SHOW, ALIAS_SHOW, NULL, show_usage, 1, false, false },
+ { OPTION_CACHE, ALIAS_CACHE, NULL, cache_usage, 1, true, false },
+ { OPTION_CACHE_BLK, NULL, NULL, cache_blk_usage, 1, true, false },
+ { OPTION_PROXY, NULL, NULL, proxy_usage, 1, true, false },
+ { OPTION_RAND, ALIAS_RAND, NULL, random_usage, 1, false, false },
+ { OPTION_REP, ALIAS_REP, NULL, repeat_usage, 1, false, false },
+ { OPTION_CREPORT, ALIAS_CREPORT, NULL, creport_usage, 1, false, false },
+ { OPTION_BUFFER, ALIAS_BUFFER, NULL, buffer_usage, 1, true, false },
+ { OPTION_SLEEP, ALIAS_SLEEP, NULL, sleep_usage, 1, true, false },
+ { OPTION_TIMEOUT, ALIAS_TIMEOUT, NULL, timeout_usage, 1, true, false },
+ { OPTION_COMPLETE, NULL, NULL, complete_usage, 1, false, false },
+ { OPTION_CCOMPL, ALIAS_CCOMPL, NULL, ccompl_usage, 1, false, false },
+ { OPTION_TRUNC, NULL, NULL, truncate_usage, 1, false, false },
+ { OPTION_START, NULL, NULL, start_usage, 1, true, false },
+ { OPTION_COUNT, NULL, NULL, count_usage, 1, true, false },
+ { OPTION_PROGRESS, NULL, NULL, progress_usage, 1, false, false },
+ { OPTION_RELIABLE, NULL, NULL, reliable_usage, 1, false, false },
+ { OPTION_FULL, ALIAS_FULL, NULL, full_usage, 1, false, false }
};
rc_t CC Usage ( const Args * args )
@@ -194,11 +213,13 @@ typedef struct fetch_ctx
const char *url;
const char *destination;
const char *cache_file;
+ const char * proxy;
size_t blocksize;
size_t start, count;
- size_t buffer_size;
- size_t sleep_time;
- size_t timeout_time;
+ size_t buffer_size;
+ size_t sleep_time;
+ size_t timeout_time;
+ size_t cache_blk;
bool verbose;
bool show_filesize;
bool random;
@@ -206,34 +227,36 @@ typedef struct fetch_ctx
bool show_curl_version;
bool report_cache;
bool check_cache_complete;
+ bool check_completeness;
bool truncate_cache;
bool local_read_only;
bool show_progress;
- bool reliable;
+ bool reliable;
+ bool full_download;
} fetch_ctx;
static rc_t src_2_dst( const KFile *src, KFile *dst, char * buffer,
- uint64_t pos, size_t * num_read, fetch_ctx * ctx )
+ uint64_t pos, size_t * num_read, fetch_ctx * ctx )
{
- rc_t rc;
- size_t n_transfer = ( ctx->count == 0 ? ctx->blocksize : ctx->count );
-
- if ( ctx->timeout_time == 0 )
- rc = KFileReadAll ( src, pos, buffer, n_transfer, num_read );
- else
- {
- timeout_t tm;
- rc = TimeoutInit ( &tm, ctx->timeout_time );
- if ( rc == 0 )
- rc = KFileTimedReadAll ( src, pos, buffer, n_transfer, num_read, &tm );
- }
- if ( rc == 0 && *num_read > 0 )
- {
- size_t num_writ;
- rc = KFileWriteAll ( dst, pos, buffer, *num_read, &num_writ );
- }
- return rc;
+ rc_t rc;
+ size_t n_transfer = ( ctx->count == 0 ? ctx->blocksize : ctx->count );
+
+ if ( ctx->timeout_time == 0 )
+ rc = KFileReadAll ( src, pos, buffer, n_transfer, num_read );
+ else
+ {
+ timeout_t tm;
+ rc = TimeoutInit ( &tm, ctx->timeout_time );
+ if ( rc == 0 )
+ rc = KFileTimedReadAll ( src, pos, buffer, n_transfer, num_read, &tm );
+ }
+ if ( rc == 0 && *num_read > 0 )
+ {
+ size_t num_writ;
+ rc = KFileWriteAll ( dst, pos, buffer, *num_read, &num_writ );
+ }
+ return rc;
}
@@ -242,20 +265,20 @@ static rc_t block_loop_in_order( const KFile *src, KFile *dst, char * buffer,
{
rc_t rc = 0;
uint64_t pos = 0;
- uint32_t blocks = 0;
+ uint32_t blocks = 0;
size_t num_read = 1;
KOutMsg( "copy-mode : linear read/write\n" );
while ( rc == 0 && num_read > 0 )
{
- rc = src_2_dst( src, dst, buffer, pos, &num_read, ctx );
- if ( rc == 0 ) pos += num_read;
- if ( ctx->show_progress && ( ( blocks & 0x0F ) == 0 ) ) KOutMsg( "." );
- blocks++;
- if ( ctx->sleep_time > 0 ) KSleepMs( ctx->sleep_time );
+ rc = src_2_dst( src, dst, buffer, pos, &num_read, ctx );
+ if ( rc == 0 ) pos += num_read;
+ if ( ctx->show_progress && ( ( blocks & 0x0F ) == 0 ) ) KOutMsg( "." );
+ blocks++;
+ if ( ctx->sleep_time > 0 ) KSleepMs( ctx->sleep_time );
}
*bytes_copied = pos;
if ( ctx->show_progress ) KOutMsg( "\n" );
- KOutMsg( "%d blocks a %d bytes\n", blocks, ctx->blocksize );
+ KOutMsg( "%d blocks a %d bytes\n", blocks, ctx->blocksize );
return rc;
}
@@ -284,36 +307,36 @@ static rc_t block_loop_random( const KFile *src, KFile *dst, char * buffer,
if ( block_vector != NULL )
{
uint32_t loop;
-
- /* fill the block_vector with ascending numbers */
- for ( loop = 0; loop < block_count; loop++ )
- block_vector[ loop ] = loop;
-
- /* randomize them */
- for ( loop = 0; loop < block_count; loop++ )
- {
- uint32_t src_idx = randr( 0, block_count - 1 );
- uint32_t dst_idx = randr( 0, block_count - 1 );
- /* swap it... */
- uint32_t tmp = block_vector[ dst_idx ];
- block_vector[ dst_idx ] = block_vector[ src_idx ];
- block_vector[ src_idx ] = tmp;
- }
-
- for ( loop = 0; rc == 0 && loop < block_count; loop++ )
- {
- size_t num_read;
- uint64_t pos = ctx->blocksize;
- pos *= block_vector[ loop ];
- rc = src_2_dst( src, dst, buffer, pos, &num_read, ctx );
- if ( rc == 0 ) *bytes_copied += num_read;
- if ( ctx->show_progress && ( ( loop & 0x0F ) == 0 ) ) KOutMsg( "." );
- if ( ctx->sleep_time > 0 ) KSleepMs( ctx->sleep_time );
-
- }
+
+ /* fill the block_vector with ascending numbers */
+ for ( loop = 0; loop < block_count; loop++ )
+ block_vector[ loop ] = loop;
+
+ /* randomize them */
+ for ( loop = 0; loop < block_count; loop++ )
+ {
+ uint32_t src_idx = randr( 0, block_count - 1 );
+ uint32_t dst_idx = randr( 0, block_count - 1 );
+ /* swap it... */
+ uint32_t tmp = block_vector[ dst_idx ];
+ block_vector[ dst_idx ] = block_vector[ src_idx ];
+ block_vector[ src_idx ] = tmp;
+ }
+
+ for ( loop = 0; rc == 0 && loop < block_count; loop++ )
+ {
+ size_t num_read;
+ uint64_t pos = ctx->blocksize;
+ pos *= block_vector[ loop ];
+ rc = src_2_dst( src, dst, buffer, pos, &num_read, ctx );
+ if ( rc == 0 ) *bytes_copied += num_read;
+ if ( ctx->show_progress && ( ( loop & 0x0F ) == 0 ) ) KOutMsg( "." );
+ if ( ctx->sleep_time > 0 ) KSleepMs( ctx->sleep_time );
+
+ }
free( block_vector );
- if ( ctx->show_progress ) KOutMsg( "\n" );
- KOutMsg( "%d blocks a %d bytes\n", loop, ctx->blocksize );
+ if ( ctx->show_progress ) KOutMsg( "\n" );
+ KOutMsg( "%d blocks a %d bytes\n", loop, ctx->blocksize );
}
}
}
@@ -344,8 +367,8 @@ static rc_t copy_file( const KFile * src, KFile * dst, fetch_ctx * ctx )
else
{
size_t num_read;
- rc = src_2_dst( src, dst, buffer, ctx->start, &num_read, ctx );
- if ( rc == 0 ) bytes_copied = num_read;
+ rc = src_2_dst( src, dst, buffer, ctx->start, &num_read, ctx );
+ if ( rc == 0 ) bytes_copied = num_read;
}
KOutMsg( "%lu bytes copied\n", bytes_copied );
free( buffer );
@@ -354,25 +377,27 @@ static rc_t copy_file( const KFile * src, KFile * dst, fetch_ctx * ctx )
}
+#define CACHE_TEE_DEFAULT_BLOCKSIZE ( 32 * 1024 * 4 )
+
static rc_t fetch_cached( KDirectory *dir, const KFile *src, KFile *dst, fetch_ctx *ctx )
{
- rc_t rc = 0;
- const KFile *tee; /* this is the file that forks persistent_content with remote */
- size_t cache_tee_block = 0; /* ctx->blocksize; */
-
- KOutMsg( "persistent cache created\n" );
-
- rc = KDirectoryMakeCacheTee ( dir, /* the KDirectory for the the sparse-file */
- &tee, /* the newly created cache-tee-file */
- src, /* the file that we are wrapping ( usually the remote http-file ) */
- cache_tee_block, /* how big one block in the cache-tee-file will be */
- ctx->cache_file ); /* the sparse-file we use write to */
- if ( rc == 0 )
- {
- KOutMsg( "cache tee created\n" );
- rc = copy_file( tee, dst, ctx );
- KFileRelease( tee );
- }
+ size_t bs = ctx->cache_blk == 0 ? CACHE_TEE_DEFAULT_BLOCKSIZE : ctx->cache_blk;
+ rc_t rc = KOutMsg( "persistent cache created : '%s' (blk-size: %d)\n", ctx->cache_file, bs );
+ if ( rc == 0 )
+ {
+ const KFile *tee; /* this is the file that forks persistent_content with remote */
+ rc = KDirectoryMakeCacheTee ( dir, /* the KDirectory for the the sparse-file */
+ &tee, /* the newly created cache-tee-file */
+ src, /* the file that we are wrapping ( usually the remote http-file ) */
+ ctx->cache_blk, /* how big one block in the cache-tee-file will be */
+ ctx->cache_file ); /* the sparse-file we use write to */
+ if ( rc == 0 )
+ {
+ KOutMsg( "cache tee created\n" );
+ rc = copy_file( tee, dst, ctx );
+ KFileRelease( tee );
+ }
+ }
return rc;
}
@@ -388,70 +413,66 @@ static void extract_name( char ** dst, const char * url )
static rc_t fetch_from( KDirectory *dir, fetch_ctx *ctx, char * outfile,
- const KFile * src )
+ const KFile * src )
{
- uint64_t file_size;
+ uint64_t file_size;
rc_t rc = KFileSize( src, &file_size );
- if ( rc != 0 )
- {
- KOutMsg( "cannot disover src-size >%R<\n", rc );
- }
- else
- {
- KFile *dst;
- KOutMsg( "src-size = %lu\n", file_size );
- rc = KDirectoryCreateFile ( dir, &dst, false, 0664, kcmInit, outfile );
- if ( rc == 0 )
- {
- KOutMsg( "dst >%s< created\n", outfile );
- if ( rc == 0 )
- {
- if ( ctx->cache_file != NULL )
- rc = fetch_cached( dir, src, dst, ctx );
- else
- rc = copy_file( src, dst, ctx );
- }
- KFileRelease( dst );
- }
- }
- return rc;
+ if ( rc != 0 )
+ KOutMsg( "cannot disover src-size >%R<\n", rc );
+ else
+ {
+ KFile *dst;
+ KOutMsg( "src-size = %lu\n", file_size );
+ rc = KDirectoryCreateFile ( dir, &dst, false, 0664, kcmInit, outfile );
+ if ( rc == 0 )
+ {
+ KOutMsg( "dst >%s< created\n", outfile );
+ if ( rc == 0 )
+ {
+ if ( ctx->cache_file != NULL )
+ rc = fetch_cached( dir, src, dst, ctx );
+ else
+ rc = copy_file( src, dst, ctx );
+ }
+ KFileRelease( dst );
+ }
+ }
+ return rc;
}
static rc_t make_remote_file( struct KNSManager * kns_mgr, const KFile ** src, fetch_ctx * ctx )
{
- rc_t rc;
-
- KNSManagerSetVerbose( kns_mgr, ctx->verbose );
- if ( ctx->reliable )
- rc = KNSManagerMakeReliableHttpFile( kns_mgr, src, NULL, 0x01010000, ctx->url );
- else
- rc = KNSManagerMakeHttpFile ( kns_mgr, src, NULL, 0x01010000, ctx->url );
-
- if ( rc != 0 )
- {
- if ( ctx->reliable )
- (void)LOGERR( klogInt, rc, "KNSManagerMakeReliableHttpFile() failed" );
- else
- (void)LOGERR( klogInt, rc, "KNSManagerMakeHttpFile() failed" );
- }
- else
- {
- const KFile * temp_file;
+ rc_t rc;
+
+ KNSManagerSetVerbose( kns_mgr, ctx->verbose );
+ if ( ctx->reliable )
+ rc = KNSManagerMakeReliableHttpFile( kns_mgr, src, NULL, 0x01010000, ctx->url );
+ else
+ rc = KNSManagerMakeHttpFile( kns_mgr, src, NULL, 0x01010000, ctx->url );
+
+ if ( rc != 0 )
+ {
+ if ( ctx->reliable )
+ (void)LOGERR( klogInt, rc, "KNSManagerMakeReliableHttpFile() failed" );
+ else
+ (void)LOGERR( klogInt, rc, "KNSManagerMakeHttpFile() failed" );
+ }
+ else
+ {
if ( ctx->buffer_size > 0 )
{
- /* there is no cache_location! just wrap the remote file in a buffer */
- /* rc = KBigBlockReaderMake ( &temp_file, *src, ctx->buffer_size ); */
+ const KFile * temp_file;
rc = KBufFileMakeRead ( & temp_file, *src, ctx->buffer_size );
- if ( rc == 0 )
- {
- KOutMsg( "remote-file wrapped in new big-block-reader of size %d\n", ctx->buffer_size );
- KFileRelease ( *src );
- *src = temp_file;
- }
+ if ( rc == 0 )
+ {
+ KOutMsg( "remote-file wrapped in new big-block-reader of size %d\n", ctx->buffer_size );
+ KFileRelease ( *src );
+ *src = temp_file;
+ }
}
- }
- return rc;
+ }
+ return rc;
}
@@ -460,8 +481,8 @@ static rc_t fetch( KDirectory *dir, fetch_ctx *ctx )
rc_t rc = 0;
char * outfile;
const KFile * remote;
- struct KNSManager * kns_mgr;
-
+ struct KNSManager * kns_mgr;
+
if ( ctx->destination == NULL )
extract_name( &outfile, ctx->url );
else
@@ -472,83 +493,311 @@ static rc_t fetch( KDirectory *dir, fetch_ctx *ctx )
if ( ctx->count > 0 )
KOutMsg( "range : %u.%u\n", ctx->start, ctx->count );
- rc = KNSManagerMake ( &kns_mgr );
- if ( rc != 0 )
- (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
- else
- {
- rc = make_remote_file( kns_mgr, &remote, ctx );
- if ( rc == 0 )
- {
- rc = fetch_from( dir, ctx, outfile, remote );
- KFileRelease( remote );
- }
- KNSManagerRelease( kns_mgr );
- }
-
- free( outfile );
+ rc = KNSManagerMake ( &kns_mgr );
+ if ( rc != 0 )
+ (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
+ else
+ {
+ if ( ctx->proxy != NULL )
+ {
+ rc = KNSManagerSetHTTPProxyPath( kns_mgr, "%s", ctx->proxy );
+ if ( rc != 0 )
+ (void)LOGERR( klogInt, rc, "KNSManagerSetHTTPProxyPath() failed" );
+ }
+ if ( rc == 0 )
+ {
+ rc = make_remote_file( kns_mgr, &remote, ctx );
+ if ( rc == 0 )
+ {
+ rc = fetch_from( dir, ctx, outfile, remote );
+ KFileRelease( remote );
+ }
+ }
+ KNSManagerRelease( kns_mgr );
+ }
+
+ free( outfile );
return rc;
}
+
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+
static rc_t show_size( KDirectory *dir, fetch_ctx *ctx )
{
- rc_t rc = 0;
- const KFile * remote;
- struct KNSManager * kns_mgr;
-
- KOutMsg( "source: >%s<\n", ctx->url );
- rc = KNSManagerMake ( &kns_mgr );
- if ( rc != 0 )
- (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
- else
- {
- rc = make_remote_file( kns_mgr, &remote, ctx );
- if ( rc == 0 )
- {
- uint64_t file_size;
- rc = KFileSize( remote, &file_size );
- KOutMsg( "file-size = %u\n", file_size );
- KFileRelease( remote );
- }
- KNSManagerRelease( kns_mgr );
- }
+ rc_t rc = KOutMsg( "source: >%s<\n", ctx->url );
+ if ( rc == 0 )
+ {
+ struct KNSManager * kns_mgr;
+ rc = KNSManagerMake ( &kns_mgr );
+ if ( rc != 0 )
+ (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
+ else
+ {
+ const KFile * remote;
+ rc = make_remote_file( kns_mgr, &remote, ctx );
+ if ( rc == 0 )
+ {
+ uint64_t file_size;
+ rc = KFileSize( remote, &file_size );
+ KOutMsg( "file-size = %u\n", file_size );
+ KFileRelease( remote );
+ }
+ KNSManagerRelease( kns_mgr );
+ }
+ }
return rc;
}
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+
+/* check cache completeness on raw file in the filesystem */
static rc_t check_cache_complete( KDirectory *dir, fetch_ctx *ctx )
{
+ rc_t rc = KOutMsg( "checking if this cache file >%s< is complete\n", ctx->url );
+ if ( rc == 0 )
+ {
+ const KFile *f;
+ rc = KDirectoryOpenFileRead( dir, &f, "%s", ctx->url );
+ if ( rc == 0 )
+ {
+ bool is_complete;
+ rc = IsCacheFileComplete( f, &is_complete );
+ if ( rc != 0 )
+ KOutMsg( "error performing IsCacheFileComplete() %R\n", rc );
+ else
+ {
+ if ( is_complete )
+ KOutMsg( "the file is complete\n" );
+ else
+ {
+ float percent = 0;
+ uint64_t bytes_cached;
+ rc = GetCacheCompleteness( f, &percent, &bytes_cached );
+ if ( rc == 0 )
+ KOutMsg( "the file is %f%% complete ( %lu bytes are cached )\n", percent, bytes_cached );
+ }
+ }
+ KFileRelease( f );
+ }
+ }
+ return rc;
+}
+
+
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+
+static rc_t fetch_loop( const KFile * src, fetch_ctx *ctx )
+{
rc_t rc = 0;
- const KFile *f;
+ size_t buffer_size = ( ctx->count == 0 ? ctx->blocksize : ctx->count );
+ char * buffer = malloc( buffer_size );
+ if ( buffer == NULL )
+ {
+ rc = RC( rcExe, rcFile, rcPacking, rcMemory, rcExhausted );
+ KOutMsg( "cant make buffer of size %u\n", buffer_size );
+ }
+ else
+ {
+ uint64_t pos = 0;
+ size_t num_read = 0;
+ do
+ {
+ rc = KFileReadAll( src, pos, buffer, buffer_size, &num_read );
+ if ( rc == 0 )
+ pos += num_read;
+ } while ( rc == 0 && num_read > 0 );
+ KOutMsg( "%lu bytes copied\n", pos );
+ free( buffer );
+ }
+ return rc;
+}
- KOutMsg( "checking if this cache file >%s< is complete\n", ctx->url );
- rc = KDirectoryOpenFileRead( dir, &f, "%s", ctx->url );
+/* check cache completeness on a open cacheteefile */
+static rc_t check_cache_completeness( KDirectory *dir, fetch_ctx *ctx )
+{
+ rc_t rc = KOutMsg( "check if IsCacheTeeComplete() works as intended\n" );
if ( rc == 0 )
{
- bool is_complete;
- rc = IsCacheFileComplete( f, &is_complete );
+ struct KNSManager * kns_mgr;
+ rc = KNSManagerMake ( &kns_mgr );
if ( rc != 0 )
- KOutMsg( "error performing IsCacheFileComplete() %R\n", rc );
+ (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
else
{
- if ( is_complete )
- KOutMsg( "the file is complete\n" );
- else
+ const KFile * remote;
+ rc = make_remote_file( kns_mgr, &remote, ctx );
+ if ( rc == 0 )
{
- float percent = 0;
- uint64_t bytes_cached;
- rc = GetCacheCompleteness( f, &percent, &bytes_cached );
+ const KFile *tee; /* this is the file that forks persistent_content with remote */
+ rc = KDirectoryMakeCacheTee ( dir, /* the KDirectory for the the sparse-file */
+ &tee, /* the newly created cache-tee-file */
+ remote, /* the file that we are wrapping ( usually the remote http-file ) */
+ ctx->cache_blk, /* how big one block in the cache-tee-file will be */
+ ctx->cache_file ); /* the sparse-file we use write to */
if ( rc == 0 )
- KOutMsg( "the file is %f%% complete ( %lu bytes are cached )\n", percent, bytes_cached );
+ rc = fetch_loop( tee, ctx );
+ if ( rc == 0 )
+ {
+
+ bool complete = false;
+ rc = IsCacheTeeComplete( tee, &complete );
+ KOutMsg( "IsCacheTeeComplete() -> %R, complete = %s\n", rc, complete ? "YES" : "NO" );
+ KFileRelease( tee );
+ }
+ KFileRelease( remote );
}
+ KNSManagerRelease( kns_mgr );
}
- KFileRelease( f );
}
return rc;
}
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+/* this is 'borrowed' from libs/kns/http-priv.h :
+ - this is a private header inside the source-directory
+ - without it KNSManagerMakeClientHttp( ... ) a public function cannot be used
+ ( or the user writes it's own URL-parsing )
+*/
+
+typedef enum
+{
+ st_NONE,
+ st_HTTP,
+ st_S3
+} SchemeType;
+
+typedef struct URLBlock URLBlock;
+struct URLBlock
+{
+ String scheme;
+ String host;
+ String path; /* Path includes any parameter portion */
+ String query;
+ String fragment;
+
+ uint32_t port;
+
+ SchemeType scheme_type;
+};
+extern void URLBlockInit ( URLBlock *self );
+extern rc_t ParseUrl ( URLBlock * b, const char * url, size_t url_size );
+
+
+/* check cache completeness on a open cacheteefile */
+static rc_t full_download( KDirectory *dir, fetch_ctx *ctx )
+{
+ rc_t rc = KOutMsg( "make full download without partial access\n" );
+ if ( rc == 0 )
+ {
+ struct KNSManager * kns_mgr;
+ rc = KNSManagerMake ( &kns_mgr );
+ if ( rc != 0 )
+ (void)LOGERR( klogInt, rc, "KNSManagerMake() failed" );
+ else
+ {
+ struct URLBlock url;
+ URLBlockInit( &url );
+ rc = ParseUrl( &url, ctx->url, string_size( ctx->url ) );
+ if ( rc == 0 )
+ {
+ KClientHttp * http;
+ rc = KNSManagerMakeClientHttp( kns_mgr, &http, NULL, 0x01010000, &url.host, url.port );
+ if ( rc == 0 )
+ {
+ KClientHttpRequest * req;
+ KOutMsg( "connection open!\n" );
+ rc = KClientHttpMakeRequest( http, &req, ctx->url );
+ if ( rc == 0 )
+ {
+ KClientHttpResult *rslt;
+
+ KOutMsg( "request made!\n" );
+
+ KClientHttpRequestConnection( req, true );
+ KClientHttpRequestSetNoCache( req );
+
+ rc = KClientHttpRequestGET( req, &rslt );
+ if ( rc == 0 )
+ {
+ uint32_t result_code;
+ size_t msg_size;
+ char buffer[ 4096 * 32 ]; /* 128k */
+
+ KOutMsg( "reply received!\n" );
+ rc = KClientHttpResultStatus( rslt, &result_code, buffer, sizeof buffer, &msg_size );
+ if ( rc == 0 )
+ {
+ struct KStream *content;
+ KOutMsg( "result-code = %d\n", result_code );
+ if ( result_code == 200 )
+ {
+ rc = KClientHttpResultGetInputStream( rslt, &content );
+ if ( rc == 0 )
+ {
+ KFile *dst;
+ char * outfile;
+
+ if ( ctx->destination == NULL )
+ extract_name( &outfile, ctx->url );
+ else
+ string_dup_measure( ctx->destination, NULL );
+
+ rc = KDirectoryCreateFile ( dir, &dst, false, 0664, kcmInit, outfile );
+ if ( rc == 0 )
+ {
+ KOutMsg( "dst >%s< created\n", outfile );
+ if ( rc == 0 )
+ {
+ uint64_t pos = 0;
+ size_t num_read;
+ struct timeout_t timeout;
+
+ TimeoutInit( &timeout, 2000 );
+ do
+ {
+ rc = KStreamTimedRead( content, buffer, sizeof buffer, &num_read, &timeout );
+ if ( rc == 0 )
+ {
+ size_t num_writ;
+ rc = KFileWriteAll( dst, pos, buffer, num_read, &num_writ );
+ pos += num_read;
+ }
+ } while ( rc == 0 && num_read > 0 );
+
+ KOutMsg( "%d bytes read!\n", pos );
+ }
+ KFileRelease( dst );
+ }
+ free( outfile );
+ KStreamRelease( content );
+ }
+ }
+ }
+ KClientHttpResultRelease( rslt );
+ }
+ KClientHttpRequestRelease( req );
+ }
+ KClientHttpRelease ( http );
+ }
+ }
+ KNSManagerRelease( kns_mgr );
+ }
+ }
+ return rc;
+}
+
+
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+
static rc_t truncate_cache( KDirectory *dir, fetch_ctx *ctx )
{
rc_t rc = 0;
@@ -570,6 +819,9 @@ static rc_t truncate_cache( KDirectory *dir, fetch_ctx *ctx )
}
+/* -------------------------------------------------------------------------------------------------------------------- */
+
+
rc_t get_bool( Args * args, const char *option, bool *value )
{
uint32_t count;
@@ -596,7 +848,42 @@ rc_t get_size_t( Args * args, const char *option, size_t *value, size_t dflt )
rc_t rc = get_str( args, option, &s );
*value = dflt;
if ( rc == 0 && s != NULL )
- *value = atoi( s );
+ {
+ size_t l = string_size( s );
+ if ( l == 0 )
+ *value = dflt;
+ else
+ {
+ size_t multipl = 1;
+ switch( s[ l - 1 ] )
+ {
+ case 'k' :
+ case 'K' : multipl = 1024; break;
+ case 'm' :
+ case 'M' : multipl = 1024 * 1024; break;
+ case 'g' :
+ case 'G' : multipl = 1024 * 1024 * 1024; break;
+ }
+
+ if ( multipl > 1 )
+ {
+ char * src = string_dup( s, l - 1 );
+ if ( src != NULL )
+ {
+ char * endptr;
+ *value = strtol( src, &endptr, 0 ) * multipl;
+ free( src );
+ }
+ else
+ *value = dflt;
+ }
+ else
+ {
+ char * endptr;
+ *value = strtol( s, &endptr, 0 );
+ }
+ }
+ }
else
*value = dflt;
return rc;
@@ -626,20 +913,24 @@ rc_t get_fetch_ctx( Args * args, fetch_ctx * ctx )
if ( rc == 0 ) rc = get_bool( args, OPTION_VERB, &ctx->verbose );
if ( rc == 0 ) rc = get_bool( args, OPTION_SHOW, &ctx->show_filesize );
if ( rc == 0 ) rc = get_str( args, OPTION_CACHE, &ctx->cache_file );
+ if ( rc == 0 ) rc = get_size_t( args, OPTION_CACHE_BLK, &ctx->cache_blk, 0 );
+ if ( rc == 0 ) rc = get_str( args, OPTION_PROXY, &ctx->proxy );
if ( rc == 0 ) rc = get_bool( args, OPTION_RAND, &ctx->random );
if ( rc == 0 ) rc = get_bool( args, OPTION_REP, &ctx->with_repeats );
if ( rc == 0 ) rc = get_bool( args, OPTION_CREPORT, &ctx->report_cache );
if ( rc == 0 ) rc = get_size_t( args, OPTION_BLOCK, &ctx->blocksize, ( 32 * 1024 ) );
- if ( rc == 0 ) rc = get_size_t( args, OPTION_BUFFER, &ctx->buffer_size, 0 );
- if ( rc == 0 ) rc = get_size_t( args, OPTION_SLEEP, &ctx->sleep_time, 0 );
- if ( rc == 0 ) rc = get_size_t( args, OPTION_TIMEOUT, &ctx->timeout_time, 0 );
+ if ( rc == 0 ) rc = get_size_t( args, OPTION_BUFFER, &ctx->buffer_size, 0 );
+ if ( rc == 0 ) rc = get_size_t( args, OPTION_SLEEP, &ctx->sleep_time, 0 );
+ if ( rc == 0 ) rc = get_size_t( args, OPTION_TIMEOUT, &ctx->timeout_time, 0 );
if ( rc == 0 ) rc = get_bool( args, OPTION_COMPLETE, &ctx->check_cache_complete );
+ if ( rc == 0 ) rc = get_bool( args, OPTION_CCOMPL, &ctx->check_completeness );
if ( rc == 0 ) rc = get_bool( args, OPTION_TRUNC, &ctx->truncate_cache );
if ( rc == 0 ) rc = get_size_t( args, OPTION_START, &ctx->start, 0 );
if ( rc == 0 ) rc = get_size_t( args, OPTION_COUNT, &ctx->count, 0 );
if ( rc == 0 ) rc = get_bool( args, OPTION_PROGRESS, &ctx->show_progress );
if ( rc == 0 ) rc = get_bool( args, OPTION_RELIABLE, &ctx->reliable );
-
+ if ( rc == 0 ) rc = get_bool( args, OPTION_FULL, &ctx->full_download );
+
return rc;
}
@@ -659,8 +950,8 @@ rc_t CC KMain ( int argc, char *argv [] )
if ( rc == 0 )
{
if ( ctx.url == NULL )
- KOutMsg( "URL is missing!\n" );
- else
+ KOutMsg( "URL is missing!\n" );
+ else
{
KDirectory *dir;
rc = KDirectoryNativeDir ( &dir );
@@ -668,10 +959,14 @@ rc_t CC KMain ( int argc, char *argv [] )
{
if ( ctx.check_cache_complete )
rc = check_cache_complete( dir, &ctx );
+ else if ( ctx.check_completeness )
+ rc = check_cache_completeness( dir, &ctx );
else if ( ctx.truncate_cache )
rc = truncate_cache( dir, &ctx );
else if ( ctx.show_filesize )
rc = show_size( dir, &ctx );
+ else if ( ctx.full_download )
+ rc = full_download( dir, &ctx );
else
rc = fetch( dir, &ctx );
diff --git a/tools/kget/kget.vers b/tools/kget/kget.vers
index 3eefcb9..9084fa2 100644
--- a/tools/kget/kget.vers
+++ b/tools/kget/kget.vers
@@ -1 +1 @@
-1.0.0
+1.1.0
diff --git a/tools/kqsh/kqsh.vers b/tools/kqsh/kqsh.vers
index 35d16fb..097a15a 100644
--- a/tools/kqsh/kqsh.vers
+++ b/tools/kqsh/kqsh.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/nenctool/nenctest.vers b/tools/nenctool/nenctest.vers
index 35d16fb..097a15a 100644
--- a/tools/nenctool/nenctest.vers
+++ b/tools/nenctool/nenctest.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/nenctool/nenctool.vers b/tools/nenctool/nenctool.vers
index 35d16fb..097a15a 100644
--- a/tools/nenctool/nenctool.vers
+++ b/tools/nenctool/nenctool.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/nencvalid/nencvalid.vers b/tools/nencvalid/nencvalid.vers
index 35d16fb..097a15a 100644
--- a/tools/nencvalid/nencvalid.vers
+++ b/tools/nencvalid/nencvalid.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/pacbio-load/pacbio-load.c b/tools/pacbio-load/pacbio-load.c
index 6e3256a..3eb2c09 100644
--- a/tools/pacbio-load/pacbio-load.c
+++ b/tools/pacbio-load/pacbio-load.c
@@ -405,6 +405,20 @@ static rc_t pacbio_load_multipart( context * ctx, KDirectory * wd, VDatabase * d
}
+static rc_t add_unique_to_namelist( const VNamelist * src, VNamelist * dst, int32_t idx )
+{
+ const char * s;
+ rc_t rc = VNameListGet( src, idx, &s );
+ if ( rc == 0 && s != NULL && s[ 0 ] != 0 )
+ {
+ uint32_t found;
+ rc_t rc2 = VNamelistIndexOf( dst, s, &found );
+ if ( GetRCState( rc2 ) == rcNotFound )
+ rc = VNamelistAppend( dst, s );
+ }
+ return rc;
+}
+
static rc_t pacbio_load( context *ctx, KDirectory * wd, ld_context *lctx, const char * toolname )
{
VDBManager * vdb_mgr = NULL;
@@ -440,46 +454,59 @@ static rc_t pacbio_load( context *ctx, KDirectory * wd, ld_context *lctx, const
/* creates the 4 output vdb tables... SEQUENCE, CONSENSUS, PASSES and METRICS */
if ( rc == 0 )
{
- bool consensus_present = false;
- uint32_t count;
- KDirectory * hdf5_src;
-
- rc = VNameListCount ( ctx->src_paths, &count );
- if ( rc == 0 && count > 0 )
+ bool consensus_present = false;
+ VNamelist * to_process;
+ rc = VNamelistMake ( &to_process, 5 );
+ if ( rc == 0 )
{
- rc = pacbio_get_hdf5_src( wd, ctx->src_paths, 0, &hdf5_src );
- if ( rc == 0 )
+ KDirectory * hdf5_src;
+ uint32_t count, idx;
+
+ rc = VNameListCount ( ctx->src_paths, &count );
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
{
- if ( pacbio_has_MultiParts( hdf5_src ) )
+ rc = pacbio_get_hdf5_src( wd, ctx->src_paths, 0, &hdf5_src );
+ if ( rc == 0 )
{
- VNamelist * parts;
- rc = VNamelistMake ( &parts, 5 );
- if ( rc == 0 )
+ if ( pacbio_has_MultiParts( hdf5_src ) )
{
- rc = pacbio_get_MultiParts( hdf5_src, parts );
+ VNamelist * parts;
+ rc = VNamelistMake ( &parts, 5 );
if ( rc == 0 )
{
- VNamelistRelease ( ctx->src_paths );
- ctx->src_paths = parts;
+ rc = pacbio_get_MultiParts( hdf5_src, parts );
+ if ( rc == 0 )
+ {
+ uint32_t p_count, p_idx;
+ rc = VNameListCount ( ctx->src_paths, &p_count );
+ for ( p_idx = 0; rc == 0 && p_idx < p_count; ++p_idx )
+ rc = add_unique_to_namelist( parts, to_process, p_idx );
+ }
+ VNamelistRelease ( parts );
}
}
+ else
+ rc = add_unique_to_namelist( ctx->src_paths, to_process, idx );
+ KDirectoryRelease( hdf5_src );
}
- KDirectoryRelease( hdf5_src );
}
- }
-
- if ( rc == 0 )
- {
- rc = VNameListCount ( ctx->src_paths, &count );
- if ( rc == 0 && count > 0 )
+ VNamelistRelease ( ctx->src_paths );
+ ctx->src_paths = to_process;
+
+ if ( rc == 0 )
{
- ctx_show( ctx );
- rc = pacbio_get_hdf5_src( wd, ctx->src_paths, 0, &hdf5_src );
- if ( rc == 0 )
- rc = pacbio_load_multipart( ctx, wd, database, &hdf5_src, &consensus_present, lctx, count );
+ rc = VNameListCount ( ctx->src_paths, &count );
+ if ( rc == 0 && count > 0 )
+ {
+ ctx_show( ctx );
+ rc = pacbio_get_hdf5_src( wd, ctx->src_paths, 0, &hdf5_src );
+ if ( rc == 0 )
+ rc = pacbio_load_multipart( ctx, wd, database, &hdf5_src, &consensus_present, lctx, count );
+ }
}
+
}
-
+
if ( !consensus_present )
VDatabaseDropTable ( database, "CONSENSUS" );
}
diff --git a/tools/pacbio-load/pacbio-load.vers b/tools/pacbio-load/pacbio-load.vers
index 35d16fb..097a15a 100644
--- a/tools/pacbio-load/pacbio-load.vers
+++ b/tools/pacbio-load/pacbio-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/pileup-stats/pileup-stats.cpp b/tools/pileup-stats/pileup-stats.cpp
index 6923e0e..07c09af 100644
--- a/tools/pileup-stats/pileup-stats.cpp
+++ b/tools/pileup-stats/pileup-stats.cpp
@@ -33,6 +33,7 @@
#include <kapp/main.h>
#include <klib/printf.h>
+#include <klib/sra-release-version.h>
#include <iomanip>
#define DFLT_BUFFER_SIZE ( 32 * 1024 )
@@ -49,9 +50,6 @@
#include <sys/types.h>
#include <unistd.h>
-// log via general-writer API rather than stderr
-#define USE_GW_LOGMESSAGE 1
-
using namespace ngs;
namespace ncbi
@@ -79,6 +77,7 @@ namespace ncbi
static uint32_t num_significant_bits = 4;
static uint32_t verbosity;
+ static bool use_gw_logmessage = true; // log via general-writer API rather than stderr
const bool need_write_true = false;
@@ -136,22 +135,24 @@ namespace ncbi
break;
case 1:
if ( ( ref_zpos % 1000000 ) == 0 )
-#if USE_GW_LOGMESSAGE
- out . progMsg ( refName, PILEUP_STATS_VERS, ref_zpos + refLengthSubTotal , refLengthTotal );
-#else
- std :: cerr << "# " << std :: setw ( 9 ) << ref_zpos << '\n';
-#endif
+ {
+ if ( ncbi :: use_gw_logmessage )
+ out . progMsg ( refName, PILEUP_STATS_VERS, ref_zpos + refLengthSubTotal , refLengthTotal );
+ else
+ std :: cerr << "# " << std :: setw ( 9 ) << ref_zpos << '\n';
+ }
break;
default:
if ( ( ref_zpos % 5000 ) == 0 )
{
-#if USE_GW_LOGMESSAGE
- out . progMsg ( refName, PILEUP_STATS_VERS, ref_zpos + refLengthSubTotal , refLengthTotal );
-#else
- if ( ( ref_zpos % 500000 ) == 0 )
- std :: cerr << "\n# " << std :: setw ( 9 ) << ref_zpos << ' ';
- std :: cerr << '.';
-#endif
+ if ( ncbi :: use_gw_logmessage )
+ out . progMsg ( refName, PILEUP_STATS_VERS, ref_zpos + refLengthSubTotal , refLengthTotal );
+ else
+ {
+ if ( ( ref_zpos % 500000 ) == 0 )
+ std :: cerr << "\n# " << std :: setw ( 9 ) << ref_zpos << ' ';
+ std :: cerr << '.';
+ }
}
}
@@ -332,21 +333,24 @@ namespace ncbi
static
void run ( const char * spec, const char *outfile, const char *_remote_db, size_t buffer_size, Alignment :: AlignmentCategory cat )
{
-#if ! USE_GW_LOGMESSAGE
- if ( verbosity > 0 )
- std :: cerr << "# Opening run '" << spec << "'\n";
-#endif
+ if ( ! ncbi :: use_gw_logmessage )
+ {
+ if ( verbosity > 0 )
+ std :: cerr << "# Opening run '" << spec << "'\n";
+ }
ReadCollection obj = ncbi :: NGS :: openReadCollection ( spec );
String runName = obj . getName ();
-#if ! USE_GW_LOGMESSAGE
- if ( verbosity > 0 )
+ if ( ! ncbi :: use_gw_logmessage )
{
- std :: cerr << "# Preparing version " << GW_CURRENT_VERSION << " pipe to stdout\n";
- if ( ( integer_column_flag_bits & 1 ) != 0 )
- std :: cerr << "# USING INTEGER PACKING\n";
+ if ( verbosity > 0 )
+ {
+ std :: cerr << "# Preparing version " << GW_CURRENT_VERSION << " pipe to stdout\n";
+ if ( ( integer_column_flag_bits & 1 ) != 0 )
+ std :: cerr << "# USING INTEGER PACKING\n";
+ }
}
-#endif
+
std :: string remote_db;
if ( _remote_db == NULL )
remote_db = runName + ".pileup_stat";
@@ -371,11 +375,12 @@ namespace ncbi
prepareOutput ( out, runName );
if ( verbosity > 0 )
-#if USE_GW_LOGMESSAGE
- out . logMsg ( "Accessing all references" );
-#else
- std :: cerr << "# Accessing all references\n";
-#endif
+ {
+ if ( ncbi :: use_gw_logmessage )
+ out . logMsg ( "Accessing all references" );
+ else
+ std :: cerr << "# Accessing all references\n";
+ }
@@ -398,29 +403,30 @@ namespace ncbi
uint64_t refLength = ref . getLength ();
if ( verbosity > 0 )
-#if USE_GW_LOGMESSAGE
{
- out . logMsg ( "Processing reference '" + refName + "'" );
+ if ( ncbi :: use_gw_logmessage )
+ out . logMsg ( "Processing reference '" + refName + "'" );
+ else
+ std :: cerr << "# Processing reference '" << refName << "'\n";
}
-#else
- std :: cerr << "# Processing reference '" << refName << "'\n";
-#endif
out . columnDefault ( column_id [ col_REFERENCE_SPEC ], 8, refName . data (), refName . size () );
if ( verbosity > 0 )
-#if USE_GW_LOGMESSAGE
- out . logMsg ( "Accessing all pileups" );
-#else
- std :: cerr << "# Accessing all pileups\n";
-#endif
+ {
+ if ( ncbi :: use_gw_logmessage )
+ out . logMsg ( "Accessing all pileups" );
+ else
+ std :: cerr << "# Accessing all pileups\n";
+ }
PileupIterator pileup = ref . getPileups ( cat );
run ( out, runName, refName, pileup, refLength, refLengthSubTotal, totalRefLength );
refLengthSubTotal += refLength;
-#if ! USE_GW_LOGMESSAGE
- if ( verbosity > 1 )
- std :: cerr << '\n';
-#endif
+ if ( ! ncbi :: use_gw_logmessage )
+ {
+ if ( verbosity > 1 )
+ std :: cerr << '\n';
+ }
}
}
catch ( ErrorMsg & x )
@@ -507,7 +513,9 @@ extern "C"
<< " -U|--unpack-integer don't pack integers in output pipe - uses more bandwidth\n"
<< " -h|--help output brief explanation of the program\n"
<< " -v|--verbose increase the verbosity of the program.\n"
+ << " -V|--version display the version of the program then quit.\n"
<< " use multiple times for more verbosity.\n"
+ << " --log-stderr log via stderr rather than general-writer API (default - general-writer API)\n"
<< '\n'
<< appName << " : "
<< ( vers >> 24 )
@@ -520,6 +528,32 @@ extern "C"
;
}
+ static void handle_version ( const char *progname )
+ {
+ char cSra [ 512 ] = "";
+ SraReleaseVersion sraVersion;
+ memset ( & sraVersion, 0, sizeof sraVersion );
+
+ rc_t rc = SraReleaseVersionGet ( & sraVersion );
+ if ( rc == 0 )
+ {
+ rc = SraReleaseVersionPrint ( & sraVersion, cSra, sizeof cSra, NULL );
+ }
+
+ ::ver_t vers = ::KAppVersion();
+
+ std::cout
+ << std::endl
+ << progname << " : "
+ << ( vers >> 24 )
+ << '.'
+ << ( ( vers >> 16 ) & 0xFF )
+ << '.'
+ << ( vers & 0xFFFF )
+ << " ( " << cSra << " )"
+ << std::endl << std::endl;
+ }
+
static void CC handle_error ( const char *arg, void *message )
{
throw ( const char * ) message;
@@ -591,6 +625,9 @@ extern "C"
case '?':
handle_help ( argv [ 0 ] );
return 0;
+ case 'V':
+ handle_version ( argv [ 0 ] );
+ return 0;
case '-':
++ arg;
if ( strcmp ( arg, "output-file" ) == 0 )
@@ -651,11 +688,20 @@ extern "C"
{
++ ncbi :: verbosity;
}
+ else if ( strcmp ( arg, "log-stderr" ) == 0 )
+ {
+ ncbi :: use_gw_logmessage = false;
+ }
else if ( strcmp ( arg, "help" ) == 0 )
{
handle_help ( argv [ 0 ] );
return 0;
}
+ else if ( strcmp ( arg, "version" ) == 0 )
+ {
+ handle_version ( argv [ 0 ] );
+ return 0;
+ }
else
{
throw "Invalid Argument";
@@ -681,37 +727,40 @@ extern "C"
}
catch ( ErrorMsg & x )
{
-#if ! USE_GW_LOGMESSAGE
- std :: cerr
- << "ERROR: "
- << argv [ 0 ]
- << ": "
- << x . what ()
- << '\n'
- ;
-#endif
+ if ( ! ncbi :: use_gw_logmessage )
+ {
+ std :: cerr
+ << "ERROR: "
+ << argv [ 0 ]
+ << ": "
+ << x . what ()
+ << '\n'
+ ;
+ }
}
catch ( const char x [] )
{
-#if ! USE_GW_LOGMESSAGE
- std :: cerr
- << "ERROR: "
- << argv [ 0 ]
- << ": "
- << x
- << '\n'
- ;
-#endif
+ if ( ! ncbi :: use_gw_logmessage )
+ {
+ std :: cerr
+ << "ERROR: "
+ << argv [ 0 ]
+ << ": "
+ << x
+ << '\n'
+ ;
+ }
}
catch ( ... )
{
-#if ! USE_GW_LOGMESSAGE
- std :: cerr
- << "ERROR: "
- << argv [ 0 ]
- << ": unknown\n"
- ;
-#endif
+ if ( ! ncbi :: use_gw_logmessage )
+ {
+ std :: cerr
+ << "ERROR: "
+ << argv [ 0 ]
+ << ": unknown\n"
+ ;
+ }
}
return rc;
diff --git a/tools/pileup-stats/pileup-stats.vers b/tools/pileup-stats/pileup-stats.vers
index 35d16fb..097a15a 100644
--- a/tools/pileup-stats/pileup-stats.vers
+++ b/tools/pileup-stats/pileup-stats.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/prefetch/prefetch.c b/tools/prefetch/prefetch.c
index d9818f4..c9bbd7e 100644
--- a/tools/prefetch/prefetch.c
+++ b/tools/prefetch/prefetch.c
@@ -28,6 +28,7 @@
#include "prefetch.vers.h"
+#include <kapp/args-conv.h> /* ArgsConvFilepath */
#include <kapp/main.h> /* KAppVersion */
#include <kdb/manager.h> /* kptDatabase */
@@ -267,7 +268,9 @@ bool _StringIsFasp(const String *self, const char **withoutScheme)
*withoutScheme = NULL;
- if (memcmp(self->addr, fasp, sizeof fasp - 1) == 0) {
+ if (string_cmp(self->addr, self->len, fasp, sizeof fasp - 1,
+ sizeof fasp - 1) == 0)
+ {
*withoutScheme = self->addr + sizeof fasp - 1;
return true;
}
@@ -1309,7 +1312,6 @@ static rc_t MainDependenciesList(const Main *self,
rc_t rc = 0;
bool isDb = true;
const VDatabase *db = NULL;
- const char *path = NULL;
const String *str = NULL;
KPathType type = kptNotFound;
@@ -1318,8 +1320,6 @@ static rc_t MainDependenciesList(const Main *self,
str = resolved->path.str;
assert(str && str->addr);
- path = str->addr;
-
rc = _VDBManagerSetDbGapCtx(self->mgr, resolved->resolver);
STSMSG(STS_DBG, ("Listing '%S's dependencies...", str));
@@ -1617,7 +1617,9 @@ static rc_t ItemInitResolved(Item *self, VResolver *resolver, KDirectory *dir,
assert(resolved->type != eRunTypeUnknown);
- if (self->desc != NULL) { /* object name is specified (not kart item) */
+ if (!self->isDependency &&
+ self->desc != NULL) /* object name is specified (not kart item) */
+ {
KPathType type = KDirectoryPathType(dir, "%s", self->desc) & ~kptAlias;
if (type == kptFile || type == kptDir) {
rc = VPathStrInitStr(&resolved->path, self->desc, 0);
@@ -1712,6 +1714,95 @@ static rc_t ItemResolve(Item *item, int32_t row) {
return rc;
}
+static bool maxSzPrntd = false;
+
+static void logMaxSize(size_t maxSize) {
+ if (maxSzPrntd) {
+ return;
+ }
+
+ maxSzPrntd = true;
+
+ if (maxSize == 0) {
+/* OUTMSG(("Maximum file size download limit is unlimited\n")); */
+ return;
+ }
+
+ if (maxSize / 1024 < 10) {
+ PLOGMSG(klogWarn, (klogWarn,
+ "Maximum file size download limit is $(size)B\n",
+ "size=%zu", maxSize));
+ return;
+ }
+
+ maxSize /= 1024;
+ if (maxSize / 1024 < 10) {
+ PLOGMSG(klogWarn, (klogWarn,
+ "Maximum file size download limit is $(size)KB\n",
+ "size=%zu", maxSize));
+ return;
+ }
+
+ maxSize /= 1024;
+ if (maxSize / 1024 < 10) {
+ PLOGMSG(klogWarn, (klogWarn,
+ "Maximum file size download limit is $(size)MB\n",
+ "size=%zu", maxSize));
+ return;
+ }
+
+ maxSize /= 1024;
+ if (maxSize / 1024 < 10) {
+ PLOGMSG(klogWarn, (klogWarn,
+ "Maximum file size download limit is $(size)GB\n",
+ "size=%zu", maxSize));
+ return;
+ }
+
+ maxSize /= 1024;
+ PLOGMSG(klogWarn, (klogWarn,
+ "Maximum file size download limit is $(size)TB\n",
+ "size=%zu", maxSize));
+}
+
+static void logBigFile(int n, const char *name, size_t size) {
+ if (size / 1024 < 10) {
+ STSMSG(STS_TOP,
+ ("%d) '%s' (%,zuB) is larger than maximum allowed: skipped\n",
+ n, name, size));
+ return;
+ }
+
+ size /= 1024;
+ if (size / 1024 < 10) {
+ STSMSG(STS_TOP,
+ ("%d) '%s' (%,zuKB) is larger than maximum allowed: skipped\n",
+ n, name, size));
+ return;
+ }
+
+ size /= 1024;
+ if (size / 1024 < 10) {
+ STSMSG(STS_TOP,
+ ("%d) '%s' (%,zuMB) is larger than maximum allowed: skipped\n",
+ n, name, size));
+ return;
+ }
+
+ size /= 1024;
+ if (size / 1024 < 10) {
+ STSMSG(STS_TOP,
+ ("%d) '%s' (%,zuGB) is larger than maximum allowed: skipped\n",
+ n, name, size));
+ return;
+ }
+
+ size /= 1024;
+ STSMSG(STS_TOP,
+ ("%d) '%s' (%,zuTB) is larger than maximum allowed: skipped\n",
+ n, name, size));
+}
+
/* download if not found; obey size restriction */
static rc_t ItemDownload(Item *item) {
bool isLocal = false;
@@ -1725,12 +1816,10 @@ static rc_t ItemDownload(Item *item) {
if (rc == 0) {
bool skip = false;
-
if (self->existing) { /* the path is a path to an existing local file */
rc = VPathStrInitStr(&self->path, item->desc, 0);
return rc;
}
-
if (self->undersized) {
STSMSG(STS_TOP,
("%d) '%s' (%,zu KB) is smaller than minimum allowed: skipped\n",
@@ -1738,9 +1827,8 @@ static rc_t ItemDownload(Item *item) {
skip = true;
}
else if (self->oversized) {
- STSMSG(STS_TOP,
- ("%d) '%s' (%,zu KB) is larger than maximum allowed: skipped\n",
- n, self->name, self->remoteSz / 1024));
+ logMaxSize(item->main->maxSize);
+ logBigFile(n, self->name, self->remoteSz);
skip = true;
}
@@ -2500,6 +2588,9 @@ static size_t _sizeFromString(const char *val) {
else if (*val == 'g' || *val == 'G') {
s *= 1024L * 1024 * 1024;
}
+ else if (*val == 't' || *val == 'T') {
+ s *= 1024L * 1024 * 1024 * 1024;
+ }
else if (*val == 'u' || *val == 'U') { /* unlimited */
s = 0;
}
@@ -2572,9 +2663,9 @@ static const char* SZ_L_USAGE[] =
#define TRANS_OPTION "transport"
#define TRASN_ALIAS "t"
-static const char* TRANS_USAGE[] = { "transport: one of: ascp; http; both.",
- "(ascp only; http only; first try ascp, "
- "use http if cannot download by ascp).",
+static const char* TRANS_USAGE[] = { "transport: one of: fasp; http; both.",
+ "(fasp only; http only; first try fasp (ascp), "
+ "use http if cannot download using fasp).",
"Default: both", NULL };
#define DEFAULT_MAX_FILE_SIZE "20G"
@@ -2627,6 +2718,8 @@ static OptDef Options[] = {
,{ CHECK_ALL_OPTION , CHECK_ALL_ALIAS , NULL, CHECK_ALL_USAGE, 1, false, false}
};
+static ParamDef Parameters[] = { { ArgsConvFilepath } };
+
static rc_t MainProcessArgs(Main *self, int argc, char *argv[]) {
rc_t rc = 0;
@@ -2634,8 +2727,9 @@ static rc_t MainProcessArgs(Main *self, int argc, char *argv[]) {
assert(self);
- rc = ArgsMakeAndHandle(&self->args, argc, argv, 1,
- Options, sizeof Options / sizeof (OptDef));
+ rc = ArgsMakeAndHandle2(&self->args, argc, argv,
+ Parameters, sizeof Parameters / sizeof Parameters[0],
+ 1, Options, sizeof Options / sizeof Options[0]);
if (rc != 0) {
DISP_RC(rc, "ArgsMakeAndHandle");
return rc;
@@ -2914,19 +3008,50 @@ static rc_t MainProcessArgs(Main *self, int argc, char *argv[]) {
}
if (pcount > 0) {
+ bool ok = false;
const char *val = NULL;
- rc = ArgsOptionValue(self->args, TRANS_OPTION, 0, (const void **)&val);
+ rc = ArgsOptionValue
+ (self->args, TRANS_OPTION, 0, (const void **)&val);
if (rc != 0) {
LOGERR(klogErr, rc,
"Failure to get '" TRANS_OPTION "' argument value");
break;
}
assert(val);
- if (val[0] == 'a') {
- self->noHttp = true;
+ switch (val[0]) {
+ case 'a':
+ case 'f': {
+ const char ascp[] = "ascp";
+ const char fasp[] = "fasp";
+ if (string_cmp(val, string_measure(val, NULL),
+ ascp, sizeof ascp - 1, sizeof ascp - 1) == 0
+ ||
+ string_cmp(val, string_measure(val, NULL),
+ fasp, sizeof fasp - 1, sizeof fasp - 1) == 0
+ ||
+ (val[0] == 'a' && val[1] == '\0'))
+ {
+ self->noHttp = true;
+ ok = true;
+ }
+ break;
+ }
+ case 'h': {
+ const char http[] = "http";
+ if (string_cmp(val, string_measure(val, NULL),
+ http, sizeof http - 1, sizeof http - 1) == 0
+ || val[1] == '\0')
+ {
+ self->noAscp = true;
+ ok = true;
+ }
+ break;
+ }
}
- else if (val[0] == 'h') {
- self->noAscp = true;
+ if (!ok) {
+ rc = RC(rcExe, rcArgv, rcParsing, rcParam, rcInvalid);
+ LOGERR(klogErr, rc, "Bad '" TRANS_OPTION "' argument value");
+ break;
}
}
@@ -3192,6 +3317,7 @@ static rc_t MainInit(int argc, char *argv[], Main *self) {
rc = VFSManagerGetResolver(self->vfsMgr, &resolver);
DISP_RC(rc, "VFSManagerGetResolver");
VResolverRemoteEnable(resolver, vrAlwaysEnable);
+ VResolverCacheEnable(resolver, vrAlwaysEnable);
RELEASE(VResolver, resolver);
}
@@ -3235,7 +3361,6 @@ static rc_t MainInit(int argc, char *argv[], Main *self) {
/*********** Process one command line argument **********/
static rc_t MainRun(Main *self, const char *arg, const char *realArg) {
ERunType type = eRunTypeDownload;
- static bool maxSzPrntd = false;
rc_t rc = 0;
Iterator it;
assert(self && realArg);
@@ -3292,17 +3417,6 @@ static rc_t MainRun(Main *self, const char *arg, const char *realArg) {
}
}
else {
- if (!maxSzPrntd) {
- maxSzPrntd = true;
- if (self->maxSize == 0) {
- OUTMSG((
- "Maximum file size download limit is unlimited\n"));
- }
- else {
- OUTMSG(("Maximum file size download limit is %,zuKB\n",
- self->maxSize / 1024));
- }
- }
if (it.kart != NULL) {
OUTMSG(("Downloading kart file '%s'\n", realArg));
if (type == eRunTypeGetSize) {
@@ -3311,7 +3425,7 @@ static rc_t MainRun(Main *self, const char *arg, const char *realArg) {
}
OUTMSG(("\n"));
}
-
+
for (n = 1; ; ++n) {
rc_t rc2 = 0;
rc_t rc3 = 0;
@@ -3356,9 +3470,9 @@ static rc_t MainRun(Main *self, const char *arg, const char *realArg) {
else if (item->resolved.oversized &&
type == eRunTypeGetSize)
{
- STSMSG(STS_TOP,
- ("%d) '%s' (%,zu KB) is larger than maximum allowed: skipped\n",
- n, item->resolved.name, item->resolved.remoteSz / 1024));
+ logMaxSize(self->maxSize);
+ logBigFile(n, item->resolved.name,
+ item->resolved.remoteSz);
}
else {
total += item->resolved.remoteSz;
diff --git a/tools/prefetch/prefetch.vers b/tools/prefetch/prefetch.vers
index 35d16fb..097a15a 100644
--- a/tools/prefetch/prefetch.vers
+++ b/tools/prefetch/prefetch.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/prefetch/prefetch.vers.h b/tools/prefetch/prefetch.vers.h
index dae07c4..1fdd187 100644
--- a/tools/prefetch/prefetch.vers.h
+++ b/tools/prefetch/prefetch.vers.h
@@ -1 +1 @@
-#define PREFETCH_VERS 0x02050007
+#define PREFETCH_VERS 0x02060002
diff --git a/tools/qual-recalib-stat/qual-recalib-stat.vers b/tools/qual-recalib-stat/qual-recalib-stat.vers
index 35d16fb..097a15a 100644
--- a/tools/qual-recalib-stat/qual-recalib-stat.vers
+++ b/tools/qual-recalib-stat/qual-recalib-stat.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/qual-recalib-stat/qual-recalib-stat.vers.h b/tools/qual-recalib-stat/qual-recalib-stat.vers.h
index fb98dec..6e217ad 100644
--- a/tools/qual-recalib-stat/qual-recalib-stat.vers.h
+++ b/tools/qual-recalib-stat/qual-recalib-stat.vers.h
@@ -1 +1 @@
-#define QUAL_RECALIB_STAT_VERS 0x02050007
+#define QUAL_RECALIB_STAT_VERS 0x02060002
diff --git a/tools/rd-filter-redact/rd-filter-redact.vers b/tools/rd-filter-redact/rd-filter-redact.vers
index 35d16fb..097a15a 100644
--- a/tools/rd-filter-redact/rd-filter-redact.vers
+++ b/tools/rd-filter-redact/rd-filter-redact.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/rd-filter-redact/read-filter-redact.vers b/tools/rd-filter-redact/read-filter-redact.vers
index 35d16fb..097a15a 100644
--- a/tools/rd-filter-redact/read-filter-redact.vers
+++ b/tools/rd-filter-redact/read-filter-redact.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/rd-filter-redact/read-filter-redact.vers.h b/tools/rd-filter-redact/read-filter-redact.vers.h
index 21caf52..7b2743b 100644
--- a/tools/rd-filter-redact/read-filter-redact.vers.h
+++ b/tools/rd-filter-redact/read-filter-redact.vers.h
@@ -1 +1 @@
-#define READ_FILTER_REDACT_VERS 0x02050007
+#define READ_FILTER_REDACT_VERS 0x02060002
diff --git a/tools/rd-filter-redact/test-read-write-cursor.vers b/tools/rd-filter-redact/test-read-write-cursor.vers
index 35d16fb..097a15a 100644
--- a/tools/rd-filter-redact/test-read-write-cursor.vers
+++ b/tools/rd-filter-redact/test-read-write-cursor.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/ref-variation/common.cpp b/tools/ref-variation/common.cpp
index 8a4f758..8f17fa9 100644
--- a/tools/ref-variation/common.cpp
+++ b/tools/ref-variation/common.cpp
@@ -26,11 +26,10 @@
#include "common.h"
#include "helper.h"
-#include <search/grep.h>
namespace Common
{
- bool find_variation_core_step (KSearch::CVRefVariation& obj,
+ bool find_variation_core_step (KSearch::CVRefVariation& obj, ::RefVarAlg alg,
char const* ref_slice, size_t ref_slice_size,
size_t& ref_pos_in_slice,
char const* var, size_t var_len, size_t var_len_on_ref,
@@ -46,7 +45,7 @@ namespace Common
}
else
{
- obj = KSearch::VRefVariationIUPACMake ( refvarAlgRA,
+ obj = KSearch::VRefVariationIUPACMake ( alg,
ref_slice, ref_slice_size,
ref_pos_in_slice, var, var_len, var_len_on_ref, bases_start );
diff --git a/tools/ref-variation/common.h b/tools/ref-variation/common.h
index a01413d..ea92c33 100644
--- a/tools/ref-variation/common.h
+++ b/tools/ref-variation/common.h
@@ -25,6 +25,7 @@
*/
#include <stdlib.h>
+#include "helper.h"
namespace KSearch
{
@@ -33,7 +34,7 @@ namespace KSearch
namespace Common
{
- bool find_variation_core_step (KSearch::CVRefVariation& obj,
+ bool find_variation_core_step (KSearch::CVRefVariation& obj, ::RefVarAlg alg,
char const* ref_slice, size_t ref_slice_size,
size_t& ref_pos_in_slice,
char const* var, size_t var_len, size_t var_len_on_ref,
diff --git a/tools/ref-variation/helper.cpp b/tools/ref-variation/helper.cpp
index 2b31d0e..fea8112 100644
--- a/tools/ref-variation/helper.cpp
+++ b/tools/ref-variation/helper.cpp
@@ -35,7 +35,6 @@
#include <vdb/vdb-priv.h>
#include <klib/rc.h>
-#include <search/grep.h>
#include <kdb/table.h>
#include <kproc/thread.h>
@@ -83,10 +82,12 @@ namespace KLib
}
}
+#if USING_UINT64_BITMAP == 2
size_t const RECORD_SIZE_IN_BITS = 2;
uint64_t const BIT_SET_MASK = 0x2;
uint64_t const BIT_VALUE_MASK = 0x1;
uint64_t const BIT_RECORD_MASK = BIT_SET_MASK | BIT_VALUE_MASK;
+#endif
void CKVector::SetBool(uint64_t key, bool value)
{
@@ -1077,37 +1078,11 @@ namespace Utils
namespace KSearch
{
-#if 0 // turning off old code
- void FindRefVariationRegionAscii (
- char const* ref, size_t ref_size, size_t ref_pos_var,
- char const* variation, size_t variation_size, size_t var_len_on_ref,
- size_t* p_ref_start, size_t* p_ref_len
- )
- {
- rc_t rc = ::FindRefVariationRegionIUPAC ( ref, ref_size, ref_pos_var,
- variation, variation_size, var_len_on_ref, p_ref_start, p_ref_len );
-
- if (rc)
- throw Utils::CErrorMsg(rc, "FindRefVariationRegionAscii");
- }
-
- // TODO: use pointers for return parameters, parameters returned by pointers should be
- // in the beginning
- void FindRefVariationRegionAscii (
- std::string const& ref, size_t ref_pos_var,
- char const* variation, size_t variation_size, size_t var_len_on_ref,
- size_t& ref_start, size_t& ref_len
- )
- {
- FindRefVariationRegionAscii ( ref.c_str(), ref.size(), ref_pos_var,
- variation, variation_size, var_len_on_ref, & ref_start, & ref_len );
- }
-#endif
-
////////////////////////////////////////////////
- CVRefVariation::CVRefVariation() : m_pSelf(NULL)
+ CVRefVariation::CVRefVariation()
{
+ ClearMembers();
}
CVRefVariation::~CVRefVariation()
@@ -1129,6 +1104,20 @@ namespace KSearch
return *this;
}
+ void CVRefVariation::ClearMembers()
+ {
+ m_pSelf = NULL;
+ m_bases_start = KSearch::UNINITIALIZED_POSITION;
+ m_allele = NULL;
+ m_query = NULL;
+ m_allele_len = KSearch::UNINITIALIZED_POSITION;
+ m_allele_len_on_ref = KSearch::UNINITIALIZED_POSITION;
+ m_query_len = KSearch::UNINITIALIZED_POSITION;
+ m_query_len_on_ref = KSearch::UNINITIALIZED_POSITION;
+ m_allele_start = KSearch::UNINITIALIZED_POSITION;
+ m_query_start = KSearch::UNINITIALIZED_POSITION;
+ }
+
void CVRefVariation::Release()
{
if (m_pSelf)
@@ -1136,17 +1125,26 @@ namespace KSearch
#if DEBUG_PRINT != 0
printf("Releasing VRefVariation %p\n", m_pSelf);
#endif
- ::VRefVariationIUPACRelease(m_pSelf);
- m_pSelf = NULL;
+ ::RefVariationRelease(m_pSelf); // don't throw an exception since this function is called in the destructor
+ ClearMembers();
}
}
void CVRefVariation::Clone(CVRefVariation const& x)
{
m_pSelf = x.m_pSelf;
- ::VRefVariationIUPACAddRef ( m_pSelf );
+ ::RefVariationAddRef ( m_pSelf ); // TODO: maybe we have to check rc and throw here
m_bases_start = x.m_bases_start;
+ m_allele = x.m_allele;
+ m_query = x.m_query;
+ m_allele_len = x.m_allele_len;
+ m_allele_len_on_ref = x.m_allele_len_on_ref;
+ m_query_len = x.m_query_len;
+ m_query_len_on_ref = x.m_query_len_on_ref;
+ m_allele_start = x.m_allele_start;
+ m_query_start = x.m_query_start;
+
#if DEBUG_PRINT != 0
printf ("CLONING VRefVariation %p\n", m_pSelf);
#endif
@@ -1154,87 +1152,143 @@ namespace KSearch
char const* CVRefVariation::GetSearchQuery() const
{
if ( m_pSelf == NULL )
- return "";
- char const* ret = ::VRefVariationIUPACGetSearchQuery ( m_pSelf );
- return ret == NULL ? "" : ret;
+ throw Utils::CErrorMsg("GetSearchQuery on uninitialized RefVariation");
+
+ if ( m_query == NULL )
+ {
+ rc_t rc = ::RefVariationGetIUPACSearchQuery ( m_pSelf, & m_query, & m_query_len, & m_query_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetIUPACSearchQuery");
+ }
+
+ return m_query;
}
size_t CVRefVariation::GetSearchQueryStartRelative() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetSearchQueryStart ( m_pSelf );
+ throw Utils::CErrorMsg("GetSearchQueryStartRelative on uninitialized RefVariation");
+
+ if ( m_query_start == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetIUPACSearchQuery ( m_pSelf, & m_query, & m_query_len, & m_query_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetIUPACSearchQuery");
+ }
+
+ return m_query_start;
}
size_t CVRefVariation::GetSearchQueryStartAbsolute() const
{
- if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetSearchQueryStart ( m_pSelf ) + m_bases_start;
+ return GetSearchQueryStartRelative() + m_bases_start;
}
size_t CVRefVariation::GetSearchQuerySize() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetSearchQuerySize ( m_pSelf );
+ throw Utils::CErrorMsg("GetSearchQuerySize on uninitialized RefVariation");
+
+ if ( m_query_len == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetIUPACSearchQuery ( m_pSelf, & m_query, & m_query_len, & m_query_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetIUPACSearchQuery");
+ }
+
+ return m_query_len;
}
size_t CVRefVariation::GetSearchQueryLenOnRef() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetSearchQueryLenOnRef ( m_pSelf );
+ throw Utils::CErrorMsg("GetSearchQueryLenOnRef on uninitialized RefVariation");
+
+ if ( m_query_len_on_ref == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetSearchQueryLenOnRef ( m_pSelf, & m_query_len_on_ref );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetSearchQueryLenOnRef");
+ }
+
+ return m_query_len_on_ref;
}
- char const* CVRefVariation::GetAllele( size_t& ret_size ) const
+ char const* CVRefVariation::GetAllele() const
{
if ( m_pSelf == NULL )
+ throw Utils::CErrorMsg("GetAllele on uninitialized RefVariation");
+
+ if ( m_allele == NULL )
{
- ret_size = 0;
- return "";
+ rc_t rc = ::RefVariationGetAllele ( m_pSelf, & m_allele, & m_allele_len, & m_allele_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetAllele");
}
- return ::VRefVariationIUPACGetAllele ( m_pSelf, & ret_size );
+
+ return m_allele;
}
size_t CVRefVariation::GetAlleleStartRelative() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetAlleleStart ( m_pSelf );
+ throw Utils::CErrorMsg("GetAllele on uninitialized RefVariation");
+
+ if ( m_allele_start == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetAllele ( m_pSelf, & m_allele, & m_allele_len, & m_allele_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetAllele");
+ }
+
+ return m_allele_start;
}
size_t CVRefVariation::GetAlleleStartAbsolute() const
{
- if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetAlleleStart ( m_pSelf ) + m_bases_start;
+ return GetAlleleStartRelative() + m_bases_start;
}
size_t CVRefVariation::GetAlleleSize() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetAlleleSize ( m_pSelf );
+ throw Utils::CErrorMsg("GetAlleleSize on uninitialized RefVariation");
+
+ if ( m_allele_len == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetAllele ( m_pSelf, & m_allele, & m_allele_len, & m_allele_start );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetAllele");
+ }
+
+ return m_allele_len;
}
size_t CVRefVariation::GetAlleleLenOnRef() const
{
if ( m_pSelf == NULL )
- return 0;
- return ::VRefVariationIUPACGetAlleleLenOnRef ( m_pSelf );
+ throw Utils::CErrorMsg("GetAlleleLenOnRef on uninitialized RefVariation");
+
+ if ( m_allele_len_on_ref == KSearch::UNINITIALIZED_POSITION )
+ {
+ rc_t rc = ::RefVariationGetAlleleLenOnRef ( m_pSelf, & m_allele_len_on_ref );
+ if (rc)
+ throw Utils::CErrorMsg(rc, "RefVariationGetAlleleLenOnRef");
+ }
+
+ return m_allele_len_on_ref;
}
- CVRefVariation VRefVariationIUPACMake ( uint32_t alg, char const* ref, size_t ref_size,
+ CVRefVariation VRefVariationIUPACMake ( ::RefVarAlg alg, char const* ref, size_t ref_size,
size_t ref_pos_var, char const* variation, size_t variation_size,
size_t var_len_on_ref, size_t bases_start)
{
CVRefVariation obj;
- rc_t rc = ::VRefVariationIUPACMake ( & obj.m_pSelf, alg,
- ref, ref_size, ref_pos_var, variation, variation_size, var_len_on_ref);
+ rc_t rc = ::RefVariationIUPACMake ( & obj.m_pSelf,
+ ref, ref_size, ref_pos_var, var_len_on_ref, variation, variation_size, alg );
if (rc)
- throw Utils::CErrorMsg(rc, "VRefVariationIUPACMake");
+ throw Utils::CErrorMsg(rc, "RefVariationIUPACMake");
#if DEBUG_PRINT != 0
printf("Created RefVariation (rd) %p\n", obj.m_pSelf);
diff --git a/tools/ref-variation/helper.h b/tools/ref-variation/helper.h
index cadc28d..88605cf 100644
--- a/tools/ref-variation/helper.h
+++ b/tools/ref-variation/helper.h
@@ -24,6 +24,9 @@
*
*/
+#ifndef HELPER_H
+#define HELPER_H
+
// helper.h
#include <exception>
@@ -44,6 +47,7 @@
#include <vdb/vdb-priv.h>
#include <kdb/index.h>
+#include <search/ref-variation.h>
#ifndef countof
#define countof(arr) (sizeof(arr)/sizeof(arr[0]))
@@ -476,24 +480,12 @@ struct VRefVariation;
namespace KSearch
{
-#if 0 // turning off old code
- void FindRefVariationRegionAscii (
- char const* ref, size_t ref_size, size_t ref_pos_var,
- char const* variation, size_t variation_size, size_t var_len_on_ref,
- size_t* p_ref_start, size_t* p_ref_len
- );
-
- void FindRefVariationRegionAscii (
- std::string const& ref, size_t ref_pos_var,
- char const* variation, size_t variation_size, size_t var_len_on_ref,
- size_t& ref_start, size_t& ref_len
- );
-#endif
+ enum { UNINITIALIZED_POSITION = (size_t)-1 };
class CVRefVariation
{
public:
- friend CVRefVariation VRefVariationIUPACMake ( uint32_t alg,
+ friend CVRefVariation VRefVariationIUPACMake ( ::RefVarAlg alg,
char const* ref, size_t ref_size,
size_t ref_pos_var, char const* variation, size_t variation_size,
size_t var_len_on_ref, size_t bases_start);
@@ -511,7 +503,7 @@ namespace KSearch
size_t GetSearchQuerySize() const;
size_t GetSearchQueryLenOnRef() const;
- char const* GetAllele( size_t& ret_size ) const;
+ char const* GetAllele() const;
size_t GetAlleleStartRelative() const; // relative to search region
size_t GetAlleleStartAbsolute() const; // in absolute reference coordinates
size_t GetAlleleSize() const;
@@ -519,12 +511,17 @@ namespace KSearch
private:
void Clone(CVRefVariation const& x);
- ::VRefVariation* m_pSelf;
+ void ClearMembers();
+ ::RefVariation* m_pSelf;
size_t m_bases_start; // the absolute position on the reference
// starting at which the search was initiated
+
+ mutable char const* m_allele, *m_query;
+ mutable size_t m_allele_len, m_allele_len_on_ref, m_query_len, m_query_len_on_ref;
+ mutable size_t m_allele_start, m_query_start;
};
- CVRefVariation VRefVariationIUPACMake ( uint32_t alg, char const* ref, size_t ref_size,
+ CVRefVariation VRefVariationIUPACMake ( ::RefVarAlg alg, char const* ref, size_t ref_size,
size_t ref_pos_var, char const* variation, size_t variation_size,
size_t var_len_on_ref, size_t bases_start);
@@ -602,4 +599,5 @@ namespace KProc
private:
TLockable & m_lock;
};
-}
\ No newline at end of file
+}
+#endif
\ No newline at end of file
diff --git a/tools/ref-variation/ref-variation.cpp b/tools/ref-variation/ref-variation.cpp
index b41417d..ebf7ab7 100644
--- a/tools/ref-variation/ref-variation.cpp
+++ b/tools/ref-variation/ref-variation.cpp
@@ -46,7 +46,6 @@
#include "helper.h"
#include "common.h"
-#include <search/grep.h>
#define CPP_THREADS 0
@@ -76,7 +75,7 @@ public:
#endif
-namespace RefVariation
+namespace NSRefVariation
{
#define COUNT_STRAND_NONE_STR "none"
@@ -451,7 +450,7 @@ namespace RefVariation
TLock* lock_cout, size_t thread_num,
coverage_info* pcoverage_count, size_t index )
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -489,7 +488,7 @@ namespace RefVariation
uint64_t id_count;
bool found = kindex.FindText ( ref_name, & ref_id_start, & id_count, NULL, NULL );
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -518,7 +517,7 @@ namespace RefVariation
// but maybe we also need to report matches and total alignments
// here (0 0)
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -539,7 +538,7 @@ namespace RefVariation
if ( depth == 0 )
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -569,8 +568,8 @@ namespace RefVariation
count = cursor.ReadItems ( pos + ref_id_start, PileupColumnIndex[idx_MISMATCH_COUNTS], mismatch, sizeof mismatch );
assert ( count == 0 || count == 4 );
- size_t allele_size;
- char const* allele = obj.GetAllele(allele_size);
+ size_t allele_size = obj.GetAlleleSize();
+ char const* allele = obj.GetAllele();
assert (count == 0 || pos - ref_pos < allele_size );
size_t alignments_matched = count == 0 ? 0 :
mismatch [base2na_to_index(allele[pos - ref_pos])];
@@ -649,14 +648,14 @@ namespace RefVariation
#endif
}
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_SOME_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_SOME_DETAILS )
{
char run_name[64];
uint32_t count = cursor.ReadItems ( id_first, PileupColumnIndex[idx_RUN_NAME], run_name, countof(run_name)-1 );
assert (count < countof(run_name));
run_name [count] = '\0';
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_SOME_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_SOME_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -675,7 +674,7 @@ namespace RefVariation
if ( e.getRC() == SILENT_RC(rcVFS,rcMgr,rcOpening,rcDirectory,rcNotFound)
|| e.getRC() == SILENT_RC(rcVFS,rcTree,rcResolving,rcPath,rcNotFound))
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -688,7 +687,7 @@ namespace RefVariation
}
else if ( e.getRC() == SILENT_RC(rcDB,rcMgr,rcOpening,rcDatabase,rcIncorrect))
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogWarn,
@@ -740,7 +739,7 @@ namespace RefVariation
TLock* lock_cout, size_t thread_num,
coverage_info* pcoverage_count, size_t index)
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -825,7 +824,7 @@ namespace RefVariation
if (is_negative)
++ alignments_matched_negative;
}
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_SOME_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_SOME_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -850,7 +849,7 @@ BREAK_ALIGNMENT_ITER:
}
else
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -943,7 +942,7 @@ BREAK_ALIGNMENT_ITER:
char const* path = input_run.GetRunPath().c_str();
char const* pileup_path = input_run.GetPileupStatsPath().c_str();
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogInfo,
@@ -964,7 +963,7 @@ BREAK_ALIGNMENT_ITER:
{
if ( strstr (e.what(), "Cannot open accession") == e.what() )
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_MORE_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_MORE_DETAILS )
{
LOCK_GUARD l(*lock_cout);
PLOGMSG ( klogWarn,
@@ -1081,7 +1080,7 @@ BREAK_ALIGNMENT_ITER:
else
{
// split
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_SOME_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_SOME_DETAILS )
{
PLOGMSG ( klogInfo,
( klogInfo,
@@ -1168,7 +1167,7 @@ BREAK_ALIGNMENT_ITER:
void print_variation_specs ( char const* ref_slice, size_t ref_slice_size,
KSearch::CVRefVariation const& obj, const char* query, size_t query_len )
{
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_SOME_DETAILS )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_SOME_DETAILS )
{
size_t ref_start = obj.GetAlleleStartAbsolute();
size_t ref_len = obj.GetAlleleLenOnRef();
@@ -1185,7 +1184,7 @@ BREAK_ALIGNMENT_ITER:
));
}
- if ( g_Params.verbosity >= RefVariation::VERBOSITY_PRINT_VAR_SPEC )
+ if ( g_Params.verbosity >= NSRefVariation::VERBOSITY_PRINT_VAR_SPEC )
{
PLOGMSG ( klogWarn,
( klogWarn,
@@ -1195,8 +1194,8 @@ BREAK_ALIGNMENT_ITER:
g_Params.var_len_on_ref, query_len, query
));
- size_t allele_size;
- char const* allele = obj.GetAllele ( allele_size );
+ size_t allele_size = obj.GetAlleleSize();
+ char const* allele = obj.GetAllele();
PLOGMSG ( klogWarn,
( klogWarn,
"Adjusted variation spec: $(REFACC):$(REFPOSVAR):$(VARLENONREF):$(ALLELE)",
@@ -1233,7 +1232,7 @@ BREAK_ALIGNMENT_ITER:
(int)g_Params.var_len_on_ref, ref_chunk.data() + ref_pos_in_slice );
}
- cont = Common::find_variation_core_step ( obj,
+ cont = Common::find_variation_core_step ( obj, g_Params.alg,
ref_chunk.data(), ref_chunk.size(), ref_pos_in_slice,
query, var_len, g_Params.var_len_on_ref,
chunk_size, chunk_no_last, bases_start, chunk_no_start, chunk_no_end );
@@ -1253,7 +1252,7 @@ BREAK_ALIGNMENT_ITER:
ref_slice = ref_seq.getReferenceBases (
bases_start, (chunk_no_end - chunk_no_start + 1)*chunk_size );
- cont = Common::find_variation_core_step ( obj,
+ cont = Common::find_variation_core_step ( obj, g_Params.alg,
ref_slice.c_str(), ref_slice.size(), ref_pos_in_slice,
query, var_len, g_Params.var_len_on_ref,
chunk_size, chunk_no_last, bases_start, chunk_no_start, chunk_no_end );
@@ -1292,9 +1291,11 @@ BREAK_ALIGNMENT_ITER:
if ( first.GetAlleleStartAbsolute() != cur.GetAlleleStartAbsolute()
|| first.GetAlleleLenOnRef() != cur.GetAlleleLenOnRef() )
{
- size_t allele_size_first, allele_size_cur;
- char const* allele_first = first.GetAllele ( allele_size_first );
- char const* allele_cur = cur.GetAllele ( allele_size_cur );
+ size_t allele_size_first = first.GetAlleleSize();
+ char const* allele_first = first.GetAllele();
+
+ size_t allele_size_cur = cur.GetAlleleSize();
+ char const* allele_cur = cur.GetAllele();
PLOGMSG( klogWarn, (klogWarn,
"Inconsistent variations found: (start=$(STARTFIRST), len=$(LENFIRST), allele=$(ALLELEFIRST)) vs (start=$(STARTCUR), len=$(LENCUR), allele=$(ALLELECUR))",
@@ -2024,18 +2025,18 @@ extern "C"
OUTMSG (("\nOptions:\n"));
- HelpOptionLine (RefVariation::ALIAS_REFERENCE_ACC, RefVariation::OPTION_REFERENCE_ACC, "acc", RefVariation::USAGE_REFERENCE_ACC);
- HelpOptionLine (RefVariation::ALIAS_REF_POS, RefVariation::OPTION_REF_POS, "value", RefVariation::USAGE_REF_POS);
- HelpOptionLine (NULL, RefVariation::OPTION_QUERY, "string", RefVariation::USAGE_QUERY);
- HelpOptionLine (RefVariation::ALIAS_VAR_LEN_ON_REF, RefVariation::OPTION_VAR_LEN_ON_REF, "value", RefVariation::USAGE_VAR_LEN_ON_REF);
- HelpOptionLine (RefVariation::ALIAS_THREADS, RefVariation::OPTION_THREADS, "value", RefVariation::USAGE_THREADS);
- HelpOptionLine (RefVariation::ALIAS_COVERAGE, RefVariation::OPTION_COVERAGE, "", RefVariation::USAGE_COVERAGE);
- HelpOptionLine (RefVariation::ALIAS_INPUT_FILE, RefVariation::OPTION_INPUT_FILE, "string", RefVariation::USAGE_INPUT_FILE);
- HelpOptionLine (NULL, RefVariation::OPTION_COUNT_STRAND, "value", RefVariation::USAGE_COUNT_STRAND);
- HelpOptionLine (NULL, RefVariation::OPTION_ALG, "value", RefVariation::USAGE_ALG);
- //HelpOptionLine (RefVariation::ALIAS_VERBOSITY, RefVariation::OPTION_VERBOSITY, "", RefVariation::USAGE_VERBOSITY);
+ HelpOptionLine (NSRefVariation::ALIAS_REFERENCE_ACC, NSRefVariation::OPTION_REFERENCE_ACC, "acc", NSRefVariation::USAGE_REFERENCE_ACC);
+ HelpOptionLine (NSRefVariation::ALIAS_REF_POS, NSRefVariation::OPTION_REF_POS, "value", NSRefVariation::USAGE_REF_POS);
+ HelpOptionLine (NULL, NSRefVariation::OPTION_QUERY, "string", NSRefVariation::USAGE_QUERY);
+ HelpOptionLine (NSRefVariation::ALIAS_VAR_LEN_ON_REF, NSRefVariation::OPTION_VAR_LEN_ON_REF, "value", NSRefVariation::USAGE_VAR_LEN_ON_REF);
+ HelpOptionLine (NSRefVariation::ALIAS_THREADS, NSRefVariation::OPTION_THREADS, "value", NSRefVariation::USAGE_THREADS);
+ HelpOptionLine (NSRefVariation::ALIAS_COVERAGE, NSRefVariation::OPTION_COVERAGE, "", NSRefVariation::USAGE_COVERAGE);
+ HelpOptionLine (NSRefVariation::ALIAS_INPUT_FILE, NSRefVariation::OPTION_INPUT_FILE, "string", NSRefVariation::USAGE_INPUT_FILE);
+ HelpOptionLine (NULL, NSRefVariation::OPTION_COUNT_STRAND, "value", NSRefVariation::USAGE_COUNT_STRAND);
+ HelpOptionLine (NULL, NSRefVariation::OPTION_ALG, "value", NSRefVariation::USAGE_ALG);
+ //HelpOptionLine (NSRefVariation::ALIAS_VERBOSITY, NSRefVariation::OPTION_VERBOSITY, "", NSRefVariation::USAGE_VERBOSITY);
#if SECRET_OPTION != 0
- HelpOptionLine (NULL, RefVariation::OPTION_SECRET, NULL, RefVariation::USAGE_SECRET);
+ HelpOptionLine (NULL, NSRefVariation::OPTION_SECRET, NULL, NSRefVariation::USAGE_SECRET);
#endif
XMLLogger_Usage();
@@ -2097,6 +2098,6 @@ extern "C"
*/
- return RefVariation::find_variation_region ( argc, argv );
+ return NSRefVariation::find_variation_region ( argc, argv );
}
}
diff --git a/tools/ref-variation/ref-variation.vers b/tools/ref-variation/ref-variation.vers
index 8acdd82..097a15a 100644
--- a/tools/ref-variation/ref-variation.vers
+++ b/tools/ref-variation/ref-variation.vers
@@ -1 +1 @@
-0.0.1
+2.6.2
diff --git a/tools/ref-variation/ref-variation.vers.h b/tools/ref-variation/ref-variation.vers.h
index d0b2ae4..9d66d52 100644
--- a/tools/ref-variation/ref-variation.vers.h
+++ b/tools/ref-variation/ref-variation.vers.h
@@ -1 +1 @@
-#define REF_VARIATION_VERS 0x00000001
+#define REF_VARIATION_VERS 0x02060002
diff --git a/tools/ref-variation/var-expand.cpp b/tools/ref-variation/var-expand.cpp
index ec9c34f..f36f822 100644
--- a/tools/ref-variation/var-expand.cpp
+++ b/tools/ref-variation/var-expand.cpp
@@ -2,7 +2,6 @@
#include <kapp/main.h>
#include <klib/rc.h>
-#include <klib/printf.h>
#include <iostream>
#include <stdio.h>
@@ -12,11 +11,16 @@
#include "helper.h"
#include "common.h"
-#include <search/grep.h>
#define PARAM_ALG_SW "sw"
#define PARAM_ALG_RA "ra"
+#ifdef _WIN32
+#define PRSIZE_T "I"
+#else
+#define PRSIZE_T "z"
+#endif
+
namespace VarExpand
{
struct Params
@@ -38,6 +42,35 @@ namespace VarExpand
{ OPTION_ALG, NULL, NULL, USAGE_ALG, 1, true, false }
};
+ template <class TObject, typename TKey> class CNGSObject
+ {
+ TObject* m_self;
+ TKey m_key;
+
+ public:
+ CNGSObject() : m_self(NULL) {}
+ ~CNGSObject() { Release(); }
+ CNGSObject ( CNGSObject const& x);
+ CNGSObject& operator=(CNGSObject const& x);
+
+ void Release()
+ {
+ delete m_self;
+ m_self = NULL;
+ }
+
+ void Init(TObject const& obj, TKey const& key)
+ {
+ TObject* p = new TObject(obj); // may throw ?
+
+ Release();
+ m_self = p;
+ m_key = key;
+ }
+
+ TKey const& GetKey() const { return m_key; }
+ TObject const* GetSelfPtr() const { return m_self; }
+ };
bool check_ref_slice ( char const* ref, size_t ref_size )
{
@@ -81,7 +114,7 @@ namespace VarExpand
(int)del_len, ref_chunk.data() + ref_pos_in_slice ));
}
- cont = Common::find_variation_core_step ( obj,
+ cont = Common::find_variation_core_step ( obj, g_Params.alg,
ref_chunk.data(), ref_chunk.size(), ref_pos_in_slice,
allele, var_len, del_len,
chunk_size, chunk_no_last, bases_start, chunk_no_start, chunk_no_end );
@@ -99,7 +132,7 @@ namespace VarExpand
ref_slice = ref_seq.getReferenceBases (
bases_start, (chunk_no_end - chunk_no_start + 1)*chunk_size );
- cont = Common::find_variation_core_step ( obj,
+ cont = Common::find_variation_core_step ( obj, g_Params.alg,
ref_slice.c_str(), ref_slice.size(), ref_pos_in_slice,
allele, var_len, del_len,
chunk_size, chunk_no_last, bases_start, chunk_no_start, chunk_no_end );
@@ -108,12 +141,30 @@ namespace VarExpand
}
}
- void expand_variation ( ngs::ReferenceSequence const& ref_seq,
- char const* key, size_t key_len,
- char const* ref_name, size_t ref_name_len,
- size_t ref_pos, size_t del_len,
- char const* allele, size_t allele_len )
+ void expand_variation ( //ngs::ReferenceSequence const& ref_seq,
+ CNGSObject <ngs::ReferenceSequence, ncbi::String>& ref_obj,
+ char const* key, size_t key_len,
+ char const* ref_name, size_t ref_name_len,
+ size_t ref_pos, size_t del_len,
+ char const* allele, size_t allele_len )
{
+ ncbi::String sref_name ( ref_name, ref_name_len );
+ if ( ref_obj.GetSelfPtr() == NULL || ref_obj.GetKey() != sref_name )
+ {
+ try
+ {
+ ref_obj.Init (ncbi::NGS::openReferenceSequence(sref_name), sref_name);
+ }
+ catch (ngs::ErrorMsg const& e)
+ {
+ if ( strstr ( e.what(), "failed to open table" ) == NULL )
+ throw;
+ return;
+ }
+ }
+
+ ngs::ReferenceSequence const& ref_seq = * ref_obj.GetSelfPtr();
+
KSearch::CVRefVariation obj;
std::string ref_allele;
@@ -122,9 +173,8 @@ namespace VarExpand
if ( ref_allele.size() == 0 )
ref_allele = "-";
- char buf[512];
- size_t new_allele_size;
- char const* new_allele = obj.GetAllele ( new_allele_size );
+ size_t new_allele_size = obj.GetAlleleSize();
+ char const* new_allele = obj.GetAllele();
if ( new_allele_size == 0)
{
@@ -132,8 +182,8 @@ namespace VarExpand
new_allele_size = 1;
}
- string_printf ( buf, countof(buf), NULL,
- "%.*s\t%.*s:%zu:%zu:%.*s\t%.*s:%zu:%zu:%.*s\t%.*s:%zu:%zu:%s",
+ printf (
+ "%.*s\t%.*s:%"PRSIZE_T"u:%"PRSIZE_T"u:%.*s\t%.*s:%"PRSIZE_T"u:%"PRSIZE_T"u:%.*s\t%.*s:%"PRSIZE_T"u:%"PRSIZE_T"u:%s\n",
(int)key_len, key,
(int)ref_name_len, ref_name,
@@ -148,8 +198,6 @@ namespace VarExpand
obj.GetAlleleStartAbsolute(), obj.GetAlleleLenOnRef(),
ref_allele.c_str()
);
- buf [countof(buf) - 1] = '\0';
- printf ("%s\n", buf);
}
@@ -249,8 +297,10 @@ namespace VarExpand
return true;
}
-#if 0
- void process_input_line ( char const* line, size_t line_size )
+
+ void process_input_line (
+ CNGSObject <ngs::ReferenceSequence, ncbi::String>& ref_obj,
+ char const* line, size_t line_size )
{
char const* key, *ref_name, *allele;
size_t key_len, ref_name_len, allele_len, ref_pos, del_len;
@@ -261,96 +311,21 @@ namespace VarExpand
& allele, & allele_len,
& ref_pos, & del_len ) )
{
- expand_variation ( key, key_len,
+ expand_variation ( ref_obj, key, key_len,
ref_name, ref_name_len,
ref_pos, del_len,
allele, allele_len );
}
}
-#endif
+
int expand_variations_impl ( )
{
std::string line;
- bool end_of_stream = false;
-
- ncbi::String sref_name, sref_name_prev;
- char const* key, *ref_name, *allele;
- size_t key_len, ref_name_len, allele_len, ref_pos, del_len;
-
- while ( ! end_of_stream )
- {
- end_of_stream = std::getline ( std::cin, line ).eof();
- if (line.size() > 0 && parse_input_line ( line.c_str(), line.size(),
- & key, & key_len,
- & ref_name, & ref_name_len,
- & allele, & allele_len,
- & ref_pos, & del_len ))
- {
- try // really only trying to open the first reference
- {
- sref_name.assign ( ref_name, ref_name_len );
- ngs::ReferenceSequence ref_seq = ncbi::NGS::openReferenceSequence(sref_name);
- break;
- }
- catch (ngs::ErrorMsg const& e)
- {
- if ( strstr ( e.what(), "failed to open table" ) == NULL )
- throw;
- else
- continue;
- }
- }
- }
-
- if ( end_of_stream )
- return 0;
-
- // here we have the first good reference name in sref_name (shall be opened with no exceptions)
- ngs::ReferenceSequence ref_seq = ncbi::NGS::openReferenceSequence(sref_name);
- expand_variation ( ref_seq, key, key_len,
- ref_name, ref_name_len,
- ref_pos, del_len,
- allele, allele_len );
- sref_name_prev = sref_name;
-
- // process the next input lines
+ CNGSObject <ngs::ReferenceSequence, ncbi::String> ref_obj;
while ( std::getline ( std::cin, line ) )
{
- if (line.size() > 0 && parse_input_line ( line.c_str(), line.size(),
- & key, & key_len,
- & ref_name, & ref_name_len,
- & allele, & allele_len,
- & ref_pos, & del_len ))
- {
- sref_name.assign ( ref_name, ref_name_len );
- if (sref_name == sref_name_prev)
- {
- expand_variation ( ref_seq, key, key_len,
- ref_name, ref_name_len,
- ref_pos, del_len,
- allele, allele_len );
- }
- else
- {
- try
- {
-
- ref_seq = ncbi::NGS::openReferenceSequence(sref_name);
- expand_variation ( ref_seq, key, key_len,
- ref_name, ref_name_len,
- ref_pos, del_len,
- allele, allele_len );
- sref_name_prev = sref_name;
- }
- catch (ngs::ErrorMsg const& e)
- {
- if ( strstr ( e.what(), "failed to open table" ) == NULL )
- throw;
- else
- continue;
- }
- }
- }
+ if (line.size() > 0)
+ process_input_line ( ref_obj, line.c_str(), line.size() );
}
return 0;
diff --git a/tools/ref-variation/var-expand.vers b/tools/ref-variation/var-expand.vers
index 8acdd82..097a15a 100644
--- a/tools/ref-variation/var-expand.vers
+++ b/tools/ref-variation/var-expand.vers
@@ -1 +1 @@
-0.0.1
+2.6.2
diff --git a/tools/ref-variation/var-expand.vers.h b/tools/ref-variation/var-expand.vers.h
index faa4618..8d3b773 100644
--- a/tools/ref-variation/var-expand.vers.h
+++ b/tools/ref-variation/var-expand.vers.h
@@ -1 +1 @@
-#define VAR_EXPAND_VERS 0x00000001
+#define VAR_EXPAND_VERS 0x02060002
diff --git a/tools/refseq-load/refseq-load.vers b/tools/refseq-load/refseq-load.vers
index da6b0a8..ecd7ee5 100644
--- a/tools/refseq-load/refseq-load.vers
+++ b/tools/refseq-load/refseq-load.vers
@@ -1 +1 @@
-2.5.6
+2.5.8
diff --git a/tools/sra-dump/abi-dump.vers b/tools/sra-dump/abi-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-dump/abi-dump.vers
+++ b/tools/sra-dump/abi-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-dump/abi-dump.vers.h b/tools/sra-dump/abi-dump.vers.h
index 5b8896f..5cb1220 100644
--- a/tools/sra-dump/abi-dump.vers.h
+++ b/tools/sra-dump/abi-dump.vers.h
@@ -1 +1 @@
-#define ABI_DUMP_VERS 0x02050007
+#define ABI_DUMP_VERS 0x02060002
diff --git a/tools/sra-dump/core.c b/tools/sra-dump/core.c
index 9224799..d955a92 100644
--- a/tools/sra-dump/core.c
+++ b/tools/sra-dump/core.c
@@ -1134,11 +1134,11 @@ rc_t CC KMain ( int argc, char* argv[] )
}
else if ( SRADumper_GetArg( &fmt, "N", "minSpotId", &i, argc, argv, &arg ) )
{
- minSpotId = AsciiToU32( arg, NULL, NULL );
+ minSpotId = AsciiToU64( arg, NULL, NULL );
}
else if ( SRADumper_GetArg( &fmt, "X", "maxSpotId", &i, argc, argv, &arg ) )
{
- maxSpotId = AsciiToU32( arg, NULL, NULL );
+ maxSpotId = AsciiToU64( arg, NULL, NULL );
}
else if ( SRADumper_GetArg( &fmt, "G", "spot-group", &i, argc, argv, NULL ) )
{
diff --git a/tools/sra-dump/fastq-dump.vers b/tools/sra-dump/fastq-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-dump/fastq-dump.vers
+++ b/tools/sra-dump/fastq-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-dump/fastq-dump.vers.h b/tools/sra-dump/fastq-dump.vers.h
index 22da127..2e4eac7 100644
--- a/tools/sra-dump/fastq-dump.vers.h
+++ b/tools/sra-dump/fastq-dump.vers.h
@@ -1 +1 @@
-#define FASTQ_DUMP_VERS 0x02050007
+#define FASTQ_DUMP_VERS 0x02060002
diff --git a/tools/sra-dump/illumina-dump.vers b/tools/sra-dump/illumina-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-dump/illumina-dump.vers
+++ b/tools/sra-dump/illumina-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-dump/illumina-dump.vers.h b/tools/sra-dump/illumina-dump.vers.h
index 2b6b70b..5578f1e 100644
--- a/tools/sra-dump/illumina-dump.vers.h
+++ b/tools/sra-dump/illumina-dump.vers.h
@@ -1 +1 @@
-#define ILLUMINA_DUMP_VERS 0x02050007
+#define ILLUMINA_DUMP_VERS 0x02060002
diff --git a/tools/sra-dump/sff-dump.vers b/tools/sra-dump/sff-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-dump/sff-dump.vers
+++ b/tools/sra-dump/sff-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-dump/sff-dump.vers.h b/tools/sra-dump/sff-dump.vers.h
index ef4cf1f..edc31de 100644
--- a/tools/sra-dump/sff-dump.vers.h
+++ b/tools/sra-dump/sff-dump.vers.h
@@ -1 +1 @@
-#define SFF_DUMP_VERS 0x02050007
+#define SFF_DUMP_VERS 0x02060002
diff --git a/tools/sra-dump/sra-dump.vers b/tools/sra-dump/sra-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-dump/sra-dump.vers
+++ b/tools/sra-dump/sra-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/abi-load.vers b/tools/sra-load/abi-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/abi-load.vers
+++ b/tools/sra-load/abi-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/fastq-fmt.c b/tools/sra-load/fastq-fmt.c
index 35c3090..e76f15f 100644
--- a/tools/sra-load/fastq-fmt.c
+++ b/tools/sra-load/fastq-fmt.c
@@ -609,7 +609,7 @@ rc_t FastqLoaderFmt_WriteData(FastqLoaderFmt* self, uint32_t argc, const SRALoad
}
} else if( spot.nreads != 1 ) {
rc = RC(rcSRA, rcFormatter, rcReading, rcData, rcUnsupported);
- SRALoaderFile_LOG(files[i].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform");
+ SRALoaderFile_LOG(files[g].file, klogErr, rc, "$(msg)", "msg=multiple reads for this platform");
} else if( self->wIonTorrent != NULL ) {
rc = SRAWriterIonTorrent_WriteRead(self->wIonTorrent, argv[0], spot.name,
spot.reads[0].seq, spot.reads[0].qual, NULL, NULL, 0, 0, 0, 0);
diff --git a/tools/sra-load/fastq-load.vers b/tools/sra-load/fastq-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/fastq-load.vers
+++ b/tools/sra-load/fastq-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/helicos-load.vers b/tools/sra-load/helicos-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/helicos-load.vers
+++ b/tools/sra-load/helicos-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/illumina-load.vers b/tools/sra-load/illumina-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/illumina-load.vers
+++ b/tools/sra-load/illumina-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/pacbio-loadxml.vers b/tools/sra-load/pacbio-loadxml.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/pacbio-loadxml.vers
+++ b/tools/sra-load/pacbio-loadxml.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/sff-load.vers b/tools/sra-load/sff-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/sff-load.vers
+++ b/tools/sra-load/sff-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-load/srf-load.vers b/tools/sra-load/srf-load.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-load/srf-load.vers
+++ b/tools/sra-load/srf-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-pileup/read_fkt.c b/tools/sra-pileup/read_fkt.c
index 63db54f..4f2ae53 100644
--- a/tools/sra-pileup/read_fkt.c
+++ b/tools/sra-pileup/read_fkt.c
@@ -31,59 +31,59 @@
rc_t read_bool( int64_t row_id, const VCursor * cursor, uint32_t idx, bool *res, bool dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) bool",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const bool * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) bool failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ const bool * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) bool failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_bool_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const bool **res, uint32_t *res_len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) bool ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- bool * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( res_len != NULL )
- *res_len = row_len;
- }
- }
+ }
+ else
+ {
+ bool * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( res_len != NULL )
+ *res_len = row_len;
+ }
+ }
return rc;
}
@@ -93,59 +93,59 @@ rc_t read_bool_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const
rc_t read_uint8( int64_t row_id, const VCursor * cursor, uint32_t idx, uint8_t *res, uint8_t dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) uint8",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const uint8_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ const uint8_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_uint8_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const uint8_t **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) uint8 ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const uint8_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const uint8_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -155,59 +155,59 @@ rc_t read_uint8_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const
rc_t read_uint32( int64_t row_id, const VCursor * cursor, uint32_t idx, uint32_t *res, uint32_t dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) uint32",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- uint32_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ uint32_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_uint32_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const uint32_t **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) uint32 ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- uint32_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ uint32_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -217,59 +217,59 @@ rc_t read_uint32_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, cons
rc_t read_int32( int64_t row_id, const VCursor * cursor, uint32_t idx, int32_t *res, int32_t dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) int32",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- int32_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ int32_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_int32_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const int32_t **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) int32 ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- int32_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ int32_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint32_t (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -279,59 +279,59 @@ rc_t read_int32_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const
rc_t read_int64( int64_t row_id, const VCursor * cursor, uint32_t idx, int64_t *res, int64_t dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) int64",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const int64_t *value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) int64 failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ const int64_t *value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) int64 failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_int64_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const int64_t **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) int64 ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- int64_t * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint64_t (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ int64_t * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) uint64_t (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -341,31 +341,31 @@ rc_t read_int64_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const
rc_t read_char_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const char **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) char ptr",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const char * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const char * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) char_ptr failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -375,59 +375,59 @@ rc_t read_char_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const
rc_t read_INSDC_coord_zero( int64_t row_id, const VCursor * cursor, uint32_t idx, INSDC_coord_zero *res, INSDC_coord_zero dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_zero",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- INSDC_coord_zero * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_zero failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ INSDC_coord_zero * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_zero failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_INSDC_coord_zero_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_coord_zero **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_zero (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const INSDC_coord_zero * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_zero (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const INSDC_coord_zero * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_zero (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -437,59 +437,59 @@ rc_t read_INSDC_coord_zero_ptr( int64_t row_id, const VCursor * cursor, uint32_t
rc_t read_INSDC_coord_one( int64_t row_id, const VCursor * cursor, uint32_t idx, INSDC_coord_one *res, INSDC_coord_one dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_one",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- INSDC_coord_one * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_one failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ INSDC_coord_one * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_one failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_INSDC_coord_one_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, INSDC_coord_one **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_one (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- INSDC_coord_one * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_one (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ INSDC_coord_one * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_one (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -500,59 +500,59 @@ rc_t read_INSDC_coord_one_ptr( int64_t row_id, const VCursor * cursor, uint32_t
rc_t read_INSDC_coord_len( int64_t row_id, const VCursor * cursor, uint32_t idx, INSDC_coord_len *res, INSDC_coord_len dflt, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_len",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- INSDC_coord_len * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_len failed",
- "tr=%li,ti=%u,hi=$u", row_id, idx, hint ) );
- }
- else
- {
- *res = ( row_len > 0 ) ? *value : dflt;
- }
- }
+ }
+ else
+ {
+ INSDC_coord_len * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_len failed",
+ "tr=%li,ti=%u,hi=$u", row_id, idx, hint ) );
+ }
+ else
+ {
+ *res = ( row_len > 0 ) ? *value : dflt;
+ }
+ }
return rc;
}
rc_t read_INSDC_coord_len_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_coord_len **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_coord_len (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const INSDC_coord_len * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_len (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const INSDC_coord_len * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_coord_len (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -562,93 +562,93 @@ rc_t read_INSDC_coord_len_ptr( int64_t row_id, const VCursor * cursor, uint32_t
rc_t read_INSDC_read_type_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_read_type **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_read_type (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const INSDC_read_type * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_read_type (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const INSDC_read_type * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_read_type (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
rc_t read_INSDC_read_filter_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_read_filter **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_read_filter (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const INSDC_read_filter * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_read_filter (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const INSDC_read_filter * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_read_filter (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
rc_t read_INSDC_dna_text_ptr( int64_t row_id, const VCursor * cursor, uint32_t idx, const INSDC_dna_text **res, uint32_t *len, const char * hint )
{
- rc_t rc;
- if ( idx == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
+ rc_t rc;
+ if ( idx == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcReading, rcItem, rcInvalid );
(void)PLOGERR( klogInt, ( klogInt, rc, "column idx invalid at row#$(tr) . $(hi) ) INSDC_dna_text (ptr)",
"tr=%li,hi=%s", row_id, hint ) );
- }
- else
- {
- const INSDC_dna_text * value;
- uint32_t elem_bits, boff, row_len;
- rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_dna_text (ptr) failed",
- "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
- }
- else
- {
- if ( row_len > 0 )
- *res = value;
- if ( len != NULL )
- *len = row_len;
- }
- }
+ }
+ else
+ {
+ const INSDC_dna_text * value;
+ uint32_t elem_bits, boff, row_len;
+ rc = VCursorCellDataDirect( cursor, row_id, idx, &elem_bits, (const void**)&value, &boff, &row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogInt, ( klogInt, rc, "VCursorCellDataDirect( row#$(tr) . idx#$(ti) . $(hi) ) INSDC_dna_text (ptr) failed",
+ "tr=%li,ti=%u,hi=%s", row_id, idx, hint ) );
+ }
+ else
+ {
+ if ( row_len > 0 )
+ *res = value;
+ if ( len != NULL )
+ *len = row_len;
+ }
+ }
return rc;
}
@@ -695,11 +695,11 @@ rc_t add_column( const VCursor * cursor, const char *colname, uint32_t * idx )
static const char * col_name_without_type( const char * colname )
{
- const char * res = colname;
- const char * s = string_chr( colname, string_size( colname ), ')' );
- if ( s != NULL )
- res = ++s;
- return res;
+ const char * res = colname;
+ const char * s = string_chr( colname, string_size( colname ), ')' );
+ if ( s != NULL )
+ res = ++s;
+ return res;
}
void add_opt_column( const VCursor * cursor, const KNamelist *names, const char *colname, uint32_t * idx )
diff --git a/tools/sra-pileup/sam-aligned.c b/tools/sra-pileup/sam-aligned.c
index aa84c7d..ce1b601 100644
--- a/tools/sra-pileup/sam-aligned.c
+++ b/tools/sra-pileup/sam-aligned.c
@@ -73,7 +73,7 @@ const char * EV_AL_TABLE = "EVIDENCE_ALIGNMENT";
MAPQ X
ALIGN_GROUP X - - -
RNA_ORIENTATION X X
-
+ LINKAGE_GROUP X X
-------------------------------------------------------------------------------------------*/
@@ -107,6 +107,7 @@ const char * EV_AL_TABLE = "EVIDENCE_ALIGNMENT";
#define COL_MAPQ "(I32)MAPQ"
#define COL_ALIGN_GROUP "(ascii)ALIGN_GROUP"
#define COL_RNA_ORIENTATION "(ascii)RNA_ORIENTATION"
+#define COL_LNK_GROUP "(ascii)LINKAGE_GROUP"
enum align_table_type
{
@@ -166,6 +167,7 @@ typedef struct align_table_context
/* this is only in prim */
uint32_t al_group_idx;
+ uint32_t lnk_group_idx;
/* this is specific to ev-interval/ev-alignmnet */
uint32_t ploidy_idx;
@@ -212,6 +214,7 @@ static void invalidate_all_column_idx( align_table_context * const atx )
atx->seq_name_idx = COL_NOT_AVAILABLE;
atx->mapq_idx = COL_NOT_AVAILABLE;
atx->al_group_idx = COL_NOT_AVAILABLE;
+ atx->lnk_group_idx = COL_NOT_AVAILABLE;
invalidate_all_cmn_column_idx( &atx->cmn );
invalidate_all_cmn_column_idx( &atx->eval );
}
@@ -290,58 +293,58 @@ static rc_t prepare_cmn_table_rows( const samdump_opts * const opts,
const VCursor * cursor = cmn->cursor;
if ( src == 'P' || src == 'S' )
- rc = add_column( cursor, COL_SEQ_SPOT_ID, &cmn->seq_spot_id_idx );
+ rc = add_column( cursor, COL_SEQ_SPOT_ID, &cmn->seq_spot_id_idx ); /* read_fkt.c */
if ( rc == 0 )
{
if ( opts->use_long_cigar )
{
- rc = add_column( cursor, COL_LONG_CIGAR, &cmn->cigar_idx );
+ rc = add_column( cursor, COL_LONG_CIGAR, &cmn->cigar_idx ); /* read_fkt.c */
if ( rc == 0 && ( src == 'I' || src == 'A' ) )
- rc = add_column( cursor, COL_CIGAR_LONG_LEN, &cmn->cigar_len_idx );
+ rc = add_column( cursor, COL_CIGAR_LONG_LEN, &cmn->cigar_len_idx ); /* read_fkt.c */
}
else
{
- rc = add_column( cursor, COL_SHORT_CIGAR, &cmn->cigar_idx );
+ rc = add_column( cursor, COL_SHORT_CIGAR, &cmn->cigar_idx ); /* read_fkt.c */
if ( rc == 0 && ( src == 'I' || src == 'A' ) )
- rc = add_column( cursor, COL_CIGAR_SHORT_LEN, &cmn->cigar_len_idx );
+ rc = add_column( cursor, COL_CIGAR_SHORT_LEN, &cmn->cigar_len_idx ); /* read_fkt.c */
}
}
if ( rc == 0 )
{
if ( opts->print_matches_as_equal_sign )
- rc = add_column( cursor, COL_MISMATCH_READ, &cmn->read_idx );
+ rc = add_column( cursor, COL_MISMATCH_READ, &cmn->read_idx ); /* read_fkt.c */
else
- rc = add_column( cursor, COL_READ, &cmn->read_idx );
+ rc = add_column( cursor, COL_READ, &cmn->read_idx ); /* read_fkt.c */
}
if ( rc == 0 )
- rc = add_column( cursor, COL_READ_LEN, &cmn->read_len_idx );
+ rc = add_column( cursor, COL_READ_LEN, &cmn->read_len_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_SAM_QUALITY, &cmn->sam_quality_idx );
+ rc = add_column( cursor, COL_SAM_QUALITY, &cmn->sam_quality_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_REF_ORIENTATION, &cmn->ref_orientation_idx );
+ rc = add_column( cursor, COL_REF_ORIENTATION, &cmn->ref_orientation_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_EDIT_DIST, &cmn->edit_dist_idx );
+ rc = add_column( cursor, COL_EDIT_DIST, &cmn->edit_dist_idx ); /* read_fkt.c */
if ( rc == 0 && ( src == 'P' || src == 'S' || src == 'A' ) )
- rc = add_column( cursor, COL_SEQ_SPOT_GROUP, &cmn->seq_spot_group_idx );
+ rc = add_column( cursor, COL_SEQ_SPOT_GROUP, &cmn->seq_spot_group_idx ); /* read_fkt.c */
if ( rc == 0 && ( src == 'P' || src == 'S' || src == 'A' ) )
- rc = add_column( cursor, COL_SEQ_READ_ID, &cmn->seq_read_id_idx );
+ rc = add_column( cursor, COL_SEQ_READ_ID, &cmn->seq_read_id_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_RAW_READ, &cmn->raw_read_idx );
+ rc = add_column( cursor, COL_RAW_READ, &cmn->raw_read_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_READ_FILTER, &cmn->read_filter_idx );
+ rc = add_column( cursor, COL_READ_FILTER, &cmn->read_filter_idx ); /* read_fkt.c */
if ( rc == 0 && ( src == 'P' || src == 'S' || src == 'A' ) )
- add_opt_column( cursor, available_columns, COL_AL_COUNT, &cmn->al_count_idx );
+ add_opt_column( cursor, available_columns, COL_AL_COUNT, &cmn->al_count_idx ); /* read_fkt.c */
return rc;
}
@@ -388,7 +391,7 @@ static rc_t prepare_prim_sec_table_cursor( const samdump_opts * const opts,
rc = prepare_cmn_table_rows( opts, tbl, &atx->cmn, table_char, available_columns );
if ( rc == 0 )
- rc = add_column( cursor, COL_SAM_FLAGS, &atx->sam_flags_idx );
+ rc = add_column( cursor, COL_SAM_FLAGS, &atx->sam_flags_idx ); /* read_fkt.c */
/* i don't have to add REF_NAME or REF_SEQ_ID, because i have it from the ref_obj later
i don't have to add REF_POS, because i have it from the iterator later
@@ -396,19 +399,25 @@ static rc_t prepare_prim_sec_table_cursor( const samdump_opts * const opts,
... when walking the iterator ...
*/
if ( rc == 0 )
- rc = add_column( cursor, COL_MATE_ALIGN_ID, &atx->mate_align_id_idx );
+ rc = add_column( cursor, COL_MATE_ALIGN_ID, &atx->mate_align_id_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_MATE_REF_NAME, &atx->mate_ref_name_idx );
+ rc = add_column( cursor, COL_MATE_REF_NAME, &atx->mate_ref_name_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_MATE_REF_POS, &atx->mate_ref_pos_idx );
+ rc = add_column( cursor, COL_MATE_REF_POS, &atx->mate_ref_pos_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( cursor, COL_TEMPLATE_LEN, &atx->tlen_idx );
+ rc = add_column( cursor, COL_TEMPLATE_LEN, &atx->tlen_idx ); /* read_fkt.c */
if ( rc == 0 )
- add_opt_column( cursor, available_columns, COL_RNA_ORIENTATION, &atx->rna_orientation_idx );
+ add_opt_column( cursor, available_columns, COL_RNA_ORIENTATION, &atx->rna_orientation_idx ); /* read_fkt.c */
+
+ if ( table_char == 'P' )
+ {
+ if ( rc == 0 )
+ add_opt_column( cursor, available_columns, COL_ALIGN_GROUP, &atx->al_group_idx ); /* read_fkt.c */
- if ( rc == 0 && ( table_char == 'P' ) )
- add_opt_column( cursor, available_columns, COL_ALIGN_GROUP, &atx->al_group_idx );
-
+ if ( rc == 0 )
+ add_opt_column( cursor, available_columns, COL_LNK_GROUP, &atx->lnk_group_idx ); /* read_fkt.c */
+ }
+
KNamelistRelease( available_columns );
}
if ( rc != 0 )
@@ -463,13 +472,13 @@ static rc_t prepare_sub_ev_alignment_table_cursor( const samdump_opts * const op
if ( rc == 0 )
{
/* special to ev-align */
- rc = add_column( atx->eval.cursor, COL_REF_POS, &atx->ref_pos_idx );
+ rc = add_column( atx->eval.cursor, COL_REF_POS, &atx->ref_pos_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( atx->eval.cursor, COL_REF_PLOIDY, &atx->ref_ploidy_idx );
+ rc = add_column( atx->eval.cursor, COL_REF_PLOIDY, &atx->ref_ploidy_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( atx->eval.cursor, COL_SEQ_NAME, &atx->seq_name_idx );
+ rc = add_column( atx->eval.cursor, COL_SEQ_NAME, &atx->seq_name_idx ); /* read_fkt.c */
if ( rc == 0 )
- rc = add_column( atx->eval.cursor, COL_MAPQ, &atx->mapq_idx );
+ rc = add_column( atx->eval.cursor, COL_MAPQ, &atx->mapq_idx ); /* read_fkt.c */
}
rc = VCursorOpen( atx->eval.cursor );
if ( rc != 0 )
@@ -523,7 +532,7 @@ static rc_t prepare_evidence_table_cursor( const samdump_opts * const opts,
}
if ( rc == 0 )
- rc = add_column( atx->cmn.cursor, COL_PLOIDY, &atx->ploidy_idx );
+ rc = add_column( atx->cmn.cursor, COL_PLOIDY, &atx->ploidy_idx ); /* read_fkt.c */
if ( rc == 0 && ( opts->dump_cg_sam || opts->dump_cg_ev_dnb ) )
rc = prepare_sub_ev_alignment_table_cursor( opts, db, atx );
@@ -852,13 +861,13 @@ static rc_t print_qslice( const samdump_opts * const opts,
rc = dump_quality_33( opts, ptr, len, reverse ); /* sam-dump-opts.c */
if ( rc == 0 )
{
- rc = KOutMsg( "\t" );
+ rc = KOutMsg( "" );
if ( rc == 0 )
*source_offset += len;
}
}
else
- rc = KOutMsg( "*\t" );
+ rc = KOutMsg( "*" );
}
return rc;
}
@@ -966,6 +975,29 @@ static rc_t cg_cigar_treatments( enum cigar_treatment what_treatment,
}
+static rc_t print_quality_or_star( const samdump_opts * const opts,
+ const char * const q,
+ uint32_t q_len,
+ uint32_t r_len )
+{
+ rc_t rc;
+ bool star_qual = ( q_len == 0 || q_len != r_len );
+ if ( !star_qual && q[ 0 ] == 255 )
+ {
+ uint32_t i = 0;
+ while ( i < q_len && q[ i ] == 255 ) i++;
+ star_qual = ( i == q_len );
+ }
+
+ if ( star_qual )
+ rc = KOutMsg( "*" );
+ else
+ rc = dump_quality_33( opts, q, q_len, false ); /* sam-dump-opts.c */
+
+ return rc;
+}
+
+
/* triggered by option "--CG-SAM" */
static rc_t print_evidence_alignment_cg_sam( const samdump_opts * const opts,
const PlacementRecord * const rec,
@@ -1054,9 +1086,9 @@ static rc_t print_evidence_alignment_cg_sam( const samdump_opts * const opts,
rc = KOutMsg( "*\t0\t0\t%.*s\t", cgc_output.p_read.len, cgc_output.p_read.ptr );
/* SAM-FIELD: QUAL SRA-column: SAM_QUALITY */
- if ( rc == 0 && cgc_output.p_quality.len > 0 )
- rc = dump_quality_33( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, false ); /* sam-dump-opts.c */
-
+ if ( rc == 0 )
+ rc = print_quality_or_star( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, cgc_output.p_read.len ); /* above */
+
/* OPT SAM-FIELD: RG SRA-column: SEQ_SPOT_GROUP */
if ( rc == 0 && spot_group_len > 0 )
rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
@@ -1168,7 +1200,7 @@ static rc_t print_evidence_alignment_cg_ev_dnb( const samdump_opts * const opts,
rc = cg_cigar_treatments( opts->cigar_treatment, &cgc_input, &cgc_output, align_id, &atx->eval );
if ( rc == 0 )
rc = cg_canonical_print_cigar( cgc_output.p_cigar.ptr, cgc_output.p_cigar.len);
- if(rc == 0) rc = KOutMsg( "\t");
+ if(rc == 0) rc = KOutMsg( "\t");
}
/* SAM-FIELD: RNEXT SRA-column: MATE_REF_NAME '*' no mates! */
@@ -1179,9 +1211,9 @@ static rc_t print_evidence_alignment_cg_ev_dnb( const samdump_opts * const opts,
rc = KOutMsg( "*\t0\t0\t%.*s\t", cgc_output.p_read.len, cgc_output.p_read.ptr );
/* SAM-FIELD: QUAL SRA-column: SAM_QUALITY */
- if ( rc == 0 && cgc_output.p_quality.len > 0 )
- rc = dump_quality_33( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, false ); /* sam-dump-opts.c */
-
+ if ( rc == 0 )
+ rc = print_quality_or_star( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, cgc_output.p_read.len ); /* above */
+
/* OPT SAM-FIELD: RG SRA-column: SEQ_SPOT_GROUP */
if ( rc == 0 && spot_group_len > 0 )
rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
@@ -1295,11 +1327,16 @@ static rc_t print_alignment_sam_ev( const samdump_opts * const opts,
/* SAM-FIELD: QUAL SRA-column: SAM_QUALITY sliced!!! */
if ( rc == 0 )
- rc = print_qslice( opts, false, quality, quality_str_len, &quality_offset, read_len_vector, read_len_vector_len, ploidy_idx );
-
+ {
+ if ( quality_str_len == read_slice_len )
+ rc = print_qslice( opts, false, quality, quality_str_len, &quality_offset, read_len_vector, read_len_vector_len, ploidy_idx );
+ else
+ rc = KOutMsg( "*" );
+ }
+
/* OPT SAM-FIELD: RG SRA-column: ploidy_idx */
if ( rc == 0 )
- rc = KOutMsg( "RG:Z:ALLELE_%u", ploidy_idx + 1 );
+ rc = KOutMsg( "\tRG:Z:ALLELE_%u", ploidy_idx + 1 );
/* OPT SAM-FIELD: XI SRA-column: ALIGN_ID */
if ( rc == 0 && opts->print_alignment_id_in_column_xi )
@@ -1359,6 +1396,27 @@ static rc_t print_alignment_sam_ev( const samdump_opts * const opts,
}
+static rc_t opt_field_spot_group( const VCursor * cursor, uint32_t col_id, int64_t row_id )
+{
+ const char * value = NULL;
+ uint32_t len;
+ rc_t rc = read_char_ptr( row_id, cursor, col_id, &value, &len, "SPOT_GROUP" );
+ if ( rc == 0 && len > 0 )
+ rc = KOutMsg( "\tRG:Z:%.*s", len, value );
+ return rc;
+}
+
+
+static rc_t opt_field_lnk_group( const VCursor * cursor, uint32_t col_id, int64_t row_id )
+{
+ const char * value = NULL;
+ uint32_t len;
+ rc_t rc = read_char_ptr( row_id, cursor, col_id, &value, &len, "LINKAGE_GROUP" );
+ if ( rc == 0 && len > 0 )
+ rc = KOutMsg( "\tBX:Z:%.*s", len, value );
+ return rc;
+}
+
static rc_t print_alignment_sam_ps( const samdump_opts * const opts,
const char * ref_name,
INSDC_coord_zero pos,
@@ -1550,15 +1608,15 @@ static rc_t print_alignment_sam_ps( const samdump_opts * const opts,
if ( opts->rna_splicing )
{
- { /*** reset previous identification of N to D ***/
- int i;
- char *c = ( char * )cgc_output.p_cigar.ptr;
- for( i = 0; i < cgc_output.p_cigar.len; i++ )
- {
- if ( c[ i ] == 'N' ) c[ i ] = 'D';
- }
- }
-
+ { /*** reset previous identification of N to D ***/
+ int i;
+ char *c = ( char * )cgc_output.p_cigar.ptr;
+ for( i = 0; i < cgc_output.p_cigar.len; i++ )
+ {
+ if ( c[ i ] == 'N' ) c[ i ] = 'D';
+ }
+ }
+
/* discover which cigar-operations could be a RNA-splice ( it is a D-operation with min length of 10 ) */
rc = discover_rna_splicing_candidates( cgc_output.p_cigar.len, cgc_output.p_cigar.ptr, 10, &candidates ); /* cg_tools.c */
if ( rc == 0 && candidates.count > 0 )
@@ -1640,22 +1698,15 @@ static rc_t print_alignment_sam_ps( const samdump_opts * const opts,
/* SAM-FIELD: QUAL SRA-column: SAM_QUALITY */
if ( rc == 0 )
- {
- if ( cgc_output.p_quality.len > 0 )
- rc = dump_quality_33( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, false );
- else
- rc = KOutMsg( "*" );
- }
+ rc = print_quality_or_star( opts, cgc_output.p_quality.ptr, cgc_output.p_quality.len, cgc_output.p_read.len ); /* above */
/* OPT SAM-FIELD: RG SRA-column: SPOT_GROUP */
if ( rc == 0 && ( atx->cmn.seq_spot_group_idx != COL_NOT_AVAILABLE ) )
- {
- const char * spot_grp = NULL;
- uint32_t spot_grp_len;
- rc = read_char_ptr( id, cursor, atx->cmn.seq_spot_group_idx, &spot_grp, &spot_grp_len, "SPOT_GROUP" );
- if ( rc == 0 && spot_grp_len > 0 )
- rc = KOutMsg( "\tRG:Z:%.*s", spot_grp_len, spot_grp );
- }
+ rc = opt_field_spot_group( cursor, atx->cmn.seq_spot_group_idx, id );
+
+ /* OPT SAM-FIELD: BZ SRA-column: LINKAGE_GROUP */
+ if ( rc == 0 && ( atx->lnk_group_idx != COL_NOT_AVAILABLE ) )
+ rc = opt_field_lnk_group( cursor, atx->lnk_group_idx, id );
if ( rc == 0 && cgc_output.p_tags.len > 0 )
rc = KOutMsg( "\t%.*s", cgc_output.p_tags.len, cgc_output.p_tags.ptr );
@@ -1711,16 +1762,6 @@ static rc_t print_alignment_sam_ps( const samdump_opts * const opts,
else
rc = KOutMsg( "\tXS:A:-" );
}
-/*
- uint32_t i;
- KOutMsg( "\tXS:A:" );
- for ( i = 0; i < candidates.count; ++i )
- {
- rna_splice_candidate * rsc = &candidates.candidates[ i ];
- KOutMsg( "( offs=%u | len=%u | op_idx=%u | matech=%u )", rsc->offset, rsc->len, rsc->op_idx, rsc->matched );
- }
-*/
-
}
else
{
@@ -1739,26 +1780,26 @@ static rc_t print_alignment_sam_ps( const samdump_opts * const opts,
}
}
- /* OPT SAM_FIELD: MD reports Mismatches and Deletions */
- if ( rc == 0 && opts->with_md_flag )
- {
- uint8_t * alig_ref = malloc( rec->len );
- if ( alig_ref == NULL )
- rc = RC( rcExe, rcNoTarg, rcAllocating, rcMemory, rcExhausted );
- else
- {
- INSDC_coord_len ref_len;
- rc = ReferenceObj_Read( rec->ref, pos, rec->len, alig_ref, &ref_len );
- if ( rc == 0 )
- {
- rc = kout_md_tag_from_cigar_string( cgc_output.p_cigar.ptr, cgc_output.p_cigar.len, /* cigar */
- cgc_output.p_read.ptr, cgc_output.p_read.len, /* read */
- alig_ref, ref_len ); /* reference */
- }
- free( alig_ref );
- }
- }
-
+ /* OPT SAM_FIELD: MD reports Mismatches and Deletions */
+ if ( rc == 0 && opts->with_md_flag )
+ {
+ uint8_t * alig_ref = malloc( rec->len );
+ if ( alig_ref == NULL )
+ rc = RC( rcExe, rcNoTarg, rcAllocating, rcMemory, rcExhausted );
+ else
+ {
+ INSDC_coord_len ref_len;
+ rc = ReferenceObj_Read( rec->ref, pos, rec->len, alig_ref, &ref_len );
+ if ( rc == 0 )
+ {
+ rc = kout_md_tag_from_cigar_string( cgc_output.p_cigar.ptr, cgc_output.p_cigar.len, /* cigar */
+ cgc_output.p_read.ptr, cgc_output.p_read.len, /* read */
+ alig_ref, ref_len ); /* reference */
+ }
+ free( alig_ref );
+ }
+ }
+
if ( rc == 0 )
rc = KOutMsg( "\n" );
diff --git a/tools/sra-pileup/sam-dump.vers b/tools/sra-pileup/sam-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-pileup/sam-dump.vers
+++ b/tools/sra-pileup/sam-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-pileup/sam-dump.vers.h b/tools/sra-pileup/sam-dump.vers.h
index 5724448..b2db389 100644
--- a/tools/sra-pileup/sam-dump.vers.h
+++ b/tools/sra-pileup/sam-dump.vers.h
@@ -1 +1 @@
-#define SAM_DUMP_VERS 0x02050007
+#define SAM_DUMP_VERS 0x02060002
diff --git a/tools/sra-pileup/sam-dump2.vers b/tools/sra-pileup/sam-dump2.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-pileup/sam-dump2.vers
+++ b/tools/sra-pileup/sam-dump2.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-pileup/sam-hdr1.c b/tools/sra-pileup/sam-hdr1.c
index 6b67fbe..3195707 100644
--- a/tools/sra-pileup/sam-hdr1.c
+++ b/tools/sra-pileup/sam-hdr1.c
@@ -37,116 +37,116 @@
typedef struct headers
{
- VNamelist * SQ_Lines_1;
- VNamelist * SQ_Lines_2;
-
- VNamelist * RG_Lines_1;
- VNamelist * RG_Lines_2;
-
- VNamelist * Other_Lines;
- VNamelist * HD_Lines;
+ VNamelist * SQ_Lines_1;
+ VNamelist * SQ_Lines_2;
+
+ VNamelist * RG_Lines_1;
+ VNamelist * RG_Lines_2;
+
+ VNamelist * Other_Lines;
+ VNamelist * HD_Lines;
} headers;
static void release_lines( VNamelist ** lines )
{
- if ( *lines != NULL )
- {
- VNamelistRelease ( *lines );
- lines = NULL;
- }
+ if ( *lines != NULL )
+ {
+ VNamelistRelease ( *lines );
+ lines = NULL;
+ }
}
static void release_headers( headers * h )
{
- release_lines( &h->HD_Lines );
- release_lines( &h->Other_Lines );
- release_lines( &h->RG_Lines_1 );
- release_lines( &h->RG_Lines_2 );
- release_lines( &h->SQ_Lines_1 );
- release_lines( &h->SQ_Lines_2 );
+ release_lines( &h->HD_Lines );
+ release_lines( &h->Other_Lines );
+ release_lines( &h->RG_Lines_1 );
+ release_lines( &h->RG_Lines_2 );
+ release_lines( &h->SQ_Lines_1 );
+ release_lines( &h->SQ_Lines_2 );
}
static rc_t init_headers( headers * h, uint32_t blocksize )
{
- rc_t rc;
- h->SQ_Lines_1 = NULL;
- h->SQ_Lines_2 = NULL;
- h->RG_Lines_1 = NULL;
- h->RG_Lines_2 = NULL;
- h->Other_Lines = NULL;
- h->HD_Lines = NULL;
-
- rc = VNamelistMake( &h->SQ_Lines_1, blocksize );
- if ( rc == 0 )
- rc = VNamelistMake( &h->SQ_Lines_2, blocksize );
- if ( rc == 0 )
- rc = VNamelistMake( &h->RG_Lines_1, blocksize );
- if ( rc == 0 )
- rc = VNamelistMake( &h->RG_Lines_2, blocksize );
- if ( rc == 0 )
- rc = VNamelistMake( &h->Other_Lines, blocksize );
- if ( rc == 0 )
- rc = VNamelistMake( &h->HD_Lines, blocksize );
-
- if ( rc != 0 )
- release_headers( h );
-
- return rc;
+ rc_t rc;
+ h->SQ_Lines_1 = NULL;
+ h->SQ_Lines_2 = NULL;
+ h->RG_Lines_1 = NULL;
+ h->RG_Lines_2 = NULL;
+ h->Other_Lines = NULL;
+ h->HD_Lines = NULL;
+
+ rc = VNamelistMake( &h->SQ_Lines_1, blocksize );
+ if ( rc == 0 )
+ rc = VNamelistMake( &h->SQ_Lines_2, blocksize );
+ if ( rc == 0 )
+ rc = VNamelistMake( &h->RG_Lines_1, blocksize );
+ if ( rc == 0 )
+ rc = VNamelistMake( &h->RG_Lines_2, blocksize );
+ if ( rc == 0 )
+ rc = VNamelistMake( &h->Other_Lines, blocksize );
+ if ( rc == 0 )
+ rc = VNamelistMake( &h->HD_Lines, blocksize );
+
+ if ( rc != 0 )
+ release_headers( h );
+
+ return rc;
}
static void process_line( headers * h, int idx, const char * line, size_t len )
{
- if ( len > 3 && line[ 0 ] == '@' )
- {
- if ( line[ 1 ] == 'S' && line[ 2 ] == 'Q' )
- {
- if ( idx == 1 )
- VNamelistAppend( h->SQ_Lines_1, line );
- else
- VNamelistAppend( h->SQ_Lines_2, line );
- }
- else if ( line[ 1 ] == 'R' && line[ 2 ] == 'G' )
- {
- if ( idx == 1 )
- VNamelistAppend( h->RG_Lines_1, line );
- else
- VNamelistAppend( h->RG_Lines_2, line );
- }
- else if ( line[ 1 ] == 'H' && line[ 2 ] == 'D' )
- VNamelistAppend( h->HD_Lines, line );
- else
- VNamelistAppend( h->Other_Lines, line );
- }
+ if ( len > 3 && line[ 0 ] == '@' )
+ {
+ if ( line[ 1 ] == 'S' && line[ 2 ] == 'Q' )
+ {
+ if ( idx == 1 )
+ VNamelistAppend( h->SQ_Lines_1, line );
+ else
+ VNamelistAppend( h->SQ_Lines_2, line );
+ }
+ else if ( line[ 1 ] == 'R' && line[ 2 ] == 'G' )
+ {
+ if ( idx == 1 )
+ VNamelistAppend( h->RG_Lines_1, line );
+ else
+ VNamelistAppend( h->RG_Lines_2, line );
+ }
+ else if ( line[ 1 ] == 'H' && line[ 2 ] == 'D' )
+ VNamelistAppend( h->HD_Lines, line );
+ else
+ VNamelistAppend( h->Other_Lines, line );
+ }
}
static rc_t process_lines( headers * h, int idx, VNamelist * content, const char * identifier )
{
- uint32_t i, count;
- rc_t rc = VNameListCount( content, &count );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogErr, ( klogErr, rc, "cant get count for content of '$(t)'", "t=%s", identifier ) );
- }
- else
- {
- const char * line = NULL;
- for ( i = 0; i < count && rc == 0; ++i )
- {
- rc = VNameListGet( content, i, &line );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogErr, ( klogErr, rc, "cant get line #$(t) from content", "t=%u", i ) );
- }
- else
- process_line( h, idx, line, string_measure( line, NULL ) );
- }
- }
- return rc;
+ uint32_t i, count;
+ rc_t rc = VNameListCount( content, &count );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogErr, ( klogErr, rc, "cant get count for content of '$(t)'", "t=%s", identifier ) );
+ }
+ else
+ {
+ const char * line = NULL;
+ for ( i = 0; i < count && rc == 0; ++i )
+ {
+ rc = VNameListGet( content, i, &line );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogErr, ( klogErr, rc, "cant get line #$(t) from content", "t=%u", i ) );
+ }
+ else
+ process_line( h, idx, line, string_measure( line, NULL ) );
+ }
+ }
+ return rc;
}
@@ -156,8 +156,8 @@ static rc_t process_lines( headers * h, int idx, VNamelist * content, const char
typedef struct buffer_range
{
- const char * start;
- uint32_t processed, count, state;
+ const char * buffer;
+ size_t size, state;
} buffer_range;
@@ -166,13 +166,13 @@ static const char empty_str[ 2 ] = { ' ', 0 };
static void LoadFromBuffer( VNamelist * nl, buffer_range * range )
{
- uint32_t idx;
- const char * p = range->start;
+ size_t idx;
+ const char * p = range->buffer;
String S;
S.addr = p;
- S.len = S.size = range->processed;
- for ( idx = range->processed; idx < range->count; ++idx )
+ S.len = S.size = 0;
+ for ( idx = 0; idx < range->size; ++idx )
{
switch( p[ idx ] )
{
@@ -230,198 +230,171 @@ static void LoadFromBuffer( VNamelist * nl, buffer_range * range )
break;
}
}
- if ( range->state == STATE_ALPHA )
- {
- range->start = S.addr;
- range->count = S.len;
- }
- else
- range->count = 0;
+ if ( range->state == STATE_ALPHA && S.len > 0 )
+ VNamelistAppendString ( nl, &S );
}
-static rc_t Load_Namelist_From_Node( VNamelist * nl, const KMDataNode * node )
+static rc_t Load_Namelist_From_Node( VNamelist * dst, const KMDataNode * node )
{
- rc_t rc = 0;
- size_t pos = 0, num_read, remaining = ~0;
- char buffer[ 4096 ];
- buffer_range range;
-
- range.start = buffer;
- range.count = 0;
- range.processed = 0;
- range.state = STATE_ALPHA;
-
- do
+ size_t num_read, remaining;
+ char b[ 10 ];
+ rc_t rc = KMDataNodeRead( node, 0, b, sizeof( b ), &num_read, &remaining );
+ if ( rc == 0 )
{
- rc = KMDataNodeRead( node, pos, buffer, sizeof( buffer ), &num_read, &remaining );
- if ( rc == 0 )
+ size_t bsize = num_read + remaining;
+ buffer_range range;
+ range.buffer = malloc( bsize );
+ if ( range.buffer != NULL )
{
- if ( num_read > 0 )
- {
- range.start = buffer;
- range.count = range.processed + num_read;
-
- LoadFromBuffer( nl, &range );
- if ( range.count > 0 )
- {
- memmove ( buffer, range.start, range.count );
- }
- range.start = buffer;
- range.processed = range.count;
-
- pos += num_read;
- }
-
- if ( remaining == 0 && range.state == STATE_ALPHA )
+ rc = KMDataNodeRead( node, 0, ( void * )range.buffer, bsize, &range.size, &remaining );
+ if ( rc == 0 )
{
- String S;
- S.addr = range.start;
- S.len = S.size = range.count;
- VNamelistAppendString ( nl, &S );
+ range.state = STATE_ALPHA;
+ LoadFromBuffer( dst, &range );
}
+ free( ( void * ) range.buffer );
}
- } while ( rc == 0 && remaining > 0 );
-
+ }
return rc;
}
static rc_t collect_from_BAM_HEADER( headers * h, int hdr_idx, input_files * ifs )
{
- rc_t rc = 0;
- if ( ifs->database_count > 0 )
- {
- uint32_t idx;
- for ( idx = 0; idx < ifs->database_count && rc == 0; ++idx )
- {
- input_database * id = VectorGet( &ifs->dbs, idx );
- if ( id != NULL )
- {
- const KMetadata * meta;
- rc = VDatabaseOpenMetadataRead( id->db, &meta );
- if ( rc == 0 )
- {
- const KMDataNode * node;
- rc = KMetadataOpenNodeRead( meta, &node, "BAM_HEADER" );
- if ( rc == 0 )
- {
- VNamelist * content;
- rc = VNamelistMake ( &content, 25 );
- if ( rc != 0 )
- {
- (void)PLOGERR( klogErr, ( klogErr, rc, "cant create container for '$(t)'", "t=%s", id->path ) );
- }
- else
- {
- rc = Load_Namelist_From_Node( content, node );
- if ( rc == 0 )
- rc = process_lines( h, hdr_idx, content, id->path );
- VNamelistRelease( content );
- }
- KMDataNodeRelease( node );
- }
- else
- rc = 0;
- KMetadataRelease( meta );
- }
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( ifs->database_count > 0 )
+ {
+ uint32_t idx;
+ for ( idx = 0; idx < ifs->database_count && rc == 0; ++idx )
+ {
+ input_database * id = VectorGet( &ifs->dbs, idx );
+ if ( id != NULL )
+ {
+ const KMetadata * meta;
+ rc = VDatabaseOpenMetadataRead( id->db, &meta );
+ if ( rc == 0 )
+ {
+ const KMDataNode * node;
+ rc = KMetadataOpenNodeRead( meta, &node, "BAM_HEADER" );
+ if ( rc == 0 )
+ {
+ VNamelist * content;
+ rc = VNamelistMake ( &content, 25 );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR( klogErr, ( klogErr, rc, "cant create container for '$(t)'", "t=%s", id->path ) );
+ }
+ else
+ {
+ rc = Load_Namelist_From_Node( content, node );
+ if ( rc == 0 )
+ rc = process_lines( h, hdr_idx, content, id->path );
+ VNamelistRelease( content );
+ }
+ KMDataNodeRelease( node );
+ }
+ else
+ rc = 0;
+ KMetadataRelease( meta );
+ }
+ }
+ }
+ }
+ return rc;
}
static rc_t collect_from_spotgroup_stats( headers * h, int hdr_idx, const KMDataNode * node, const KNamelist * spot_groups )
{
- uint32_t count;
- rc_t rc = KNamelistCount( spot_groups, &count );
- if ( rc == 0 && count > 0 )
- {
- uint32_t i;
- for ( i = 0; i < count && rc == 0; ++i )
- {
- const char * name = NULL; /* this is the name of the node !!!NOT!!! the name of the spotgroup! */
- rc = KNamelistGet( spot_groups, i, &name );
- if ( rc == 0 && name != NULL )
- {
- const KMDataNode * spot_count_node;
- rc = KMDataNodeOpenNodeRead( node, &spot_count_node, "%s/SPOT_COUNT", name );
- if ( rc == 0 )
- {
- uint64_t spot_count = 0;
- rc = KMDataNodeReadAsU64( spot_count_node, &spot_count );
- if ( rc == 0 )
- {
- if ( spot_count > 0 )
- {
- const KMDataNode * spot_group_node;
- rc = KMDataNodeOpenNodeRead( node, &spot_group_node, name );
- if ( rc == 0 )
- {
- char name_attr[ 2048 ];
- char buffer[ 2048 ];
- size_t num_writ;
- rc = KMDataNodeReadAttr( spot_group_node, "name", name_attr, sizeof name_attr, &num_writ );
- rc = string_printf( buffer, sizeof buffer, &num_writ,
- "@RG\tID:%s", rc == 0 ? name_attr : name );
- if ( rc == 0 )
- process_line( h, hdr_idx, buffer, num_writ );
- KMDataNodeRelease( spot_group_node );
- }
- }
- }
- else
- rc = 0;
- KMDataNodeRelease( spot_count_node );
- }
- }
- }
- }
- return rc;
+ uint32_t count;
+ rc_t rc = KNamelistCount( spot_groups, &count );
+ if ( rc == 0 && count > 0 )
+ {
+ uint32_t i;
+ for ( i = 0; i < count && rc == 0; ++i )
+ {
+ const char * name = NULL; /* this is the name of the node !!!NOT!!! the name of the spotgroup! */
+ rc = KNamelistGet( spot_groups, i, &name );
+ if ( rc == 0 && name != NULL )
+ {
+ const KMDataNode * spot_count_node;
+ rc = KMDataNodeOpenNodeRead( node, &spot_count_node, "%s/SPOT_COUNT", name );
+ if ( rc == 0 )
+ {
+ uint64_t spot_count = 0;
+ rc = KMDataNodeReadAsU64( spot_count_node, &spot_count );
+ if ( rc == 0 )
+ {
+ if ( spot_count > 0 )
+ {
+ const KMDataNode * spot_group_node;
+ rc = KMDataNodeOpenNodeRead( node, &spot_group_node, name );
+ if ( rc == 0 )
+ {
+ char name_attr[ 2048 ];
+ char buffer[ 2048 ];
+ size_t num_writ;
+ rc = KMDataNodeReadAttr( spot_group_node, "name", name_attr, sizeof name_attr, &num_writ );
+ rc = string_printf( buffer, sizeof buffer, &num_writ,
+ "@RG\tID:%s", rc == 0 ? name_attr : name );
+ if ( rc == 0 )
+ process_line( h, hdr_idx, buffer, num_writ );
+ KMDataNodeRelease( spot_group_node );
+ }
+ }
+ }
+ else
+ rc = 0;
+ KMDataNodeRelease( spot_count_node );
+ }
+ }
+ }
+ }
+ return rc;
}
static rc_t collect_from_stats( headers * h, int hdr_idx, input_files * ifs )
{
- rc_t rc = 0;
- if ( ifs->database_count > 0 )
- {
- uint32_t idx;
- for ( idx = 0; idx < ifs->database_count && rc == 0; ++idx )
- {
- input_database * id = VectorGet( &ifs->dbs, idx );
- if ( id != NULL )
- {
- const VTable * seq_table = NULL;
- rc = VDatabaseOpenTableRead( id->db, &seq_table, "SEQUENCE" );
- if ( rc == 0 )
- {
- const KMetadata * meta;
- rc = VTableOpenMetadataRead( seq_table, &meta );
- if ( rc == 0 )
- {
- const KMDataNode * node;
- rc = KMetadataOpenNodeRead( meta, &node, "STATS/SPOT_GROUP" );
- if ( rc == 0 )
- {
- KNamelist * spot_groups;
- rc = KMDataNodeListChildren( node, &spot_groups );
- if ( rc == 0 )
- {
- rc = collect_from_spotgroup_stats( h, hdr_idx, node, spot_groups );
- KNamelistRelease( spot_groups );
- }
- KMDataNodeRelease( node );
- }
- KMetadataRelease( meta );
- }
- VTableRelease( seq_table );
- }
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( ifs->database_count > 0 )
+ {
+ uint32_t idx;
+ for ( idx = 0; idx < ifs->database_count && rc == 0; ++idx )
+ {
+ input_database * id = VectorGet( &ifs->dbs, idx );
+ if ( id != NULL )
+ {
+ const VTable * seq_table = NULL;
+ rc = VDatabaseOpenTableRead( id->db, &seq_table, "SEQUENCE" );
+ if ( rc == 0 )
+ {
+ const KMetadata * meta;
+ rc = VTableOpenMetadataRead( seq_table, &meta );
+ if ( rc == 0 )
+ {
+ const KMDataNode * node;
+ rc = KMetadataOpenNodeRead( meta, &node, "STATS/SPOT_GROUP" );
+ if ( rc == 0 )
+ {
+ KNamelist * spot_groups;
+ rc = KMDataNodeListChildren( node, &spot_groups );
+ if ( rc == 0 )
+ {
+ rc = collect_from_spotgroup_stats( h, hdr_idx, node, spot_groups );
+ KNamelistRelease( spot_groups );
+ }
+ KMDataNodeRelease( node );
+ }
+ KMetadataRelease( meta );
+ }
+ VTableRelease( seq_table );
+ }
+ }
+ }
+ }
+ return rc;
}
@@ -457,7 +430,7 @@ static rc_t collect_from_file( headers * h, int hdr_idx, const char * filename )
(void)PLOGERR( klogErr, ( klogErr, rc, "cant load file '$(t)' into container", "t=%s", filename ) );
}
else
- rc = process_lines( h, hdr_idx, content, filename );
+ rc = process_lines( h, hdr_idx, content, filename );
VNamelistRelease( content );
}
KFileRelease ( f );
@@ -500,354 +473,400 @@ static rc_t collect_from_references( headers * h, int hdr_idx, input_files * ifs
rc = ReferenceObj_SeqLength( ref_obj, &seq_len );
if ( rc == 0 )
{
- char buffer[ 2048 ];
- size_t num_writ;
- rc = string_printf( buffer, sizeof buffer, &num_writ, "@SQ\tSN:%s\tLN:%lu", name, seq_len );
- if ( rc == 0 )
- process_line( h, hdr_idx, buffer, num_writ );
+ char buffer[ 2048 ];
+ size_t num_writ;
+ rc = string_printf( buffer, sizeof buffer, &num_writ, "@SQ\tSN:%s\tLN:%lu", name, seq_len );
+ if ( rc == 0 )
+ process_line( h, hdr_idx, buffer, num_writ );
}
- }
+ }
}
}
}
}
}
}
- return rc;
+ return rc;
}
typedef struct hdr_tag
{
- String key, value;
+ String key, value;
} hdr_tag;
typedef struct hdr_line
{
- String line_key;
- hdr_tag tags[ 32 ];
- uint32_t n_tags;
+ String line_key;
+ hdr_tag tags[ 32 ];
+ uint32_t n_tags;
} hdr_line;
static bool parse_hdr_line( hdr_line * hl, const char * line )
{
- size_t i, tl = 0, start = 3, len = string_size( line );
- bool res = ( len > start && line[ 0 ] == '@' );
- hl->n_tags = 0;
- if ( res )
- {
- uint32_t colons = 0;
- StringInit( &hl->line_key, &line[ 1 ], 2, 2 );
- for( i = start; i < len; ++i )
- {
- switch ( line[ i ] )
- {
- case '\t' : if ( tl > 0 )
- {
- StringInit( &hl->tags[ hl->n_tags ].value, &line[ start ], tl, tl );
- ( hl->n_tags )++;
- tl = 0;
- }
- start = i + 1;
- colons = 0;
- break;
-
- case ':' : if ( colons == 0 )
- {
- if ( tl > 0 )
- {
- StringInit( &hl->tags[ hl->n_tags ].key, &line[ start ], tl, tl );
- tl = 0;
- }
- start = i + 1;
- }
- else
- tl++;
- colons++;
- break;
-
- default : tl++;
- break;
- }
- }
- if ( tl > 0 )
- {
- StringInit( &hl->tags[ hl->n_tags ].value, &line[ start ], tl, tl );
- ( hl->n_tags )++;
- }
- }
- else
- StringInit( &hl->line_key, NULL, 0, 0 );
- return res;
-}
-
+ size_t i, tl = 0, start = 3, len = string_size( line );
+ bool res = ( len > start && line[ 0 ] == '@' );
+ hl->n_tags = 0;
+ if ( res )
+ {
+ uint32_t colons = 0;
+ StringInit( &hl->line_key, &line[ 1 ], 2, 2 );
+ for( i = start; i < len; ++i )
+ {
+ switch ( line[ i ] )
+ {
+ case '\t' : if ( tl > 0 )
+ {
+ StringInit( &hl->tags[ hl->n_tags ].value, &line[ start ], tl, tl );
+ ( hl->n_tags )++;
+ tl = 0;
+ }
+ start = i + 1;
+ colons = 0;
+ break;
-static rc_t print_hdr_line( char * buffer, size_t buflen, const hdr_line * hl )
-{
- rc_t rc = 0;
- buffer[ 0 ] = 0;
- if ( hl->line_key.len > 0 )
- {
- size_t num_writ, total_writ = 0, i;
- rc = string_printf( buffer, buflen, &num_writ, "@%S", &hl->line_key );
- for ( i = 0; i < hl->n_tags && rc == 0; ++i )
- {
- total_writ += num_writ;
- if ( buflen > total_writ )
- rc = string_printf( &buffer[ total_writ ], buflen - total_writ, &num_writ,
- "\t%S:%S", &hl->tags[ i ].key, &hl->tags[ i ].value );
- }
- }
- return rc;
+ case ':' : if ( colons == 0 )
+ {
+ if ( tl > 0 )
+ {
+ StringInit( &hl->tags[ hl->n_tags ].key, &line[ start ], tl, tl );
+ tl = 0;
+ }
+ start = i + 1;
+ }
+ else
+ tl++;
+ colons++;
+ break;
+
+ default : tl++;
+ break;
+ }
+ }
+ if ( tl > 0 )
+ {
+ StringInit( &hl->tags[ hl->n_tags ].value, &line[ start ], tl, tl );
+ ( hl->n_tags )++;
+ }
+ }
+ else
+ StringInit( &hl->line_key, NULL, 0, 0 );
+ return res;
}
-static rc_t append_hdr_line( VNamelist * dst, const hdr_line * hl )
+static rc_t print_hdr_line( char * buffer, size_t buflen, const hdr_line * hl )
{
- rc_t rc = 0;
- if ( dst != NULL && hl != NULL && hl->line_key.len > 0 )
- {
- char buffer[ 2048 ];
- rc = print_hdr_line( buffer, sizeof buffer, hl );
- if ( rc == 0 && buffer[ 0 ] != 0 )
- rc = VNamelistAppend( dst, buffer );
- }
- return rc;
+ rc_t rc = 0;
+ buffer[ 0 ] = 0;
+ if ( hl->line_key.len > 0 )
+ {
+ size_t num_writ, total_writ = 0, i;
+ rc = string_printf( buffer, buflen, &num_writ, "@%S", &hl->line_key );
+ for ( i = 0; i < hl->n_tags && rc == 0; ++i )
+ {
+ total_writ += num_writ;
+ if ( buflen > total_writ )
+ rc = string_printf( &buffer[ total_writ ], buflen - total_writ, &num_writ,
+ "\t%S:%S", &hl->tags[ i ].key, &hl->tags[ i ].value );
+ }
+ }
+ return rc;
}
-
static bool sam_hdr_id( const hdr_line * hl1, const hdr_line * hl2 )
{
- bool res = ( hl1 != NULL && hl2 != NULL );
- if ( res )
- res = ( ( hl1->n_tags > 0 ) && ( hl2->n_tags > 0 ) );
- if ( res )
- res = ( 0 == StringCompare( &hl1->tags[ 0 ].value, &hl2->tags[ 0 ].value ) );
- return res;
+ bool res = ( hl1 != NULL && hl2 != NULL );
+ if ( res )
+ res = ( ( hl1->n_tags > 0 ) && ( hl2->n_tags > 0 ) );
+ if ( res )
+ res = ( 0 == StringCompare( &hl1->tags[ 0 ].value, &hl2->tags[ 0 ].value ) );
+ return res;
}
-
-#if 0
-static void copy_hdr_line( const hdr_line * src, hdr_line * dst )
-{
- if ( src != NULL && dst != NULL )
- {
- uint32_t i;
-
- memset( dst, 0, sizeof *dst );
- dst->line_key = src->line_key;
- for ( i = 0; i < src->n_tags; ++i )
- dst->tags[ i ] = src->tags[ i ];
- dst->n_tags = src->n_tags;
- }
-}
-#endif
-
-
static bool has_tag( const String * tag_id, hdr_line * dst )
{
- bool res = false;
- uint32_t i;
- for ( i = 0; i < dst->n_tags && !res; ++i )
- res = ( 0 == StringCompare( tag_id, &dst->tags[ i ].key ) );
- return res;
+ bool res = false;
+ uint32_t i;
+ for ( i = 0; i < dst->n_tags && !res; ++i )
+ res = ( 0 == StringCompare( tag_id, &dst->tags[ i ].key ) );
+ return res;
}
-
static void merge_tag( const hdr_tag * tag, hdr_line * dst )
{
- if ( !has_tag( &tag->key, dst ) )
- {
- dst->tags[ dst->n_tags ] = *tag;
- ( dst->n_tags )++;
- }
+ if ( !has_tag( &tag->key, dst ) )
+ {
+ dst->tags[ dst->n_tags ] = *tag;
+ ( dst->n_tags )++;
+ }
}
static void merge_hdr_line( const hdr_line * src, hdr_line * dst )
{
- if ( src != NULL && dst != NULL )
- {
- uint32_t i;
- for ( i = 0; i < src->n_tags; ++i )
- merge_tag( &src->tags[ i ], dst );
- }
+ if ( src != NULL && dst != NULL )
+ {
+ uint32_t i;
+ for ( i = 0; i < src->n_tags; ++i )
+ merge_tag( &src->tags[ i ], dst );
+ }
}
static rc_t for_each_line( const VNamelist * src,
- rc_t ( * f ) ( const char * line, void * context ),
- void * context )
-{
- uint32_t idx, count;
- rc_t rc = VNameListCount( src, &count );
- for ( idx = 0; idx < count && rc == 0; ++idx )
- {
- const char * line = NULL;
- rc = VNameListGet( src, idx, &line );
- if ( rc == 0 )
- rc = f( line, context );
- }
- return rc;
+ rc_t ( * f ) ( const char * line, void * context ),
+ void * context )
+{
+ uint32_t idx, count;
+ rc_t rc = VNameListCount( src, &count );
+ for ( idx = 0; idx < count && rc == 0; ++idx )
+ {
+ const char * line = NULL;
+ rc = VNameListGet( src, idx, &line );
+ if ( rc == 0 )
+ rc = f( line, context );
+ }
+ return rc;
}
-static rc_t merge_callback_2( const char * line, void * context )
+static rc_t merge_header_tags( const char * line, void * context )
{
- hdr_line * h_parent = context;
- hdr_line h;
- if ( parse_hdr_line( &h, line ) )
- {
- if ( sam_hdr_id( h_parent, &h ) )
- {
- /* merge the tags! */
- merge_hdr_line( h_parent, &h );
- }
- }
- return 0;
+ hdr_line * h_parent = context;
+ hdr_line h;
+ if ( parse_hdr_line( &h, line ) )
+ {
+ /* merge the tags! */
+ if ( sam_hdr_id( h_parent, &h ) )
+ merge_hdr_line( h_parent, &h );
+ }
+ return 0;
}
typedef struct merge_ctx
{
- VNamelist * dst;
- VNamelist * other;
+ VNamelist * dst;
+ const VNamelist * other;
+ bool unique;
} merge_ctx;
-static rc_t merge_callback_1( const char * line, void * context )
+static rc_t merge_callback( const char * line, void * context )
{
- rc_t rc = 0;
- merge_ctx * mc = context;
- hdr_line h;
- if ( parse_hdr_line( &h, line ) )
- {
- rc = for_each_line( mc->other, merge_callback_2, &h );
- if ( rc == 0 )
- rc = append_hdr_line( mc->dst, &h );
- }
- return rc;
+ rc_t rc = 0;
+ merge_ctx * mc = context;
+ hdr_line h;
+ if ( parse_hdr_line( &h, line ) )
+ {
+ rc = for_each_line( mc->other, merge_header_tags, &h );
+ if ( rc == 0 )
+ {
+ char buffer[ 2048 ];
+ rc = print_hdr_line( buffer, sizeof buffer, &h );
+ if ( rc == 0 && buffer[ 0 ] != 0 )
+ {
+ bool do_append = true;
+ if ( mc->unique )
+ {
+ uint32_t found;
+ rc_t rc1 = VNamelistIndexOf( mc->dst, buffer, &found );
+ do_append = ( rc1 != 0 );
+ }
+ if ( do_append )
+ rc = VNamelistAppend( mc->dst, buffer );
+ }
+ }
+ }
+ return rc;
}
/* SQ-lines have to be uniue by the SN-tag */
/* RG-lines have to be uniue by the ID-tag */
-static rc_t merge_lines( VNamelist ** lines_1, VNamelist * lines_2 )
-{
- rc_t rc;
- merge_ctx mc;
- memset( &mc, 0, sizeof mc );
- rc = VNamelistMake( &mc.dst, 25 );
- if ( rc == 0 )
- {
- mc.other = lines_2;
- rc = for_each_line( *lines_1, merge_callback_1, &mc );
- if ( rc == 0 )
- {
- VNamelistRelease( *lines_1 );
- *lines_1 = mc.dst;
- }
- }
- return rc;
+static rc_t merge_lines( VNamelist ** lines_1, const VNamelist * lines_2, bool unique )
+{
+ rc_t rc;
+ merge_ctx mc;
+ memset( &mc, 0, sizeof mc );
+ mc.unique = unique;
+ rc = VNamelistMake( &mc.dst, 25 );
+ if ( rc == 0 )
+ {
+ mc.other = lines_2;
+ rc = for_each_line( *lines_1, merge_callback, &mc );
+ if ( rc == 0 )
+ {
+ VNamelistRelease( *lines_1 );
+ *lines_1 = mc.dst;
+ }
+ }
+ return rc;
}
static rc_t collect_from_bam_hdr( headers * h, input_files * ifs )
{
- uint32_t count;
- rc_t rc = collect_from_BAM_HEADER( h, 1, ifs );
- if ( rc == 0 )
- {
- rc = VNameListCount( h->SQ_Lines_1, &count );
- if ( rc == 0 && count == 0 )
- rc = collect_from_references( h, 1, ifs );
- }
- if ( rc == 0 )
- {
- rc = VNameListCount( h->RG_Lines_1, &count );
- if ( rc == 0 && count == 0 )
- rc = collect_from_stats( h, 1, ifs );
- }
- return rc;
+ uint32_t count;
+ rc_t rc = collect_from_BAM_HEADER( h, 1, ifs );
+ if ( rc == 0 )
+ {
+ rc = VNameListCount( h->SQ_Lines_1, &count );
+ if ( rc == 0 && count == 0 )
+ rc = collect_from_references( h, 1, ifs );
+ }
+ if ( rc == 0 )
+ {
+ rc = VNameListCount( h->RG_Lines_1, &count );
+ if ( rc == 0 && count == 0 )
+ rc = collect_from_stats( h, 1, ifs );
+ }
+ return rc;
}
static rc_t collect_by_recalc( headers * h, input_files * ifs )
{
- rc_t rc = collect_from_references( h, 1, ifs );
- if ( rc == 0 )
- rc = collect_from_stats( h, 1, ifs );
- return rc;
+ rc_t rc = collect_from_references( h, 1, ifs );
+ if ( rc == 0 )
+ rc = collect_from_stats( h, 1, ifs );
+ return rc;
}
static rc_t collect_from_src_and_files( headers * h, input_files * ifs, const char * filename )
{
- rc_t rc = collect_from_bam_hdr( h, ifs );
- if ( rc == 0 && filename != NULL )
- rc = collect_from_file( h, 2, filename );
- return rc;
+ rc_t rc = collect_from_bam_hdr( h, ifs );
+ if ( rc == 0 && filename != NULL )
+ rc = collect_from_file( h, 2, filename );
+ return rc;
}
static rc_t print_HD_line( const VNamelist * lines )
{
- uint32_t count;
- rc_t rc = VNameListCount( lines, &count );
- if ( rc == 0 && count > 0 )
- {
- const char * line = NULL;
- rc = VNameListGet( lines, 0, &line );
- if ( rc == 0 && line != NULL )
- rc = KOutMsg( "%s\n", line );
- else
- rc = KOutMsg( "@HD\tVN:1.2\tSO:coordinate\n" );
- }
- else
- rc = KOutMsg( "@HD\tVN:1.2\tSO:coordinate\n" );
- return rc;
+ uint32_t count;
+ rc_t rc = VNameListCount( lines, &count );
+ if ( rc == 0 && count > 0 )
+ {
+ const char * line = NULL;
+ rc = VNameListGet( lines, 0, &line );
+ if ( rc == 0 && line != NULL )
+ rc = KOutMsg( "%s\n", line );
+ else
+ rc = KOutMsg( "@HD\tVN:1.2\tSO:coordinate\n" );
+ }
+ else
+ rc = KOutMsg( "@HD\tVN:1.2\tSO:coordinate\n" );
+ return rc;
}
+
static rc_t print_callback( const char * line, void * context ) { return KOutMsg( "%s\n", line ); }
+/*
+static void print_header_info( const headers * h )
+{
+ uint32_t count;
+
+ VNameListCount( h->SQ_Lines_1, &count );
+ KOutMsg( "h->SQ_Lines_1 = %d\n", count );
+
+ VNameListCount( h->SQ_Lines_2, &count );
+ KOutMsg( "h->SQ_Lines_2 = %d\n", count );
+
+ VNameListCount( h->RG_Lines_1, &count );
+ KOutMsg( "h->RG_Lines_1 = %d\n", count );
+
+ VNameListCount( h->RG_Lines_2, &count );
+ KOutMsg( "h->RG_Lines_2 = %d\n", count );
+
+ VNameListCount( h->Other_Lines, &count );
+ KOutMsg( "h->Other_Lines = %d\n", count );
+
+ VNameListCount( h->HD_Lines, &count );
+ KOutMsg( "h->HD_Lines = %d\n", count );
+}
+*/
+
+static rc_t merge_and_print( VNamelist ** L1, const VNamelist * L2 )
+{
+ uint32_t count1, count2;
+
+ rc_t rc = VNameListCount( *L1, &count1 );
+ if ( rc == 0 )
+ rc = VNameListCount( L2, &count2 );
+ if ( rc == 0 )
+ {
+ if ( count1 > 0 && count2 > 0 )
+ {
+ if ( rc == 0 )
+ rc = merge_lines( L1, L2, true );
+ if ( rc == 0 )
+ rc = for_each_line( *L1, print_callback, NULL );
+ }
+ else if ( count1 > 0 )
+ {
+ rc = for_each_line( *L1, print_callback, NULL );
+ }
+ else if ( count2 > 0 )
+ {
+ rc = for_each_line( L2, print_callback, NULL );
+ }
+ }
+ return rc;
+}
+
+
rc_t print_headers_1( const samdump_opts * opts, input_files * ifs )
{
- headers h;
- rc_t rc = init_headers( &h, 25 );
- if ( rc == 0 )
- {
- /* collect ... */
-
- switch( opts->header_mode )
- {
- case hm_dump : rc = collect_from_bam_hdr( &h, ifs ); break;
-
- case hm_recalc : rc = collect_by_recalc( &h, ifs ); break;
-
- case hm_file : rc = collect_from_src_and_files( &h, ifs, opts->header_file ); break;
-
- case hm_none : break; /* to not let the compiler complain about not handled enum */
- }
-
- /* merge ... */
- if ( rc == 0 )
- rc = merge_lines( &h.SQ_Lines_1, h.SQ_Lines_2 );
- if ( rc == 0 )
- rc = merge_lines( &h.RG_Lines_1, h.RG_Lines_2 );
-
-
- /* print ... */
- if ( rc == 0 )
- rc = print_HD_line( h.HD_Lines );
- if ( rc == 0 )
- rc = for_each_line( h.SQ_Lines_1, print_callback, NULL );
- if ( rc == 0 )
- rc = for_each_line( h.RG_Lines_1, print_callback, NULL );
- if ( rc == 0 )
- rc = for_each_line( h.Other_Lines, print_callback, NULL );
-
- release_headers( &h );
- }
- return rc;
+ headers h;
+ rc_t rc = init_headers( &h, 25 );
+ if ( rc == 0 )
+ {
+ /* collect ... */
+
+ switch( opts->header_mode )
+ {
+ case hm_dump : rc = collect_from_bam_hdr( &h, ifs ); break;
+
+ case hm_recalc : rc = collect_by_recalc( &h, ifs ); break;
+
+ case hm_file : rc = collect_from_src_and_files( &h, ifs, opts->header_file ); break;
+
+ case hm_none : break; /* to not let the compiler complain about not handled enum */
+ }
+
+ if ( rc == 0 )
+ rc = print_HD_line( h.HD_Lines );
+
+ if ( rc == 0 )
+ rc = merge_and_print( &h.SQ_Lines_1, h.SQ_Lines_2 );
+
+ if ( rc == 0 )
+ rc = merge_and_print( &h.RG_Lines_1, h.RG_Lines_2 );
+
+ /* merge ... */
+ /*
+ if ( rc == 0 )
+ rc = merge_lines( &h.SQ_Lines_1, h.SQ_Lines_2, true );
+ if ( rc == 0 )
+ rc = merge_lines( &h.RG_Lines_1, h.RG_Lines_2, true );
+ */
+
+ /* print ... */
+ /*
+ if ( rc == 0 )
+ rc = for_each_line( h.SQ_Lines_1, print_callback, NULL );
+ if ( rc == 0 )
+ rc = for_each_line( h.RG_Lines_1, print_callback, NULL );
+ */
+ if ( rc == 0 )
+ rc = for_each_line( h.Other_Lines, print_callback, NULL );
+
+ release_headers( &h );
+ }
+ return rc;
}
\ No newline at end of file
diff --git a/tools/sra-pileup/sam-unaligned.c b/tools/sra-pileup/sam-unaligned.c
index 9511f40..9d0d72a 100644
--- a/tools/sra-pileup/sam-unaligned.c
+++ b/tools/sra-pileup/sam-unaligned.c
@@ -90,6 +90,7 @@ static rc_t prepare_prim_table_ctx( const samdump_opts * const opts,
#define COL_QUALITY "(INSDC:quality:phred)QUALITY"
#define COL_SPOT_GROUP "(ascii)SPOT_GROUP"
#define COL_NAME "(ascii)NAME"
+#define COL_LNK_GROUP "(ascii)LINKAGE_GROUP"
typedef struct seq_table_ctx
{
@@ -105,6 +106,7 @@ typedef struct seq_table_ctx
uint32_t quality_idx;
uint32_t spot_group_idx;
uint32_t name_idx;
+ uint32_t lnk_group_idx;
} seq_table_ctx;
@@ -134,7 +136,8 @@ static rc_t prepare_seq_table_ctx( const samdump_opts * const opts,
add_opt_column( stx->cursor, available_columns, COL_ALIGN_COUNT, &stx->align_count_idx ); /* read_fkt.c */
add_opt_column( stx->cursor, available_columns, COL_PRIM_AL_ID, &stx->prim_al_id_idx );
add_opt_column( stx->cursor, available_columns, COL_NAME, &stx->name_idx );
-
+ add_opt_column( stx->cursor, available_columns, COL_LNK_GROUP, &stx->lnk_group_idx );
+
if ( rc == 0 )
rc = add_column( stx->cursor, COL_READ_TYPE, &stx->read_type_idx ); /* read_fkt.c */
if ( rc == 0 )
@@ -563,6 +566,26 @@ static bool calc_reverse_flag( const samdump_opts * const opts,
}
+static rc_t opt_field_spot_group( const seq_table_ctx * const stx, int64_t row_id )
+{
+ const char * spot_group = NULL;
+ uint32_t spot_group_len;
+ rc_t rc = read_char_ptr( row_id, stx->cursor, stx->spot_group_idx, &spot_group, &spot_group_len, "SPOT_GROUP" );
+ if ( rc == 0 && spot_group_len > 0 )
+ rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
+ return rc;
+}
+
+static rc_t opt_field_lnk_group( const seq_table_ctx * const stx, int64_t row_id )
+{
+ const char * lnk_grp;
+ uint32_t lnk_grp_len;
+ rc_t rc = read_char_ptr( row_id, stx->cursor, stx->lnk_group_idx, &lnk_grp, &lnk_grp_len, "LINKAGE_GROUP" );
+ if ( rc == 0 && lnk_grp_len > 0 )
+ rc = KOutMsg( "\tBX:Z:%.*s", lnk_grp_len, lnk_grp );
+ return rc;
+}
+
static rc_t dump_seq_row_sam_filtered( const samdump_opts * const opts,
const seq_table_ctx * const stx,
const prim_table_ctx * const ptx,
@@ -571,9 +594,8 @@ static rc_t dump_seq_row_sam_filtered( const samdump_opts * const opts,
const int64_t row_id,
const uint32_t nreads )
{
- uint32_t read_idx, rd_len, prim_align_ids_len, spot_group_len;
+ uint32_t read_idx, rd_len, prim_align_ids_len;
const int64_t * prim_align_ids;
- const char * spot_group = NULL;
const char * quality = NULL;
const INSDC_dna_text * read = NULL;
const INSDC_read_type * read_type = NULL;
@@ -683,16 +705,18 @@ static rc_t dump_seq_row_sam_filtered( const samdump_opts * const opts,
if ( rc == 0 )
rc = print_sliced_quality( opts, quality, read_idx, reverse, read_start, read_len );
- /* OPT SAM-FIIELD: SRA-column: ALIGN_ID */
+ /* OPT SAM-FIELD: SRA-column: ALIGN_ID */
if ( rc == 0 && opts->print_alignment_id_in_column_xi )
rc = KOutMsg( "\tXI:i:%u", row_id );
- /* OPT SAM-FIIELD: SRA-column: SPOT_GROUP */
- if ( rc == 0 && spot_group == NULL )
- rc = read_char_ptr( row_id, stx->cursor, stx->spot_group_idx, &spot_group, &spot_group_len, "SPOT_GROUP" );
- if ( rc == 0 && spot_group_len > 0 )
- rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
+ /* OPT SAM-FIELD: SRA-column: SPOT_GROUP */
+ if ( rc == 0 && stx->spot_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_spot_group( stx, row_id );
+ /* OPT SAM-FIELD: SRA-column: LINKAGE_GROUP */
+ if ( rc == 0 && stx->lnk_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_lnk_group( stx, row_id );
+
if ( rc == 0 )
rc = KOutMsg( "\n" );
}
@@ -717,9 +741,8 @@ static rc_t dump_seq_prim_row_sam( const samdump_opts * const opts,
const int64_t row_id,
const uint32_t nreads )
{
- uint32_t read_idx, rd_len, prim_align_ids_len, spot_group_len;
+ uint32_t read_idx, rd_len, prim_align_ids_len;
const int64_t * prim_align_ids;
- const char * spot_group = NULL;
const char * quality = NULL;
const INSDC_dna_text * read = NULL;
const INSDC_read_type * read_type = NULL;
@@ -850,10 +873,12 @@ static rc_t dump_seq_prim_row_sam( const samdump_opts * const opts,
rc = KOutMsg( "\tXI:i:%u", row_id );
/* OPT SAM-FIIELD: SRA-column: SPOT_GROUP */
- if ( rc == 0 && spot_group == NULL )
- rc = read_char_ptr( row_id, stx->cursor, stx->spot_group_idx, &spot_group, &spot_group_len, "SPOT_GROUP" );
- if ( rc == 0 && spot_group_len > 0 )
- rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
+ if ( rc == 0 && stx->spot_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_spot_group( stx, row_id );
+
+ /* OPT SAM-FIELD: SRA-column: LINKAGE_GROUP */
+ if ( rc == 0 && stx->lnk_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_lnk_group( stx, row_id );
if ( rc == 0 )
rc = KOutMsg( "\n" );
@@ -869,8 +894,7 @@ static rc_t dump_seq_row_sam( const samdump_opts * const opts,
const int64_t row_id,
const uint32_t nreads )
{
- uint32_t read_idx, rd_len, spot_group_len, name_len;
- const char * spot_group = NULL;
+ uint32_t read_idx, rd_len, name_len;
const char * quality = NULL;
const char * name = NULL;
const INSDC_dna_text * read = NULL;
@@ -963,10 +987,12 @@ static rc_t dump_seq_row_sam( const samdump_opts * const opts,
rc = KOutMsg( "\tXI:i:%u", row_id );
/* OPT SAM-FIIELD: SRA-column: SPOT_GROUP */
- if ( rc == 0 && spot_group == NULL )
- rc = read_char_ptr( row_id, stx->cursor, stx->spot_group_idx, &spot_group, &spot_group_len, "SPOT_GROUP" );
- if ( rc == 0 && ( spot_group != NULL ) && ( spot_group_len > 0 ) )
- rc = KOutMsg( "\tRG:Z:%.*s", spot_group_len, spot_group );
+ if ( rc == 0 && stx->spot_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_spot_group( stx, row_id );
+
+ /* OPT SAM-FIELD: SRA-column: LINKAGE_GROUP */
+ if ( rc == 0 && stx->lnk_group_idx != COL_NOT_AVAILABLE )
+ rc = opt_field_lnk_group( stx, row_id );
if ( rc == 0 )
rc = KOutMsg( "\n" );
diff --git a/tools/sra-pileup/sra-pileup.vers b/tools/sra-pileup/sra-pileup.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-pileup/sra-pileup.vers
+++ b/tools/sra-pileup/sra-pileup.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-pileup/sra-pileup.vers.h b/tools/sra-pileup/sra-pileup.vers.h
index 4de089c..41689b9 100644
--- a/tools/sra-pileup/sra-pileup.vers.h
+++ b/tools/sra-pileup/sra-pileup.vers.h
@@ -1 +1 @@
-#define SRA_PILEUP_VERS 0x02050007
+#define SRA_PILEUP_VERS 0x02060002
diff --git a/tools/sra-sort/buff-writer.c b/tools/sra-sort/buff-writer.c
index 22af68a..c1ee43d 100644
--- a/tools/sra-sort/buff-writer.c
+++ b/tools/sra-sort/buff-writer.c
@@ -201,18 +201,22 @@ void BufferedPairColWriterMapValues ( BufferedPairColWriter *self, const ctx_t *
STATUS ( 3, "randomly mapping old=>new values" );
for ( i = 0; i < self -> num_immed; ++ i )
{
+ uint32_t *base;
+
id = self -> u . data [ i ] . val . imm;
+
+ ON_FAIL ( base = MemBankAlloc ( self -> mbank, ctx, sizeof id + sizeof * base, false ) )
+ return;
+
if ( id != 0 )
{
- uint32_t *base;
- ON_FAIL ( base = MemBankAlloc ( self -> mbank, ctx, sizeof id + sizeof * base, false ) )
- return;
ON_FAIL ( id = MapFileMapSingleOldToNew ( self -> idx, ctx, id, assign_ids ) )
return;
- memcpy ( & base [ 1 ], & id, sizeof id );
- base [ 0 ] = 1;
- self -> u . data [ i ] . val . ptr = base;
}
+
+ memcpy ( & base [ 1 ], & id, sizeof id );
+ base [ 0 ] = 1;
+ self -> u . data [ i ] . val . ptr = base;
}
for ( self -> num_immed = 0; i < self -> num_items; ++ i )
{
diff --git a/tools/sra-sort/col-pair.c b/tools/sra-sort/col-pair.c
index 3bde514..4394bff 100644
--- a/tools/sra-sort/col-pair.c
+++ b/tools/sra-sort/col-pair.c
@@ -478,10 +478,12 @@ ColumnWriter *TablePairMakeColumnWriter ( TablePair *self, const ctx_t *ctx,
ERROR ( rc, "failed to add column 'dst.%s.%s' to cursor", self -> full_spec, colspec );
else
{
- VCursorSuspendTriggers ( curs );
+ VCursorSuspendTriggers ( curs );
+
rc = VCursorOpen ( curs );
if ( rc != 0 )
ERROR ( rc, "failed to open cursor on column 'dst.%s.%s'", self -> full_spec, colspec );
+ else
{
SimpleColumnWriter *col;
size_t full_spec_size = self -> full_spec_size + string_size ( colspec ) + sizeof "dst.." - 1;
diff --git a/tools/sra-sort/csra-pair.c b/tools/sra-sort/csra-pair.c
index 579863c..f58d9ff 100644
--- a/tools/sra-sort/csra-pair.c
+++ b/tools/sra-sort/csra-pair.c
@@ -105,7 +105,8 @@ void cSRAPairExplode ( cSRAPair *self, const ctx_t *ctx )
{
FUNC_ENTRY ( ctx );
- TablePair *tbl;
+ DirPair * dir;
+ TablePair * tbl;
TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, "REFERENCE", NULL, 0, true, false, cSRATblPairMakeRef ) )
{
@@ -115,28 +116,39 @@ void cSRAPairExplode ( cSRAPair *self, const ctx_t *ctx )
{
self -> prim_align = tbl;
- TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, "SECONDARY_ALIGNMENT", NULL, 2, false, true, cSRATblPairMakeAlign ) )
+#if SEQUENCE_BEFORE_SECONDARY
+ TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, NULL, "SEQUENCE", 0, false, true, cSRATblPairMakeSeq ) )
{
- self -> sec_align = tbl;
+ self -> sequence = tbl;
+#endif
- TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, NULL, "EVIDENCE_ALIGNMENT", 3, false, false, cSRATblPairMakeAlign ) )
+ TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, "SECONDARY_ALIGNMENT", NULL, 2, false, true, cSRATblPairMakeAlign ) )
{
- self -> evidence_align = tbl;
+ self -> sec_align = tbl;
- TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, NULL, "SEQUENCE", 0, false, true, cSRATblPairMakeSeq ) )
+ TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, NULL, "EVIDENCE_ALIGNMENT", 3, false, false, cSRATblPairMakeAlign ) )
{
- DirPair *dir;
-
- self -> sequence = tbl;
+ self -> evidence_align = tbl;
- TRY ( dir = DbPairMakeDirPair ( & self -> dad, ctx, "extra", false, DbPairMakeStdDirPair ) )
+#if ! SEQUENCE_BEFORE_SECONDARY
+ TRY ( tbl = cSRAPairMakeTablePair ( self, ctx, NULL, "SEQUENCE", 0, false, true, cSRATblPairMakeSeq ) )
{
- ON_FAIL ( DbPairAddDirPair ( & self -> dad, ctx, dir ) )
- DirPairRelease ( dir, ctx );
+ self -> sequence = tbl;
+#endif
+
+ TRY ( dir = DbPairMakeDirPair ( & self -> dad, ctx, "extra", false, DbPairMakeStdDirPair ) )
+ {
+ ON_FAIL ( DbPairAddDirPair ( & self -> dad, ctx, dir ) )
+ DirPairRelease ( dir, ctx );
+ }
+#if ! SEQUENCE_BEFORE_SECONDARY
}
+#endif
}
}
+#if SEQUENCE_BEFORE_SECONDARY
}
+#endif
}
}
}
diff --git a/tools/sra-sort/csra-pair.h b/tools/sra-sort/csra-pair.h
index a9de62e..48791d0 100644
--- a/tools/sra-sort/csra-pair.h
+++ b/tools/sra-sort/csra-pair.h
@@ -32,6 +32,9 @@
#endif
+#define SEQUENCE_BEFORE_SECONDARY 1
+
+
/*--------------------------------------------------------------------------
* forwards
*/
diff --git a/tools/sra-sort/csra-tbl.c b/tools/sra-sort/csra-tbl.c
index f8db2ea..5b3401a 100644
--- a/tools/sra-sort/csra-tbl.c
+++ b/tools/sra-sort/csra-tbl.c
@@ -199,6 +199,21 @@ static TablePair_vt cSRATblPair_Ref_vt =
*/
static
+void cSRATblPairWhackMappingIdx ( cSRATblPair * self, const ctx_t * ctx )
+{
+ cSRAPair *csra = self -> csra;
+
+ RowSetIteratorRelease ( self -> rsi, ctx );
+ self -> rsi = NULL;
+
+ MapFileRelease ( csra -> pa_idx, ctx );
+ csra -> pa_idx = NULL;
+
+ MapFileRelease ( csra -> seq_idx, ctx );
+ csra -> seq_idx = NULL;
+}
+
+static
ColumnPair *cSRATblPairMakeSeqSpotIdColPairPrim ( cSRATblPair *self, const ctx_t *ctx )
{
FUNC_ENTRY ( ctx );
@@ -447,6 +462,11 @@ void cSRATblPairPostCopyAlign ( cSRATblPair *self, const ctx_t *ctx )
break;
case 2:
CrossCheckRefAlignTbl ( ctx, csra -> reference -> dtbl, csra -> sec_align -> dtbl, "SECONDARY_ALIGNMENT" );
+
+#if SEQUENCE_BEFORE_SECONDARY
+ cSRATblPairWhackMappingIdx ( self, ctx );
+#endif
+
break;
}
}
@@ -593,14 +613,9 @@ void cSRATblPairPostCopySeq ( cSRATblPair *self, const ctx_t *ctx )
cSRAPair *csra = self -> csra;
- RowSetIteratorRelease ( self -> rsi, ctx );
- self -> rsi = NULL;
-
- MapFileRelease ( csra -> pa_idx, ctx );
- csra -> pa_idx = NULL;
-
- MapFileRelease ( csra -> seq_idx, ctx );
- csra -> seq_idx = NULL;
+#if ! SEQUENCE_BEFORE_SECONDARY
+ cSRATblPairWhackMappingIdx ( self, ctx );
+#endif
/* record markers in metadata */
if ( ! FAILED () && ( csra -> first_half_aligned_spot != 0 || csra -> first_unaligned_spot != 0 ) )
diff --git a/tools/sra-sort/sra-sort.vers b/tools/sra-sort/sra-sort.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-sort/sra-sort.vers
+++ b/tools/sra-sort/sra-sort.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-sort/xcheck-ref-align.c b/tools/sra-sort/xcheck-ref-align.c
index 2c7a1cf..4a0288b 100644
--- a/tools/sra-sort/xcheck-ref-align.c
+++ b/tools/sra-sort/xcheck-ref-align.c
@@ -33,6 +33,7 @@
#include <kapp/main.h>
#include <vdb/table.h>
#include <vdb/cursor.h>
+#include <vdb/vdb-priv.h>
#include <kproc/thread.h>
#include <klib/rc.h>
@@ -306,7 +307,13 @@ void CrossCheckRefAlignTblInt ( const ctx_t *ctx,
INTERNAL_ERROR ( rc, "VTableCreateCursorRead - failed to open cursor on %s table", align_name );
else
{
- CrossCheckRefAlignCurs ( ctx, ref_curs, align_curs, align_name );
+ rc = VCursorLinkedCursorSet(align_curs,"REFERENCE",ref_curs);
+ if ( rc != 0 )
+ INTERNAL_ERROR ( rc, "VCursorLinkedCursorSet - failed to link cursor on REFERENCE table" );
+ else
+ {
+ CrossCheckRefAlignCurs ( ctx, ref_curs, align_curs, align_name );
+ }
VCursorRelease ( align_curs );
}
diff --git a/tools/sra-stat/sra-stat.c b/tools/sra-stat/sra-stat.c
index dedebe5..6a02a43 100644
--- a/tools/sra-stat/sra-stat.c
+++ b/tools/sra-stat/sra-stat.c
@@ -93,6 +93,8 @@
#define MAX_NREADS 2*1024
+#define DEFAULT_CURSOR_CAPACITY (1024*1024*1024UL)
+
/********** _XMLLogger_Encode : copied from kapp/log-xml.c (-lload) ***********/
static
@@ -306,6 +308,11 @@ static void Statistics2Print(const Statistics2* selfs,
OUTMSG(("%s</Statistics2>\n", indent));
}
+static bool columnUndefined(rc_t rc) {
+ return rc == SILENT_RC(rcVDB, rcCursor, rcOpening , rcColumn, rcUndefined)
+ || rc == SILENT_RC(rcVDB, rcCursor, rcUpdating, rcColumn, rcNotFound );
+}
+
typedef struct {
uint64_t cnt[5];
bool CS_NATIVE;
@@ -314,6 +321,7 @@ typedef struct {
bool finalized;
} Bases;
+
static rc_t BasesInit(Bases *self, const VTable *vtbl) {
rc_t rc = 0;
@@ -329,38 +337,52 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
self->CS_NATIVE = true;
- rc = VTableCreateCursorRead(vtbl, &curs);
- DISP_RC(rc, "Cannot VTableCreateCursorRead");
+ rc = VTableCreateCachedCursorRead(vtbl, &curs, DEFAULT_CURSOR_CAPACITY);
+ DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
if (rc == 0) {
- rc = VCursorAddColumn(curs, &idx, "%s", name);
- DISP_RC(rc, "Cannot VCursorAddColumn(CS_NATIVE)");
+ rc = VCursorPermitPostOpenAdd(curs);
+ DISP_RC(rc, "Cannot VCursorPermitPostOpenAdd");
}
if (rc == 0) {
rc = VCursorOpen(curs);
- DISP_RC(rc, "Cannot VCursorOpen(CS_NATIVE)");
+ DISP_RC(rc, "Cannot VCursorOpen");
}
if (rc == 0) {
- bitsz_t boff = ~0;
- bitsz_t row_bits = ~0;
+ rc = VCursorAddColumn(curs, &idx, "%s", name);
+ if (rc != 0) {
+ if (columnUndefined(rc)) {
+ self->CS_NATIVE = false;
+ rc = 0;
+ }
+ else {
+ DISP_RC(rc, "Cannot VCursorAddColumn(CS_NATIVE)");
+ }
+ }
+ else {
+ bitsz_t boff = ~0;
+ bitsz_t row_bits = ~0;
- uint32_t elem_bits = 0, elem_off = 0, elem_cnt = 0;
- rc = VCursorCellDataDirect(curs, 1, idx,
- &elem_bits, &base, &elem_off, &elem_cnt);
- boff = elem_off * elem_bits;
- row_bits = elem_cnt * elem_bits;
+ uint32_t elem_bits = 0, elem_off = 0, elem_cnt = 0;
+ rc = VCursorCellDataDirect(curs, 1, idx,
+ &elem_bits, &base, &elem_off, &elem_cnt);
+ boff = elem_off * elem_bits;
+ row_bits = elem_cnt * elem_bits;
- if (boff != 0 || row_bits != 8) {
- rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
- PLOGERR(klogInt, (klogErr, rc, "invalid boff or row_bits "
- "while VCursorCellDataDirect($(name))", "name=%s", name));
- }
- }
+ if (boff != 0 || row_bits != 8) {
+ rc = RC(rcExe, rcColumn, rcReading, rcData, rcInvalid);
+ PLOGERR(klogInt, (klogErr, rc, "invalid boff or row_bits "
+ "while VCursorCellDataDirect($(name))",
+ "name=%s", name));
+ }
- if (rc == 0) {
- self->CS_NATIVE = *((bool*)base);
+ if (rc == 0) {
+ self->CS_NATIVE = *((bool*)base);
+ }
+
+ }
}
RELEASE(VCursor, curs);
@@ -370,8 +392,8 @@ static rc_t BasesInit(Bases *self, const VTable *vtbl) {
const char *name = self->CS_NATIVE ? "CSREAD" : "READ";
const char *datatype
= self->CS_NATIVE ? "INSDC:x2cs:bin" : "INSDC:x2na:bin";
- rc = VTableCreateCursorRead(vtbl, &self->curs);
- DISP_RC(rc, "Cannot VTableCreateCursorRead");
+ rc = VTableCreateCachedCursorRead(vtbl, &self->curs, DEFAULT_CURSOR_CAPACITY);
+ DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
if (rc == 0) {
rc = VCursorAddColumn(self->curs,
&self->idx, "(%s)%s", datatype, name);
@@ -490,15 +512,6 @@ static rc_t BasesPrint(const Bases *self,
return rc;
}
- if (self->cnt[0] + self->cnt[1] + self->cnt[2] +
- self->cnt[3] + self->cnt[4] != base_count)
- {
- rc = RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
- LOGERR(klogErr, rc,
- "BASE_COUNT MISMATCH DURING BASES COUNT CALCULATION");
- return rc;
- }
-
name = self->CS_NATIVE ? "0123." : "ACGTN";
OUTMSG(("%s<%s cs_native=\"%s\" count=\"%lu\">\n",
@@ -511,6 +524,13 @@ static rc_t BasesPrint(const Bases *self,
OUTMSG(("%s</%s>\n", indent, tag));
+ if (self->cnt[0] + self->cnt[1] + self->cnt[2] +
+ self->cnt[3] + self->cnt[4] != base_count)
+ {
+ rc = RC(rcExe, rcNumeral, rcComparing, rcData, rcInvalid);
+ LOGERR(klogErr, rc, "stored base count did not match observed base count");
+ }
+
return rc;
}
@@ -2276,15 +2296,16 @@ rc_t print_results(const Ctx* ctx)
}
if (ctx->meta->found && ! ctx->pb->quick) {
- bool mismatch = false;
+/* bool mismatch = false; */
SraStats* ss = (SraStats*)BSTreeFind(ctx->tr, "", srastats_cmp);
const SraStatsMeta* m = &ctx->meta->table;
if (ctx->total->BASE_COUNT != m->BASE_COUNT)
{ mismatch = true; }
if (ctx->total->BIO_BASE_COUNT != m->BIO_BASE_COUNT)
{ mismatch = true; }
- if (ctx->total->spot_count != m->spot_count ||
- ctx->total->total_cmp_len != m->CMP_BASE_COUNT)
+ if (ctx->total->spot_count != m->spot_count)
+ { mismatch = true; }
+ if (ctx->total->total_cmp_len != m->CMP_BASE_COUNT)
{ mismatch = true; }
if (ss != NULL) {
const SraStatsMeta* m = &ctx->meta->table;
@@ -2383,11 +2404,12 @@ rc_t print_results(const Ctx* ctx)
const SraStatsMeta* m = &ctx->meta->table;
if (ctx->pb->total.BASE_COUNT != m->BASE_COUNT
|| ctx->pb->total.BIO_BASE_COUNT != m->BIO_BASE_COUNT
- || ctx->pb->total.spot_count != m->spot_count
- || ctx->pb->total.total_cmp_len != m->CMP_BASE_COUNT)
+ || ctx->pb->total.spot_count != m->spot_count)
{
mismatch = true;
}
+ if (ctx->pb->total.total_cmp_len != m->CMP_BASE_COUNT)
+ { mismatch = true; }
}
if (ctx->pb->total.spot_count != ctx->total->spot_count ||
ctx->pb->total.spot_count_mates != ctx->total->spot_count_mates ||
@@ -2514,11 +2536,6 @@ int64_t CC srastats_sort ( const BSTNode *item, const BSTNode *n )
return srastats_cmp(ss->spot_group,n);
}
-static bool columnUndefined(rc_t rc) {
- return rc == SILENT_RC(rcVDB, rcCursor, rcOpening , rcColumn, rcUndefined)
- || rc == SILENT_RC(rcVDB, rcCursor, rcUpdating, rcColumn, rcNotFound );
-}
-
static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
SraStatsTotal* total, const VTable *vtbl)
{
@@ -2551,8 +2568,8 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
memset(g_totalREAD_LEN, 0, sizeof g_totalREAD_LEN);
memset(g_nonZeroLenReads, 0, sizeof g_nonZeroLenReads);
- rc = VTableCreateCursorRead(vtbl, &curs);
- DISP_RC(rc, "Cannot VTableCreateCursorRead");
+ rc = VTableCreateCachedCursorRead(vtbl, &curs, DEFAULT_CURSOR_CAPACITY);
+ DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
if (rc == 0) {
rc = VCursorPermitPostOpenAdd(curs);
@@ -3065,8 +3082,8 @@ static rc_t sra_stat(srastat_parms* pb, BSTree* tr,
average[i] = (double)g_totalREAD_LEN[i] / n_spots;
}
- rc = VTableCreateCursorRead(vtbl, &curs);
- DISP_RC(rc, "Cannot VTableCreateCursorRead");
+ rc = VTableCreateCachedCursorRead(vtbl, &curs, DEFAULT_CURSOR_CAPACITY);
+ DISP_RC(rc, "Cannot VTableCreateCachedCursorRead");
if (rc == 0) {
const char* name = READ_LEN;
diff --git a/tools/sra-stat/sra-stat.vers b/tools/sra-stat/sra-stat.vers
index 35d16fb..097a15a 100644
--- a/tools/sra-stat/sra-stat.vers
+++ b/tools/sra-stat/sra-stat.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/sra-stat/sra-stat.vers.h b/tools/sra-stat/sra-stat.vers.h
index bbfbbe5..ffd88d8 100644
--- a/tools/sra-stat/sra-stat.vers.h
+++ b/tools/sra-stat/sra-stat.vers.h
@@ -1 +1 @@
-#define SRA_STAT_VERS 0x02050007
+#define SRA_STAT_VERS 0x02060002
diff --git a/tools/sra/table-vers.vers b/tools/sra/table-vers.vers
index 35d16fb..097a15a 100644
--- a/tools/sra/table-vers.vers
+++ b/tools/sra/table-vers.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/srapath/srapath.vers b/tools/srapath/srapath.vers
index 35d16fb..097a15a 100644
--- a/tools/srapath/srapath.vers
+++ b/tools/srapath/srapath.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/srapath/srapath.vers.h b/tools/srapath/srapath.vers.h
index 217c0dd..94ba19b 100644
--- a/tools/srapath/srapath.vers.h
+++ b/tools/srapath/srapath.vers.h
@@ -1 +1 @@
-#define SRAPATH_VERS 0x02050007
+#define SRAPATH_VERS 0x02060002
diff --git a/tools/update-schema/vdb-update-schema.vers b/tools/update-schema/vdb-update-schema.vers
index 35d16fb..097a15a 100644
--- a/tools/update-schema/vdb-update-schema.vers
+++ b/tools/update-schema/vdb-update-schema.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/Makefile b/tools/util/Makefile
index 0e64e0a..418c00d 100644
--- a/tools/util/Makefile
+++ b/tools/util/Makefile
@@ -38,7 +38,8 @@ INT_TOOLS = \
pacbio-correct \
crc32sum \
samview \
- kdb-index
+ kdb-index \
+ pseudo-aligner
EXT_TOOLS = \
rcexplain \
@@ -296,6 +297,20 @@ $(BINDIR)/samview: $(SAMVIEW_OBJ)
$(LD) --exe -o $@ $^ $(SAMVIEW_LIB)
+#-------------------------------------------------------------------------------
+# pseudo-aligner
+# for generating test data
+#
+SUDO_ALGN_SRC = \
+ pseudo-aligner
+
+SUDO_ALGN_OBJ = \
+ $(addsuffix .$(OBJX),$(SUDO_ALGN_SRC))
+
+$(BINDIR)/pseudo-aligner: $(SUDO_ALGN_OBJ)
+ $(LP) --exe -o $@ $^
+
+
#----------------------------------------------------------------
# sortreadtest
#
diff --git a/tools/util/kdb-index.vers b/tools/util/kdb-index.vers
index 35d16fb..097a15a 100644
--- a/tools/util/kdb-index.vers
+++ b/tools/util/kdb-index.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/kdbmeta.vers b/tools/util/kdbmeta.vers
index 35d16fb..097a15a 100644
--- a/tools/util/kdbmeta.vers
+++ b/tools/util/kdbmeta.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/kdbmeta.vers.h b/tools/util/kdbmeta.vers.h
index a7a4be3..2a17913 100644
--- a/tools/util/kdbmeta.vers.h
+++ b/tools/util/kdbmeta.vers.h
@@ -1 +1 @@
-#define KDBMETA_VERS 0x02050007
+#define KDBMETA_VERS 0x02060002
diff --git a/tools/util/md5cp.vers b/tools/util/md5cp.vers
index 35d16fb..097a15a 100644
--- a/tools/util/md5cp.vers
+++ b/tools/util/md5cp.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/md5cp.vers.h b/tools/util/md5cp.vers.h
index eebf593..4fe95b6 100644
--- a/tools/util/md5cp.vers.h
+++ b/tools/util/md5cp.vers.h
@@ -1 +1 @@
-#define MD5CP_VERS 0x02050007
+#define MD5CP_VERS 0x02060002
diff --git a/tools/util/pacbio-correct.vers b/tools/util/pacbio-correct.vers
index 35d16fb..097a15a 100644
--- a/tools/util/pacbio-correct.vers
+++ b/tools/util/pacbio-correct.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/pseudo-aligner.cpp b/tools/util/pseudo-aligner.cpp
new file mode 100644
index 0000000..375955e
--- /dev/null
+++ b/tools/util/pseudo-aligner.cpp
@@ -0,0 +1,515 @@
+/* ===========================================================================
+ *
+ * PUBLIC DOMAIN NOTICE
+ * National Center for Biotechnology Information
+ *
+ * This software/database is a "United States Government Work" under the
+ * terms of the United States Copyright Act. It was written as part of
+ * the author's official duties as a United States Government employee and
+ * thus cannot be copyrighted. This software/database is freely available
+ * to the public for use. The National Library of Medicine and the U.S.
+ * Government have not placed any restriction on its use or reproduction.
+ *
+ * Although all reasonable efforts have been taken to ensure the accuracy
+ * and reliability of the software and data, the NLM and the U.S.
+ * Government do not and cannot warrant the performance or results that
+ * may be obtained by using this software or data. The NLM and the U.S.
+ * Government disclaim all warranties, express or implied, including
+ * warranties of performance, merchantability or fitness for any particular
+ * purpose.
+ *
+ * Please cite the author in any work or product based on this material.
+ *
+ * ===========================================================================
+ *
+ */
+
+#include <iostream>
+#include <string>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include <stdexcept>
+#include <vector>
+#include <utility>
+#include <math.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+
+using namespace std;
+
+static unsigned stringtoui(string const &str, int base = 0)
+{
+ if (str.length() == 0)
+ throw length_error("empty");
+
+ char const *const s = str.c_str();
+ char *endp = NULL;
+ unsigned long const value = strtoul(s, &endp, base);
+
+ if (*endp != '\0')
+ throw invalid_argument("'" + str + "' is not convertible to an integer");
+
+ if (value > unsigned(-1L))
+ throw range_error("'" + str + "' is too big");
+
+ return (unsigned)value;
+}
+
+struct Settings {
+ ostream *refSeqStream;
+ ostream *fastqStream;
+
+ string refName;
+
+ int coverageTarget;
+ int lengthTarget;
+ int readLength;
+ int templateLength;
+
+ Settings(map<string, string> &args)
+ : templateLength(600)
+ , readLength(150)
+ , coverageTarget(stringtoui(args["coverage"]))
+ , lengthTarget(stringtoui(args["length"]))
+ {
+ refName = args["refname"];
+ refSeqStream = new ofstream((refName + ".fasta").c_str());
+ fastqStream = new ofstream(args["fastq"].c_str());
+ }
+ ~Settings() {
+ delete refSeqStream;
+ delete fastqStream;
+ }
+
+ static vector<string> const ¶mNames() {
+ static char const *values[] = {
+ "refname", "fastq", "coverage", "length"
+ };
+ static vector<string> const value(values, values + 4);
+
+ return value;
+ }
+ static vector<string> const ¶mTypes() {
+ static char const *values[] = {
+ "name", "file", "integer", "integer"
+ };
+ static vector<string> const value(values, values + 4);
+
+ return value;
+ }
+};
+
+static Settings const *settings;
+
+static double frand()
+{
+ return double(random()) / double(1 << 30) - 1.0;
+}
+
+static pair<double, double> normalRandom()
+{
+ for ( ; ; ) {
+ double const x = frand();
+ double const y = frand();
+ double const r = x * x + y * y;
+ if (0.0 < r && r <= 1.0) {
+ double const scale = sqrt(-2.0 * log(r) / r);
+ return make_pair(x * scale, y * scale);
+ }
+ }
+}
+
+struct refbase {
+ uint8_t base, count;
+};
+typedef vector<refbase> ref_t;
+
+static void printReference(ref_t const &ref)
+{
+ ostream &out = *settings->refSeqStream;
+
+ out << '>' << settings->refName << " A.randomus random sequence" << endl;
+ int j = 0;
+ for (ref_t::const_iterator i = ref.begin(); i != ref.end(); ++i) {
+ uint8_t const base = i->base;
+ out << "NACGT"[base == 0 ? (random() % 4 + 1) : base];
+ if (++j == 70) {
+ j = 0;
+ out << endl;
+ }
+ }
+ if (j)
+ out << endl;
+}
+
+static uint8_t complement(uint8_t const base)
+{
+ return (1 > base || base > 4) ? 0 : (5 - base);
+}
+
+static bool isOverCovered(ref_t const &ref, int pos)
+{
+ for (int i = 0; i < settings->readLength; ++i) {
+ int const coverage = ref[pos + i].count;
+ if (coverage > 2 * settings->coverageTarget)
+ return true;
+ }
+ return false;
+}
+
+static void covered(ref_t const &ref, int pos, int &left, int &right)
+{
+ left = 0;
+ right = settings->readLength;
+ while (ref[pos + left].count != 0) {
+ ++left;
+ }
+ while (ref[pos + right - 1].count != 0) {
+ --right;
+ }
+}
+
+static void fillReference(ref_t &ref, int pos)
+{
+ for (int i = 0; i < settings->readLength; ++i) {
+ int const check = ref[pos + i].count;
+
+ if (check == 0) {
+ bool const GC = frand() > 0.2;
+ bool const AC = frand() > 0.0;
+ ref[pos + i].base = (GC ? AC ? 2 : 3 : AC ? 1 : 4);
+ }
+ int const count = int(check) + 1;
+ ref[pos + i].count = count < 0xFF ? count : 0xFF;
+ }
+}
+
+static void copyReference(ref_t &ref, int pos, vector<uint8_t> const &SEQ, bool reverse)
+{
+ for (int i = 0; i < settings->readLength; ++i) {
+ int const check = ref[pos + i].count;
+
+ if (check == 0) {
+ bool const mutate = -10.0 * log10(frand()) > 15.0;
+ int const j = reverse ? (settings->readLength - 1 - i) : i;
+ uint8_t const value = mutate ? uint8_t(random() % 4 + 1) : SEQ[j];
+ uint8_t const base = reverse ? complement(value) : value;
+ ref[pos + i].base = base;
+ }
+ int const count = int(check) + 1;
+ ref[pos + i].count = count < 0xFF ? count : 0xFF;
+ }
+}
+
+static ostream &printSequence(ostream &out, vector<uint8_t> const &SEQ, bool reverse)
+{
+ for (int i = 0; i < settings->readLength; ++i) {
+ int const j = reverse ? (settings->readLength - 1 - i) : i;
+ uint8_t const value = SEQ[j];
+ char const base = "NACGT"[reverse ? complement(value) : value];
+
+ out << base;
+ }
+ return out;
+}
+
+static ostream &printQuality(ostream &out, vector<uint8_t> const &QUAL, bool reverse)
+{
+ for (int i = 0; i < settings->readLength; ++i) {
+ int const j = reverse ? (settings->readLength - 1 - i) : i;
+ char const value = QUAL[j] + 33;
+
+ out << value;
+ }
+ return out;
+}
+
+static void print1FastQ(uint64_t serialNo, int readNo, vector<uint8_t> const &SEQ, vector<uint8_t> const &QUAL)
+{
+ ostream &out = *settings->fastqStream;
+
+ out << '@' << serialNo << '/' << readNo << endl;
+ printSequence(out, SEQ, false) << endl << '+' << endl;
+ printQuality(out, QUAL, false) << endl;
+}
+
+static void printFastQ(uint64_t serialNo, vector<uint8_t> const &SEQ1, vector<uint8_t> const &SEQ2, vector<uint8_t> const &QUAL1, vector<uint8_t> const &QUAL2)
+{
+ print1FastQ(serialNo, 1, SEQ1, QUAL1);
+ print1FastQ(serialNo, 2, SEQ2, QUAL2);
+}
+
+static int templateLength(int pos, int mpos, bool reversed)
+{
+ int const aleft = reversed ? pos + settings->readLength - 1 : pos;
+ int const aright = reversed ? pos - 1 : pos + settings->readLength;
+ int const bleft = !reversed ? mpos + settings->readLength - 1 : mpos;
+ int const bright = !reversed ? mpos - 1 : mpos + settings->readLength;
+ int const left = min(aleft, bleft);
+ int const right = max(aright, bright);
+ int const value = right - left;
+ if (aright == right)
+ return -value;
+ return value;
+}
+
+static void printSAM(ostream &out, uint64_t name, int readNo, bool reversed, bool secondary, int pos, int lclip, int rclip, bool hardClip, int mpos, vector<uint8_t> const &SEQ, vector<uint8_t> const &QUAL)
+{
+ int const FLAG = 0x1 | 0x2 | (reversed ? 0x10 : 0) | (reversed ? 0 : 0x20) | (readNo == 1 ? 0x40 : 0) | (readNo == 2 ? 0x80 : 0) | (secondary ? 0x100 : 0);
+ out << name << '\t' << FLAG << '\t' << settings->refName << '\t' << pos + 1 + lclip << '\t' << (secondary ? 3 : 30) << '\t';
+ if (lclip)
+ out << lclip << (hardClip ? 'H' : 'S');
+ out << settings->readLength - lclip - rclip << 'M';
+ if (rclip)
+ out << rclip << (hardClip ? 'H' : 'S');
+ out << "\t=\t" << mpos + 1 << '\t' << templateLength(pos, mpos, reversed) << '\t';
+ printSequence(out, SEQ, reversed) << '\t';
+ printQuality(out, QUAL, reversed) << endl;
+}
+
+static vector<uint8_t> randomQuality()
+{
+ vector<uint8_t> rslt(settings->readLength, 38);
+
+ for (vector<uint8_t>::iterator i = rslt.begin(); i != rslt.end(); ++i) {
+ *i = max(10, min(40, int(*i + normalRandom().first * 5)));
+ }
+ return rslt;
+}
+
+static vector<uint8_t> readReference(ref_t const &ref, int pos, bool reverse)
+{
+ vector<uint8_t> rslt(settings->readLength);
+ for (int i = 0; i < settings->readLength; ++i) {
+ bool const mutate = -10.0 * log10(frand()) > 30.0;
+ uint8_t const base = mutate ? (random() % 4 + 1) : ref[pos + i].base;
+ if (reverse) {
+ rslt[settings->readLength - 1 - i] = complement(base);
+ }
+ else {
+ rslt[i] = base;
+ }
+ }
+ return rslt;
+}
+
+static uint64_t scramble(uint64_t const serialNo)
+{
+ static uint8_t scramble[256];
+ static uint8_t const *mixer = NULL;
+
+ if (mixer == NULL) {
+ for (int i = 0; i < 256; ++i)
+ scramble[i] = i;
+ for (int k = 0; k < 256; ++k) {
+ for (int i = 0; i < 256; ++i) {
+ int const j = random() % 256;
+ int const xi = scramble[i];
+ int const xj = scramble[j];
+
+ scramble[i] = xj;
+ scramble[j] = xi;
+ }
+ }
+ mixer = scramble;
+ }
+ uint64_t rslt = 0;
+ uint8_t carry = 0x55;
+ for (int i = 0; i < 8; ++i) {
+ uint8_t const j = serialNo >> (8 * i);
+ uint8_t const k = mixer[j ^ carry];
+ rslt = (uint64_t(k) << 56) | (rslt >> 8);
+ carry = k;
+ }
+ return rslt;
+}
+
+static pair<unsigned, unsigned> make_ipd_pair(void)
+{
+ for ( ; ; ) {
+ pair<double, double> const r2 = normalRandom();
+ int const ipd1 = int(r2.first * settings->templateLength / 10.0) + settings->templateLength;
+ int const ipd2 = int(r2.second * settings->templateLength / 100.0) + ipd1;
+ if (ipd1 > settings->readLength && ipd2 > settings->readLength)
+ return make_pair((unsigned)ipd1, (unsigned)ipd2);
+ }
+}
+
+static pair<unsigned, unsigned> make_read_pair(unsigned const ipd)
+{
+/*
+ * |<-------ipd------->| |rrrrrrrrr|
+ * |fffffffff|-------------------|
+ * |-------------------|fffffffff|
+ * |rrrrrrrrr|
+ * ^second ^first ^second
+ */
+ for ( ; ; ) {
+ double const r1 = frand();
+ bool const reversed = r1 < 0;
+ unsigned const pos1 = unsigned((reversed ? -r1 : r1) * settings->lengthTarget);
+ if (reversed && pos1 > settings->readLength + ipd) {
+ unsigned const pos2 = pos1 - settings->readLength - ipd;
+ return make_pair(pos1, pos2);
+ }
+ else {
+ unsigned const pos2 = pos1 + settings->readLength + ipd;
+ if (pos2 + settings->readLength <= settings->lengthTarget)
+ return make_pair(pos1, pos2);
+ }
+ }
+}
+
+static int run(void)
+{
+ ref_t ref(settings->lengthTarget);
+ uint64_t coverage = 0;
+ uint64_t serialNo = 0;
+
+ for ( ; ; ) {
+ pair<unsigned, unsigned> const ipd = make_ipd_pair();
+ pair<unsigned, unsigned> const pos = make_read_pair(ipd.first);
+ bool const rev = pos.first > pos.second;
+
+ if (isOverCovered(ref, pos.first) || isOverCovered(ref, pos.second))
+ continue;
+
+ fillReference(ref, pos.first);
+ fillReference(ref, pos.second);
+
+ vector<uint8_t> const SEQ1 = readReference(ref, pos.first, rev);
+ vector<uint8_t> const SEQ2 = readReference(ref, pos.second, !rev);
+ vector<uint8_t> const QUAL1 = randomQuality();
+ vector<uint8_t> const QUAL2 = randomQuality();
+ uint64_t const name = scramble(++serialNo);
+
+ printFastQ(name, SEQ1, SEQ2, QUAL1, QUAL2);
+ printSAM(cout, name, 1, rev, false, pos.first, 0, 0, false, pos.second, SEQ1, QUAL1);
+ printSAM(cout, name, 2, !rev, false, pos.second, 0, 0, false, pos.first, SEQ2, QUAL2);
+
+ coverage += 2 * settings->readLength;
+
+ for ( ; ; ) {
+ pair<unsigned, unsigned> const pos = make_read_pair(ipd.second);
+ bool const rev = pos.first > pos.second;
+
+ if (isOverCovered(ref, pos.first) || isOverCovered(ref, pos.second))
+ break;
+
+ int left1, right1;
+ covered(ref, pos.first, left1, right1);
+ if (right1 <= left1 || (right1 - left1) * 2 < settings->readLength)
+ break;
+
+ int left2, right2;
+ covered(ref, pos.second, left2, right2);
+ if (right2 <= left2 || (right2 - left2) * 2 < settings->readLength)
+ break;
+
+ copyReference(ref, pos.first, SEQ1, rev);
+ copyReference(ref, pos.second, SEQ2, !rev);
+
+ printSAM(cout, name, 1, rev, true, pos.first, left1, settings->readLength - right1, false, pos.second, SEQ1, QUAL1);
+ printSAM(cout, name, 2, !rev, true, pos.second, left2, settings->readLength - right2, false, pos.first, SEQ2, QUAL2);
+
+ coverage += 2 * settings->readLength;
+ break;
+ }
+
+ if (double(coverage)/double(ref.size()) > settings->coverageTarget)
+ break;
+ }
+
+ printReference(ref);
+ return 0;
+}
+
+static string getProgName(string const &argv0)
+{
+ string::size_type const sep = argv0.find_last_of('/');
+ if (sep == string::npos)
+ return argv0;
+ return argv0.substr(sep + 1);
+}
+
+static string command_line(string const &progname, int argc, char *argv[])
+{
+ ostringstream oss;
+
+ oss << progname;
+ while (++argv, --argc) {
+ oss << ' ' << *argv;
+ }
+ return oss.str();
+}
+
+static void usage(string const &progname, bool error)
+{
+ cerr << "Usage: " << progname;
+ for (int i = 0; i < Settings::paramNames().size(); ++i) {
+ cout << " [" << Settings::paramNames()[i] << "=<" << Settings::paramTypes()[i] << ">]";
+ }
+ cout << endl;
+ exit(error ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+static map<string, string> loadArgs(int argc, char *argv[], string const &progname)
+{
+ map<string, string> args;
+
+ while (++argv, --argc) {
+ string const arg(*argv);
+ bool found = false;
+
+ if (arg == "--help" || arg == "-h" || arg == "-?")
+ usage(progname, false);
+
+ for (vector<string>::const_iterator i = Settings::paramNames().begin(); i != Settings::paramNames().end(); ++i) {
+ if (arg.substr(0, i->length() + 1) == *i + "=") {
+ args[*i] = arg.substr(i->length() + 1);
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ cerr << "Error: unknown parameter: '" << arg << "'" << endl;
+ usage(progname, true);
+ }
+ }
+ return args;
+}
+
+int main(int argc, char *argv[])
+{
+ string const progname = getProgName(argv[0]);
+ string const CL = command_line(progname, argc, argv);
+ map<string, string> args = loadArgs(argc, argv, progname);
+
+ // set defaults
+ (void)args.insert(make_pair("refname", "R"));
+ (void)args.insert(make_pair("fastq", progname + ".fastq"));
+ (void)args.insert(make_pair("coverage", "30"));
+ (void)args.insert(make_pair("length", "500000"));
+
+ try {
+ settings = new Settings(args);
+ }
+ catch (logic_error const &e) {
+ cerr << "Error: " << e.what() << endl;
+ usage(progname, true);
+ }
+
+ cout << "@HD\tVN:1.0\tSO:queryname" << endl;
+ cout << "@SQ\tSN:" << args["refname"] << "\tLN:" << args["length"] << "\tUR:" << args["refname"] << ".fasta" << endl;
+ cout << "@PG\tID:1\tPN:" << progname << "\tCL:" << CL << endl;
+ cout << "@CO\targuments: { ";
+ for (map<string, string>::const_iterator i = args.begin(); i != args.end(); ++i) {
+ cout << i->first << "=" << i->second << "; ";
+ }
+ cout << "}" << endl;
+
+ srandom((unsigned long)(time(0)));
+ return run();
+}
diff --git a/tools/util/rcexplain.vers b/tools/util/rcexplain.vers
index 35d16fb..097a15a 100644
--- a/tools/util/rcexplain.vers
+++ b/tools/util/rcexplain.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/rcexplain.vers.h b/tools/util/rcexplain.vers.h
index e27a8b3..7f877fa 100644
--- a/tools/util/rcexplain.vers.h
+++ b/tools/util/rcexplain.vers.h
@@ -1 +1 @@
-#define RCEXPLAIN_VERS 0x02050007
+#define RCEXPLAIN_VERS 0x02060002
diff --git a/tools/util/rowwritetest.vers b/tools/util/rowwritetest.vers
index 35d16fb..097a15a 100644
--- a/tools/util/rowwritetest.vers
+++ b/tools/util/rowwritetest.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/schema-replace.vers b/tools/util/schema-replace.vers
index 35d16fb..097a15a 100644
--- a/tools/util/schema-replace.vers
+++ b/tools/util/schema-replace.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/schema-replace.vers.h b/tools/util/schema-replace.vers.h
index a617fcb..3e4e977 100644
--- a/tools/util/schema-replace.vers.h
+++ b/tools/util/schema-replace.vers.h
@@ -1 +1 @@
-#define SCHEMA_REPLACE_VERS 0x02050007
+#define SCHEMA_REPLACE_VERS 0x02060002
diff --git a/tools/util/test-sra.vers b/tools/util/test-sra.vers
index 35d16fb..097a15a 100644
--- a/tools/util/test-sra.vers
+++ b/tools/util/test-sra.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/test-sra.vers.h b/tools/util/test-sra.vers.h
index 8a4eb6a..853d01a 100644
--- a/tools/util/test-sra.vers.h
+++ b/tools/util/test-sra.vers.h
@@ -1 +1 @@
-#define TEST_SRA_VERS 0x02050007
+#define TEST_SRA_VERS 0x02060002
diff --git a/tools/util/testld.vers b/tools/util/testld.vers
index 35d16fb..097a15a 100644
--- a/tools/util/testld.vers
+++ b/tools/util/testld.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/txt2kdb.vers b/tools/util/txt2kdb.vers
index 35d16fb..097a15a 100644
--- a/tools/util/txt2kdb.vers
+++ b/tools/util/txt2kdb.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/vdb-lock.vers b/tools/util/vdb-lock.vers
index 35d16fb..097a15a 100644
--- a/tools/util/vdb-lock.vers
+++ b/tools/util/vdb-lock.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/vdb-lock.vers.h b/tools/util/vdb-lock.vers.h
index 40e9880..c72842d 100644
--- a/tools/util/vdb-lock.vers.h
+++ b/tools/util/vdb-lock.vers.h
@@ -1 +1 @@
-#define VDB_LOCK_VERS 0x02050007
+#define VDB_LOCK_VERS 0x02060002
diff --git a/tools/util/vdb-passwd.vers b/tools/util/vdb-passwd.vers
index 35d16fb..097a15a 100644
--- a/tools/util/vdb-passwd.vers
+++ b/tools/util/vdb-passwd.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/vdb-passwd.vers.h b/tools/util/vdb-passwd.vers.h
index c206a34..be91a7b 100644
--- a/tools/util/vdb-passwd.vers.h
+++ b/tools/util/vdb-passwd.vers.h
@@ -1 +1 @@
-#define VDB_PASSWD_VERS 0x02050007
+#define VDB_PASSWD_VERS 0x02060002
diff --git a/tools/util/vdb-unlock.vers b/tools/util/vdb-unlock.vers
index 35d16fb..097a15a 100644
--- a/tools/util/vdb-unlock.vers
+++ b/tools/util/vdb-unlock.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/util/vdb-unlock.vers.h b/tools/util/vdb-unlock.vers.h
index e9e1a51..921e5a2 100644
--- a/tools/util/vdb-unlock.vers.h
+++ b/tools/util/vdb-unlock.vers.h
@@ -1 +1 @@
-#define VDB_UNLOCK_VERS 0x02050007
+#define VDB_UNLOCK_VERS 0x02060002
diff --git a/tools/vcf-loader/vcf-load.vers b/tools/vcf-loader/vcf-load.vers
index 35d16fb..097a15a 100644
--- a/tools/vcf-loader/vcf-load.vers
+++ b/tools/vcf-loader/vcf-load.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-config/vdb-config.c b/tools/vdb-config/vdb-config.c
index f24178a..84f5e10 100644
--- a/tools/vdb-config/vdb-config.c
+++ b/tools/vdb-config/vdb-config.c
@@ -127,6 +127,11 @@ static const char* USAGE_OUT[] = { "output type: one of (x n), "
#define OPTION_PCF "cfg"
static const char* USAGE_PCF[] = { "print current configuration", NULL };
+#define ALIAS_CDR NULL
+#define OPTION_CDR "cfg-dir"
+static const char* USAGE_CDR[]
+ = { "set directory to load configuration", NULL };
+
#define ALIAS_PRD NULL
#define OPTION_PRD "proxy-disable"
static const char* USAGE_PRD[] = { "enable/disable using HTTP proxy", NULL };
@@ -166,6 +171,7 @@ rc_t WorkspaceDirPathConv(const Args * args, uint32_t arg_index, const char * ar
OptDef Options[] =
{ /* needs_value, required, converter */
{ OPTION_ALL, ALIAS_ALL, NULL, USAGE_ALL, 1, false, false, NULL }
+ , { OPTION_CDR, ALIAS_CDR, NULL, USAGE_CDR, 1, true , false, NULL }
, { OPTION_CFG, ALIAS_CFG, NULL, USAGE_CFG, 1, false, false, NULL }
, { OPTION_CFM, ALIAS_CFM, NULL, USAGE_CFM, 1, true , false, NULL }
, { OPTION_DIR, ALIAS_DIR, NULL, USAGE_DIR, 1, false, false, NULL }
@@ -218,6 +224,7 @@ rc_t CC Usage(const Args* args) {
HelpOptionLine (ALIAS_ALL, OPTION_ALL, NULL, USAGE_ALL);
HelpOptionLine (ALIAS_PCF, OPTION_PCF, NULL, USAGE_PCF);
HelpOptionLine (ALIAS_FIL, OPTION_FIL, NULL, USAGE_FIL);
+ HelpOptionLine (ALIAS_DIR, OPTION_DIR, NULL, USAGE_DIR);
HelpOptionLine (ALIAS_ENV, OPTION_ENV, NULL, USAGE_ENV);
HelpOptionLine (ALIAS_MOD, OPTION_MOD, NULL, USAGE_MOD);
KOutMsg ("\n");
@@ -233,6 +240,8 @@ rc_t CC Usage(const Args* args) {
HelpOptionLine (ALIAS_PRX, OPTION_PRX, "uri[:port]", USAGE_PRX);
HelpOptionLine (ALIAS_PRD, OPTION_PRD, "yes | no", USAGE_PRD);
KOutMsg ("\n");
+ HelpOptionLine (ALIAS_CDR, OPTION_CDR, "path", USAGE_CDR);
+ KOutMsg ("\n");
HelpOptionLine (ALIAS_ROOT,OPTION_ROOT,NULL, USAGE_ROOT);
KOutMsg ("\n");
@@ -416,6 +425,8 @@ typedef struct Params {
uint32_t argsParamIdx;
uint32_t argsParamCnt;
+ const char *cfg_dir;
+
bool xml;
const char *setValue;
@@ -503,6 +514,22 @@ static rc_t ParamsConstruct(int argc, char* argv[], Params* prm) {
++count;
}
}
+ { // OPTION_CDR
+ rc = ArgsOptionCount(args, OPTION_CDR, &pcount);
+ if (rc != 0) {
+ LOGERR(klogErr, rc, "Failure to get '" OPTION_CDR "' argument");
+ break;
+ }
+ if (pcount > 0) {
+ rc = ArgsOptionValue
+ (args, OPTION_CDR, 0, (const void **)&prm->cfg_dir);
+ if (rc) {
+ LOGERR(klogErr, rc,
+ "Failure to get '" OPTION_CDR "' argument");
+ break;
+ }
+ }
+ }
{ // OPTION_FIL
rc = ArgsOptionCount(args, OPTION_FIL, &pcount);
if (rc) {
@@ -1387,9 +1414,12 @@ static void ShowEnv(const Params* prm) {
bool hasAny = false;
const char * env_list [] = {
"KLIB_CONFIG",
- "VDB_CONFIG",
+ "LD_LIBRARY_PATH",
+ "NCBI_HOME",
+ "NCBI_SETTINGS",
+ "NCBI_VDB_CONFIG",
"VDBCONFIG",
- "LD_LIBRARY_PATH"
+ "VDB_CONFIG",
};
int i = 0;
@@ -1626,8 +1656,21 @@ rc_t CC KMain(int argc, char* argv[]) {
}
if (rc == 0) {
- rc = KConfigMake(&cfg, NULL);
- DISP_RC(rc, "while calling KConfigMake");
+ const KDirectory *d = NULL;
+ if (prm.cfg_dir != NULL) {
+ KDirectory *n = NULL;
+ rc = KDirectoryNativeDir(&n);
+ if (rc == 0) {
+ rc = KDirectoryOpenDirRead(n, &d, false, prm.cfg_dir);
+ DISP_RC2(rc, "while opening", prm.cfg_dir);
+ }
+ RELEASE(KDirectory, n);
+ }
+ if (rc == 0) {
+ rc = KConfigMake(&cfg, d);
+ DISP_RC(rc, "while calling KConfigMake");
+ }
+ RELEASE(KDirectory, d);
}
if (rc == 0 && prm.showMultiple && prm.xml) {
diff --git a/tools/vdb-config/vdb-config.vers b/tools/vdb-config/vdb-config.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-config/vdb-config.vers
+++ b/tools/vdb-config/vdb-config.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-config/vdb-config.vers.h b/tools/vdb-config/vdb-config.vers.h
index 1abbcbd..7a7a5bf 100644
--- a/tools/vdb-config/vdb-config.vers.h
+++ b/tools/vdb-config/vdb-config.vers.h
@@ -1 +1 @@
-#define VDB_CONFIG_VERS 0x02050007
+#define VDB_CONFIG_VERS 0x02060002
diff --git a/tools/vdb-copy/vdb-copy.vers b/tools/vdb-copy/vdb-copy.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-copy/vdb-copy.vers
+++ b/tools/vdb-copy/vdb-copy.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-copy/vdb-copy.vers.h b/tools/vdb-copy/vdb-copy.vers.h
index fda4806..2051ddd 100644
--- a/tools/vdb-copy/vdb-copy.vers.h
+++ b/tools/vdb-copy/vdb-copy.vers.h
@@ -1 +1 @@
-#define VDB_COPY_VERS 0x02050007
+#define VDB_COPY_VERS 0x02060002
diff --git a/tools/vdb-decrypt/vdb-decrypt.vers b/tools/vdb-decrypt/vdb-decrypt.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-decrypt/vdb-decrypt.vers
+++ b/tools/vdb-decrypt/vdb-decrypt.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-decrypt/vdb-decrypt.vers.h b/tools/vdb-decrypt/vdb-decrypt.vers.h
index b09ac25..6867585 100644
--- a/tools/vdb-decrypt/vdb-decrypt.vers.h
+++ b/tools/vdb-decrypt/vdb-decrypt.vers.h
@@ -1 +1 @@
-#define VDB_DECRYPT_VERS 0x02050007
+#define VDB_DECRYPT_VERS 0x02060002
diff --git a/tools/vdb-decrypt/vdb-encrypt.vers b/tools/vdb-decrypt/vdb-encrypt.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-decrypt/vdb-encrypt.vers
+++ b/tools/vdb-decrypt/vdb-encrypt.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-decrypt/vdb-encrypt.vers.h b/tools/vdb-decrypt/vdb-encrypt.vers.h
index 419c60c..8619a5a 100644
--- a/tools/vdb-decrypt/vdb-encrypt.vers.h
+++ b/tools/vdb-decrypt/vdb-encrypt.vers.h
@@ -1 +1 @@
-#define VDB_ENCRYPT_VERS 0x02050007
+#define VDB_ENCRYPT_VERS 0x02060002
diff --git a/tools/vdb-dump/Makefile b/tools/vdb-dump/Makefile
index d789ebf..0375eea 100644
--- a/tools/vdb-dump/Makefile
+++ b/tools/vdb-dump/Makefile
@@ -95,6 +95,9 @@ VDB_DUMP_SRC = \
vdb-dump-redir \
vdb-dump-fastq \
vdb-dump-bin \
+ vdb-dump-interact \
+ vdb-dump-repo \
+ vdb-dump-print \
vdb_info \
vdb-dump
diff --git a/tools/vdb-dump/vdb-boot.vers b/tools/vdb-dump/vdb-boot.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-dump/vdb-boot.vers
+++ b/tools/vdb-dump/vdb-boot.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-dump/vdb-dump-context.c b/tools/vdb-dump/vdb-dump-context.c
index 3ce366c..e3ac2d7 100644
--- a/tools/vdb-dump/vdb-dump-context.c
+++ b/tools/vdb-dump/vdb-dump-context.c
@@ -51,32 +51,51 @@ static rc_t vdco_set_str( char **dst, const char *src )
{
size_t len;
if ( dst == NULL )
- {
return RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
- }
if ( *dst != NULL )
{
free( *dst );
*dst = NULL;
}
if ( src == NULL )
- {
return RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
- }
+
len = string_size( src );
if ( len == 0 )
- {
return RC( rcVDB, rcNoTarg, rcWriting, rcItem, rcEmpty );
- }
+
*dst = (char*)malloc( len + 1 );
if ( *dst == NULL )
- {
return RC( rcVDB, rcNoTarg, rcWriting, rcMemory, rcExhausted );
- }
+
string_copy( *dst, len + 1, src, len );
return 0;
}
+
+static rc_t vdco_set_String( char **dst, const String *src )
+{
+ if ( dst == NULL )
+ return RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
+ if ( *dst != NULL )
+ {
+ free( *dst );
+ *dst = NULL;
+ }
+ if ( src == NULL )
+ return RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
+
+ if ( src->len == 0 )
+ return RC( rcVDB, rcNoTarg, rcWriting, rcItem, rcEmpty );
+
+ *dst = (char*)malloc( src->size + 1 );
+ if ( *dst == NULL )
+ return RC( rcVDB, rcNoTarg, rcWriting, rcMemory, rcExhausted );
+
+ string_copy( *dst, src->len + 1, src->addr, src->len );
+ return 0;
+}
+
static void vdco_init_values( p_dump_context ctx )
{
ctx->path = NULL;
@@ -84,9 +103,9 @@ static void vdco_init_values( p_dump_context ctx )
ctx->columns = NULL;
ctx->excluded_columns = NULL;
ctx->filter = NULL;
- ctx->idx_range = NULL;
- ctx->output_file = NULL;
- ctx->output_path = NULL;
+ ctx->idx_range = NULL;
+ ctx->output_file = NULL;
+ ctx->output_path = NULL;
ctx->rows = NULL;
ctx->print_row_id = true;
@@ -97,6 +116,7 @@ static void vdco_init_values( p_dump_context ctx )
ctx->max_line_len = 0;
ctx->indented_line_len = 0;
ctx->phase = 0;
+ ctx->slice_depth = 0;
ctx->help_requested = false;
ctx->usage_requested = false;
@@ -111,14 +131,14 @@ static void vdco_init_values( p_dump_context ctx )
ctx->objver_requested = false;
ctx->objts_requested = false;
ctx->objtype_requested = false;
- ctx->idx_enum_requested = false;
- ctx->idx_range_requested = false;
+ ctx->idx_enum_requested = false;
+ ctx->idx_range_requested = false;
ctx->disable_multithreading = false;
- ctx->table_defined = false;
- ctx->diff = false;
- ctx->show_spotgroups = false;
- /*ctx->force_sra_schema = false;*/
- ctx->show_spread = false;
+ ctx->table_defined = false;
+ ctx->diff = false;
+ ctx->show_spotgroups = false;
+ ctx->show_spread = false;
+ ctx->interactive = false;
}
rc_t vdco_init( dump_context **ctx )
@@ -250,14 +270,12 @@ static rc_t vdco_set_filter( p_dump_context ctx, const char *src )
}
/* not static because can be called directly from vdb-dump.c */
-rc_t vdco_set_table( p_dump_context ctx, const char *src )
+rc_t vdco_set_table( p_dump_context ctx, const char * src )
{
- rc_t rc = 0;
+ rc_t rc;
if ( ( ctx == NULL )||( src == NULL ) )
- {
rc = RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
- }
- if ( rc == 0 )
+ else
{
rc = vdco_set_str( (char**)&(ctx->table), src );
DISP_RC( rc, "vdco_set_str() failed" );
@@ -265,6 +283,21 @@ rc_t vdco_set_table( p_dump_context ctx, const char *src )
return rc;
}
+
+rc_t vdco_set_table_String( p_dump_context ctx, const String * src )
+{
+ rc_t rc;
+ if ( ( ctx == NULL )||( src == NULL ) )
+ rc = RC( rcVDB, rcNoTarg, rcWriting, rcParam, rcNull );
+ else
+ {
+ rc = vdco_set_String( (char**)&(ctx->table), src );
+ DISP_RC( rc, "vdco_set_str() failed" );
+ }
+ return rc;
+}
+
+
static rc_t vdco_set_columns( p_dump_context ctx, const char *src )
{
rc_t rc = 0;
@@ -389,13 +422,17 @@ static bool vdco_set_format( p_dump_context ctx, const char *src )
ctx->format = df_fastq;
else if ( strcmp( src, "fastq1" ) == 0 )
ctx->format = df_fastq1;
- else if ( strcmp( src, "fasta" ) == 0 )
+ else if ( strcmp( src, "fasta" ) == 0 )
ctx->format = df_fasta;
else if ( strcmp( src, "fasta1" ) == 0 )
ctx->format = df_fasta1;
else if ( strcmp( src, "fasta2" ) == 0 )
ctx->format = df_fasta2;
- else if ( strcmp( src, "bin" ) == 0 )
+ else if ( strcmp( src, "qual" ) == 0 )
+ ctx->format = df_qual;
+ else if ( strcmp( src, "qual1" ) == 0 )
+ ctx->format = df_qual1;
+ else if ( strcmp( src, "bin" ) == 0 )
ctx->format = df_bin;
else if ( strcmp( src, "sql" ) == 0 )
ctx->format = df_sql;
@@ -564,11 +601,13 @@ static void vdco_evaluate_options( const Args *my_args,
ctx->disable_multithreading = vdco_get_bool_option( my_args, OPTION_NO_MULTITHREAD, false );
ctx->print_info = vdco_get_bool_option( my_args, OPTION_INFO, false );
ctx->diff = vdco_get_bool_option( my_args, OPTION_DIFF, false );
- ctx->show_spotgroups = vdco_get_bool_option( my_args, OPTION_SPOTGROUPS, false );
- /*ctx->force_sra_schema = vdco_get_bool_option( my_args, OPTION_SRASCHEMA, false );*/
- ctx->merge_ranges = vdco_get_bool_option( my_args, OPTION_MERGE_RANGES, false );
- ctx->show_spread = vdco_get_bool_option( my_args, OPTION_SPREAD, false );
-
+ ctx->show_spotgroups = vdco_get_bool_option( my_args, OPTION_SPOTGROUPS, false );
+ /*ctx->force_sra_schema = vdco_get_bool_option( my_args, OPTION_SRASCHEMA, false );*/
+ ctx->merge_ranges = vdco_get_bool_option( my_args, OPTION_MERGE_RANGES, false );
+ ctx->show_spread = vdco_get_bool_option( my_args, OPTION_SPREAD, false );
+ ctx->interactive = vdco_get_bool_option( my_args, OPTION_INTERACTIVE, false );
+ ctx->slice_depth = vdco_get_uint16_option( my_args, OPTION_SLICE, 0 );
+
ctx->cur_cache_size = vdco_get_size_t_option( my_args, OPTION_CUR_CACHE, CURSOR_CACHE_SIZE );
ctx->output_buffer_size = vdco_get_size_t_option( my_args, OPTION_OUT_BUF_SIZE, DEF_OPTION_OUT_BUF_SIZE );
@@ -578,18 +617,18 @@ static void vdco_evaluate_options( const Args *my_args,
ctx->compress_mode = orm_bzip2;
else
ctx->compress_mode = orm_uncompressed;
-
+
vdco_set_table( ctx, vdco_get_str_option( my_args, OPTION_TABLE ) );
- ctx->table_defined = ( ctx->table != NULL );
-
+ ctx->table_defined = ( ctx->table != NULL );
+
vdco_set_columns( ctx, vdco_get_str_option( my_args, OPTION_COLUMNS ) );
vdco_set_excluded_columns( ctx, vdco_get_str_option( my_args, OPTION_EXCLUDED_COLUMNS ) );
vdco_set_row_range( ctx, vdco_get_str_option( my_args, OPTION_ROWS ) );
- vdco_set_idx_range( ctx, vdco_get_str_option( my_args, OPTION_IDX_RANGE ) );
+ vdco_set_idx_range( ctx, vdco_get_str_option( my_args, OPTION_IDX_RANGE ) );
vdco_set_output_file( ctx, vdco_get_str_option( my_args, OPTION_OUT_FILE ) );
vdco_set_output_path( ctx, vdco_get_str_option( my_args, OPTION_OUT_PATH ) );
- ctx->idx_range_requested = ( ctx->idx_range != NULL );
+ ctx->idx_range_requested = ( ctx->idx_range != NULL );
vdco_set_schemas( my_args, ctx );
vdco_set_filter( ctx, vdco_get_str_option( my_args, OPTION_FILTER ) );
vdco_set_boolean_char( ctx, vdco_get_str_option( my_args, OPTION_BOOLEAN ) );
diff --git a/tools/vdb-dump/vdb-dump-context.h b/tools/vdb-dump/vdb-dump-context.h
index edb9ddf..2bd5d54 100644
--- a/tools/vdb-dump/vdb-dump-context.h
+++ b/tools/vdb-dump/vdb-dump-context.h
@@ -83,8 +83,9 @@ extern "C" {
#define OPTION_SPOTGROUPS "spotgroups"
/*#define OPTION_SRASCHEMA "sraschema"*/
#define OPTION_MERGE_RANGES "merge-ranges"
-#define OPTION_SPREAD "spread"
-#define OPTION_SLICE "slice"
+#define OPTION_SPREAD "spread"
+#define OPTION_SLICE "slice"
+#define OPTION_INTERACTIVE "interactive"
#define ALIAS_ROW_ID_ON "I"
#define ALIAS_LINE_FEED "l"
@@ -126,10 +127,12 @@ typedef enum dump_format_t
df_sra_dump,
df_tab,
df_fastq,
- df_fastq1,
+ df_fastq1,
df_fasta,
df_fasta1,
df_fasta2,
+ df_qual,
+ df_qual1,
df_bin,
df_sql
} dump_format_t;
@@ -145,7 +148,7 @@ typedef struct dump_context
const char *columns;
const char *excluded_columns;
const char *filter;
- const char *idx_range;
+ const char *idx_range;
const char *row_range;
const char *output_file;
const char *output_path;
@@ -156,7 +159,7 @@ typedef struct dump_context
uint16_t indented_line_len;
uint16_t phase;
uint32_t generic_idx;
- uint32_t slice_depth;
+ uint32_t slice_depth;
size_t cur_cache_size;
size_t output_buffer_size;
dump_format_t format;
@@ -183,16 +186,16 @@ typedef struct dump_context
bool show_blobbing;
bool enum_phys;
bool enum_readable;
- bool idx_enum_requested;
- bool idx_range_requested;
+ bool idx_enum_requested;
+ bool idx_range_requested;
bool disable_multithreading;
bool print_info;
- bool table_defined;
- bool diff;
- bool show_spotgroups;
- /* bool force_sra_schema; */
- bool merge_ranges;
- bool show_spread;
+ bool table_defined;
+ bool diff;
+ bool show_spotgroups;
+ bool merge_ranges;
+ bool show_spread;
+ bool interactive;
} dump_context;
typedef dump_context* p_dump_context;
@@ -206,6 +209,7 @@ void vdco_show_usage( p_dump_context ctx );
void vdco_show_help( p_dump_context ctx );
rc_t vdco_set_table( p_dump_context ctx, const char *src );
+rc_t vdco_set_table_String( p_dump_context ctx, const String * src );
rc_t vdco_capture_arguments_and_options( const Args * args, dump_context *ctx );
diff --git a/tools/vdb-dump/vdb-dump-fastq.c b/tools/vdb-dump/vdb-dump-fastq.c
index 58c2cbe..d02fef3 100644
--- a/tools/vdb-dump/vdb-dump-fastq.c
+++ b/tools/vdb-dump/vdb-dump-fastq.c
@@ -33,6 +33,7 @@
#include <vdb/vdb-priv.h>
#include <klib/log.h>
#include <klib/out.h>
+#include <klib/printf.h>
#include <klib/num-gen.h>
#include <insdc/sra.h> /* for filter/types */
@@ -46,18 +47,18 @@ rc_t CC Quitting ( void );
typedef struct fastq_ctx
{
const char * run_name;
- const VTable * tbl;
+ const VTable * tbl;
const VCursor * cursor;
- const struct num_gen_iter * row_iter;
- dump_format_t format;
- size_t cur_cache_size;
- uint32_t max_line_len;
+ const struct num_gen_iter * row_iter;
+ dump_format_t format;
+ size_t cur_cache_size;
+ uint32_t max_line_len;
uint32_t idx_read;
uint32_t idx_qual;
uint32_t idx_name;
- uint32_t idx_read_start;
- uint32_t idx_read_len;
- uint32_t idx_read_type;
+ uint32_t idx_read_start;
+ uint32_t idx_read_len;
+ uint32_t idx_read_type;
} fastq_ctx;
@@ -73,19 +74,19 @@ static char * vdb_fastq_extract_run_name( const char * acc_or_path )
static void init_fastq_ctx( const p_dump_context ctx, fastq_ctx * fctx, const char * acc_or_path )
{
- fctx->run_name = vdb_fastq_extract_run_name( acc_or_path );
- fctx->tbl = NULL;
- fctx->cursor = NULL;
- fctx->row_iter = NULL;
- fctx->max_line_len = ctx->max_line_len;
- fctx->format = ctx->format;
- fctx->cur_cache_size = ctx->cur_cache_size;
- fctx->idx_read = INVALID_COLUMN;
- fctx->idx_qual = INVALID_COLUMN;
- fctx->idx_name = INVALID_COLUMN;
- fctx->idx_read_start = INVALID_COLUMN;
- fctx->idx_read_len = INVALID_COLUMN;
- fctx->idx_read_type = INVALID_COLUMN;
+ fctx->run_name = vdb_fastq_extract_run_name( acc_or_path );
+ fctx->tbl = NULL;
+ fctx->cursor = NULL;
+ fctx->row_iter = NULL;
+ fctx->max_line_len = ctx->max_line_len;
+ fctx->format = ctx->format;
+ fctx->cur_cache_size = ctx->cur_cache_size;
+ fctx->idx_read = INVALID_COLUMN;
+ fctx->idx_qual = INVALID_COLUMN;
+ fctx->idx_name = INVALID_COLUMN;
+ fctx->idx_read_start = INVALID_COLUMN;
+ fctx->idx_read_len = INVALID_COLUMN;
+ fctx->idx_read_type = INVALID_COLUMN;
}
@@ -97,618 +98,802 @@ static void vdb_fastq_row_error( const char * fmt, rc_t rc, int64_t row_id )
static bool is_name_in_list( KNamelist * col_names, const char * to_find )
{
- bool res = false;
- uint32_t count;
- rc_t rc = KNamelistCount( col_names, &count );
- DISP_RC( rc, "KNamelistCount() failed" );
- if ( rc == 0 )
- {
- uint32_t i;
- size_t to_find_len = string_size( to_find );
- for ( i = 0; i < count && rc == 0 && !res; ++i )
- {
- const char * col_name;
- rc = KNamelistGet( col_names, i, &col_name );
- DISP_RC( rc, "KNamelistGet() failed" );
- if ( rc == 0 )
- {
- size_t col_name_len = string_size( col_name );
- if ( col_name_len == to_find_len )
- res = ( string_cmp( to_find, to_find_len, col_name, col_name_len, col_name_len ) == 0 );
- }
- }
- }
- return res;
+ bool res = false;
+ uint32_t count;
+ rc_t rc = KNamelistCount( col_names, &count );
+ DISP_RC( rc, "KNamelistCount() failed" );
+ if ( rc == 0 )
+ {
+ uint32_t i;
+ size_t to_find_len = string_size( to_find );
+ for ( i = 0; i < count && rc == 0 && !res; ++i )
+ {
+ const char * col_name;
+ rc = KNamelistGet( col_names, i, &col_name );
+ DISP_RC( rc, "KNamelistGet() failed" );
+ if ( rc == 0 )
+ {
+ size_t col_name_len = string_size( col_name );
+ if ( col_name_len == to_find_len )
+ res = ( string_cmp( to_find, to_find_len, col_name, col_name_len, col_name_len ) == 0 );
+ }
+ }
+ }
+ return res;
}
static rc_t prepare_column( fastq_ctx * fctx, KNamelist * col_names, uint32_t * col_idx,
- const char * to_find, const char * col_spec )
-{
- rc_t rc = 0;
- if ( is_name_in_list( col_names, to_find ) )
- {
- rc = VCursorAddColumn( fctx->cursor, col_idx, col_spec );
- if ( rc != 0 )
- {
- *col_idx = INVALID_COLUMN;
- PLOGERR( klogInt, ( klogInt, rc, "VCurosrAddColumn( '$(col)' ) failed", "col=%s", col_spec ) );
- }
- }
- return rc;
+ const char * to_find, const char * col_spec )
+{
+ rc_t rc = 0;
+ if ( is_name_in_list( col_names, to_find ) )
+ {
+ rc = VCursorAddColumn( fctx->cursor, col_idx, col_spec );
+ if ( rc != 0 )
+ {
+ *col_idx = INVALID_COLUMN;
+ PLOGERR( klogInt, ( klogInt, rc, "VCurosrAddColumn( '$(col)' ) failed", "col=%s", col_spec ) );
+ }
+ }
+ return rc;
}
static rc_t vdb_prepare_cursor( fastq_ctx * fctx )
{
- KNamelist * col_names;
- rc_t rc = VTableListCol( fctx->tbl, &col_names );
- DISP_RC( rc, "VTableListCol() failed" );
- if ( rc == 0 )
- {
- rc = VTableCreateCachedCursorRead( fctx->tbl, &fctx->cursor, fctx->cur_cache_size );
- DISP_RC( rc, "VTableCreateCursorRead( fasta/fastq ) failed" );
- if ( rc == 0 )
- rc = prepare_column( fctx, col_names, &fctx->idx_read, "READ", "(INSDC:dna:text)READ" );
-
- if ( rc == 0 && ( fctx->format == df_fastq || fctx->format == df_fastq1 ) )
- rc = prepare_column( fctx, col_names, &fctx->idx_qual, "QUALITY", "(INSDC:quality:text:phred_33)QUALITY" );
-
- if ( rc == 0 )
- {
- if ( fctx->format == df_fasta2 )
- rc = prepare_column( fctx, col_names, &fctx->idx_name, "SEQ_ID", "(ascii)SEQ_ID" );
- if ( rc == 0 && fctx->idx_name == INVALID_COLUMN )
- rc = prepare_column( fctx, col_names, &fctx->idx_name, "NAME", "(ascii)NAME" );
- }
-
- if ( rc == 0 )
- rc = prepare_column( fctx, col_names, &fctx->idx_read_start, "READ_START", "(INSDC:coord:zero)READ_START" );
-
- if ( rc == 0 )
- rc = prepare_column( fctx, col_names, &fctx->idx_read_len, "READ_LEN", "(INSDC:coord:len)READ_LEN" );
-
- if ( rc == 0 )
- rc = prepare_column( fctx, col_names, &fctx->idx_read_type, "READ_TYPE", "(INSDC:SRA:xread_type)READ_TYPE" );
-
- if ( rc == 0 )
- {
- rc = VCursorOpen ( fctx->cursor );
- DISP_RC( rc, "VCursorOpen( fasta/fastq ) failed" );
- }
- KNamelistRelease( col_names );
- }
+ KNamelist * col_names;
+ rc_t rc = VTableListCol( fctx->tbl, &col_names );
+ DISP_RC( rc, "VTableListCol() failed" );
+ if ( rc == 0 )
+ {
+ rc = VTableCreateCachedCursorRead( fctx->tbl, &fctx->cursor, fctx->cur_cache_size );
+ DISP_RC( rc, "VTableCreateCursorRead( fasta/fastq ) failed" );
+ if ( rc == 0 )
+ rc = prepare_column( fctx, col_names, &fctx->idx_read, "READ", "(INSDC:dna:text)READ" );
+
+ if ( rc == 0 && ( fctx->format == df_fastq || fctx->format == df_fastq1 ) )
+ rc = prepare_column( fctx, col_names, &fctx->idx_qual, "QUALITY", "(INSDC:quality:text:phred_33)QUALITY" );
+
+ if ( rc == 0 && ( fctx->format == df_qual || fctx->format == df_qual1 ) )
+ rc = prepare_column( fctx, col_names, &fctx->idx_qual, "QUALITY", "(INSDC:quality:phred)QUALITY" );
+
+ if ( rc == 0 )
+ {
+ if ( fctx->format == df_fasta2 )
+ rc = prepare_column( fctx, col_names, &fctx->idx_name, "SEQ_ID", "(ascii)SEQ_ID" );
+ if ( rc == 0 && fctx->idx_name == INVALID_COLUMN )
+ rc = prepare_column( fctx, col_names, &fctx->idx_name, "NAME", "(ascii)NAME" );
+ }
+
+ if ( rc == 0 )
+ rc = prepare_column( fctx, col_names, &fctx->idx_read_start, "READ_START", "(INSDC:coord:zero)READ_START" );
+
+ if ( rc == 0 )
+ rc = prepare_column( fctx, col_names, &fctx->idx_read_len, "READ_LEN", "(INSDC:coord:len)READ_LEN" );
+
+ if ( rc == 0 )
+ rc = prepare_column( fctx, col_names, &fctx->idx_read_type, "READ_TYPE", "(INSDC:SRA:xread_type)READ_TYPE" );
+
+ if ( rc == 0 )
+ {
+ rc = VCursorOpen ( fctx->cursor );
+ DISP_RC( rc, "VCursorOpen( fasta/fastq ) failed" );
+ }
+ KNamelistRelease( col_names );
+ }
return rc;
}
typedef struct fastq_spot
{
- const char * name;
- const char * bases;
- const char * qual;
- const uint32_t * rd_start;
- const uint32_t * rd_len;
- const uint8_t * rd_type;
- uint32_t name_len;
- uint32_t num_bases;
- uint32_t num_qual;
- uint32_t num_rd_start;
- uint32_t num_rd_len;
- uint32_t num_rd_type;
+ const char * name;
+ const char * bases;
+ const char * qual;
+ const uint32_t * rd_start;
+ const uint32_t * rd_len;
+ const uint8_t * rd_type;
+ uint32_t name_len;
+ uint32_t num_bases;
+ uint32_t num_qual;
+ uint32_t num_rd_start;
+ uint32_t num_rd_len;
+ uint32_t num_rd_type;
} fastq_spot;
static rc_t read_spot( const fastq_ctx * fctx, int64_t row_id, fastq_spot * spot )
{
- rc_t rc = 0;
- uint32_t elem_bits, boff;
- if ( fctx->idx_name != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
- (const void**)&spot->name, &boff, &spot->name_len );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
- }
-
- if ( rc == 0 && fctx->idx_read != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
- (const void**)&spot->bases, &boff, &spot->num_bases );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
- }
-
- if ( rc == 0 && fctx->idx_qual != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_qual, &elem_bits,
- (const void**)&spot->qual, &boff, &spot->num_qual );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), QUALITY ) failed", rc, row_id );
- }
-
- if ( rc == 0 && fctx->idx_read_start != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_start, &elem_bits,
- (const void**)&spot->rd_start, &boff, &spot->num_rd_start );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_START ) failed", rc, row_id );
- }
-
- if ( rc == 0 && fctx->idx_read_len != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_len, &elem_bits,
- (const void**)&spot->rd_len, &boff, &spot->num_rd_len );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_LEN ) failed", rc, row_id );
- }
-
- if ( rc == 0 && fctx->idx_read_type != INVALID_COLUMN )
- {
- rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_type, &elem_bits,
- (const void**)&spot->rd_type, &boff, &spot->num_rd_type );
- if ( rc != 0 )
- vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_TYPE ) failed", rc, row_id );
- }
-
- return rc;
+ rc_t rc = 0;
+ uint32_t elem_bits, boff;
+ if ( fctx->idx_name != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_name, &elem_bits,
+ (const void**)&spot->name, &boff, &spot->name_len );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), NAME ) failed", rc, row_id );
+ }
+
+ if ( rc == 0 && fctx->idx_read != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read, &elem_bits,
+ (const void**)&spot->bases, &boff, &spot->num_bases );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ ) failed", rc, row_id );
+ }
+
+ if ( rc == 0 && fctx->idx_qual != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_qual, &elem_bits,
+ (const void**)&spot->qual, &boff, &spot->num_qual );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), QUALITY ) failed", rc, row_id );
+ }
+
+ if ( rc == 0 && fctx->idx_read_start != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_start, &elem_bits,
+ (const void**)&spot->rd_start, &boff, &spot->num_rd_start );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_START ) failed", rc, row_id );
+ }
+
+ if ( rc == 0 && fctx->idx_read_len != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_len, &elem_bits,
+ (const void**)&spot->rd_len, &boff, &spot->num_rd_len );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_LEN ) failed", rc, row_id );
+ }
+
+ if ( rc == 0 && fctx->idx_read_type != INVALID_COLUMN )
+ {
+ rc = VCursorCellDataDirect( fctx->cursor, row_id, fctx->idx_read_type, &elem_bits,
+ (const void**)&spot->rd_type, &boff, &spot->num_rd_type );
+ if ( rc != 0 )
+ vdb_fastq_row_error( "VCursorCellDataDirect( row#$(row_nr), READ_TYPE ) failed", rc, row_id );
+ }
+
+ return rc;
}
static rc_t vdb_fastq1_frag_type_checked( fastq_spot * spot, int64_t row_id, const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( spot->num_bases != spot->num_qual )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- PLOGERR( klogInt,
- ( klogInt, rc, "invalid spot #$(row): bases.len( $(n_bases) ) != qual.len( $(n_qual)",
- "row=%li,n_bases=%d,n_qual=%d", row_id, spot->num_bases, spot->num_qual ) );
- }
- else if ( spot->num_rd_start != spot->num_rd_len ||
- spot->num_rd_start != spot->num_rd_type )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- PLOGERR( klogInt,
- ( klogInt, rc,
- "invalid spot #$(row): #READ_START=$(rd_start), #READ_LEN=$(rd_len), #READ_TYPE=$(rd_type)",
- "row=%li,rd_start=%d,rd_len=%d,rd_type=%d",
- row_id, spot->num_rd_start, spot->num_rd_len, spot->num_rd_type ) );
- }
- else
- {
- uint32_t idx, frag, ofs;
- for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot->num_rd_start; ++idx )
- {
- if ( ( ( spot->rd_type[ idx ] & READ_TYPE_BIOLOGICAL ) == READ_TYPE_BIOLOGICAL ) &&
- spot->rd_len[ idx ] > 0 )
- {
- rc = KOutMsg( "@%s.%li.%d %.*s length=%u\n%.*s\n+%s.%li.%d %.*s length=%u\n%.*s\n",
- fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
- spot->rd_len[ idx ], &( spot->bases[ ofs ] ),
- fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
- spot->rd_len[ idx ], &( spot->qual[ ofs ] )
- );
- frag++;
- }
- ofs += spot->rd_len[ idx ];
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( spot->num_bases != spot->num_qual )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ PLOGERR( klogInt,
+ ( klogInt, rc, "invalid spot #$(row): bases.len( $(n_bases) ) != qual.len( $(n_qual)",
+ "row=%li,n_bases=%d,n_qual=%d", row_id, spot->num_bases, spot->num_qual ) );
+ }
+ else if ( spot->num_rd_start != spot->num_rd_len ||
+ spot->num_rd_start != spot->num_rd_type )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ PLOGERR( klogInt,
+ ( klogInt, rc,
+ "invalid spot #$(row): #READ_START=$(rd_start), #READ_LEN=$(rd_len), #READ_TYPE=$(rd_type)",
+ "row=%li,rd_start=%d,rd_len=%d,rd_type=%d",
+ row_id, spot->num_rd_start, spot->num_rd_len, spot->num_rd_type ) );
+ }
+ else
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot->num_rd_start; ++idx )
+ {
+ if ( ( ( spot->rd_type[ idx ] & READ_TYPE_BIOLOGICAL ) == READ_TYPE_BIOLOGICAL ) &&
+ spot->rd_len[ idx ] > 0 )
+ {
+ rc = KOutMsg( "@%s.%li.%d %.*s length=%u\n%.*s\n+%s.%li.%d %.*s length=%u\n%.*s\n",
+ fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
+ spot->rd_len[ idx ], &( spot->bases[ ofs ] ),
+ fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
+ spot->rd_len[ idx ], &( spot->qual[ ofs ] )
+ );
+ frag++;
+ }
+ ofs += spot->rd_len[ idx ];
+ }
+ }
+ return rc;
}
static rc_t vdb_fastq1_frag_not_type_checked( fastq_spot * spot, int64_t row_id, const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( spot->num_bases != spot->num_qual )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- PLOGERR( klogInt,
- ( klogInt, rc, "invalid spot #$(row): bases.len( $(n_bases) ) != qual.len( $(n_qual)",
- "row=%li,n_bases=%d,n_qual=%d", row_id, spot->num_bases, spot->num_qual ) );
- }
- else if ( spot->num_rd_start != spot->num_rd_len )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- PLOGERR( klogInt,
- ( klogInt, rc,
- "invalid spot #$(row): #READ_START=$(rd_start), #READ_LEN=$(rd_len)",
- "row=%li,rd_start=%d,rd_len=%d",
- row_id, spot->num_rd_start, spot->num_rd_len ) );
- }
- else
- {
- uint32_t idx, frag, ofs;
- for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot->num_rd_start; ++idx )
- {
- if ( spot->rd_len[ idx ] > 0 )
- {
- rc = KOutMsg( "@%s.%li.%d %.*s length=%u\n%.*s\n+%s.%li.%d %.*s length=%u\n%.*s\n",
- fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
- spot->rd_len[ idx ], &( spot->bases[ ofs ] ),
- fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
- spot->rd_len[ idx ], &( spot->qual[ ofs ] )
- );
- frag++;
- }
- ofs += spot->rd_len[ idx ];
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( spot->num_bases != spot->num_qual )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ PLOGERR( klogInt,
+ ( klogInt, rc, "invalid spot #$(row): bases.len( $(n_bases) ) != qual.len( $(n_qual)",
+ "row=%li,n_bases=%d,n_qual=%d", row_id, spot->num_bases, spot->num_qual ) );
+ }
+ else if ( spot->num_rd_start != spot->num_rd_len )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ PLOGERR( klogInt,
+ ( klogInt, rc,
+ "invalid spot #$(row): #READ_START=$(rd_start), #READ_LEN=$(rd_len)",
+ "row=%li,rd_start=%d,rd_len=%d",
+ row_id, spot->num_rd_start, spot->num_rd_len ) );
+ }
+ else
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot->num_rd_start; ++idx )
+ {
+ if ( spot->rd_len[ idx ] > 0 )
+ {
+ rc = KOutMsg( "@%s.%li.%d %.*s length=%u\n%.*s\n+%s.%li.%d %.*s length=%u\n%.*s\n",
+ fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
+ spot->rd_len[ idx ], &( spot->bases[ ofs ] ),
+ fctx->run_name, row_id, frag, spot->name_len, spot->name, spot->rd_len[ idx ],
+ spot->rd_len[ idx ], &( spot->qual[ ofs ] )
+ );
+ frag++;
+ }
+ ofs += spot->rd_len[ idx ];
+ }
+ }
+ return rc;
}
static rc_t vdb_fastq1_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( fctx->idx_read == INVALID_COLUMN || fctx->idx_name == INVALID_COLUMN ||
- fctx->idx_qual == INVALID_COLUMN || fctx->idx_read_start == INVALID_COLUMN ||
- fctx->idx_read_len == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- DISP_RC( rc, "cannot generate fasta-format, at least one of these columns not found: READ, NAME, QUALITY, READ_START, READ_LEN" );
- }
- else
- {
- bool has_type = ( fctx->idx_read_type == INVALID_COLUMN );
- int64_t row_id;
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- if ( has_type )
- rc = vdb_fastq1_frag_type_checked( &spot, row_id, fctx );
- else
- rc = vdb_fastq1_frag_not_type_checked( &spot, row_id, fctx );
- }
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( fctx->idx_read == INVALID_COLUMN || fctx->idx_name == INVALID_COLUMN ||
+ fctx->idx_qual == INVALID_COLUMN || fctx->idx_read_start == INVALID_COLUMN ||
+ fctx->idx_read_len == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta-format, at least one of these columns not found: READ, NAME, QUALITY, READ_START, READ_LEN" );
+ }
+ else
+ {
+ bool has_type = ( fctx->idx_read_type == INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ if ( has_type )
+ rc = vdb_fastq1_frag_type_checked( &spot, row_id, fctx );
+ else
+ rc = vdb_fastq1_frag_not_type_checked( &spot, row_id, fctx );
+ }
+ }
+ }
+ }
+ return rc;
}
static rc_t vdb_fastq_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( fctx->idx_read == INVALID_COLUMN || fctx->idx_qual == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- DISP_RC( rc, "cannot generate fasta-format: READ and/or QUALITY column not found" );
- }
- else
- {
- bool has_name = ( fctx->idx_name != INVALID_COLUMN );
- int64_t row_id;
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- if ( has_name )
- rc = KOutMsg( "@%s.%li %.*s length=%u\n%.*s\n+%s.%li %.*s length=%u\n%.*s\n",
- fctx->run_name, row_id, spot.name_len, spot.name, spot.num_bases,
- spot.num_bases, spot.bases,
- fctx->run_name, row_id, spot.name_len, spot.name, spot.num_qual,
- spot.num_qual, spot.qual );
- else
-
- rc = KOutMsg( "@%s.%li %li length=%u\n%.*s\n+%s.%li %li length=%u\n%.*s\n",
- fctx->run_name, row_id, row_id, spot.num_bases,
- spot.num_bases, spot.bases,
- fctx->run_name, row_id, row_id, spot.num_bases,
- spot.num_qual, spot.qual );
- }
- }
- }
- }
+ rc_t rc = 0;
+ if ( fctx->idx_read == INVALID_COLUMN || fctx->idx_qual == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta-format: READ and/or QUALITY column not found" );
+ }
+ else
+ {
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ if ( has_name )
+ rc = KOutMsg( "@%s.%li %.*s length=%u\n%.*s\n+%s.%li %.*s length=%u\n%.*s\n",
+ fctx->run_name, row_id, spot.name_len, spot.name, spot.num_bases,
+ spot.num_bases, spot.bases,
+ fctx->run_name, row_id, spot.name_len, spot.name, spot.num_qual,
+ spot.num_qual, spot.qual );
+ else
+
+ rc = KOutMsg( "@%s.%li %li length=%u\n%.*s\n+%s.%li %li length=%u\n%.*s\n",
+ fctx->run_name, row_id, row_id, spot.num_bases,
+ spot.num_bases, spot.bases,
+ fctx->run_name, row_id, row_id, spot.num_bases,
+ spot.num_qual, spot.qual );
+ }
+ }
+ }
+ }
return rc;
}
static rc_t print_bases( const char * bases, uint32_t num_bases, uint32_t max_line_len )
{
- rc_t rc;
- if ( max_line_len == 0 )
- rc = KOutMsg( "%.*s\n", num_bases, bases );
- else
- {
- uint32_t idx = 0, to_print = num_bases;
- rc = 0;
- while ( rc == 0 && idx < num_bases )
- {
- if ( to_print > max_line_len )
- to_print = max_line_len;
-
- rc = KOutMsg( "%.*s\n", to_print, &bases[ idx ] );
- if ( rc == 0 )
- {
- idx += to_print;
- to_print = ( num_bases - idx );
- }
- }
- }
- return rc;
+ rc_t rc;
+ if ( max_line_len == 0 )
+ rc = KOutMsg( "%.*s\n", num_bases, bases );
+ else
+ {
+ uint32_t idx = 0, to_print = num_bases;
+ rc = 0;
+ while ( rc == 0 && idx < num_bases )
+ {
+ if ( to_print > max_line_len )
+ to_print = max_line_len;
+
+ rc = KOutMsg( "%.*s\n", to_print, &bases[ idx ] );
+ if ( rc == 0 )
+ {
+ idx += to_print;
+ to_print = ( num_bases - idx );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t print_qual( const char * qual, uint32_t count, uint32_t max_line_len )
+{
+ rc_t rc = 0;
+ uint32_t i = 0, on_line = 0;
+ while ( rc == 0 && i < count )
+ {
+ char buffer[ 16 ];
+ size_t num_writ;
+ rc = string_printf( buffer, sizeof buffer, &num_writ, "%d", qual[ i ] );
+ if ( rc == 0 )
+ {
+ if ( on_line == 0 )
+ {
+ rc = KOutMsg( "%s", buffer );
+ on_line = num_writ;
+ }
+ if ( ( on_line + num_writ + 1 ) < max_line_len )
+ {
+ rc = KOutMsg( " %s", buffer );
+ on_line += ( num_writ + 1 );
+ }
+ else
+ {
+ rc = KOutMsg( "\n%s", buffer );
+ on_line = num_writ;
+ }
+ i++;
+ }
+ }
+ rc = KOutMsg( "\n" );
+ return rc;
}
static rc_t vdb_fasta_frag_type_checked_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- bool has_name = ( fctx->idx_name != INVALID_COLUMN );
- int64_t row_id;
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- uint32_t idx, frag, ofs;
- for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
- {
- uint32_t frag_len = spot.rd_len[ idx ];
- if ( frag_len > 0 &&
- ( ( spot.rd_type[ idx ] & READ_TYPE_BIOLOGICAL ) == READ_TYPE_BIOLOGICAL ) )
- {
- if ( has_name )
- rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
- fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
- else
- rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
- fctx->run_name, row_id, frag, row_id, frag_len );
-
- if ( rc == 0 )
- rc = print_bases( &( spot.bases[ ofs ] ), frag_len, fctx->max_line_len );
-
- frag++;
- }
- ofs += frag_len;
- }
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
+ {
+ uint32_t frag_len = spot.rd_len[ idx ];
+ if ( frag_len > 0 &&
+ ( ( spot.rd_type[ idx ] & READ_TYPE_BIOLOGICAL ) == READ_TYPE_BIOLOGICAL ) )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
+ fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
+ else
+ rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
+ fctx->run_name, row_id, frag, row_id, frag_len );
+
+ if ( rc == 0 )
+ rc = print_bases( &( spot.bases[ ofs ] ), frag_len, fctx->max_line_len );
+
+ frag++;
+ }
+ ofs += frag_len;
+ }
+ }
+ }
+ }
+ return rc;
}
static rc_t vdb_fasta_frag_no_type_check_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- bool has_name = ( fctx->idx_name != INVALID_COLUMN );
- int64_t row_id;
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- uint32_t idx, frag, ofs;
- for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
- {
- uint32_t frag_len = spot.rd_len[ idx ];
- if ( frag_len > 0 )
- {
- if ( has_name )
- rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
- fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
- else
- rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
- fctx->run_name, row_id, frag, row_id, frag_len );
-
- if ( rc == 0 )
- rc = print_bases( &( spot.bases[ ofs ] ), frag_len, fctx->max_line_len );
-
- frag++;
- }
- ofs += frag_len;
- }
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
+ {
+ uint32_t frag_len = spot.rd_len[ idx ];
+ if ( frag_len > 0 )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
+ fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
+ else
+ rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
+ fctx->run_name, row_id, frag, row_id, frag_len );
+
+ if ( rc == 0 )
+ rc = print_bases( &( spot.bases[ ofs ] ), frag_len, fctx->max_line_len );
+
+ frag++;
+ }
+ ofs += frag_len;
+ }
+ }
+ }
+ }
+ return rc;
}
static rc_t vdb_fasta_spot_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- bool has_name = ( fctx->idx_name != INVALID_COLUMN );
- int64_t row_id;
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- if ( has_name )
- rc = KOutMsg( ">%s.%li %.*s length=%u\n",
- fctx->run_name, row_id, spot.name_len, spot.name, spot.num_bases );
- else
- rc = KOutMsg( ">%s.%li %li length=%u\n", fctx->run_name, row_id, row_id, spot.num_bases );
-
- if ( rc == 0 )
- rc = print_bases( spot.bases, spot.num_bases, fctx->max_line_len );
- }
- }
- }
- return rc;
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li %.*s length=%u\n",
+ fctx->run_name, row_id, spot.name_len, spot.name, spot.num_bases );
+ else
+ rc = KOutMsg( ">%s.%li %li length=%u\n", fctx->run_name, row_id, row_id, spot.num_bases );
+
+ if ( rc == 0 )
+ rc = print_bases( spot.bases, spot.num_bases, fctx->max_line_len );
+ }
+ }
+ }
+ return rc;
}
+
static rc_t vdb_fasta_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( fctx->idx_read == INVALID_COLUMN )
- {
- /* we actually only need a READ-column, everything else name/splitting etc. is optional... */
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- DISP_RC( rc, "cannot generate fasta-format: READ column not found" );
- }
- else
- {
- bool can_split = ( fctx->idx_read_start != INVALID_COLUMN && fctx->idx_read_len != INVALID_COLUMN );
- if ( can_split )
- {
- bool has_type = ( fctx->idx_read_type != INVALID_COLUMN );
- if ( has_type )
- rc = vdb_fasta_frag_type_checked_loop( fctx );
- else
- rc = vdb_fasta_frag_no_type_check_loop( fctx );
- }
- else
- rc = vdb_fasta_spot_loop( fctx );
- }
+ rc_t rc = 0;
+ if ( fctx->idx_read == INVALID_COLUMN )
+ {
+ /* we actually only need a READ-column, everything else name/splitting etc. is optional... */
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta-format: READ column not found" );
+ }
+ else
+ {
+ bool can_split = ( fctx->idx_read_start != INVALID_COLUMN && fctx->idx_read_len != INVALID_COLUMN );
+ if ( can_split )
+ {
+ bool has_type = ( fctx->idx_read_type != INVALID_COLUMN );
+ if ( has_type )
+ rc = vdb_fasta_frag_type_checked_loop( fctx );
+ else
+ rc = vdb_fasta_frag_no_type_check_loop( fctx );
+ }
+ else
+ rc = vdb_fasta_spot_loop( fctx );
+ }
+ return rc;
+}
+
+
+/* -------------------------------------------------------------------------------------------------------------- */
+
+static rc_t vdb_qual_frag_type_checked_loop( const fastq_ctx * fctx )
+{
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
+ {
+ uint32_t frag_len = spot.rd_len[ idx ];
+ if ( frag_len > 0 &&
+ ( ( spot.rd_type[ idx ] & READ_TYPE_BIOLOGICAL ) == READ_TYPE_BIOLOGICAL ) )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
+ fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
+ else
+ rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
+ fctx->run_name, row_id, frag, row_id, frag_len );
+
+ if ( rc == 0 )
+ rc = print_qual( &( spot.qual[ ofs ] ), frag_len, fctx->max_line_len );
+
+ frag++;
+ }
+ ofs += frag_len;
+ }
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdb_qual_frag_no_type_check_loop( const fastq_ctx * fctx )
+{
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ uint32_t idx, frag, ofs;
+ for ( idx = 0, frag = 1, ofs = 0; rc == 0 && idx < spot.num_rd_start; ++idx )
+ {
+ uint32_t frag_len = spot.rd_len[ idx ];
+ if ( frag_len > 0 )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li.%d %.*s length=%u\n",
+ fctx->run_name, row_id, frag, spot.name_len, spot.name, frag_len );
+ else
+ rc = KOutMsg( ">%s.%li.%d %li length=%u\n",
+ fctx->run_name, row_id, frag, row_id, frag_len );
+
+ if ( rc == 0 )
+ rc = print_qual( &( spot.qual[ ofs ] ), frag_len, fctx->max_line_len );
+
+ frag++;
+ }
+ ofs += frag_len;
+ }
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdb_qual_spot_loop( const fastq_ctx * fctx )
+{
+ rc_t rc = 0;
+ bool has_name = ( fctx->idx_name != INVALID_COLUMN );
+ int64_t row_id;
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ if ( has_name )
+ rc = KOutMsg( ">%s.%li %.*s length=%u\n",
+ fctx->run_name, row_id, spot.name_len, spot.name, spot.num_qual );
+ else
+ rc = KOutMsg( ">%s.%li %li length=%u\n", fctx->run_name, row_id, row_id, spot.num_qual );
+
+ if ( rc == 0 )
+ rc = print_qual( spot.qual, spot.num_qual, fctx->max_line_len );
+ }
+ }
+ }
return rc;
}
+static rc_t vdb_qual_loop( const fastq_ctx * fctx )
+{
+ rc_t rc = 0;
+ if ( fctx->idx_qual == INVALID_COLUMN )
+ {
+ /* we actually only need a QUAL-column, everything else name/splitting etc. is optional... */
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta-format: READ column not found" );
+ }
+ else
+ {
+ bool can_split = ( fctx->idx_read_start != INVALID_COLUMN && fctx->idx_read_len != INVALID_COLUMN );
+ if ( can_split )
+ {
+ bool has_type = ( fctx->idx_read_type != INVALID_COLUMN );
+ if ( has_type )
+ rc = vdb_qual_frag_type_checked_loop( fctx );
+ else
+ rc = vdb_qual_frag_no_type_check_loop( fctx );
+ }
+ else
+ rc = vdb_qual_spot_loop( fctx );
+ }
+ return rc;
+}
+
+
+/* -------------------------------------------------------------------------------------------------------------- */
+
static rc_t vdb_fasta_accumulated( const char * bases, uint32_t num_bases,
- int32_t * chars_left_on_line, uint32_t max_line_len )
-{
- rc_t rc = 0;
- if ( num_bases < ( *chars_left_on_line ) )
- {
- rc = KOutMsg( "%.*s", num_bases, bases );
- ( *chars_left_on_line ) -= num_bases;
- }
- else if ( num_bases == ( *chars_left_on_line ) )
- {
- rc = KOutMsg( "%.*s\n", num_bases, bases );
- ( *chars_left_on_line ) = max_line_len;
- }
- else
- {
- uint32_t ofs = 0;
- int32_t remaining = num_bases;
- while( rc == 0 && ofs < num_bases )
- {
- if ( remaining >= ( *chars_left_on_line ) )
- {
- rc = KOutMsg( "%.*s\n", ( *chars_left_on_line ), &bases[ ofs ] );
- ofs += ( *chars_left_on_line );
- remaining -= ( *chars_left_on_line );
- ( *chars_left_on_line ) = max_line_len;
- }
- else
- {
- rc = KOutMsg( "%.*s", remaining, &bases[ ofs ] );
- ofs += remaining;
- ( *chars_left_on_line ) -= remaining;
- remaining = 0;
- }
- }
- }
- return rc;
+ int32_t * chars_left_on_line, uint32_t max_line_len )
+{
+ rc_t rc = 0;
+ if ( num_bases < ( *chars_left_on_line ) )
+ {
+ rc = KOutMsg( "%.*s", num_bases, bases );
+ ( *chars_left_on_line ) -= num_bases;
+ }
+ else if ( num_bases == ( *chars_left_on_line ) )
+ {
+ rc = KOutMsg( "%.*s\n", num_bases, bases );
+ ( *chars_left_on_line ) = max_line_len;
+ }
+ else
+ {
+ uint32_t ofs = 0;
+ int32_t remaining = num_bases;
+ while( rc == 0 && ofs < num_bases )
+ {
+ if ( remaining >= ( *chars_left_on_line ) )
+ {
+ rc = KOutMsg( "%.*s\n", ( *chars_left_on_line ), &bases[ ofs ] );
+ ofs += ( *chars_left_on_line );
+ remaining -= ( *chars_left_on_line );
+ ( *chars_left_on_line ) = max_line_len;
+ }
+ else
+ {
+ rc = KOutMsg( "%.*s", remaining, &bases[ ofs ] );
+ ofs += remaining;
+ ( *chars_left_on_line ) -= remaining;
+ remaining = 0;
+ }
+ }
+ }
+ return rc;
}
static rc_t vdb_fasta1_loop( const fastq_ctx * fctx )
{
- rc_t rc;
- if ( fctx->idx_read == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- DISP_RC( rc, "cannot generate fasta1-format: READ column not found" );
- }
- else
- {
- int64_t row_id;
- int32_t chars_left_on_line = fctx->max_line_len;
-
- rc = KOutMsg( ">%s\n", fctx->run_name );
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- rc = vdb_fasta_accumulated( spot.bases, spot.num_bases, &chars_left_on_line, fctx->max_line_len );
- }
- }
- rc = KOutMsg( "\n" );
- }
+ rc_t rc;
+ if ( fctx->idx_read == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta1-format: READ column not found" );
+ }
+ else
+ {
+ int64_t row_id;
+ int32_t chars_left_on_line = fctx->max_line_len;
+
+ rc = KOutMsg( ">%s\n", fctx->run_name );
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ rc = vdb_fasta_accumulated( spot.bases, spot.num_bases, &chars_left_on_line, fctx->max_line_len );
+ }
+ }
+ rc = KOutMsg( "\n" );
+ }
return rc;
}
static rc_t vdb_fasta2_loop( const fastq_ctx * fctx )
{
- rc_t rc = 0;
- if ( fctx->idx_name == INVALID_COLUMN || fctx->idx_read == INVALID_COLUMN )
- {
- rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
- DISP_RC( rc, "cannot generate fasta2-format: READ and/or NAME column not found" );
- }
- else
- {
- char last_name[ 1024 ];
- size_t last_name_len = 0;
- int64_t row_id;
- int32_t chars_left_on_line = fctx->max_line_len;
-
- while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
- {
- if ( rc == 0 )
- rc = Quitting();
- if ( rc == 0 )
- {
- fastq_spot spot;
- rc = read_spot( fctx, row_id, &spot );
- if ( rc == 0 )
- {
- bool print_ref_name = ( last_name_len == 0 );
- if ( !print_ref_name )
- {
- print_ref_name = ( last_name_len != spot.name_len );
- if ( !print_ref_name )
- print_ref_name = ( string_cmp( last_name, last_name_len, spot.name, spot.name_len, spot.name_len ) != 0 );
- }
-
- if ( print_ref_name )
- {
- if ( chars_left_on_line == fctx->max_line_len )
- rc = KOutMsg( ">%.*s\n", spot.name_len, spot.name );
- else
- {
- rc = KOutMsg( "\n>%.*s\n", spot.name_len, spot.name );
- chars_left_on_line = fctx->max_line_len;
- }
- last_name_len = string_copy ( last_name, sizeof last_name, spot.name, spot.name_len );
- }
-
- if ( rc == 0 )
- rc = vdb_fasta_accumulated( spot.bases, spot.num_bases, &chars_left_on_line, fctx->max_line_len );
- }
- }
- }
- rc = KOutMsg( "\n" );
- }
+ rc_t rc = 0;
+ if ( fctx->idx_name == INVALID_COLUMN || fctx->idx_read == INVALID_COLUMN )
+ {
+ rc = RC( rcExe, rcNoTarg, rcConstructing, rcNoObj, rcInvalid );
+ DISP_RC( rc, "cannot generate fasta2-format: READ and/or NAME column not found" );
+ }
+ else
+ {
+ char last_name[ 1024 ];
+ size_t last_name_len = 0;
+ int64_t row_id;
+ int32_t chars_left_on_line = fctx->max_line_len;
+
+ while ( rc == 0 && num_gen_iterator_next( fctx->row_iter, &row_id, &rc ) )
+ {
+ if ( rc == 0 )
+ rc = Quitting();
+ if ( rc == 0 )
+ {
+ fastq_spot spot;
+ rc = read_spot( fctx, row_id, &spot );
+ if ( rc == 0 )
+ {
+ bool print_ref_name = ( last_name_len == 0 );
+ if ( !print_ref_name )
+ {
+ print_ref_name = ( last_name_len != spot.name_len );
+ if ( !print_ref_name )
+ print_ref_name = ( string_cmp( last_name, last_name_len, spot.name, spot.name_len, spot.name_len ) != 0 );
+ }
+
+ if ( print_ref_name )
+ {
+ if ( chars_left_on_line == fctx->max_line_len )
+ rc = KOutMsg( ">%.*s\n", spot.name_len, spot.name );
+ else
+ {
+ rc = KOutMsg( "\n>%.*s\n", spot.name_len, spot.name );
+ chars_left_on_line = fctx->max_line_len;
+ }
+ last_name_len = string_copy ( last_name, sizeof last_name, spot.name, spot.name_len );
+ }
+
+ if ( rc == 0 )
+ rc = vdb_fasta_accumulated( spot.bases, spot.num_bases, &chars_left_on_line, fctx->max_line_len );
+ }
+ }
+ }
+ rc = KOutMsg( "\n" );
+ }
return rc;
}
@@ -721,7 +906,7 @@ static rc_t vdb_fastq_tbl( const p_dump_context ctx, fastq_ctx * fctx )
{
int64_t first;
uint64_t count;
- /* READ is the colum we have in all cases... */
+ /* READ is the colum we have in all cases... */
rc = VCursorIdRange( fctx->cursor, fctx->idx_read, &first, &count );
DISP_RC( rc, "VCursorIdRange() failed" );
if ( rc == 0 )
@@ -747,39 +932,47 @@ static rc_t vdb_fastq_tbl( const p_dump_context ctx, fastq_ctx * fctx )
if ( rc == 0 && !num_gen_empty( ctx->rows ) )
{
- rc = num_gen_iterator_make( ctx->rows, &fctx->row_iter );
- DISP_RC( rc, "num_gen_iterator_make() failed" );
- if ( rc == 0 )
- {
- if ( fctx->max_line_len == 0 )
- fctx->max_line_len = DEF_FASTA_LEN;
-
- switch( fctx->format )
- {
- /* one FASTQ-record ( 4 liner ) per READ/SPOT */
- case df_fastq : rc = vdb_fastq_loop( fctx ); /* <--- */
- break;
-
- /* one FASTQ-record ( 4 liner ) per FRAGMENT/ALIGNMENT */
- case df_fastq1 : rc = vdb_fastq1_loop( fctx ); /* <--- */
- break;
-
- /* one FASTA-record ( 2 liner ) per READ/SPOT */
- case df_fasta : rc = vdb_fasta_loop( fctx ); /* <--- */
- break;
-
- /* one FASTA-record ( many lines ) for the whole accession ( REFSEQ-accession ) */
- case df_fasta1 : rc = vdb_fasta1_loop( fctx ); /* <--- */
- break;
-
- /* one FASTA-record ( many lines ) for each REFERENCE used in a cSRA-database */
- case df_fasta2 : rc = vdb_fasta2_loop( fctx ); /* <--- */
- break;
-
- default : break;
- }
- num_gen_iterator_destroy( fctx->row_iter );
- }
+ rc = num_gen_iterator_make( ctx->rows, &fctx->row_iter );
+ DISP_RC( rc, "num_gen_iterator_make() failed" );
+ if ( rc == 0 )
+ {
+ if ( fctx->max_line_len == 0 )
+ fctx->max_line_len = DEF_FASTA_LEN;
+
+ switch( fctx->format )
+ {
+ /* one FASTQ-record ( 4 liner ) per READ/SPOT */
+ case df_fastq : rc = vdb_fastq_loop( fctx ); /* <--- */
+ break;
+
+ /* one FASTQ-record ( 4 liner ) per FRAGMENT/ALIGNMENT */
+ case df_fastq1 : rc = vdb_fastq1_loop( fctx ); /* <--- */
+ break;
+
+ /* one FASTA-record ( 2 liner ) per READ/SPOT */
+ case df_fasta : rc = vdb_fasta_loop( fctx ); /* <--- */
+ break;
+
+ /* one FASTA-record ( many lines ) for the whole accession ( REFSEQ-accession ) */
+ case df_fasta1 : rc = vdb_fasta1_loop( fctx ); /* <--- */
+ break;
+
+ /* one FASTA-record ( many lines ) for each REFERENCE used in a cSRA-database */
+ case df_fasta2 : rc = vdb_fasta2_loop( fctx ); /* <--- */
+ break;
+
+ /* one QUAL-record ( 2 liner ) per whole READ/SPOT */
+ case df_qual : rc = vdb_qual_spot_loop( fctx ); /* <--- */
+ break;
+
+ /* one QUAL-record ( 2 liner ) per FRAGMENT/ALIGNMENTT */
+ case df_qual1 : rc = vdb_qual_loop( fctx ); /* <--- */
+ break;
+
+ default : break;
+ }
+ num_gen_iterator_destroy( fctx->row_iter );
+ }
}
else
rc = RC( rcExe, rcDatabase, rcReading, rcRange, rcEmpty );
@@ -807,9 +1000,9 @@ static rc_t vdb_fastq_table( const p_dump_context ctx,
rc = vdb_fastq_tbl( ctx, fctx );
VTableRelease( fctx->tbl );
}
-
- if ( schema != NULL )
- VSchemaRelease( schema );
+
+ if ( schema != NULL )
+ VSchemaRelease( schema );
return rc;
}
@@ -851,8 +1044,8 @@ static rc_t vdb_fastq_database( const p_dump_context ctx,
VDatabaseRelease( db );
}
- if ( schema != NULL )
- VSchemaRelease( schema );
+ if ( schema != NULL )
+ VSchemaRelease( schema );
return rc;
}
@@ -923,9 +1116,9 @@ rc_t vdf_main( const p_dump_context ctx, const VDBManager * mgr, const char * ac
{
rc_t rc = 0;
fastq_ctx fctx;
- init_fastq_ctx( ctx, &fctx, acc_or_path );
+ init_fastq_ctx( ctx, &fctx, acc_or_path );
ctx->path = string_dup_measure ( acc_or_path, NULL );
-
+
if ( USE_PATHTYPE_TO_DETECT_DB_OR_TAB ) /* in vdb-dump-context.h */
rc = vdb_fastq_by_pathtype( ctx, mgr, &fctx );
else
diff --git a/tools/vdb-dump/vdb-dump-formats.c b/tools/vdb-dump/vdb-dump-formats.c
index 1a820dc..5364a87 100644
--- a/tools/vdb-dump/vdb-dump-formats.c
+++ b/tools/vdb-dump/vdb-dump-formats.c
@@ -136,9 +136,9 @@ static rc_t vdfo_print_row_csv( const p_row_context r_ctx )
{
rc_t rc = vds_clear( &(r_ctx->s_col) );
DISP_RC( rc, "dump_str_clear() failed" )
- if ( rc == 0 && r_ctx->ctx->print_row_id )
- rc = KOutMsg( "%u", r_ctx->row_id );
-
+ if ( rc == 0 && r_ctx->ctx->print_row_id )
+ rc = KOutMsg( "%u", r_ctx->row_id );
+
if ( rc == 0 )
{
r_ctx->col_nr = 0;
@@ -350,9 +350,9 @@ static rc_t vdfo_print_row_tab( const p_row_context r_ctx )
{
rc_t rc = vds_clear( &(r_ctx->s_col) );
DISP_RC( rc, "dump_str_clear() failed" )
- if ( rc == 0 && r_ctx->ctx->print_row_id )
- rc = KOutMsg( "%u", r_ctx->row_id );
-
+ if ( rc == 0 && r_ctx->ctx->print_row_id )
+ rc = KOutMsg( "%u", r_ctx->row_id );
+
if ( rc == 0 )
{
r_ctx->col_nr = 0;
diff --git a/tools/vdb-dump/vdb-dump-helper.c b/tools/vdb-dump/vdb-dump-helper.c
index d3ef011..5200dc6 100644
--- a/tools/vdb-dump/vdb-dump-helper.c
+++ b/tools/vdb-dump/vdb-dump-helper.c
@@ -27,6 +27,7 @@
#include <klib/log.h>
#include <klib/rc.h>
#include <klib/text.h>
+#include <klib/printf.h>
#include <kfs/directory.h>
#include <kfs/file.h>
@@ -48,6 +49,21 @@
#include <assert.h>
#include <stdarg.h>
+rc_t ErrMsg( const char * fmt, ... )
+{
+ rc_t rc;
+ char buffer[ 4096 ];
+ size_t num_writ;
+
+ va_list list;
+ va_start( list, fmt );
+ rc = string_vprintf( buffer, sizeof buffer, &num_writ, fmt, list );
+ if ( rc == 0 )
+ rc = pLogMsg( klogErr, "$(E)", "E=%s", buffer );
+ va_end( list );
+ return rc;
+}
+
/********************************************************************
helper function to display the version of the vdb-manager
********************************************************************/
@@ -55,8 +71,9 @@ rc_t vdh_show_manager_version( const VDBManager *my_manager )
{
uint32_t version;
rc_t rc = VDBManagerVersion( my_manager, &version );
- DISP_RC( rc, "VDBManagerVersion() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerVersion() -> %R", rc );
+ else
{
PLOGMSG ( klogInfo, ( klogInfo, "manager-version = $(maj).$(min).$(rel)",
"vers=0x%X,maj=%u,min=%u,rel=%u",
@@ -75,43 +92,43 @@ static void CC vdh_parse_1_schema( void *item, void *data )
if ( ( item != NULL )&&( my_schema != NULL ) )
{
rc_t rc = VSchemaParseFile( my_schema, "%s", s );
- DISP_RC( rc, "VSchemaParseFile() failed" );
+ if ( rc != 0 )
+ ErrMsg( "VSchemaParseFile() -> %R", rc );
}
}
rc_t vdh_parse_schema( const VDBManager *my_manager,
VSchema **new_schema,
Vector *schema_list,
- bool with_sra_schema )
+ bool with_sra_schema )
{
rc_t rc = 0;
if ( my_manager == NULL )
- {
return RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcNull );
- }
+
if ( new_schema == NULL )
- {
return RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcNull );
- }
- *new_schema = NULL;
-
- if ( with_sra_schema )
- {
- rc = VDBManagerMakeSRASchema( my_manager, new_schema );
- DISP_RC( rc, "VDBManagerMakeSRASchema() failed" );
- }
-
+ *new_schema = NULL;
+
+ if ( with_sra_schema )
+ {
+ rc = VDBManagerMakeSRASchema( my_manager, new_schema );
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerMakeSRASchema() -> %R", rc );
+ }
+
if ( ( rc == 0 )&&( schema_list != NULL ) )
{
- if ( *new_schema == NULL )
- {
- rc = VDBManagerMakeSchema( my_manager, new_schema );
- DISP_RC( rc, "VDBManagerMakeSchema() failed" );
- }
- if ( rc == 0 )
- VectorForEach( schema_list, false, vdh_parse_1_schema, *new_schema );
+ if ( *new_schema == NULL )
+ {
+ rc = VDBManagerMakeSchema( my_manager, new_schema );
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerMakeSchema() -> %R", rc );
+ }
+ if ( rc == 0 )
+ VectorForEach( schema_list, false, vdh_parse_1_schema, *new_schema );
}
return rc;
}
@@ -126,24 +143,22 @@ bool vdh_is_path_table( const VDBManager *my_manager, const char *path,
const VTable *my_table;
VSchema *my_schema = NULL;
rc_t rc;
-
- rc = vdh_parse_schema( my_manager, &my_schema, schema_list, false );
- DISP_RC( rc, "helper_parse_schema() failed" );
+
+ vdh_parse_schema( my_manager, &my_schema, schema_list, false );
rc = VDBManagerOpenTableRead( my_manager, &my_table, my_schema, "%s", path );
- DISP_RC( rc, "VDBManagerOpenTableRead() failed" );
if ( rc == 0 )
- {
+ {
res = true; /* yes we are able to open the table ---> path is a table */
VTableRelease( my_table );
- }
+ }
if ( my_schema != NULL )
{
rc = VSchemaRelease( my_schema );
- DISP_RC( rc, "VSchemaRelease() failed" );
+ if ( rc != 0 )
+ ErrMsg( "VSchemaRelease() -> %R", rc );
}
-
return res;
}
@@ -167,8 +182,9 @@ bool vdh_is_path_column( const VDBManager *my_manager, const char *path,
{
KDirectory *my_directory;
rc_t rc = KDirectoryNativeDir( &my_directory );
- DISP_RC( rc, "KDirectoryNativeDir() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryNativeDir() -> %R", rc );
+ else
{
string_copy( pp_path, path_len + 20, path, path_len );
string_copy( &pp_path[ path_len ], 20, backback, string_size( backback ) );
@@ -194,19 +210,23 @@ bool vdh_is_path_database( const VDBManager *my_manager, const char *path,
VSchema *my_schema = NULL;
rc_t rc;
- rc = vdh_parse_schema( my_manager, &my_schema, schema_list, false );
- DISP_RC( rc, "helper_parse_schema() failed" );
+ vdh_parse_schema( my_manager, &my_schema, schema_list, false );
rc = VDBManagerOpenDBRead( my_manager, &my_database, my_schema, "%s", path );
if ( rc == 0 )
- {
+ {
res = true; /* yes we are able to open the database ---> path is a database */
- VDatabaseRelease( my_database );
- }
+ rc = VDatabaseRelease( my_database );
+ if ( rc != 0 )
+ ErrMsg( "VDatabaseRelease() -> %R", rc );
+ }
if ( my_schema != NULL )
- VSchemaRelease( my_schema );
-
+ {
+ rc = VSchemaRelease( my_schema );
+ if ( rc != 0 )
+ ErrMsg( "VSchemaRelease() -> %R", rc );
+ }
return res;
}
@@ -215,41 +235,36 @@ bool vdh_is_path_database( const VDBManager *my_manager, const char *path,
helper-function to extract the name of the first table of a database
and put it into the dump-context
*************************************************************************************/
-bool vdh_take_1st_table_from_db( dump_context *ctx, const VDatabase *my_database )
+bool vdh_take_1st_table_from_db( dump_context *ctx, const KNamelist * tbl_names )
{
bool we_found_a_table = false;
- KNamelist *tbl_names;
- rc_t rc = VDatabaseListTbl( my_database, &tbl_names );
- DISP_RC( rc, "VDatabaseListTbl() failed" );
- if ( rc == 0 )
+ uint32_t count;
+ rc_t rc = KNamelistCount( tbl_names, &count );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistCount() -> %R", rc );
+ else if ( count > 0 )
{
- uint32_t n;
- rc = KNamelistCount( tbl_names, &n );
- DISP_RC( rc, "KNamelistCount() failed" );
- if ( ( rc == 0 )&&( n > 0 ) )
+ const char *tbl_name;
+ rc = KNamelistGet( tbl_names, 0, &tbl_name );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistGet( 0 ) -> %R", rc );
+ else
{
- const char *tbl_name;
- rc = KNamelistGet( tbl_names, 0, &tbl_name );
- DISP_RC( rc, "KNamelistGet() failed" );
- if ( rc == 0 )
- {
- vdco_set_table( ctx, tbl_name );
- we_found_a_table = true;
- }
+ vdco_set_table( ctx, tbl_name );
+ we_found_a_table = true;
}
- rc = KNamelistRelease( tbl_names );
- DISP_RC( rc, "KNamelistRelease() failed" );
}
return we_found_a_table;
}
-
+/*
static int vdh_str_cmp( const char *a, const char *b )
{
size_t asize = string_size ( a );
size_t bsize = string_size ( b );
return strcase_cmp ( a, asize, b, bsize, ( asize > bsize ) ? asize : bsize );
}
+*/
static bool vdh_str_starts_with( const char *a, const char *b )
{
@@ -264,44 +279,110 @@ static bool vdh_str_starts_with( const char *a, const char *b )
return res;
}
-/*************************************************************************************
-helper-function to check if a given table is in the list of tables
-if found put that name into the dump-context
-*************************************************************************************/
-bool vdh_take_this_table_from_db( dump_context *ctx, const VDatabase *my_database,
- const char * table_to_find )
+
+bool list_contains_value( const KNamelist * list, const String * value )
{
- bool we_found_a_table = false;
- KNamelist *tbl_names;
- rc_t rc = VDatabaseListTbl( my_database, &tbl_names );
- DISP_RC( rc, "VDatabaseListTbl() failed" );
- if ( rc == 0 )
+ bool found = false;
+ uint32_t count;
+ rc_t rc = KNamelistCount( list, &count );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistCount() -> %R", rc );
+ else if ( count > 0 )
{
- uint32_t n;
- rc = KNamelistCount( tbl_names, &n );
- DISP_RC( rc, "KNamelistCount() failed" );
- if ( ( rc == 0 )&&( n > 0 ) )
+ uint32_t i;
+ for ( i = 0; i < count && rc == 0 && !found; ++i )
{
- uint32_t i;
- for ( i = 0; i < n && rc == 0 && !we_found_a_table; ++i )
+ const char *s;
+ rc = KNamelistGet( list, i, &s );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistGet( %d ) -> %R", i, rc );
+ else
{
- const char *tbl_name;
- rc = KNamelistGet( tbl_names, i, &tbl_name );
- DISP_RC( rc, "KNamelistGet() failed" );
- if ( rc == 0 )
+ String item;
+ StringInitCString( &item, s );
+ found = ( StringCompare ( &item, value ) == 0 );
+ }
+ }
+ }
+ return found;
+}
+
+
+static bool list_contains_value_starting_with( const KNamelist * list, const String * value, String * found )
+{
+ bool res = false;
+ uint32_t count;
+ rc_t rc = KNamelistCount( list, &count );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistCount() -> %R", rc );
+ else if ( count > 0 )
+ {
+ uint32_t i;
+ for ( i = 0; i < count && rc == 0 && !res; ++i )
+ {
+ const char *s;
+ rc = KNamelistGet( list, i, &s );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistGet( %d ) -> %R", i, rc );
+ else
+ {
+ String item;
+ StringInitCString( &item, s );
+ if ( value->len <= item.len )
{
- if ( vdh_str_cmp( tbl_name, table_to_find ) == 0 )
- {
- vdco_set_table( ctx, tbl_name );
- we_found_a_table = true;
- }
+ item.len = value->len;
+ item.size = value->size;
+ res = ( StringCompare ( &item, value ) == 0 );
+ if ( res )
+ StringInitCString( found, s );
}
}
}
+ }
+ return res;
+}
+
+/*************************************************************************************
+helper-function to check if a given table is in the list of tables
+if found put that name into the dump-context
+*************************************************************************************/
+bool vdh_take_this_table_from_list( dump_context *ctx, const KNamelist * tbl_names,
+ const char * table_to_find )
+{
+ bool res = false;
+ String to_find;
+
+ StringInitCString( &to_find, table_to_find );
+ res = list_contains_value( tbl_names, &to_find );
+ if ( res )
+ vdco_set_table_String( ctx, &to_find );
+ else
+ {
+ String found;
+ res = list_contains_value_starting_with( tbl_names, &to_find, &found );
+ if ( res )
+ vdco_set_table_String( ctx, &found );
+ }
+ return res;
+}
+
+
+bool vdh_take_this_table_from_db( dump_context *ctx, const VDatabase * db,
+ const char * table_to_find )
+{
+ bool we_found_the_table = false;
+ KNamelist *tbl_names;
+ rc_t rc = VDatabaseListTbl( db, &tbl_names );
+ if ( rc != 0 )
+ ErrMsg( "VDatabaseListTbl() -> %R", rc );
+ else
+ {
+ we_found_the_table = vdh_take_this_table_from_list( ctx, tbl_names, table_to_find );
rc = KNamelistRelease( tbl_names );
- DISP_RC( rc, "KNamelistRelease() failed" );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistRelease() -> %R", rc );
}
- return we_found_a_table;
+ return we_found_the_table;
}
@@ -332,10 +413,10 @@ static rc_t vdh_print_full_col_info( dump_context *ctx,
if ( rc == 0 && my_schema )
{
char buf[64];
- rc = VTypedeclToText( &(col_def->type_decl), my_schema,
- buf, sizeof(buf) );
- DISP_RC( rc, "VTypedeclToText() failed" );
- if ( rc == 0 )
+ rc = VTypedeclToText( &(col_def->type_decl), my_schema, buf, sizeof( buf ) );
+ if ( rc != 0 )
+ ErrMsg( "VTypedeclToText() -> %R", rc );
+ else
rc = KOutMsg( "\n (%s)", buf );
}
if ( rc == 0 )
@@ -344,20 +425,17 @@ static rc_t vdh_print_full_col_info( dump_context *ctx,
else
{
if ( ctx->table == NULL )
- {
rc = KOutMsg( "error: no table-name in print_column_info()" );
- }
+
if ( col_def->name == NULL )
- {
rc = KOutMsg( "error: no column-name in print_column_info()" );
- }
+
}
free( s_domain );
}
else
- {
rc = KOutMsg( "error: making domain-text in print_column_info()" );
- }
+
return rc;
}
@@ -369,16 +447,15 @@ static rc_t vdh_print_short_col_info( const p_col_def col_def,
if ( col_def->name != NULL )
{
rc = KOutMsg( "%s", col_def->name );
- if ( my_schema )
+ if ( rc == 0 && my_schema != NULL )
{
- char buf[64];
+ char buf[ 64 ];
rc = VTypedeclToText( &(col_def->type_decl), my_schema,
buf, sizeof(buf) );
- DISP_RC( rc, "VTypedeclToText() failed" );
- if ( rc == 0 )
- {
+ if ( rc != 0 )
+ ErrMsg( "VTypedeclToText() -> %R", rc );
+ else
rc = KOutMsg( " (%s)", buf );
- }
}
if ( rc == 0 )
rc = KOutMsg( "\n" );
@@ -402,22 +479,80 @@ rc_t vdh_print_col_info( dump_context *ctx,
return rc;
}
+rc_t resolve_remote_accession( const char * accession, char * dst, size_t dst_size )
+{
+ VFSManager * vfs_mgr;
+ rc_t rc = VFSManagerMake( &vfs_mgr );
+ dst[ 0 ] = 0;
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMake() -> %R", rc );
+ else
+ {
+ VResolver * resolver;
+ rc = VFSManagerGetResolver( vfs_mgr, &resolver );
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerGetResolver() -> %R", rc );
+ else
+ {
+ VPath * vpath;
+ rc = VFSManagerMakePath( vfs_mgr, &vpath, "ncbi-acc:%s", accession );
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMakePath( %s ) -> %R", accession, rc );
+ else
+ {
+ const VPath * remote = NULL;
+ VResolverRemoteEnable( resolver, vrAlwaysEnable );
+ rc = VResolverQuery ( resolver, eProtocolHttp, vpath, NULL, &remote, NULL );
+ if ( rc == 0 && remote != NULL )
+ {
+ const String * path;
+ rc = VPathMakeString( remote, &path );
+ if ( rc == 0 && path != NULL )
+ {
+ string_copy ( dst, dst_size, path->addr, path->size );
+ dst[ path->size ] = 0;
+ StringWhack ( path );
+ }
+ if ( remote != NULL )
+ VPathRelease ( remote );
+ }
+ VPathRelease ( vpath );
+ }
+ VResolverRelease( resolver );
+ }
+ VFSManagerRelease ( vfs_mgr );
+ }
+
+ if ( rc == 0 && vdh_str_starts_with( dst, "ncbi-acc:" ) )
+ {
+ size_t l = string_size ( dst );
+ memmove( dst, &( dst[ 9 ] ), l - 9 );
+ dst[ l - 9 ] = 0;
+ }
+ return rc;
+}
rc_t resolve_accession( const char * accession, char * dst, size_t dst_size, bool remotely )
{
VFSManager * vfs_mgr;
rc_t rc = VFSManagerMake( &vfs_mgr );
dst[ 0 ] = 0;
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMake() -> %R", rc );
+ else
{
VResolver * resolver;
rc = VFSManagerGetResolver( vfs_mgr, &resolver );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerGetResolver() -> %R", rc );
+ else
{
VPath * vpath;
rc = VFSManagerMakePath( vfs_mgr, &vpath, "ncbi-acc:%s", accession );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMakePath( %s ) -> %R", accession, rc );
+ else
{
const VPath * local = NULL;
const VPath * remote = NULL;
@@ -467,24 +602,30 @@ rc_t resolve_cache( const char * accession, char * dst, size_t dst_size )
VFSManager * vfs_mgr;
rc_t rc = VFSManagerMake( &vfs_mgr );
dst[ 0 ] = 0;
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMake() -> %R", rc );
+ else
{
VResolver * resolver;
rc = VFSManagerGetResolver( vfs_mgr, &resolver );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerGetResolver() -> %R", rc );
+ else
{
VPath * vpath;
rc = VFSManagerMakePath( vfs_mgr, &vpath, "ncbi-acc:%s", accession );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VFSManagerMakePath( %s ) -> %R", accession, rc );
+ else
{
const VPath * local = NULL;
const VPath * remote = NULL;
- const VPath * cache = NULL;
- rc = VResolverQuery ( resolver, eProtocolHttp, vpath, &local, &remote, &cache );
+ const VPath * cache = NULL;
+ rc = VResolverQuery ( resolver, eProtocolHttp, vpath, &local, &remote, &cache );
if ( rc == 0 && cache != NULL )
{
const String * path;
- rc = VPathMakeString( cache, &path );
+ rc = VPathMakeString( cache, &path );
if ( rc == 0 && path != NULL )
{
@@ -512,33 +653,120 @@ rc_t resolve_cache( const char * accession, char * dst, size_t dst_size )
rc_t check_cache_comleteness( const char * path, float * percent, uint64_t * bytes_in_cache )
{
- rc_t rc = 0;
- if ( percent != NULL ) { ( * percent ) = 0.0; }
- if ( bytes_in_cache != NULL ) { ( * bytes_in_cache ) = 0; }
- if ( path != NULL && path[ 0 ] != 0 )
- {
- KDirectory * dir;
- rc_t rc = KDirectoryNativeDir( &dir );
- if ( rc == 0 )
- {
- const KFile * f = NULL;
- rc = KDirectoryOpenFileRead( dir, &f, "%s.cache", path );
- if ( rc == 0 )
- {
- rc = GetCacheCompleteness( f, percent, bytes_in_cache );
- }
- else
- {
- rc = KDirectoryOpenFileRead( dir, &f, "%s", path );
- if ( rc == 0 )
- {
- if ( percent != NULL ) ( * percent ) = 100.0;
- if ( bytes_in_cache != NULL ) rc = KFileSize ( f, bytes_in_cache );
- }
- }
- if ( f != NULL ) KFileRelease( f );
- KDirectoryRelease( dir );
- }
- }
- return rc;
+ rc_t rc = 0;
+ if ( percent != NULL ) { ( * percent ) = 0.0; }
+ if ( bytes_in_cache != NULL ) { ( * bytes_in_cache ) = 0; }
+ if ( path != NULL && path[ 0 ] != 0 )
+ {
+ KDirectory * dir;
+ rc_t rc = KDirectoryNativeDir( &dir );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryNativeDir() -> %R", rc);
+ else
+ {
+ const KFile * f = NULL;
+ rc = KDirectoryOpenFileRead( dir, &f, "%s.cache", path );
+ if ( rc == 0 )
+ rc = GetCacheCompleteness( f, percent, bytes_in_cache );
+ else
+ {
+ rc = KDirectoryOpenFileRead( dir, &f, "%s", path );
+ if ( rc == 0 )
+ {
+ if ( percent != NULL ) ( * percent ) = 100.0;
+ if ( bytes_in_cache != NULL ) rc = KFileSize ( f, bytes_in_cache );
+ }
+ }
+ if ( f != NULL ) KFileRelease( f );
+ KDirectoryRelease( dir );
+ }
+ }
+ return rc;
+}
+
+
+static bool matches( const String * cmd, const String * pattern )
+{
+ char buffer[ 256 ];
+ String match;
+ uint32_t matching;
+
+ StringInit( &match, buffer, sizeof buffer, 0 );
+ matching = StringMatch( &match, cmd, pattern );
+ return ( matching == pattern->len && matching == cmd->len );
+}
+
+
+int32_t index_of_match( const String * word, uint32_t num, ... )
+{
+ int32_t res = -1;
+ if ( word != NULL )
+ {
+ uint32_t idx;
+ va_list args;
+
+ va_start ( args, num );
+ for ( idx = 0; idx < num && res < 0; ++idx )
+ {
+ const char * arg = va_arg ( args, const char * );
+ if ( arg != NULL )
+ {
+ String S;
+ StringInitCString( &S, arg );
+ if ( matches( word, &S ) ) res = idx;
+ }
+ }
+ va_end ( args );
+ }
+ return res;
+}
+
+
+static void CC destroy_String( void * item, void * data ) { free( item ); }
+void destroy_String_vector( Vector * v ) { VectorWhack( v, destroy_String, NULL ); }
+
+uint32_t copy_String_2_vector( Vector * v, const String * S )
+{
+ uint32_t res = 0;
+ if ( S->len > 0 && S->addr != NULL )
+ {
+ String * S1 = malloc( sizeof * S1 );
+ if ( S1 != NULL )
+ {
+ rc_t rc;
+ StringInit( S1, S->addr, S->size, S->len );
+ rc = VectorAppend( v, NULL, S1 );
+ if ( rc == 0 ) res++; else free( S1 );
+ }
+ }
+ return res;
+}
+
+
+uint32_t split_buffer( Vector * v, const String * S, const char * delim )
+{
+ uint32_t i, res = 0;
+ size_t delim_len = string_size( delim );
+ String temp;
+
+ StringInit( &temp, NULL, 0, 0 );
+ VectorInit( v, 0, 10 );
+ for( i = 0; i < S->len; ++i )
+ {
+ if ( string_chr( delim, delim_len, S->addr[ i ] ) != NULL )
+ {
+ /* delimiter found */
+ res += copy_String_2_vector( v, &temp );
+ StringInit( &temp, NULL, 0, 0 );
+ }
+ else
+ {
+ /* normal char in line */
+ if ( temp.addr == NULL ) temp.addr = &( S->addr[ i ] );
+ temp.size++;
+ temp.len++;
+ }
+ }
+ res += copy_String_2_vector( v, &temp );
+ return res;
}
diff --git a/tools/vdb-dump/vdb-dump-helper.h b/tools/vdb-dump/vdb-dump-helper.h
index 2337c81..da456b0 100644
--- a/tools/vdb-dump/vdb-dump-helper.h
+++ b/tools/vdb-dump/vdb-dump-helper.h
@@ -37,6 +37,8 @@ extern "C" {
#include <klib/out.h>
#include <klib/rc.h>
+#include <klib/vector.h>
+#include <klib/text.h>
#include <vfs/manager.h>
#include <vfs/path.h>
@@ -58,12 +60,14 @@ extern "C" {
#define DISP_RC2(rc,err,succ) \
(void)((rc != 0)? 0 : (succ) ? LOGMSG( klogInfo, succ ) : LOGERR( klogInt, rc, err ))
+rc_t ErrMsg( const char * fmt, ... );
+
rc_t vdh_show_manager_version( const VDBManager *my_manager );
rc_t vdh_parse_schema( const VDBManager *my_manager,
VSchema **new_schema,
Vector *schema_list,
- bool with_sra_schema );
+ bool with_sra_schema );
bool vdh_is_path_table( const VDBManager *my_manager, const char *path,
Vector *schema_list );
@@ -72,20 +76,30 @@ bool vdh_is_path_column( const VDBManager *my_manager, const char *path,
bool vdh_is_path_database( const VDBManager *my_manager, const char *path,
Vector *schema_list );
-bool vdh_take_1st_table_from_db( dump_context *ctx,
- const VDatabase *my_database );
+bool list_contains_value( const KNamelist * list, const String * value );
+
+bool vdh_take_1st_table_from_db( dump_context *ctx, const KNamelist *tbl_names );
-bool vdh_take_this_table_from_db( dump_context *ctx, const VDatabase *my_database,
+bool vdh_take_this_table_from_list( dump_context *ctx, const KNamelist *tbl_names,
+ const char * table_to_find );
+
+bool vdh_take_this_table_from_db( dump_context *ctx, const VDatabase *db,
const char * table_to_find );
rc_t vdh_print_col_info( dump_context *ctx,
const p_col_def col_def,
const VSchema *my_schema );
+rc_t resolve_remote_accession( const char * accession, char * dst, size_t dst_size );
rc_t resolve_accession( const char * accession, char * dst, size_t dst_size, bool remotely );
rc_t resolve_cache( const char * accession, char * dst, size_t dst_size );
rc_t check_cache_comleteness( const char * path, float * percent, uint64_t * bytes_in_cache );
+int32_t index_of_match( const String * word, uint32_t num, ... );
+void destroy_String_vector( Vector * v );
+uint32_t copy_String_2_vector( Vector * v, const String * S );
+uint32_t split_buffer( Vector * v, const String * S, const char * delim );
+
#ifdef __cplusplus
}
#endif
diff --git a/tools/vdb-dump/vdb-dump-interact.c b/tools/vdb-dump/vdb-dump-interact.c
new file mode 100644
index 0000000..f2be0c0
--- /dev/null
+++ b/tools/vdb-dump/vdb-dump-interact.c
@@ -0,0 +1,265 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "vdb-dump-interact.h"
+#include "vdb-dump-repo.h"
+#include "vdb-dump-helper.h"
+#include "vdb-dump-print.h"
+
+#include <klib/vector.h>
+#include <klib/text.h>
+#include <kfs/file.h>
+#include <kfs/filetools.h>
+
+rc_t Quitting();
+
+#define INPUTLINE_SIZE 4096
+
+typedef struct ictx
+{
+ const dump_context * ctx;
+ const Args * args;
+ const KFile * std_in;
+ struct vdp_src_context * vsctx;
+ Vector history;
+ char inputline[ INPUTLINE_SIZE ];
+ String PROMPT;
+ String SInputLine;
+
+ bool interactive, done;
+} ictx;
+
+
+static rc_t init_ictx( struct ictx * ictx, const dump_context * ctx, const Args * args )
+{
+ rc_t rc = KFileMakeStdIn ( &( ictx->std_in ) );
+ DISP_RC( rc, "KFileMakeStdIn() failed" );
+ if ( rc == 0 )
+ {
+ ictx->ctx = ctx;
+ ictx->args = args;
+ VectorInit( &ictx->history, 0, 10 );
+ ictx->interactive = ( KFileType ( ictx->std_in ) == kfdCharDev );
+ ictx->done = false;
+
+ CONST_STRING( &(ictx->PROMPT), "\nvdb $" );
+ StringInit( &(ictx->SInputLine), &( ictx->inputline[0] ), sizeof( ictx->inputline ), 0 );
+
+ rc = vdp_init_ctx( &ictx->vsctx, args );
+ }
+ return rc;
+}
+
+
+static void release_ictx( ictx * ctx )
+{
+ destroy_String_vector( &ctx->history );
+ KFileRelease( ctx->std_in );
+ vdp_release_ctx( ctx->vsctx );
+}
+
+
+static rc_t vdi_test( const Vector * v )
+{
+ rc_t rc = 0;
+ uint32_t start = VectorStart( v );
+ uint32_t len = VectorLength( v );
+ uint32_t idx;
+ for ( idx = start; rc == 0 && idx < ( start + len ); ++idx )
+ rc = KOutMsg( "{%S} ", VectorGet( v, idx ) );
+ return rc;
+}
+
+
+static rc_t vdi_top_help()
+{
+ rc_t rc = KOutMsg( "help:\n" );
+ if ( rc == 0 )
+ rc = KOutMsg( "quit / exit : terminate program\n" );
+ if ( rc == 0 )
+ rc = KOutMsg( "help [cmd] : print help [ for this topic ]\n" );
+ if ( rc == 0 )
+ rc = KOutMsg( "repo : manage repositories\n" );
+ return rc;
+}
+
+
+static rc_t vdi_help_on_help()
+{
+ rc_t rc = KOutMsg( "help: [help]\n" );
+ return rc;
+}
+
+static rc_t vdi_help_on_repo()
+{
+ rc_t rc = KOutMsg( "help: [repo]\n" );
+ return rc;
+}
+
+static rc_t vdi_help_on_print()
+{
+ rc_t rc = KOutMsg( "help: [print]\n" );
+ return rc;
+}
+
+static rc_t vdi_help( const Vector * v )
+{
+ rc_t rc = 0;
+ if ( VectorLength( v ) < 2 )
+ rc = vdi_top_help();
+ else
+ {
+ int32_t cmd_idx = index_of_match( VectorGet( v, 1 ), 2,
+ "help", "repo", "print" );
+ switch( cmd_idx )
+ {
+ case 0 : rc = vdi_help_on_help(); break;
+ case 1 : rc = vdi_help_on_repo(); break;
+ case 2 : rc = vdi_help_on_print(); break;
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdi_on_newline( ictx * ctx, const String * Line )
+{
+ rc_t rc = 0;
+ Vector v;
+ uint32_t args = split_buffer( &v, Line, " \t" ); /* from vdb-dump-helper.c */
+ if ( args > 0 )
+ {
+ const String * S = VectorGet( &v, 0 );
+ if ( S != NULL )
+ {
+ int32_t cmd_idx = index_of_match( S, 6,
+ "quit", "exit", "help", "repo", "test", "print" );
+
+ ctx->done = ( cmd_idx == 0 || cmd_idx == 1 );
+ if ( !ctx->done )
+ {
+ switch( cmd_idx )
+ {
+ case 2 : rc = vdi_help( &v ); break; /* above */
+ case 3 : rc = vdi_repo( &v ); break; /* in vdb-dump-repo */
+ case 4 : rc = vdi_test( &v ); break;
+ case 5 : rc = vdp_print_interactive( &v, ctx->vsctx ); break; /* in vdb-dump-print */
+ default : rc = KOutMsg( "??? {%S}", S ); break;
+ }
+ if ( rc == 0 )
+ {
+ if ( ctx->interactive )
+ rc = KOutMsg( "%S", &( ctx->PROMPT ) );
+ else
+ rc = KOutMsg( "\n" );
+ }
+ }
+ }
+ }
+ destroy_String_vector( &v ); /* from vdb-dump-helper.c */
+ return rc;
+}
+
+
+static rc_t vdi_on_char( ictx * ctx, const char c )
+{
+ rc_t rc = 0;
+ if ( ctx->SInputLine.len < ( ( ctx->SInputLine.size ) - 1 ) )
+ {
+ ctx->inputline[ ctx->SInputLine.len++ ] = c;
+ }
+ else
+ {
+ rc = KOutMsg( "\ntoo long!%s", &( ctx->PROMPT ) );
+ ctx->SInputLine.len = 0;
+ }
+ return rc;
+}
+
+
+static rc_t vdi_interactive_newline( ictx * ctx )
+{
+ rc_t rc = vdi_on_newline( ctx, &(ctx->SInputLine) );
+ copy_String_2_vector( &(ctx->history), &(ctx->SInputLine) );
+ ctx->SInputLine.len = 0;
+ return rc;
+}
+
+
+static rc_t vdi_interactive_loop( ictx * ctx )
+{
+ char cc[ 4 ];
+ uint64_t pos = 0;
+ rc_t rc = KOutMsg( "%S", &( ctx->PROMPT ) );
+
+ while ( rc == 0 && !ctx->done && ( 0 == Quitting() ) )
+ {
+ size_t num_read;
+ rc = KFileRead( ctx->std_in, pos, cc, 1, &num_read );
+ if ( rc != 0 )
+ LOGERR ( klogErr, rc, "failed to read stdin" );
+ else if ( num_read > 0 )
+ {
+ pos += num_read;
+ switch( cc[ 0 ] )
+ {
+ case '\n' : rc = vdi_interactive_newline( ctx ); break;
+ default : rc = vdi_on_char( ctx, cc[ 0 ] ); break;
+ }
+ }
+ }
+ if ( rc == 0 ) rc = KOutMsg( "\n" );
+ return rc;
+}
+
+
+static rc_t on_line( const String * line, void * data )
+{
+ rc_t rc = Quitting();
+ if ( rc == 0 )
+ {
+ ictx * ctx = data;
+ rc = vdi_on_newline( ctx, line );
+ }
+ return rc;
+}
+
+
+rc_t vdi_main( const dump_context * ctx, const Args * args )
+{
+ ictx ictx;
+ rc_t rc = init_ictx( &ictx, ctx, args );
+ if ( rc == 0 )
+ {
+ if ( ictx.interactive )
+ rc = vdi_interactive_loop( &ictx );
+ else
+ rc = ProcessFileLineByLine( ictx.std_in, on_line, &ictx ); /* from kfs/filetools.h */
+
+ release_ictx( &ictx );
+ }
+ return rc;
+}
\ No newline at end of file
diff --git a/test/samline/refbases.h b/tools/vdb-dump/vdb-dump-interact.h
similarity index 84%
copy from test/samline/refbases.h
copy to tools/vdb-dump/vdb-dump-interact.h
index 94136fb..88f777b 100644
--- a/test/samline/refbases.h
+++ b/tools/vdb-dump/vdb-dump-interact.h
@@ -24,15 +24,24 @@
*
*/
-#ifndef _h_refbases_
-#define _h_refbases_
+#ifndef _h_vdb_dump_interact_
+#define _h_vdb_dump_interact_
#ifdef __cplusplus
extern "C" {
#endif
+#if 0
+}
+#endif
+
+
+#include <kapp/args.h>
+#include <klib/rc.h>
+#include <klib/text.h>
+
+#include "vdb-dump-context.h"
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
+rc_t vdi_main( const dump_context * ctx, const Args * args );
#ifdef __cplusplus
}
diff --git a/tools/vdb-dump/vdb-dump-print.c b/tools/vdb-dump/vdb-dump-print.c
index e6e7744..0288dae 100644
--- a/tools/vdb-dump/vdb-dump-print.c
+++ b/tools/vdb-dump/vdb-dump-print.c
@@ -25,11 +25,21 @@
*/
#include "vdb-dump-print.h"
+#include "vdb-dump-helper.h"
#include <klib/rc.h>
#include <klib/printf.h>
#include <klib/out.h>
#include <klib/pack.h>
+#include <klib/num-gen.h>
+
+#include <kfs/directory.h>
+
+#include <kdb/manager.h>
+
+#include <vdb/cursor.h>
+#include <vdb/table.h>
+#include <vdb/database.h>
#include <stdlib.h>
#include <string.h>
@@ -65,7 +75,7 @@ static rc_t vdp_print( vdp_context * vdp_ctx, const char * fmt, ... )
size_t available = ( vdp_ctx->buf_size - vdp_ctx->printed_so_far );
va_start ( args, fmt );
- rc = string_vprintf ( &( vdp_ctx->buf[ vdp_ctx->printed_so_far ]), available, &num_writ, fmt, args );
+ rc = string_vprintf( &( vdp_ctx->buf[ vdp_ctx->printed_so_far ]), available, &num_writ, fmt, args );
vdp_ctx->printed_so_far += num_writ;
va_end ( args );
}
@@ -90,7 +100,7 @@ static rc_t vdp_print_string( vdp_context * vdp_ctx, const char * s )
size_t num_writ;
size_t available = ( vdp_ctx->buf_size - vdp_ctx->printed_so_far );
- rc = string_printf ( &( vdp_ctx->buf[ vdp_ctx->printed_so_far ]), available, &num_writ, s );
+ rc = string_printf ( &( vdp_ctx->buf[ vdp_ctx->printed_so_far ] ), available, &num_writ, s );
vdp_ctx->printed_so_far += num_writ;
}
@@ -173,7 +183,7 @@ static uint64_t vdp_bitlength_2_mask( const size_t n_bits )
static void vdp_move_to_value( void* dst, vdp_context * vdp_ctx, const uint32_t n_bits )
{
- char *src_ptr = ( char* )vdp_ctx->buf + BYTE_OFFSET( vdp_ctx->offset_in_bits );
+ uint8_t *src_ptr = ( uint8_t * )vdp_ctx->base + BYTE_OFFSET( vdp_ctx->offset_in_bits );
if ( BIT_OFFSET( vdp_ctx->offset_in_bits ) == 0 )
{
memmove( dst, src_ptr, vdp_bitlength_2_bytes( n_bits ) );
@@ -200,7 +210,6 @@ static uint64_t vdp_move_to_uint64( vdp_context * vdp_ctx )
return value;
}
-
static rc_t vdp_boolean( vdp_context * vdp_ctx )
{
rc_t rc;
@@ -232,11 +241,28 @@ static const char * uint_hex_fmt = "0x%lX";
static const char * uint_dec_fmt = "%lu";
static const char * int_dec_fmt = "%ld";
+
+static rc_t vdp_print_uint64( vdp_context * vdp_ctx, const char * fmt, uint64_t value )
+{
+ if ( vdp_ctx->buf == NULL )
+ return KOutMsg( fmt, value );
+ else
+ return vdp_print( vdp_ctx, fmt, value );
+}
+
+static rc_t vdp_print_int64( vdp_context * vdp_ctx, const char * fmt, int64_t value )
+{
+ if ( vdp_ctx->buf == NULL )
+ return KOutMsg( fmt, value );
+ else
+ return vdp_print( vdp_ctx, fmt, value );
+}
+
static rc_t vdp_uint( vdp_context * vdp_ctx )
{
rc_t rc = 0;
uint64_t value = vdp_move_to_uint64( vdp_ctx );
- if ( ( vdp_ctx->opts->without_sra_types == false )/*&&( def->value_trans_fct != NULL )*/ )
+ if ( ( vdp_ctx->opts->translate_sra_types )/*&&( def->value_trans_fct != NULL )*/ )
{
/*
const char *txt = def->value_trans_fct( (uint32_t)value );
@@ -245,16 +271,9 @@ static rc_t vdp_uint( vdp_context * vdp_ctx )
}
else
{
- const char * fmt;
- if ( vdp_ctx->opts->in_hex )
- fmt = uint_hex_fmt;
- else
- fmt = uint_dec_fmt;
-
- if ( vdp_ctx->buf == NULL )
- rc = KOutMsg( fmt, value );
- else
- rc = vdp_print( vdp_ctx, fmt, value );
+ rc = vdp_print_uint64( vdp_ctx,
+ vdp_ctx->opts->in_hex ? uint_hex_fmt : uint_dec_fmt,
+ value ) ;
}
return rc;
}
@@ -264,7 +283,7 @@ static rc_t vdp_int( vdp_context * vdp_ctx )
{
rc_t rc = 0;
int64_t value = (int64_t)vdp_move_to_uint64( vdp_ctx );
- if ( ( vdp_ctx->opts->without_sra_types == false )/*&&( def->value_trans_fct != NULL )*/ )
+ if ( ( vdp_ctx->opts->translate_sra_types )/*&&( def->value_trans_fct != NULL )*/ )
{
/*
const char *txt = def->value_trans_fct( (uint32_t)value );
@@ -273,8 +292,6 @@ static rc_t vdp_int( vdp_context * vdp_ctx )
}
else
{
- const char * fmt;
-
switch ( vdp_ctx->type_desc->intrinsic_bits )
{
case 8 : { int8_t temp = (int8_t)value;
@@ -287,16 +304,9 @@ static rc_t vdp_int( vdp_context * vdp_ctx )
value = temp; }
break;
}
-
- if ( vdp_ctx->opts->in_hex )
- fmt = uint_hex_fmt;
- else
- fmt = int_dec_fmt;
-
- if ( vdp_ctx->buf == NULL )
- rc = KOutMsg( fmt, value );
- else
- rc = vdp_print( vdp_ctx, fmt, value );
+ rc = vdp_print_int64( vdp_ctx,
+ vdp_ctx->opts->in_hex ? uint_hex_fmt : int_dec_fmt,
+ value ) ;
}
return rc;
}
@@ -334,9 +344,8 @@ static rc_t vdp_float( vdp_context * vdp_ctx )
rc = vdp_print( vdp_ctx, float_fmt, value );
}
else
- {
rc = vdp_print_string( vdp_ctx, unknown_float_fmt );
- }
+
vdp_ctx->offset_in_bits += n_bits;
}
return rc;
@@ -348,7 +357,7 @@ static const char * txt_fmt = "%.*s";
static rc_t vdp_txt_ascii( vdp_context * vdp_ctx )
{
rc_t rc;
- char *src_ptr = (char*)vdp_ctx->buf + BYTE_OFFSET( vdp_ctx->offset_in_bits );
+ char *src_ptr = (char*)vdp_ctx->base + BYTE_OFFSET( vdp_ctx->offset_in_bits );
if ( vdp_ctx->buf == NULL )
rc = KOutMsg( txt_fmt, vdp_ctx->row_len, src_ptr );
else
@@ -375,20 +384,20 @@ static rc_t vdp_hex_char( char * temp, uint32_t * idx, const uint8_t c )
static rc_t vdp_hex_ascii( vdp_context * vdp_ctx )
{
rc_t rc = 0;
- char *src_ptr = (char*)vdp_ctx->buf + BYTE_OFFSET( vdp_ctx->offset_in_bits );
+ char *src_ptr = (char*)vdp_ctx->base + BYTE_OFFSET( vdp_ctx->offset_in_bits );
char *tmp = malloc( ( vdp_ctx->row_len + 1 ) * 4 );
if ( tmp != NULL )
{
- uint32_t i, dst = 0;
+ uint32_t i, dst_idx = 0;
for ( i = 0; i < vdp_ctx->row_len && rc == 0; ++i )
- rc = vdp_hex_char( tmp, &dst, src_ptr[ i ] );
- src_ptr[ dst ] = 0;
+ rc = vdp_hex_char( tmp, &dst_idx, src_ptr[ i ] );
+ tmp[ dst_idx ] = 0;
if ( rc == 0 )
{
if ( vdp_ctx->buf == NULL )
- rc = KOutMsg( txt_fmt, dst, tmp );
+ rc = KOutMsg( txt_fmt, dst_idx, tmp );
else
- rc = vdp_print( vdp_ctx, txt_fmt, dst, tmp );
+ rc = vdp_print( vdp_ctx, txt_fmt, dst_idx, tmp );
}
free( tmp );
}
@@ -433,11 +442,92 @@ vdp_fkt vdp_dispatch[] =
vdp_unicode
};
+static rc_t vdp_print_dim( vdp_context * vdp_ctx, uint32_t dimension, uint32_t selection )
+{
+ rc_t rc = 0;
+ int i = 0;
+ bool print_comma = true;
+
+ if ( selection == 0 ) /* cell-type == boolean */
+ {
+ /* if long form "false" or "true" separate elements by comma */
+ print_comma = ( vdp_ctx->opts->c_boolean == 0 );
+ }
+
+ while ( ( i < dimension )&&( rc == 0 ) )
+ {
+ /* selection 0 ... boolean */
+ if ( print_comma && ( i > 0 ) )
+ rc = vdp_print( vdp_ctx, ", " );
+
+ if ( rc == 0 )
+ rc = vdp_dispatch[ selection ]( vdp_ctx );
+
+ i++;
+ }
+ return rc;
+}
+
+
+static char dna_chars[ 4 ] = { 'A', 'C', 'G', 'T' };
+
+/* special function to translate dim=2,bits=1 into a DNA-base */
+static rc_t vdp_print_1_base( vdp_context * vdp_ctx )
+{
+ uint64_t value;
+ vdp_move_to_value( &value, vdp_ctx, 2 ); /* move 2 bits into value */
+ value &= 3;
+ if ( vdp_ctx->buf == NULL )
+ return KOutMsg( "%c", dna_chars[ value ] );
+ else
+ return vdp_print( vdp_ctx, "%c", dna_chars[ value ] );
+}
static rc_t vdp_print_elem( vdp_context * vdp_ctx )
{
rc_t rc = 0;
+ uint32_t dimension = vdp_ctx->type_desc->intrinsic_dim;
+ uint32_t selection = vdp_ctx->type_desc->domain - 1;
+
+ if ( dimension == 1 )
+ {
+ /* we have only 1 dimension ---> just print this value */
+ if ( selection < 6 )
+ rc = vdp_dispatch[ selection ]( vdp_ctx );
+ }
+ else
+ {
+ /* we have more than 1 dimension ---> repeat printing value's */
+ if ( vdp_ctx->print_dna_bases )
+ rc = vdp_print_1_base( vdp_ctx );
+ else
+ {
+ /*
+ bool trans = ( ( vdp_ctx->opts->without_sra_types == false )&&
+ ( def->dim_trans_fct ) );
+ bool paren = ( ( src->number_of_elements > 1 )||( !trans ) );
+ */
+ bool paren = ( vdp_ctx->row_len > 1 );
+
+ if ( paren )
+ rc = vdp_print( vdp_ctx, "[" );
+ /* rc = vds_append_str( &(def->content), bracket ? "[" : "{" ); */
+
+ if ( rc == 0 )
+ {
+ /*
+ if ( trans )
+ rc = vdt_dump_dim_trans( src, def, dimension );
+ else
+ */
+ rc = vdp_print_dim( vdp_ctx, dimension, selection );
+ }
+
+ if ( paren && ( rc == 0 ) )
+ rc = vdp_print( vdp_ctx, "]" );
+ }
+ }
return rc;
}
@@ -471,9 +561,7 @@ rc_t vdp_print_cell_cmn( char * buf, size_t buf_size, size_t *num_written,
vdp_ctx.offset_in_bits = 0;
if ( ( type_desc->domain < vtdBool ) || ( type_desc->domain > vtdUnicode ) )
- {
rc = vdp_print_string( &vdp_ctx, "unknown data-type" );
- }
else
{
bool print_comma = true;
@@ -484,18 +572,14 @@ rc_t vdp_print_cell_cmn( char * buf, size_t buf_size, size_t *num_written,
( type_desc->intrinsic_bits == 1 ) );
if ( ( type_desc->domain == vtdBool ) && opts->c_boolean )
- {
print_comma = false;
- }
while( ( vdp_ctx.elem_idx < row_len ) && ( rc == 0 ) && ( !vdp_ctx.buf_filled ) )
{
uint32_t eidx = vdp_ctx.elem_idx;
- if ( ( eidx > 0 )&& ( vdp_ctx.print_dna_bases == false ) && print_comma )
- {
+ if ( ( eidx > 0 ) && ( vdp_ctx.print_dna_bases == false ) && print_comma )
rc = vdp_print_string( &vdp_ctx, ", " );
- }
/* dumps the basic data-types, implementation above
>>> that means it appends or prints to stdout the element-string
@@ -505,9 +589,7 @@ rc_t vdp_print_cell_cmn( char * buf, size_t buf_size, size_t *num_written,
/* insurance against endless loop */
if ( eidx == vdp_ctx.elem_idx )
- {
vdp_ctx.elem_idx++;
- }
}
}
}
@@ -541,10 +623,727 @@ rc_t vdp_print_cell( const uint32_t elem_bits, const void * base, uint32_t boff,
if ( base == NULL || type_desc == NULL || opts == NULL )
{
rc = RC( rcVDB, rcNoTarg, rcVisiting, rcParam, rcNull );
+ KOutMsg( "base/type_desc/otps is NULL\n" );
}
else
- {
rc = vdp_print_cell_cmn( NULL, 0, NULL, elem_bits, base, boff, row_len, type_desc, opts );
+ return rc;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+
+
+/* -----------------------------------------------------------------------------------------------*/
+
+typedef struct vdp_src_context
+{
+ KDirectory *dir;
+ const VDBManager *mgr;
+ VSchema *schema;
+ Vector sources;
+ bool print_info;
+ vdp_opts opts;
+} vdp_src_context;
+
+
+typedef struct vdp_database
+{
+ const String * name;
+ const VDatabase *database;
+ Vector sub_databases;
+ Vector sub_tables;
+ vdp_opts * opts;
+} vdp_database;
+
+typedef struct vdp_table
+{
+ const String * name;
+ const VTable *table;
+ const VCursor *cursor;
+ vdp_opts * opts;
+ Vector columns;
+ uint32_t max_col_name_len;
+} vdp_table;
+
+
+typedef struct vdp_column
+{
+ const String * name;
+ uint32_t id;
+ vdp_table * tab;
+ struct VTypedecl type;
+ struct VTypedesc desc;
+} vdp_column;
+
+
+typedef struct vdp_source
+{
+ const String * path;
+ int path_type;
+ vdp_table * tbl;
+ vdp_database * db;
+ vdp_opts * opts;
+} vdp_source;
+
+
+/* -----------------------------------------------------------------------------------------------*/
+
+static void CC release_column( void *item, void * data )
+{
+ vdp_column * c = ( vdp_column * )item;
+ if ( c != NULL )
+ StringWhack ( c->name );
+ free( item );
+}
+
+static rc_t vdp_add_column( vdp_table * tbl, const String * name, bool print_info )
+{
+ rc_t rc = 0;
+ vdp_column * col = malloc( sizeof * col );
+ if ( col == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ KOutMsg( "vdp_add_column( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ rc = StringCopy( &col->name, name );
+ if ( rc != 0 )
+ {
+ free( ( void * ) col );
+ KOutMsg( "StringCopy( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ col->tab = tbl;
+ if ( name->len > tbl->max_col_name_len ) tbl->max_col_name_len = name->len;
+ rc = VCursorAddColumn( tbl->cursor, &col->id, "%s", name->addr );
+ if ( rc != 0 )
+ KOutMsg( "VCursorAddColumn( '%S.%S' ) -> %R\n", tbl->name, name, rc );
+ else
+ rc = VectorAppend( &tbl->columns, NULL, col );
+
+ if ( rc != 0 )
+ release_column( col, NULL );
+ else if ( print_info )
+ KOutMsg( "column: '%S.%S' added\n", tbl->name, name );
+ }
+ }
+ return rc;
+}
+
+static rc_t vdp_get_column_type( vdp_table * tbl, vdp_column * col )
+{
+ rc_t rc = VCursorDatatype( tbl->cursor, col->id, &col->type, &col->desc );
+ if ( rc != 0 )
+ KOutMsg( "VCursorDatatype( '%S.%S' ) -> %R\n", tbl->name, col->name, rc );
+ return rc;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+
+static void CC release_table( void *item, void * data )
+{
+ vdp_table * tbl = ( vdp_table * ) item;
+ if ( tbl != NULL )
+ {
+ VectorWhack( &tbl->columns, release_column, NULL );
+ StringWhack ( tbl->name );
+ VCursorRelease( tbl->cursor );
+ VTableRelease ( tbl->table );
+ free( item );
+ }
+}
+
+/* we can open a table from an accession or from a database... */
+static rc_t vdp_open_table( vdp_src_context * vctx,
+ vdp_source * acc, vdp_database * parent_db, const String * name )
+{
+ rc_t rc = 0;
+ vdp_table * tbl = malloc( sizeof * tbl );
+ if ( tbl == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ KOutMsg( "vdp_open_table( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ rc = StringCopy( &tbl->name, name );
+ if ( rc != 0 )
+ {
+ free( ( void * ) tbl );
+ KOutMsg( "StringCopy( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ tbl->opts = &vctx->opts;
+ VectorInit( &tbl->columns, 0, 20 );
+ tbl->max_col_name_len = 0;
+
+ /* open the table: either from manager or from database */
+ if ( acc != NULL )
+ rc = VDBManagerOpenTableRead( vctx->mgr, &tbl->table, vctx->schema, "%s", name->addr );
+ else
+ rc = VDatabaseOpenTableRead( parent_db->database, &tbl->table, "%s", name->addr );
+
+ /* enumerate columns, create cursor, add columns */
+ if ( rc == 0 )
+ {
+ rc = VTableCreateCursorRead( tbl->table, &tbl->cursor );
+ if ( rc == 0 )
+ {
+ KNamelist * column_names;
+ rc = VTableListCol( tbl->table, &column_names );
+ if ( rc == 0 )
+ {
+ uint32_t count, idx;
+ rc = KNamelistCount( column_names, &count );
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * column_name;
+ rc = KNamelistGet( column_names, idx, &column_name );
+ if ( rc == 0 && column_name != NULL )
+ {
+ String temp_str;
+ StringInitCString( &temp_str, column_name );
+ rc = vdp_add_column( tbl, &temp_str, vctx->print_info );
+ }
+ }
+ KNamelistRelease( column_names );
+ }
+ rc = VCursorOpen( tbl->cursor );
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ /* update the type for each column */
+ uint32_t start = VectorStart( &tbl->columns );
+ uint32_t count = VectorLength( &tbl->columns );
+ uint32_t id = start;
+ while ( id < start + count - 1 && rc == 0 )
+ {
+ vdp_column * column = VectorGet( &tbl->columns, id );
+ if ( column != NULL )
+ rc = vdp_get_column_type( tbl, column );
+ id++;
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ /* enter the new object: either into source-struct or parent-db */
+ if ( acc != NULL )
+ acc->tbl = tbl;
+ else
+ rc = VectorAppend( &parent_db->sub_tables, NULL, tbl );
+ }
+
+ if ( rc != 0 )
+ release_table( tbl, NULL );
+ else if ( vctx->print_info )
+ KOutMsg( "table: '%S' opened\n", name );
+
+ }
+ }
+ return rc;
+}
+
+static rc_t vdp_table_adjust_ranges( vdp_table * tbl, struct num_gen * ranges )
+{
+ rc_t rc = 0;
+ if ( tbl == NULL || ranges == NULL )
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcNull );
+ else
+ {
+ int64_t first;
+ uint64_t count;
+ rc = VCursorIdRange( tbl->cursor, 0, &first, &count );
+ if ( rc != 0 )
+ KOutMsg( "VCursorIdRange( %S ) -> %R\n", tbl->name, rc );
+ else
+ {
+ if ( num_gen_empty( ranges ) )
+ {
+ rc = num_gen_add( ranges, first, count );
+ if ( rc != 0 )
+ KOutMsg( "tbl '%S' : num_gen_add( %d, %d ) -> %R\n", tbl->name, first, count, rc );
+ }
+ else
+ {
+ rc = num_gen_trim( ranges, first, count );
+ if ( rc != 0 )
+ KOutMsg( "tbl '%S' : num_gen_trim( %d, %d ) -> %R\n", tbl->name, first, count, rc );
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdp_print_table_row( vdp_table * tbl, int64_t row_id )
+{
+ rc_t rc = 0;
+ uint32_t start = VectorStart( &tbl->columns );
+ uint32_t count = VectorLength( &tbl->columns );
+ uint32_t id = start;
+
+ /* rc = KOutMsg( "row#%ld:\n", row_id ); */
+ while ( id < start + count - 1 && rc == 0 )
+ {
+ vdp_column * column = VectorGet( &tbl->columns, id );
+ if ( column != NULL )
+ {
+ uint32_t w = tbl->max_col_name_len + 1 - column->name->len;
+ rc = KOutMsg( "%S:%*s", column->name, w, " " );
+ if ( rc == 0 )
+ {
+ uint32_t elem_bits, boff, row_len;
+ const void * base;
+ rc = VCursorCellDataDirect( tbl->cursor, row_id, column->id,
+ &elem_bits, &base, &boff, &row_len );
+ if ( rc != 0 )
+ KOutMsg( "VCursorCellDataDirect( tbl: '%s', row: %ld, col: '%s' ) -> %R\n",
+ tbl->name, row_id, column->name, rc );
+ else
+ {
+ if ( rc == 0 && base != NULL )
+ {
+ rc = vdp_print_cell( elem_bits, base, boff, row_len, &column->desc, tbl->opts );
+ }
+ /*
+ KOutMsg( "elem_bits=%d, base=%p boff=%d, row_len=%d, desc=%p, opts=%p\n",
+ elem_bits, base, boff, row_len, &column->desc, tbl->opts );
+ */
+ }
+ }
+ if ( rc == 0 )
+ rc = KOutMsg( "\n" );
+ }
+ id++;
+ }
+ return rc;
+}
+
+static rc_t vdp_print_table( vdp_table * tbl, struct num_gen * ranges )
+{
+ const struct num_gen_iter * iter;
+ rc_t rc = num_gen_iterator_make( ranges, &iter );
+ if ( rc != 0 )
+ KOutMsg( "num_gen_iterator_make() -> %R", rc );
+ else
+ {
+ int64_t row_id;
+ while ( num_gen_iterator_next( iter, &row_id, &rc ) && rc == 0 )
+ rc = vdp_print_table_row( tbl, row_id );
+
+ num_gen_iterator_destroy( iter );
+ }
+ return rc;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+
+static void CC release_database( void *item, void * data )
+{
+ vdp_database * db = ( vdp_database * ) item;
+ if ( db != NULL )
+ {
+ StringWhack( db->name );
+ VectorWhack( &db->sub_tables, release_table, NULL );
+ VectorWhack( &db->sub_databases, release_database, NULL ); /* !! recursion */
+ VDatabaseRelease( db->database );
+ free( item );
+ }
+}
+
+/* we can open a database from an accession or from a database... */
+static rc_t vdp_open_database( vdp_src_context * vctx,
+ vdp_source * acc, vdp_database * parent_db, const String * name )
+{
+ rc_t rc = 0;
+ vdp_database * db = malloc( sizeof * db );
+ if ( db == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ KOutMsg( "vdp_open_database( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ rc = StringCopy( &db->name, name );
+ if ( rc != 0 )
+ {
+ free( ( void * ) db );
+ KOutMsg( "string_dup_measure( '%S' ) -> %R\n", name, rc );
+ }
+ else
+ {
+ VectorInit( &db->sub_databases, 0, 5 );
+ VectorInit( &db->sub_tables, 0, 5 );
+ db->opts = &vctx->opts;
+
+ /* open the table: either from manager or from database */
+ if ( acc != NULL )
+ rc = VDBManagerOpenDBRead( vctx->mgr, &db->database, vctx->schema, "%s", name->addr );
+ else
+ rc = VDatabaseOpenDBRead( parent_db->database, &db->database, "%s", name->addr );
+
+ /* enumerate tables, open tables */
+ if ( rc == 0 )
+ {
+ KNamelist * table_names;
+ rc_t rc1 = VDatabaseListTbl( db->database, &table_names );
+ if ( rc1 == 0 )
+ {
+ uint32_t count, idx;
+ rc = KNamelistCount( table_names, &count );
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * table_name;
+ rc = KNamelistGet( table_names, idx, &table_name );
+ if ( rc == 0 && table_name != NULL )
+ {
+ String temp_str;
+ StringInitCString( &temp_str, table_name );
+ rc = vdp_open_table( vctx, NULL, db, &temp_str );
+ }
+ }
+ KNamelistRelease( table_names );
+ }
+ }
+
+ /* enumerate sub-db's, open sub-db's */
+ if ( rc == 0 )
+ {
+ KNamelist * sub_db_names;
+ rc_t rc1 = VDatabaseListDB( db->database, &sub_db_names );
+ if ( rc1 == 0 )
+ {
+ uint32_t count, idx;
+ rc = KNamelistCount( sub_db_names, &count );
+ for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char * sub_db_name;
+ rc = KNamelistGet( sub_db_names, idx, &sub_db_name );
+ if ( rc == 0 && sub_db_name != NULL )
+ {
+ String temp_str;
+ StringInitCString( &temp_str, sub_db_name );
+ rc = vdp_open_database( vctx, NULL, db, &temp_str ); /* !! recursion !! */
+ }
+ }
+ KNamelistRelease( sub_db_names );
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ /* enter the new object: either into source-struct or parent-db */
+ if ( acc != NULL )
+ acc->db = db;
+ else
+ rc = VectorAppend( &parent_db->sub_databases, NULL, db );
+ }
+
+ if ( rc != 0 )
+ release_database( db, NULL );
+ else if ( vctx->print_info )
+ KOutMsg( "database: '%S' opened\n", name );
+ }
+ }
+ return rc;
+}
+
+/*
+KLIB_EXTERN void* CC VectorFind ( const Vector *self, const void *key, uint32_t *idx,
+ int64_t ( CC * cmp ) ( const void *key, const void *n ) );
+*/
+static int64_t CC vdp_db_find_table( const void *key, const void * n )
+{
+ const String * to_find = key;
+ const vdp_table * tbl = n;
+ return StringCompare( to_find, tbl->name );
+}
+
+static const char * DFLT_TABLE = "SEQUENCE";
+
+static vdp_table * vdp_db_get_table( vdp_database * db, const String * path )
+{
+ vdp_table * res = NULL;
+ if ( db != NULL )
+ {
+ if ( path == NULL || path->len == 0 )
+ {
+ String tmp;
+ tmp.addr = DFLT_TABLE;
+ tmp.size = tmp.len = sizeof DFLT_TABLE;
+ res = vdp_db_get_table( db, &tmp ); /* recursion */
+ if ( res == NULL )
+ res = VectorGet( &db->sub_tables, 0 );
+ }
+ else
+ {
+ uint32_t found;
+ res = VectorFind( &db->sub_tables, path, &found, vdp_db_find_table );
+ }
+ }
+ return res;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+
+static void CC release_source( void *item, void * data )
+{
+ vdp_source * vsrc = ( vdp_source * )item;
+ if ( vsrc != NULL )
+ {
+ if ( vsrc->tbl != NULL ) release_table( vsrc->tbl, NULL );
+ if ( vsrc->db != NULL ) release_database( vsrc->db, NULL );
+ StringWhack ( vsrc->path );
+ free( ( void * ) item );
+ }
+}
+
+static rc_t vdp_init_source( vdp_src_context * vctx, const String * path )
+{
+ rc_t rc = 0;
+ vdp_source * vsrc = malloc( sizeof * vsrc );
+ if ( vsrc == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ KOutMsg( "vdp_init_source( '%S' ) -> %R\n", path, rc );
+ }
+ else
+ {
+ rc = StringCopy ( &vsrc->path, path );
+ if ( rc != 0 )
+ {
+ free( ( void * ) vsrc );
+ KOutMsg( "StringCopy( '%S' ) -> %R\n", path, rc );
+ }
+ else
+ {
+ vsrc->path_type = ( VDBManagerPathType ( vctx->mgr, "%s", vsrc->path->addr ) & ~ kptAlias );
+ vsrc->tbl = NULL;
+ vsrc->db = NULL;
+ vsrc->opts = &vctx->opts;
+ /* types defined in <kdb/manager.h> */
+ switch ( vsrc->path_type )
+ {
+ case kptDatabase : rc = vdp_open_database( vctx, vsrc, NULL, vsrc->path ); break;
+ case kptPrereleaseTbl:
+ case kptTable : rc = vdp_open_table( vctx, vsrc, NULL, vsrc->path ); break;
+ default : rc = RC( rcVDB, rcNoTarg, rcConstructing, rcFormat, rcUnknown ); break;
+ }
+
+ if ( rc != 0 )
+ KOutMsg( "cannot open source '%S' -> %R\n", path, rc );
+ else
+ rc = VectorAppend( &vctx->sources, NULL, vsrc );
+
+ if ( rc == 0 && vctx->print_info )
+ KOutMsg( "source '%S' opened\n", path );
+ }
+ }
+ return rc;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+
+rc_t vdp_release_ctx( vdp_src_context * vctx )
+{
+ rc_t rc = 0;
+ if ( vctx != NULL )
+ {
+ /* release all sources */
+ VectorWhack( &vctx->sources, release_source, NULL );
+ if ( vctx->dir != NULL )
+ rc = KDirectoryRelease( vctx->dir );
+ if ( rc == 0 && vctx->mgr != NULL )
+ rc = VDBManagerRelease( vctx->mgr );
+ if ( rc == 0 && vctx->schema != NULL )
+ rc = VSchemaRelease( vctx->schema );
+ }
+ return rc;
+}
+
+rc_t vdp_init_ctx( vdp_src_context ** vctx, const Args * args )
+{
+ rc_t rc = 0;
+ if ( vctx == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ KOutMsg( "vdp_init_ctx() -> %R\n", rc );
+
+ }
+ else
+ {
+ vdp_src_context * o = malloc( sizeof *o );
+ if ( o == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ KOutMsg( "vdp_init_ctx() -> %R\n", rc );
+ }
+ else
+ {
+ VectorInit( &o->sources, 0, 5 );
+ o->schema = NULL;
+ o->print_info = false;
+ o->opts.print_dna_bases = false;
+ o->opts.in_hex = false;
+ o->opts.translate_sra_types = false;
+ o->opts.c_boolean = '1';
+
+ rc = KDirectoryNativeDir( &o->dir );
+ if ( rc != 0 )
+ {
+ KOutMsg( "KDirectoryNativeDir() -> %R\n", rc );
+ o->dir = NULL;
+ }
+ else
+ {
+ rc = VDBManagerMakeRead ( &o->mgr, o->dir );
+ if ( rc != 0 )
+ {
+ KOutMsg( "VDBManagerMakeRead() -> %R\n", rc );
+ o->mgr = NULL;
+ }
+ else
+ {
+ uint32_t count, idx;
+ vdh_parse_schema( o->mgr, &o->schema, NULL, true /*ctx->force_sra_schema*/ );
+ rc = ArgsParamCount( args, &count );
+ if ( rc != 0 )
+ KOutMsg( "ArgsParamCount() -> %R\n", rc );
+ else for ( idx = 0; rc == 0 && idx < count; ++idx )
+ {
+ const char *arg = NULL;
+ rc = ArgsParamValue( args, idx, (const void **)&arg );
+ if ( rc != 0 )
+ KOutMsg( "ArgsParamValue() -> %R\n", rc );
+ else if ( arg != NULL && arg[ 0 ] != 0 )
+ {
+ String temp_str;
+ StringInitCString( &temp_str, arg );
+ rc = vdp_init_source( o, &temp_str );
+ }
+ }
+ }
+ }
+ if ( rc == 0 )
+ *vctx = o;
+ else
+ vdp_release_ctx( o );
+ }
}
return rc;
-}
\ No newline at end of file
+}
+
+
+static vdp_table * vdp_get_table( vdp_src_context * vctx, uint32_t src_id, String * path )
+{
+ vdp_table * res = NULL;
+ if ( vctx != NULL )
+ {
+ vdp_source * src = VectorGet( &vctx->sources, src_id );
+ if ( src != NULL )
+ {
+ if ( src->tbl != NULL )
+ res = src->tbl; /* source has only this table */
+ else if ( src->db != NULL )
+ res = vdp_db_get_table( src->db, path ); /* source is a database */
+ }
+ }
+ return res;
+}
+
+/* -----------------------------------------------------------------------------------------------*/
+static rc_t vdb_print_parse_range( struct num_gen * ranges, const String * range )
+{
+ rc_t rc = num_gen_parse_S( ranges, range );
+ if ( rc != 0 )
+ KOutMsg( "num_gen_parse_S( %S ) -> %R\n", range, rc );
+ return rc;
+}
+
+static rc_t vdb_print_get_src_and_ranges( const String * S,
+ struct num_gen * ranges, uint32_t * src_id )
+{
+ rc_t rc = 0;
+ if ( S != NULL )
+ {
+ char * dot = string_chr( S->addr, S->len, '.' );
+ if ( dot == NULL )
+ rc = vdb_print_parse_range( ranges, S );
+ else
+ {
+ String Sub;
+ uint32_t dot_idx = ( dot - S->addr );
+ if ( dot_idx < S->len )
+ {
+ String * tmp = StringSubstr( S, &Sub, dot_idx + 1, S->len - dot_idx );
+ if ( tmp != NULL )
+ rc = vdb_print_parse_range( ranges, tmp );
+ }
+ if ( rc == 0 && dot_idx > 0 )
+ {
+ String * tmp = StringSubstr( S, &Sub, 0, dot_idx );
+ if ( tmp != NULL )
+ {
+ rc_t rc1;
+ uint64_t v = StringToU64( tmp, &rc1 );
+ if ( rc1 != 0 || v > 0xFFFF ) v = 0;
+ *src_id = ( v & 0xFFFF );
+ }
+ }
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdp_print_show_src_and_ranges( struct num_gen * ranges, uint32_t src_id )
+{
+ rc_t rc;
+ char buffer[ 1024 ];
+ buffer[ 0 ] = 0;
+ rc = num_gen_as_string( ranges, buffer, sizeof buffer, NULL, true );
+ if ( rc == 0 )
+ rc = KOutMsg( "src-id = %d, ranges = %s\n", src_id, buffer );
+ return rc;
+}
+
+
+/* called from vdb-dump-interact.c, v is a vector of String-objects */
+rc_t vdp_print_interactive( const Vector * v, vdp_src_context * vctx )
+{
+ struct num_gen * ranges;
+ uint32_t src_id = 0; /* per default use the first ( mostly only ) source */
+ rc_t rc = num_gen_make_sorted( &ranges, true );
+ if ( rc != 0 )
+ KOutMsg( "num_gen_make_sorted() -> %R\n", rc );
+ else
+ {
+ rc = vdb_print_get_src_and_ranges( VectorGet ( v, 1 ), ranges, &src_id );
+ if ( rc == 0 )
+ {
+ vdp_table * tbl = vdp_get_table( vctx, src_id, NULL );
+ if ( tbl == NULL )
+ KOutMsg( "invalid source #%d\n", src_id );
+ else
+ {
+ rc = vdp_table_adjust_ranges( tbl, ranges );
+
+ if ( rc == 0 )
+ rc = KOutMsg( "tbl: %S\n", tbl->name );
+ /* if ( rc == 0 )
+ rc = vdp_print_show_src_and_ranges( ranges, src_id ); */
+
+ if ( rc == 0 )
+ rc = vdp_print_table( tbl, ranges );
+ }
+ }
+ num_gen_destroy( ranges );
+ }
+ return rc;
+}
diff --git a/tools/vdb-dump/vdb-dump-print.h b/tools/vdb-dump/vdb-dump-print.h
index e493ca8..af8f0e1 100644
--- a/tools/vdb-dump/vdb-dump-print.h
+++ b/tools/vdb-dump/vdb-dump-print.h
@@ -29,6 +29,10 @@
#define _h_vdb_dump_print_
#include <vdb/schema.h> /* for VTypedesc */
+#include <vdb/manager.h>
+
+#include <klib/vector.h>
+#include <kapp/args.h>
#ifdef __cplusplus
extern "C" {
@@ -38,11 +42,14 @@ typedef struct vdp_opts
{
bool print_dna_bases;
bool in_hex;
- bool without_sra_types;
+ bool translate_sra_types;
char c_boolean; /* how boolean is printed '1' ... 0/1, 'T' ... T/F, /0 ... true/false */
} vdp_opts;
+struct vdp_src_context;
+
+
/* vdp_print_cell
* prints the content of a cursor-cell to stdout
*
@@ -66,6 +73,12 @@ rc_t vdp_print_cell( const uint32_t elem_bits, const void * base, uint32_t boff,
const VTypedesc * type_desc, vdp_opts * opts );
+rc_t vdp_init_ctx( struct vdp_src_context ** vctx, const Args * args );
+rc_t vdp_release_ctx( struct vdp_src_context * vctx );
+
+/* v is a vector of String objects... */
+rc_t vdp_print_interactive( const Vector * v, struct vdp_src_context * vctx );
+
#ifdef __cplusplus
}
#endif
diff --git a/tools/vdb-dump/vdb-dump-repo.c b/tools/vdb-dump/vdb-dump-repo.c
new file mode 100644
index 0000000..66baca6
--- /dev/null
+++ b/tools/vdb-dump/vdb-dump-repo.c
@@ -0,0 +1,281 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "vdb-dump-repo.h"
+#include "vdb-dump-helper.h"
+
+#include <klib/text.h>
+#include <klib/out.h>
+#include <kfg/config.h>
+#include <kfg/repository.h>
+
+/* ------------------------------------------------------------------------------------------------- */
+
+static const KRepCategory id_to_repo_cat[ 4 ] = { krepUserCategory, krepSiteCategory, krepRemoteCategory, krepBadCategory };
+
+static const char * s_BadCategory = "BadCategory";
+static const char * s_UserCategory = "UserCategory";
+static const char * s_SiteCategory = "SiteCategory";
+static const char * s_RemoteCategory = "RemoteCategory";
+static const char * s_UnknownCategory = "unknow category";
+
+static const char * KRepCategory_to_str( const KRepCategory c )
+{
+ switch( c )
+ {
+ case krepBadCategory : return s_BadCategory; break;
+ case krepUserCategory : return s_UserCategory; break;
+ case krepSiteCategory : return s_SiteCategory; break;
+ case krepRemoteCategory : return s_RemoteCategory; break;
+ };
+ return s_UnknownCategory;
+}
+
+static const char * s_Prefix_bad = "bad";
+static const char * s_Prefix_user = "user";
+static const char * s_Prefix_site = "site";
+static const char * s_Prefix_remote = "remote";
+static const char * s_Prefix_unknown = "unknow";
+
+static const char * KRepCategory_to_prefix( const KRepCategory c )
+{
+ switch( c )
+ {
+ case krepBadCategory : return s_Prefix_bad; break;
+ case krepUserCategory : return s_Prefix_user; break;
+ case krepSiteCategory : return s_Prefix_site; break;
+ case krepRemoteCategory : return s_Prefix_remote; break;
+ };
+ return s_Prefix_unknown;
+}
+
+
+static const char * s_BadSubCategory = "BadSubCategory";
+static const char * s_MainSubCategory = "MainSubCategory";
+static const char * s_AuxSubCategory = "AuxSubCategory";
+static const char * s_ProtectedSubCategory = "ProtectedSubCategory";
+
+static const char * KRepSubCategory_to_str( const KRepSubCategory c )
+{
+ switch( c )
+ {
+ case krepBadSubCategory : return s_BadSubCategory; break;
+ case krepMainSubCategory : return s_MainSubCategory; break;
+ case krepAuxSubCategory : return s_AuxSubCategory; break;
+ case krepProtectedSubCategory : return s_ProtectedSubCategory; break;
+ }
+ return s_UnknownCategory;
+}
+
+static const char * s_yes = "yes";
+static const char * s_no = "no";
+
+static const char * yes_or_no( const bool flag )
+{
+ return flag ? s_yes : s_no;
+}
+
+typedef rc_t ( CC * repofunc )( const KRepository *self, char *buffer, size_t bsize, size_t * size );
+
+static rc_t vdi_report_repo_str( const KRepository * repo, const char * elem_name, repofunc f )
+{
+ if ( f != NULL )
+ {
+ char buffer[ 4096 ];
+ rc_t rc = f( repo, buffer, sizeof buffer, NULL );
+ if ( rc == 0 ) return KOutMsg( " - %s : %s\n", elem_name, buffer );
+ }
+ return 0;
+}
+
+static rc_t vdi_report_repository( const KRepository * repo, const char * prefix, int idx, bool full )
+{
+ KRepCategory cat = KRepositoryCategory( repo );
+ KRepSubCategory subcat = KRepositorySubCategory( repo );
+
+ rc_t rc = KOutMsg( " repo.%s #%d:\n", prefix, idx );
+ if ( rc == 0 )
+ rc = KOutMsg( " - category : %s.%s\n", KRepCategory_to_str( cat ), KRepSubCategory_to_str( subcat ) );
+ if ( rc == 0 )
+ rc = vdi_report_repo_str( repo, "name", KRepositoryName );
+ if ( rc == 0 )
+ rc = KOutMsg( " - disabled : %s\n", yes_or_no( KRepositoryDisabled( repo ) ) );
+
+ if ( full )
+ {
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "displayname", KRepositoryDisplayName );
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "root", KRepositoryRoot );
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "resolver", KRepositoryResolver );
+
+ if ( rc == 0 )
+ rc = KOutMsg( " - cached : %s\n", yes_or_no( KRepositoryCacheEnabled( repo ) ) );
+
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "ticket", KRepositoryDownloadTicket );
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "key", KRepositoryEncryptionKey );
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "keyfile", KRepositoryEncryptionKeyFile );
+ if ( rc == 0 ) rc = vdi_report_repo_str( repo, "desc", KRepositoryDescription );
+
+ if ( rc == 0 )
+ {
+ uint32_t prj_id;
+ rc_t rc1 = KRepositoryProjectId( repo, &prj_id );
+ if ( rc1 == 0 ) rc = KOutMsg( " - prj-id : %d\n", prj_id );
+ }
+ }
+
+ if ( rc == 0 ) rc = KOutMsg( "\n" );
+ return rc;
+}
+
+typedef rc_t ( CC * catfunc )( const KRepositoryMgr *self, KRepositoryVector *user_repositories );
+
+static catfunc vdi_get_catfunc( const KRepCategory cat )
+{
+ switch( cat )
+ {
+ case krepUserCategory : return KRepositoryMgrUserRepositories; break;
+ case krepSiteCategory : return KRepositoryMgrSiteRepositories; break;
+ case krepRemoteCategory : return KRepositoryMgrRemoteRepositories; break;
+ };
+ return NULL;
+}
+
+static rc_t vdi_report_repo_vector( const KRepositoryMgr * repomgr, const KRepCategory cat,
+ int32_t select, bool full )
+
+{
+ rc_t rc = 0;
+ catfunc f = vdi_get_catfunc( cat );
+ if ( f != NULL )
+ {
+ KRepositoryVector v;
+ rc = f( repomgr, &v );
+ if ( rc == 0 )
+ {
+ const char * prefix = KRepCategory_to_prefix( cat );
+ uint32_t idx, len = VectorLength( &v );
+ bool disabled = KRepositoryMgrCategoryDisabled( repomgr, cat );
+ rc = KOutMsg( "repo.%s --> disabled: %s, %d subrepositories )\n", prefix, yes_or_no( disabled ), len );
+ for ( idx = 0; rc == 0 && idx < len; ++idx )
+ {
+ if ( select == idx || !full )
+ rc = vdi_report_repository( VectorGet( &v, idx ), prefix, idx, full );
+ }
+
+ KRepositoryVectorWhack( &v );
+ }
+ }
+ return rc;
+}
+
+
+static rc_t vdi_repo_all( const KRepositoryMgr * repomgr, bool full )
+{
+ rc_t rc = vdi_report_repo_vector( repomgr, krepUserCategory, -1, full );
+ if ( rc == 0 )
+ rc = vdi_report_repo_vector( repomgr, krepSiteCategory, -1, full );
+ if ( rc == 0 )
+ rc = vdi_report_repo_vector( repomgr, krepRemoteCategory, -1, full );
+ return rc;
+}
+
+
+static rc_t vdi_repo_switch( const KRepositoryMgr * repomgr, const KRepCategory cat, bool disabled )
+{
+ const char * s_cat = KRepCategory_to_prefix( cat );
+ rc_t rc = KRepositoryMgrCategorySetDisabled( repomgr, cat, disabled );
+ if ( rc == 0 )
+ rc = KOutMsg( "repository '%s' successfully %s", s_cat, ( disabled ? "disabled" : "enabled" ) );
+ else
+ rc = KOutMsg( "repository '%s' not %s: '%R'", s_cat, ( disabled ? "disabled" : "enabled" ), rc );
+ return rc;
+}
+
+
+static rc_t vdi_sub_repo( const KRepositoryMgr * repomgr, const Vector * v, const String * which_repo, int32_t repo_id )
+{
+ rc_t rc = 0;
+ const KRepCategory cat = id_to_repo_cat[ repo_id ];
+
+ if ( VectorLength( v ) > 2 )
+ {
+ const String * which_sub = VectorGet( v, 2 );
+ int32_t repo_func = index_of_match( which_sub, 2, "on", "off" );
+ switch( repo_func )
+ {
+ case 0 : rc = vdi_repo_switch( repomgr, cat, false ); break;
+ case 1 : rc = vdi_repo_switch( repomgr, cat, true ); break;
+ case -1 : {
+ int32_t select = ( int32_t )string_to_I64( which_sub->addr, which_sub->len, NULL );
+ rc = vdi_report_repo_vector( repomgr, cat, select, true );
+ }
+ break;
+ }
+ }
+ else
+ {
+ if ( repo_id < 3 )
+ rc = vdi_report_repo_vector( repomgr, cat, -1, false );
+ else
+ rc = vdi_repo_all( repomgr, false );
+ }
+
+ return rc;
+}
+
+
+rc_t vdi_repo( const Vector * v )
+{
+ KConfig * cfg;
+ rc_t rc = KConfigMake( &cfg, NULL );
+ if ( rc == 0 )
+ {
+ const KRepositoryMgr * repomgr;
+ rc = KConfigMakeRepositoryMgrRead( cfg, &repomgr );
+ {
+ if ( VectorLength( v ) < 2 )
+ {
+ rc = vdi_repo_all( repomgr, true );
+ }
+ else
+ {
+ const String * which_repo = VectorGet( v, 1 );
+ if ( which_repo != NULL )
+ {
+ int32_t repo_id = index_of_match( which_repo, 4, "user", "site", "remote", "all" );
+ if ( repo_id < 0 || repo_id > 3 )
+ rc = KOutMsg( "unknow repository '%S'", which_repo );
+ else
+ rc = vdi_sub_repo( repomgr, v, which_repo, repo_id );
+ }
+ }
+
+ KRepositoryMgrRelease( repomgr );
+ }
+ KConfigRelease ( cfg );
+ }
+ return rc;
+}
diff --git a/test/samline/refbases.h b/tools/vdb-dump/vdb-dump-repo.h
similarity index 87%
copy from test/samline/refbases.h
copy to tools/vdb-dump/vdb-dump-repo.h
index 94136fb..d2a2e64 100644
--- a/test/samline/refbases.h
+++ b/tools/vdb-dump/vdb-dump-repo.h
@@ -1,41 +1,46 @@
-/*===========================================================================
-*
-* PUBLIC DOMAIN NOTICE
-* National Center for Biotechnology Information
-*
-* This software/database is a "United States Government Work" under the
-* terms of the United States Copyright Act. It was written as part of
-* the author's official duties as a United States Government employee and
-* thus cannot be copyrighted. This software/database is freely available
-* to the public for use. The National Library of Medicine and the U.S.
-* Government have not placed any restriction on its use or reproduction.
-*
-* Although all reasonable efforts have been taken to ensure the accuracy
-* and reliability of the software and data, the NLM and the U.S.
-* Government do not and cannot warrant the performance or results that
-* may be obtained by using this software or data. The NLM and the U.S.
-* Government disclaim all warranties, express or implied, including
-* warranties of performance, merchantability or fitness for any particular
-* purpose.
-*
-* Please cite the author in any work or product based on this material.
-*
-* ===========================================================================
-*
-*/
-
-#ifndef _h_refbases_
-#define _h_refbases_
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-char * read_refbases( const char * refname, uint32_t ref_pos_1_based,
- uint32_t ref_len, uint32_t * bases_in_ref );
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#ifndef _h_vdb_dump_repo_
+#define _h_vdb_dump_repo_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#if 0
+}
+#endif
+
+#include <klib/rc.h>
+#include <klib/vector.h>
+
+rc_t vdi_repo( const Vector * v );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/vdb-dump/vdb-dump-str.c b/tools/vdb-dump/vdb-dump-str.c
index 488cc7e..8dac7e7 100644
--- a/tools/vdb-dump/vdb-dump-str.c
+++ b/tools/vdb-dump/vdb-dump-str.c
@@ -98,35 +98,43 @@ char *vds_ptr( p_dump_str s )
static rc_t vds_inc_buffer( p_dump_str s, const size_t by_len )
{
+ rc_t rc = 0;
if ( s == NULL )
{
- return RC( rcVDB, rcNoTarg, rcAllocating, rcParam, rcNull );
+ rc = RC( rcVDB, rcNoTarg, rcAllocating, rcParam, rcNull );
}
- if ( ( s->str_len + by_len ) >= s->buf_size )
+ else
{
- size_t new_len = by_len + s->str_len + 1;
- while( s->buf_size < new_len ) s->buf_size += s->buf_inc;
- s->buf = realloc( s->buf, s->buf_size );
- if ( s->buf == NULL )
+ size_t needed = ( s->str_len + by_len + 1 );
+ if ( needed >= s->buf_size )
{
- return RC( rcVDB, rcNoTarg, rcAllocating, rcMemory, rcExhausted );
+ char * tmp;
+ size_t new_size = s->buf_size * 2;
+
+ if ( new_size < needed ) new_size = needed + s->buf_inc;
+ tmp = realloc( s->buf, new_size );
+ if ( tmp == NULL )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcAllocating, rcMemory, rcExhausted );
+ }
+ else
+ {
+ s->buf = tmp;
+ s->buf_size = new_size;
+ }
}
}
- return 0;
+ return rc;
}
-static rc_t vds_truncate( p_dump_str s )
+static rc_t vds_truncate( p_dump_str s, const size_t appended )
{
- if ( s == NULL )
+ if ( s == NULL || s->buf == NULL )
{
return RC( rcVDB, rcNoTarg, rcResizing, rcParam, rcNull );
}
- if ( s->buf == NULL )
- {
- return RC( rcVDB, rcNoTarg, rcResizing, rcParam, rcNull );
- }
- s->str_len = string_size( s->buf );
+ s->str_len += appended;
if ( ( s->str_limit > 0 )&&( s->str_len > s->str_limit ) )
{
s->buf[ s->str_limit ] = 0;
@@ -139,32 +147,34 @@ static rc_t vds_truncate( p_dump_str s )
rc_t vds_append_fmt( p_dump_str s, const size_t aprox_len, const char *fmt, ... )
{
- va_list argp;
- rc_t rc;
- if ( s == NULL )
- {
- return RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcNull );
- }
- if ( fmt == NULL )
+ rc_t rc = 0;
+ if ( s == NULL || fmt == NULL )
{
- return RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcNull );
+ rc = RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcNull );
}
- if ( fmt[0] == 0 )
+ else if ( fmt[ 0 ] == 0 )
{
- return RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcEmpty );
+ rc = RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcEmpty );
}
- if ( ( s->str_limit > 0 )&&( s->str_len >= s->str_limit ) )
+ else if ( ( s->str_limit > 0 )&&( s->str_len >= s->str_limit ) )
{
s->truncated = true;
- return 0;
}
- rc = vds_inc_buffer( s, aprox_len );
- if ( rc == 0 )
+ else
{
- va_start( argp, fmt );
- string_vprintf( s->buf + s->str_len, s->buf_size-1, NULL, fmt, argp );
- va_end( argp );
- rc = vds_truncate( s ); /* adjusts str_len */
+ rc = vds_inc_buffer( s, aprox_len );
+ if ( rc == 0 )
+ {
+ va_list argp;
+ size_t num_writ;
+
+ va_start( argp, fmt );
+ rc = string_vprintf( s->buf + s->str_len, s->buf_size - 1, &num_writ, fmt, argp );
+ va_end( argp );
+
+ if ( rc == 0 )
+ rc = vds_truncate( s, num_writ ); /* adjusts str_len */
+ }
}
return rc;
}
@@ -192,9 +202,9 @@ rc_t vds_append_str( p_dump_str s, const char *s1 )
rc = vds_inc_buffer( s, append_len );
if ( rc == 0 )
{
- size_t l = string_size( s->buf );
- string_copy( s->buf + l, s->buf_size - l, s1, append_len );
- rc = vds_truncate( s ); /* adjusts str_len */
+ size_t l = s->str_len;
+ size_t appended = string_copy( s->buf + l, s->buf_size - l, s1, append_len );
+ rc = vds_truncate( s, appended ); /* adjusts str_len */
}
}
}
@@ -220,8 +230,8 @@ rc_t vds_append_str_no_limit_check( p_dump_str s, const char *s1 )
if ( rc == 0 )
{
size_t l = string_size( s->buf );
- string_copy( s->buf + l, s->buf_size - l, s1, append_len );
- s->str_len += append_len;
+ size_t appended = string_copy( s->buf + l, s->buf_size - l, s1, append_len );
+ s->str_len += appended;
}
return rc;
}
@@ -340,15 +350,15 @@ rc_t vds_enclose_string( p_dump_str s, const char c_left, const char c_right )
{
return RC( rcVDB, rcNoTarg, rcInserting, rcParam, rcNull );
}
- to_move = string_size( s->buf ) + 1;
+ to_move = s->str_len + 1;
rc = vds_inc_buffer( s, 2 );
if ( rc == 0 )
{
memmove( s->buf + 1, s->buf, to_move );
- s->buf[0]=c_left;
- s->buf[to_move]=c_right;
- s->buf[to_move+1]=0;
- s->str_len = string_size( s->buf );
+ s->buf[ 0 ] = c_left;
+ s->buf[ to_move ] = c_right;
+ s->buf[ to_move + 1 ] = 0;
+ s->str_len += 2;
}
return rc;
}
diff --git a/tools/vdb-dump/vdb-dump.c b/tools/vdb-dump/vdb-dump.c
index 597f190..b296183 100644
--- a/tools/vdb-dump/vdb-dump.c
+++ b/tools/vdb-dump/vdb-dump.c
@@ -75,102 +75,102 @@
#include "vdb-dump-fastq.h"
#include "vdb-dump-redir.h"
#include "vdb-dump-bin.h"
+#include "vdb-dump-interact.h"
#include "vdb_info.h"
-static const char * row_id_on_usage[] = { "print row id", NULL };
-static const char * line_feed_usage[] = { "line-feed's inbetween rows", NULL };
-static const char * colname_off_usage[] = { "do not print column-names", NULL };
-static const char * in_hex_usage[] = { "print numbers in hex", NULL };
-static const char * table_usage[] = { "table-name", NULL };
-static const char * rows_usage[] = { "rows (default = all)", NULL };
-static const char * columns_usage[] = { "columns (default = all)", NULL };
-static const char * schema_usage[] = { "schema-name", NULL };
-static const char * schema_dump_usage[] = { "dumps the schema", NULL };
-static const char * table_enum_usage[] = { "enumerates tables", NULL };
-static const char * column_enum_usage[] = { "enumerates columns in extended form", NULL };
-static const char * column_short_usage[] = { "enumerates columns in short form", NULL };
-static const char * dna_bases_usage[] = { "print dna-bases", NULL };
-static const char * max_line_len_usage[] = { "limits line length", NULL };
-static const char * line_indent_usage[] = { "indents the line", NULL };
-static const char * filter_usage[] = { "filters lines", NULL };
-static const char * format_usage[] = { "output format:", NULL };
-static const char * id_range_usage[] = { "prints id-range", NULL };
-static const char * without_sra_usage[] = { "without sra-type-translation", NULL };
-static const char * excluded_columns_usage[] = { "exclude these columns", NULL };
-static const char * boolean_usage[] = { "defines how boolean's are printed (1,T)", NULL };
-static const char * objver_usage[] = { "request vdb-version", NULL };
-static const char * objts_usage[] = { "request object modification date", NULL };
-static const char * numelem_usage[] = { "print only element-count", NULL };
-static const char * numelemsum_usage[] = { "sum element-count", NULL };
-static const char * show_blobbing_usage[] = { "show blobbing", NULL };
-static const char * enum_phys_usage[] = { "enumerate physical columns", NULL };
-static const char * enum_readable_usage[] = { "enumerate readable columns", NULL };
-static const char * objtype_usage[] = { "report type of object", NULL };
-static const char * idx_enum_usage[] = { "enumerate all available index", NULL };
-static const char * idx_range_usage[] = { "enumerate values and row-ranges of one index", NULL };
-static const char * cur_cache_usage[] = { "size of cursor cache", NULL };
-static const char * out_file_usage[] = { "write output to this file", NULL };
-static const char * out_path_usage[] = { "write output to this directory", NULL };
-static const char * gzip_usage[] = { "compress output using gzip", NULL };
-static const char * bzip2_usage[] = { "compress output using bzip2", NULL };
-static const char * outbuf_size_usage[] = { "size of output-buffer, 0...none", NULL };
-static const char * disable_mt_usage[] = { "disable multithreading", NULL };
-static const char * info_usage[] = { "print info about run", NULL };
-static const char * spotgroup_usage[] = { "show spotgroups", NULL };
-/*static const char * sraschema_usage[] = { "force use of dflt. sra-schema", NULL }; */
-static const char * merge_ranges_usage[] = { "merge and sort row-ranges", NULL };
-static const char * spread_usage[] = { "show spread of integer values", NULL };
-static const char * slice_usage[] = { "find a slice of given depth", NULL };
+static const char * row_id_on_usage[] = { "print row id", NULL };
+static const char * line_feed_usage[] = { "line-feed's inbetween rows", NULL };
+static const char * colname_off_usage[] = { "do not print column-names", NULL };
+static const char * in_hex_usage[] = { "print numbers in hex", NULL };
+static const char * table_usage[] = { "table-name", NULL };
+static const char * rows_usage[] = { "rows (default = all)", NULL };
+static const char * columns_usage[] = { "columns (default = all)", NULL };
+static const char * schema_usage[] = { "schema-name", NULL };
+static const char * schema_dump_usage[] = { "dumps the schema", NULL };
+static const char * table_enum_usage[] = { "enumerates tables", NULL };
+static const char * column_enum_usage[] = { "enumerates columns in extended form", NULL };
+static const char * column_short_usage[] = { "enumerates columns in short form", NULL };
+static const char * dna_bases_usage[] = { "print dna-bases", NULL };
+static const char * max_line_len_usage[] = { "limits line length", NULL };
+static const char * line_indent_usage[] = { "indents the line", NULL };
+static const char * filter_usage[] = { "filters lines", NULL };
+static const char * format_usage[] = { "output format:", NULL };
+static const char * id_range_usage[] = { "prints id-range", NULL };
+static const char * without_sra_usage[] = { "without sra-type-translation", NULL };
+static const char * excluded_columns_usage[] = { "exclude these columns", NULL };
+static const char * boolean_usage[] = { "defines how boolean's are printed (1,T)", NULL };
+static const char * objver_usage[] = { "request vdb-version", NULL };
+static const char * objts_usage[] = { "request object modification date", NULL };
+static const char * numelem_usage[] = { "print only element-count", NULL };
+static const char * numelemsum_usage[] = { "sum element-count", NULL };
+static const char * show_blobbing_usage[] = { "show blobbing", NULL };
+static const char * enum_phys_usage[] = { "enumerate physical columns", NULL };
+static const char * enum_readable_usage[] = { "enumerate readable columns", NULL };
+static const char * objtype_usage[] = { "report type of object", NULL };
+static const char * idx_enum_usage[] = { "enumerate all available index", NULL };
+static const char * idx_range_usage[] = { "enumerate values and row-ranges of one index", NULL };
+static const char * cur_cache_usage[] = { "size of cursor cache", NULL };
+static const char * out_file_usage[] = { "write output to this file", NULL };
+static const char * out_path_usage[] = { "write output to this directory", NULL };
+static const char * gzip_usage[] = { "compress output using gzip", NULL };
+static const char * bzip2_usage[] = { "compress output using bzip2", NULL };
+static const char * outbuf_size_usage[] = { "size of output-buffer, 0...none", NULL };
+static const char * disable_mt_usage[] = { "disable multithreading", NULL };
+static const char * info_usage[] = { "print info about run", NULL };
+static const char * spotgroup_usage[] = { "show spotgroups", NULL };
+static const char * merge_ranges_usage[] = { "merge and sort row-ranges", NULL };
+static const char * spread_usage[] = { "show spread of integer values", NULL };
+static const char * slice_usage[] = { "find a slice of given depth", NULL };
+static const char * interactive_usage[] = { "interactive mode", NULL };
OptDef DumpOptions[] =
{
- { OPTION_ROW_ID_ON, ALIAS_ROW_ID_ON, NULL, row_id_on_usage, 1, false, false },
- { OPTION_LINE_FEED, ALIAS_LINE_FEED, NULL, line_feed_usage, 1, true, false },
- { OPTION_COLNAME_OFF, ALIAS_COLNAME_OFF, NULL, colname_off_usage, 1, false, false },
- { OPTION_IN_HEX, ALIAS_IN_HEX, NULL, in_hex_usage, 1, false, false },
- { OPTION_TABLE, ALIAS_TABLE, NULL, table_usage, 1, true, false },
- { OPTION_ROWS, ALIAS_ROWS, NULL, rows_usage, 1, true, false },
- { OPTION_COLUMNS, ALIAS_COLUMNS, NULL, columns_usage, 1, true, false },
-/* limit of 5 schemas? */
- { OPTION_SCHEMA, ALIAS_SCHEMA, NULL, schema_usage, 5, true, false },
- { OPTION_SCHEMA_DUMP, ALIAS_SCHEMA_DUMP, NULL, schema_dump_usage, 1, false, false },
- { OPTION_TABLE_ENUM, ALIAS_TABLE_ENUM, NULL, table_enum_usage, 1, false, false },
- { OPTION_COLUMN_ENUM, ALIAS_COLUMN_ENUM, NULL, column_enum_usage, 1, false, false },
- { OPTION_COLUMN_SHORT, ALIAS_COLUMN_SHORT, NULL, column_short_usage, 1, false, false },
- { OPTION_DNA_BASES, ALIAS_DNA_BASES, NULL, dna_bases_usage, 1, false, false },
- { OPTION_MAX_LINE_LEN, ALIAS_MAX_LINE_LEN, NULL, max_line_len_usage, 1, true, false },
- { OPTION_LINE_INDENT, ALIAS_LINE_INDENT, NULL, line_indent_usage, 1, true, false },
- { OPTION_FILTER, ALIAS_FILTER, NULL, filter_usage, 1, true, false },
- { OPTION_FORMAT, ALIAS_FORMAT, NULL, format_usage, 1, true, false },
- { OPTION_ID_RANGE, ALIAS_ID_RANGE, NULL, id_range_usage, 1, false, false },
- { OPTION_WITHOUT_SRA, ALIAS_WITHOUT_SRA, NULL, without_sra_usage, 1, false, false },
- { OPTION_EXCLUDED_COLUMNS, ALIAS_EXCLUDED_COLUMNS, NULL, excluded_columns_usage, 1, true, false },
- { OPTION_BOOLEAN, ALIAS_BOOLEAN, NULL, boolean_usage, 1, true, false },
- { OPTION_NUMELEM, ALIAS_NUMELEM, NULL, numelem_usage, 1, false, false },
- { OPTION_NUMELEMSUM, ALIAS_NUMELEMSUM, NULL, numelemsum_usage, 1, false, false },
- { OPTION_SHOW_BLOBBING, NULL, NULL, show_blobbing_usage, 1, false, false },
- { OPTION_ENUM_PHYS, NULL, NULL, enum_phys_usage, 1, false, false },
- { OPTION_ENUM_READABLE, NULL, NULL, enum_readable_usage, 1, false, false },
- { OPTION_OBJVER, ALIAS_OBJVER, NULL, objver_usage, 1, false, false },
- { OPTION_OBJTS, NULL, NULL, objts_usage, 1, false, false },
- { OPTION_OBJTYPE, ALIAS_OBJTYPE, NULL, objtype_usage, 1, false, false },
- { OPTION_IDX_ENUM, NULL, NULL, idx_enum_usage, 1, false, false },
- { OPTION_IDX_RANGE, NULL, NULL, idx_range_usage, 1, true, false },
- { OPTION_CUR_CACHE, NULL, NULL, cur_cache_usage, 1, true, false },
- { OPTION_OUT_FILE, NULL, NULL, out_file_usage, 1, true, false },
- { OPTION_OUT_PATH, NULL, NULL, out_path_usage, 1, true, false },
- { OPTION_PHASE, NULL, NULL, NULL, 1, true, false },
- { OPTION_GZIP, NULL, NULL, gzip_usage, 1, false, false },
- { OPTION_BZIP2, NULL, NULL, bzip2_usage, 1, false, false },
- { OPTION_OUT_BUF_SIZE, NULL, NULL, outbuf_size_usage, 1, true, false },
- { OPTION_NO_MULTITHREAD, NULL, NULL, disable_mt_usage, 1, false, false },
- { OPTION_INFO, NULL, NULL, info_usage, 1, false, false },
- { OPTION_DIFF, NULL, NULL, NULL, 1, false, false },
- { OPTION_SPOTGROUPS, NULL, NULL, spotgroup_usage, 1, false, false },
- /*{ OPTION_SRASCHEMA, NULL, NULL, sraschema_usage, 1, false, false }, */
- { OPTION_MERGE_RANGES, NULL, NULL, merge_ranges_usage, 1, false, false },
- { OPTION_SPREAD, NULL, NULL, spread_usage, 1, false, false },
- { OPTION_SLICE, NULL, NULL, slice_usage, 1, true, false }
+ { OPTION_ROW_ID_ON, ALIAS_ROW_ID_ON, NULL, row_id_on_usage, 1, false, false },
+ { OPTION_LINE_FEED, ALIAS_LINE_FEED, NULL, line_feed_usage, 1, true, false },
+ { OPTION_COLNAME_OFF, ALIAS_COLNAME_OFF, NULL, colname_off_usage, 1, false, false },
+ { OPTION_IN_HEX, ALIAS_IN_HEX, NULL, in_hex_usage, 1, false, false },
+ { OPTION_TABLE, ALIAS_TABLE, NULL, table_usage, 1, true, false },
+ { OPTION_ROWS, ALIAS_ROWS, NULL, rows_usage, 1, true, false },
+ { OPTION_COLUMNS, ALIAS_COLUMNS, NULL, columns_usage, 1, true, false },
+ { OPTION_SCHEMA, ALIAS_SCHEMA, NULL, schema_usage, 5, true, false },
+ { OPTION_SCHEMA_DUMP, ALIAS_SCHEMA_DUMP, NULL, schema_dump_usage, 1, false, false },
+ { OPTION_TABLE_ENUM, ALIAS_TABLE_ENUM, NULL, table_enum_usage, 1, false, false },
+ { OPTION_COLUMN_ENUM, ALIAS_COLUMN_ENUM, NULL, column_enum_usage, 1, false, false },
+ { OPTION_COLUMN_SHORT, ALIAS_COLUMN_SHORT, NULL, column_short_usage, 1, false, false },
+ { OPTION_DNA_BASES, ALIAS_DNA_BASES, NULL, dna_bases_usage, 1, false, false },
+ { OPTION_MAX_LINE_LEN, ALIAS_MAX_LINE_LEN, NULL, max_line_len_usage, 1, true, false },
+ { OPTION_LINE_INDENT, ALIAS_LINE_INDENT, NULL, line_indent_usage, 1, true, false },
+ { OPTION_FILTER, ALIAS_FILTER, NULL, filter_usage, 1, true, false },
+ { OPTION_FORMAT, ALIAS_FORMAT, NULL, format_usage, 1, true, false },
+ { OPTION_ID_RANGE, ALIAS_ID_RANGE, NULL, id_range_usage, 1, false, false },
+ { OPTION_WITHOUT_SRA, ALIAS_WITHOUT_SRA, NULL, without_sra_usage, 1, false, false },
+ { OPTION_EXCLUDED_COLUMNS, ALIAS_EXCLUDED_COLUMNS, NULL, excluded_columns_usage, 1, true, false },
+ { OPTION_BOOLEAN, ALIAS_BOOLEAN, NULL, boolean_usage, 1, true, false },
+ { OPTION_NUMELEM, ALIAS_NUMELEM, NULL, numelem_usage, 1, false, false },
+ { OPTION_NUMELEMSUM, ALIAS_NUMELEMSUM, NULL, numelemsum_usage, 1, false, false },
+ { OPTION_SHOW_BLOBBING, NULL, NULL, show_blobbing_usage, 1, false, false },
+ { OPTION_ENUM_PHYS, NULL, NULL, enum_phys_usage, 1, false, false },
+ { OPTION_ENUM_READABLE, NULL, NULL, enum_readable_usage, 1, false, false },
+ { OPTION_OBJVER, ALIAS_OBJVER, NULL, objver_usage, 1, false, false },
+ { OPTION_OBJTS, NULL, NULL, objts_usage, 1, false, false },
+ { OPTION_OBJTYPE, ALIAS_OBJTYPE, NULL, objtype_usage, 1, false, false },
+ { OPTION_IDX_ENUM, NULL, NULL, idx_enum_usage, 1, false, false },
+ { OPTION_IDX_RANGE, NULL, NULL, idx_range_usage, 1, true, false },
+ { OPTION_CUR_CACHE, NULL, NULL, cur_cache_usage, 1, true, false },
+ { OPTION_OUT_FILE, NULL, NULL, out_file_usage, 1, true, false },
+ { OPTION_OUT_PATH, NULL, NULL, out_path_usage, 1, true, false },
+ { OPTION_PHASE, NULL, NULL, NULL, 1, true, false },
+ { OPTION_GZIP, NULL, NULL, gzip_usage, 1, false, false },
+ { OPTION_BZIP2, NULL, NULL, bzip2_usage, 1, false, false },
+ { OPTION_OUT_BUF_SIZE, NULL, NULL, outbuf_size_usage, 1, true, false },
+ { OPTION_NO_MULTITHREAD, NULL, NULL, disable_mt_usage, 1, false, false },
+ { OPTION_INFO, NULL, NULL, info_usage, 1, false, false },
+ { OPTION_DIFF, NULL, NULL, NULL, 1, false, false },
+ { OPTION_SPOTGROUPS, NULL, NULL, spotgroup_usage, 1, false, false },
+ { OPTION_MERGE_RANGES, NULL, NULL, merge_ranges_usage, 1, false, false },
+ { OPTION_SPREAD, NULL, NULL, spread_usage, 1, false, false },
+ { OPTION_INTERACTIVE, NULL, NULL, interactive_usage, 1, false, false },
+ { OPTION_SLICE, NULL, NULL, slice_usage, 1, true, false }
};
const char UsageDefaultName[] = "vdb-dump";
@@ -203,61 +203,62 @@ rc_t CC Usage ( const Args * args )
KOutMsg ( "Options:\n" );
- HelpOptionLine ( ALIAS_ROW_ID_ON, OPTION_ROW_ID_ON, NULL, row_id_on_usage );
- HelpOptionLine ( ALIAS_LINE_FEED, OPTION_LINE_FEED, "line_feed", line_feed_usage );
- HelpOptionLine ( ALIAS_COLNAME_OFF, OPTION_COLNAME_OFF, NULL, colname_off_usage );
- HelpOptionLine ( ALIAS_IN_HEX, OPTION_IN_HEX, NULL, in_hex_usage );
- HelpOptionLine ( ALIAS_TABLE, OPTION_TABLE, "table", table_usage );
- HelpOptionLine ( ALIAS_ROWS, OPTION_ROWS, "rows", rows_usage );
- HelpOptionLine ( ALIAS_COLUMNS, OPTION_COLUMNS, "columns", columns_usage );
- HelpOptionLine ( ALIAS_SCHEMA, OPTION_SCHEMA, "schema", schema_usage );
- HelpOptionLine ( ALIAS_SCHEMA_DUMP, OPTION_SCHEMA_DUMP, NULL, schema_dump_usage );
- HelpOptionLine ( ALIAS_TABLE_ENUM, OPTION_TABLE_ENUM, NULL, table_enum_usage );
- HelpOptionLine ( ALIAS_COLUMN_ENUM, OPTION_COLUMN_ENUM, NULL, column_enum_usage );
- HelpOptionLine ( ALIAS_COLUMN_SHORT, OPTION_COLUMN_SHORT, NULL, column_short_usage );
- HelpOptionLine ( ALIAS_DNA_BASES, OPTION_DNA_BASES, "dna_bases", dna_bases_usage );
- HelpOptionLine ( ALIAS_MAX_LINE_LEN, OPTION_MAX_LINE_LEN, "max_length", max_line_len_usage );
- HelpOptionLine ( ALIAS_LINE_INDENT, OPTION_LINE_INDENT, "indent_width", line_indent_usage );
- HelpOptionLine ( ALIAS_FORMAT, OPTION_FORMAT, "format", format_usage );
-
- KOutMsg( " csv ..... comma separated values on one line\n" );
- KOutMsg( " xml ..... xml-style without complete xml-frame\n" );
- KOutMsg( " json .... json-style\n" );
- KOutMsg( " piped ... 1 line per cell: row-id, column-name: value\n" );
- KOutMsg( " tab ..... 1 line per row: tab-separated values only\n" );
- KOutMsg( " fastq ... FASTQ( 4 lines ) for each row\n" );
- KOutMsg( " fastq1 .. FASTQ( 4 lines ) for each fragment\n" );
- KOutMsg( " fasta ... FASTA( 2 lines ) for each fragment if possible\n" );
- KOutMsg( " fasta1 .. one FASTA-record for the whole accession (REFSEQ)\n" );
- KOutMsg( " fasta2 .. one FASTA-record for each REFERENCE in cSRA\n\n" );
-
- HelpOptionLine ( ALIAS_ID_RANGE, OPTION_ID_RANGE, NULL, id_range_usage );
- HelpOptionLine ( ALIAS_WITHOUT_SRA, OPTION_WITHOUT_SRA, NULL, without_sra_usage );
- HelpOptionLine ( ALIAS_EXCLUDED_COLUMNS, OPTION_EXCLUDED_COLUMNS, NULL, excluded_columns_usage );
- HelpOptionLine ( ALIAS_BOOLEAN, OPTION_BOOLEAN, NULL, boolean_usage );
- HelpOptionLine ( ALIAS_OBJVER, OPTION_OBJVER, NULL, objver_usage );
- HelpOptionLine ( NULL, OPTION_OBJTS, NULL, objts_usage );
- HelpOptionLine ( ALIAS_OBJTYPE, OPTION_OBJTYPE, NULL, objtype_usage );
- HelpOptionLine ( ALIAS_NUMELEM, OPTION_NUMELEM, NULL, numelem_usage );
- HelpOptionLine ( ALIAS_NUMELEMSUM, OPTION_NUMELEMSUM, NULL, numelemsum_usage );
- HelpOptionLine ( NULL, OPTION_SHOW_BLOBBING, NULL, show_blobbing_usage );
- HelpOptionLine ( NULL, OPTION_ENUM_PHYS, NULL, enum_phys_usage );
- HelpOptionLine ( NULL, OPTION_ENUM_READABLE, NULL, enum_readable_usage );
- HelpOptionLine ( NULL, OPTION_IDX_ENUM, NULL, idx_enum_usage );
- HelpOptionLine ( NULL, OPTION_IDX_RANGE, NULL, idx_range_usage );
- HelpOptionLine ( NULL, OPTION_CUR_CACHE, NULL, cur_cache_usage );
- HelpOptionLine ( NULL, OPTION_OUT_FILE, NULL, out_file_usage );
- HelpOptionLine ( NULL, OPTION_OUT_PATH, NULL, out_path_usage );
- HelpOptionLine ( NULL, OPTION_GZIP, NULL, gzip_usage );
- HelpOptionLine ( NULL, OPTION_BZIP2, NULL, bzip2_usage );
- HelpOptionLine ( NULL, OPTION_OUT_BUF_SIZE, NULL, outbuf_size_usage );
- HelpOptionLine ( NULL, OPTION_NO_MULTITHREAD, NULL, disable_mt_usage );
- HelpOptionLine ( NULL, OPTION_INFO, NULL, info_usage );
- HelpOptionLine ( NULL, OPTION_SPOTGROUPS, NULL, spotgroup_usage );
- /* HelpOptionLine ( NULL, OPTION_SRASCHEMA, NULL, sraschema_usage ); */
- HelpOptionLine ( NULL, OPTION_MERGE_RANGES, NULL, merge_ranges_usage );
- HelpOptionLine ( NULL, OPTION_SPREAD, NULL, spread_usage );
-
+ HelpOptionLine ( ALIAS_ROW_ID_ON, OPTION_ROW_ID_ON, NULL, row_id_on_usage );
+ HelpOptionLine ( ALIAS_LINE_FEED, OPTION_LINE_FEED, "line_feed", line_feed_usage );
+ HelpOptionLine ( ALIAS_COLNAME_OFF, OPTION_COLNAME_OFF, NULL, colname_off_usage );
+ HelpOptionLine ( ALIAS_IN_HEX, OPTION_IN_HEX, NULL, in_hex_usage );
+ HelpOptionLine ( ALIAS_TABLE, OPTION_TABLE, "table", table_usage );
+ HelpOptionLine ( ALIAS_ROWS, OPTION_ROWS, "rows", rows_usage );
+ HelpOptionLine ( ALIAS_COLUMNS, OPTION_COLUMNS, "columns", columns_usage );
+ HelpOptionLine ( ALIAS_SCHEMA, OPTION_SCHEMA, "schema", schema_usage );
+ HelpOptionLine ( ALIAS_SCHEMA_DUMP, OPTION_SCHEMA_DUMP, NULL, schema_dump_usage );
+ HelpOptionLine ( ALIAS_TABLE_ENUM, OPTION_TABLE_ENUM, NULL, table_enum_usage );
+ HelpOptionLine ( ALIAS_COLUMN_ENUM, OPTION_COLUMN_ENUM, NULL, column_enum_usage );
+ HelpOptionLine ( ALIAS_COLUMN_SHORT, OPTION_COLUMN_SHORT, NULL, column_short_usage );
+ HelpOptionLine ( ALIAS_DNA_BASES, OPTION_DNA_BASES, "dna_bases", dna_bases_usage );
+ HelpOptionLine ( ALIAS_MAX_LINE_LEN, OPTION_MAX_LINE_LEN, "max_length", max_line_len_usage );
+ HelpOptionLine ( ALIAS_LINE_INDENT, OPTION_LINE_INDENT, "indent_width", line_indent_usage );
+ HelpOptionLine ( ALIAS_FORMAT, OPTION_FORMAT, "format", format_usage );
+
+ KOutMsg( " csv ..... comma separated values on one line\n" );
+ KOutMsg( " xml ..... xml-style without complete xml-frame\n" );
+ KOutMsg( " json .... json-style\n" );
+ KOutMsg( " piped ... 1 line per cell: row-id, column-name: value\n" );
+ KOutMsg( " tab ..... 1 line per row: tab-separated values only\n" );
+ KOutMsg( " fastq ... FASTQ( 4 lines ) for each row\n" );
+ KOutMsg( " fastq1 .. FASTQ( 4 lines ) for each fragment\n" );
+ KOutMsg( " fasta ... FASTA( 2 lines ) for each fragment if possible\n" );
+ KOutMsg( " fasta1 .. one FASTA-record for the whole accession (REFSEQ)\n" );
+ KOutMsg( " fasta2 .. one FASTA-record for each REFERENCE in cSRA\n" );
+ KOutMsg( " qual .... QUAL( 2 lines ) for each row\n" );
+ KOutMsg( " qual1 ... QUAL( 2 lines ) for each fragment if possible\n\n" );
+
+ HelpOptionLine ( ALIAS_ID_RANGE, OPTION_ID_RANGE, NULL, id_range_usage );
+ HelpOptionLine ( ALIAS_WITHOUT_SRA, OPTION_WITHOUT_SRA, NULL, without_sra_usage );
+ HelpOptionLine ( ALIAS_EXCLUDED_COLUMNS, OPTION_EXCLUDED_COLUMNS,NULL, excluded_columns_usage );
+ HelpOptionLine ( ALIAS_BOOLEAN, OPTION_BOOLEAN, NULL, boolean_usage );
+ HelpOptionLine ( ALIAS_OBJVER, OPTION_OBJVER, NULL, objver_usage );
+ HelpOptionLine ( NULL, OPTION_OBJTS, NULL, objts_usage );
+ HelpOptionLine ( ALIAS_OBJTYPE, OPTION_OBJTYPE, NULL, objtype_usage );
+ HelpOptionLine ( ALIAS_NUMELEM, OPTION_NUMELEM, NULL, numelem_usage );
+ HelpOptionLine ( ALIAS_NUMELEMSUM, OPTION_NUMELEMSUM, NULL, numelemsum_usage );
+ HelpOptionLine ( NULL, OPTION_SHOW_BLOBBING, NULL, show_blobbing_usage );
+ HelpOptionLine ( NULL, OPTION_ENUM_PHYS, NULL, enum_phys_usage );
+ HelpOptionLine ( NULL, OPTION_ENUM_READABLE, NULL, enum_readable_usage );
+ HelpOptionLine ( NULL, OPTION_IDX_ENUM, NULL, idx_enum_usage );
+ HelpOptionLine ( NULL, OPTION_IDX_RANGE, NULL, idx_range_usage );
+ HelpOptionLine ( NULL, OPTION_CUR_CACHE, NULL, cur_cache_usage );
+ HelpOptionLine ( NULL, OPTION_OUT_FILE, NULL, out_file_usage );
+ HelpOptionLine ( NULL, OPTION_OUT_PATH, NULL, out_path_usage );
+ HelpOptionLine ( NULL, OPTION_GZIP, NULL, gzip_usage );
+ HelpOptionLine ( NULL, OPTION_BZIP2, NULL, bzip2_usage );
+ HelpOptionLine ( NULL, OPTION_OUT_BUF_SIZE, NULL, outbuf_size_usage );
+ HelpOptionLine ( NULL, OPTION_NO_MULTITHREAD, NULL, disable_mt_usage );
+ HelpOptionLine ( NULL, OPTION_INFO, NULL, info_usage );
+ HelpOptionLine ( NULL, OPTION_SPOTGROUPS, NULL, spotgroup_usage );
+ HelpOptionLine ( NULL, OPTION_MERGE_RANGES, NULL, merge_ranges_usage );
+ HelpOptionLine ( NULL, OPTION_SPREAD, NULL, spread_usage );
+
HelpOptionsStandard ();
HelpVersion ( fullpath, KAppVersion() );
@@ -734,13 +735,13 @@ static rc_t vdm_walk_sections( const VDatabase * base_db, const VDatabase ** sub
static void vdm_clear_recorded_errors( void )
{
- rc_t rc;
- const char * filename;
- const char * funcname;
- uint32_t line_nr;
- while ( GetUnreadRCInfo ( &rc, &filename, &funcname, &line_nr ) )
- {
- }
+ rc_t rc;
+ const char * filename;
+ const char * funcname;
+ uint32_t line_nr;
+ while ( GetUnreadRCInfo ( &rc, &filename, &funcname, &line_nr ) )
+ {
+ }
}
@@ -751,7 +752,7 @@ static rc_t vdm_check_table_empty( const VTable * tab )
DISP_RC( rc, "VTableIsEmpty() failed" );
if ( rc == 0 && empty )
{
- vdm_clear_recorded_errors();
+ vdm_clear_recorded_errors();
KOutMsg( "the requested table is empty!\n" );
rc = RC( rcVDB, rcNoTarg, rcConstructing, rcTable, rcEmpty );
}
@@ -815,71 +816,71 @@ static rc_t vdm_dump_opened_database( const p_dump_context ctx,
/* ********************************************************************** */
static rc_t vdm_show_tab_spread( const p_dump_context ctx,
- const VTable *my_table )
+ const VTable *my_table )
{
- const VCursor * cursor;
- rc_t rc = VTableCreateCachedCursorRead( my_table, &cursor, ctx->cur_cache_size );
- DISP_RC( rc, "VTableCreateCursorRead() failed" );
- if ( rc == 0 )
- {
- col_defs * cols;
- if ( !vdcd_init( &cols, ctx->max_line_len ) )
- {
- rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
- DISP_RC( rc, "col_defs_init() failed" );
- }
- if ( rc == 0 )
- {
- uint32_t n = vdm_extract_or_parse_columns( ctx, my_table, cols );
- if ( n < 1 )
- rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
- else
- {
- n = vdcd_add_to_cursor( cols, cursor );
- if ( n < 1 )
- rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
- else
- {
- rc = VCursorOpen( cursor );
- DISP_RC( rc, "VCursorOpen() failed" );
- if ( rc == 0 )
- {
- int64_t first;
- uint64_t count;
- rc = VCursorIdRange( cursor, 0, &first, &count );
- DISP_RC( rc, "VCursorIdRange( spread ) failed" );
- if ( rc == 0 )
- {
- if ( ctx->rows == NULL )
- {
- rc = num_gen_make_from_range( &ctx->rows, first, count );
- DISP_RC( rc, "num_gen_make_from_range() failed" );
- }
- else
- {
- if ( count > 0 )
- {
- rc = num_gen_trim( ctx->rows, first, count );
- DISP_RC( rc, "num_gen_trim() failed" );
- }
- }
-
- if ( rc == 0 )
- {
- if ( num_gen_empty( ctx->rows ) )
- rc = RC( rcExe, rcDatabase, rcReading, rcRange, rcEmpty );
- else
- rc = vdcd_collect_spread( ctx->rows, cols, cursor ); /* is in vdb-dump-coldefs.c */
- }
- }
- }
- }
- }
- vdcd_destroy( cols );
- }
- VCursorRelease( cursor );
- }
- return rc;
+ const VCursor * cursor;
+ rc_t rc = VTableCreateCachedCursorRead( my_table, &cursor, ctx->cur_cache_size );
+ DISP_RC( rc, "VTableCreateCursorRead() failed" );
+ if ( rc == 0 )
+ {
+ col_defs * cols;
+ if ( !vdcd_init( &cols, ctx->max_line_len ) )
+ {
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcMemory, rcExhausted );
+ DISP_RC( rc, "col_defs_init() failed" );
+ }
+ if ( rc == 0 )
+ {
+ uint32_t n = vdm_extract_or_parse_columns( ctx, my_table, cols );
+ if ( n < 1 )
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ else
+ {
+ n = vdcd_add_to_cursor( cols, cursor );
+ if ( n < 1 )
+ rc = RC( rcVDB, rcNoTarg, rcConstructing, rcParam, rcInvalid );
+ else
+ {
+ rc = VCursorOpen( cursor );
+ DISP_RC( rc, "VCursorOpen() failed" );
+ if ( rc == 0 )
+ {
+ int64_t first;
+ uint64_t count;
+ rc = VCursorIdRange( cursor, 0, &first, &count );
+ DISP_RC( rc, "VCursorIdRange( spread ) failed" );
+ if ( rc == 0 )
+ {
+ if ( ctx->rows == NULL )
+ {
+ rc = num_gen_make_from_range( &ctx->rows, first, count );
+ DISP_RC( rc, "num_gen_make_from_range() failed" );
+ }
+ else
+ {
+ if ( count > 0 )
+ {
+ rc = num_gen_trim( ctx->rows, first, count );
+ DISP_RC( rc, "num_gen_trim() failed" );
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ if ( num_gen_empty( ctx->rows ) )
+ rc = RC( rcExe, rcDatabase, rcReading, rcRange, rcEmpty );
+ else
+ rc = vdcd_collect_spread( ctx->rows, cols, cursor ); /* is in vdb-dump-coldefs.c */
+ }
+ }
+ }
+ }
+ }
+ vdcd_destroy( cols );
+ }
+ VCursorRelease( cursor );
+ }
+ return rc;
}
static rc_t vdm_show_db_spread( const p_dump_context ctx,
@@ -924,24 +925,24 @@ static rc_t vdm_dump_tab_schema( const p_dump_context ctx,
DISP_RC( rc, "VTableOpenSchema() failed" );
if ( rc == 0 )
{
- if ( ctx->columns == NULL )
- {
- /* the user did not ask to inspect a specific object, we look for
- the Typespec of the table... */
- char buffer[ 4096 ];
- rc = VTableTypespec ( my_table, buffer, sizeof buffer );
- DISP_RC( rc, "VTableTypespec() failed" );
- if ( rc == 0 )
- rc = VSchemaDump( my_schema, sdmPrint, buffer,
- vdm_schema_dump_flush, stdout );
-
- }
- else
- {
- /* the user did ask to inspect a specific object */
- rc = VSchemaDump( my_schema, sdmPrint, ctx->columns,
- vdm_schema_dump_flush, stdout );
- }
+ if ( ctx->columns == NULL )
+ {
+ /* the user did not ask to inspect a specific object, we look for
+ the Typespec of the table... */
+ char buffer[ 4096 ];
+ rc = VTableTypespec ( my_table, buffer, sizeof buffer );
+ DISP_RC( rc, "VTableTypespec() failed" );
+ if ( rc == 0 )
+ rc = VSchemaDump( my_schema, sdmPrint, buffer,
+ vdm_schema_dump_flush, stdout );
+
+ }
+ else
+ {
+ /* the user did ask to inspect a specific object */
+ rc = VSchemaDump( my_schema, sdmPrint, ctx->columns,
+ vdm_schema_dump_flush, stdout );
+ }
DISP_RC( rc, "VSchemaDump() failed" );
VSchemaRelease( my_schema );
}
@@ -961,47 +962,47 @@ my_database [IN] ... open database needed for vdb-calls
static rc_t vdm_dump_db_schema( const p_dump_context ctx,
const VDatabase *my_database )
{
- rc_t rc = 0;
- if ( ctx->table_defined )
- {
- /* the user has given a database as object, but asks to inspect a given table */
- const VTable *my_table;
+ rc_t rc = 0;
+ if ( ctx->table_defined )
+ {
+ /* the user has given a database as object, but asks to inspect a given table */
+ const VTable *my_table;
rc = vdm_open_table_by_path( my_database, ctx->table, &my_table );
- if ( rc == 0 )
- {
- rc = vdm_dump_tab_schema( ctx, my_table );
- VTableRelease( my_table );
- }
- }
- else
- {
- /* the user has given a database as object, but did not ask for a specific table */
- const VSchema * my_schema;
- rc = VDatabaseOpenSchema( my_database, &my_schema );
- DISP_RC( rc, "VDatabaseOpenSchema() failed" );
- if ( rc == 0 )
- {
- if ( ctx->columns == NULL )
- {
- /* the used did not ask to inspect a specifiy object, we look for
- the Typespec of the database... */
- char buffer[ 4096 ];
- rc = VDatabaseTypespec ( my_database, buffer, sizeof buffer );
- DISP_RC( rc, "VDatabaseTypespec() failed" );
- if ( rc == 0 )
- rc = VSchemaDump( my_schema, sdmPrint, buffer,
- vdm_schema_dump_flush, stdout );
- }
- else
- {
- /* the user did ask to inspect a specific object */
- rc = VSchemaDump( my_schema, sdmPrint, ctx->columns,
- vdm_schema_dump_flush, stdout );
- }
- DISP_RC( rc, "VSchemaDump() failed" );
- VSchemaRelease( my_schema );
- }
- }
+ if ( rc == 0 )
+ {
+ rc = vdm_dump_tab_schema( ctx, my_table );
+ VTableRelease( my_table );
+ }
+ }
+ else
+ {
+ /* the user has given a database as object, but did not ask for a specific table */
+ const VSchema * my_schema;
+ rc = VDatabaseOpenSchema( my_database, &my_schema );
+ DISP_RC( rc, "VDatabaseOpenSchema() failed" );
+ if ( rc == 0 )
+ {
+ if ( ctx->columns == NULL )
+ {
+ /* the used did not ask to inspect a specifiy object, we look for
+ the Typespec of the database... */
+ char buffer[ 4096 ];
+ rc = VDatabaseTypespec ( my_database, buffer, sizeof buffer );
+ DISP_RC( rc, "VDatabaseTypespec() failed" );
+ if ( rc == 0 )
+ rc = VSchemaDump( my_schema, sdmPrint, buffer,
+ vdm_schema_dump_flush, stdout );
+ }
+ else
+ {
+ /* the user did ask to inspect a specific object */
+ rc = VSchemaDump( my_schema, sdmPrint, ctx->columns,
+ vdm_schema_dump_flush, stdout );
+ }
+ DISP_RC( rc, "VSchemaDump() failed" );
+ VSchemaRelease( my_schema );
+ }
+ }
return rc;
}
@@ -1445,13 +1446,13 @@ static rc_t vdm_print_tab_id_range( const p_dump_context ctx, const VTable *my_t
uint64_t count;
uint32_t idx = 0;
-
+
/* calling with idx = 0 means: let the cursor find out the min/max values of
- all open columns...
-
- vdcd_get_first_none_static_column_idx( my_col_defs, my_cursor, &idx );
- */
-
+ all open columns...
+
+ vdcd_get_first_none_static_column_idx( my_col_defs, my_cursor, &idx );
+ */
+
rc = VCursorIdRange( my_cursor, idx, &first, &count );
DISP_RC( rc, "VCursorIdRange() failed" );
if ( rc == 0 )
@@ -1494,67 +1495,67 @@ static rc_t vdm_print_db_id_range( const p_dump_context ctx, const VDatabase *my
static rc_t vdm_enum_index( const KTable * my_ktable, uint32_t idx_nr, const char * idx_name )
{
- rc_t rc = KOutMsg( "idx #%u: %s", idx_nr + 1, idx_name );
- if ( rc == 0 )
- {
- const KIndex * my_idx;
- rc = KTableOpenIndexRead ( my_ktable, &my_idx, "%s", idx_name );
- if ( rc != 0 )
- rc = KOutMsg( " (cannot open)" );
- else
- {
- uint32_t idx_version;
- rc = KIndexVersion ( my_idx, &idx_version );
- if ( rc != 0 )
- rc = KOutMsg( " V?.?.?" );
- else
- rc = KOutMsg( " V%V", idx_version );
-
- if ( rc == 0 )
- {
- KIdxType idx_type;
- rc = KIndexType ( my_idx, &idx_type );
- if ( rc != 0 )
- rc = KOutMsg( " type = ?" );
- else
- {
- switch ( idx_type &~ kitProj )
- {
- case kitText : rc = KOutMsg( " type = Text" ); break;
- case kitU64 : rc = KOutMsg( " type = U64" ); break;
- default : rc = KOutMsg( " type = unknown" ); break;
- }
- if ( rc == 0 && ( ( idx_type & kitProj ) == kitProj ) )
- rc = KOutMsg( " reverse" );
- }
- }
-
- if ( rc == 0 )
- {
- bool locked = KIndexLocked ( my_idx );
- if ( locked )
- rc = KOutMsg( " locked" );
- }
- KIndexRelease( my_idx );
- }
- }
- if ( rc == 0 )
- rc = KOutMsg( "\n" );
- return rc;
+ rc_t rc = KOutMsg( "idx #%u: %s", idx_nr + 1, idx_name );
+ if ( rc == 0 )
+ {
+ const KIndex * my_idx;
+ rc = KTableOpenIndexRead ( my_ktable, &my_idx, "%s", idx_name );
+ if ( rc != 0 )
+ rc = KOutMsg( " (cannot open)" );
+ else
+ {
+ uint32_t idx_version;
+ rc = KIndexVersion ( my_idx, &idx_version );
+ if ( rc != 0 )
+ rc = KOutMsg( " V?.?.?" );
+ else
+ rc = KOutMsg( " V%V", idx_version );
+
+ if ( rc == 0 )
+ {
+ KIdxType idx_type;
+ rc = KIndexType ( my_idx, &idx_type );
+ if ( rc != 0 )
+ rc = KOutMsg( " type = ?" );
+ else
+ {
+ switch ( idx_type &~ kitProj )
+ {
+ case kitText : rc = KOutMsg( " type = Text" ); break;
+ case kitU64 : rc = KOutMsg( " type = U64" ); break;
+ default : rc = KOutMsg( " type = unknown" ); break;
+ }
+ if ( rc == 0 && ( ( idx_type & kitProj ) == kitProj ) )
+ rc = KOutMsg( " reverse" );
+ }
+ }
+
+ if ( rc == 0 )
+ {
+ bool locked = KIndexLocked ( my_idx );
+ if ( locked )
+ rc = KOutMsg( " locked" );
+ }
+ KIndexRelease( my_idx );
+ }
+ }
+ if ( rc == 0 )
+ rc = KOutMsg( "\n" );
+ return rc;
}
static rc_t vdm_enum_tab_index( const p_dump_context ctx, const VTable *my_table )
{
- const KTable * my_ktable;
- rc_t rc = VTableOpenKTableRead( my_table, &my_ktable );
- DISP_RC( rc, "VTableOpenKTableRead() failed" );
- if ( rc == 0 )
- {
- KNamelist *idx_names;
- rc = KTableListIdx ( my_ktable, &idx_names );
- if ( rc == 0 )
- {
+ const KTable * my_ktable;
+ rc_t rc = VTableOpenKTableRead( my_table, &my_ktable );
+ DISP_RC( rc, "VTableOpenKTableRead() failed" );
+ if ( rc == 0 )
+ {
+ KNamelist *idx_names;
+ rc = KTableListIdx ( my_ktable, &idx_names );
+ if ( rc == 0 )
+ {
uint32_t count;
rc = KNamelistCount( idx_names, &count );
if ( rc == 0 )
@@ -1565,16 +1566,16 @@ static rc_t vdm_enum_tab_index( const p_dump_context ctx, const VTable *my_table
const char * idx_name = NULL;
rc = KNamelistGet( idx_names, i, &idx_name );
if ( rc == 0 && idx_name != NULL )
- rc = vdm_enum_index( my_ktable, i, idx_name );
+ rc = vdm_enum_index( my_ktable, i, idx_name );
}
}
- KNamelistRelease( idx_names );
- }
- else
- rc = KOutMsg( "no index available\n" );
- KTableRelease( my_ktable );
- }
- return rc;
+ KNamelistRelease( idx_names );
+ }
+ else
+ rc = KOutMsg( "no index available\n" );
+ KTableRelease( my_ktable );
+ }
+ return rc;
}
static rc_t vdm_enum_db_index( const p_dump_context ctx, const VDatabase *my_database )
@@ -1594,20 +1595,21 @@ static rc_t vdm_enum_db_index( const p_dump_context ctx, const VDatabase *my_dat
static rc_t vdm_range_tab_index( const p_dump_context ctx, const VTable *my_table )
{
- const KTable * my_ktable;
- rc_t rc = VTableOpenKTableRead( my_table, &my_ktable );
- DISP_RC( rc, "VTableOpenKTableRead() failed" );
- if ( rc == 0 )
- {
+ const KTable * my_ktable;
+ rc_t rc = VTableOpenKTableRead( my_table, &my_ktable );
+ if ( rc != 0 )
+ ErrMsg( "VTableOpenKTableRead() -> %R", rc );
+ else
+ {
const KIndex * my_kindex;
rc = KTableOpenIndexRead ( my_ktable, &my_kindex, "%s", ctx->idx_range );
- DISP_RC( rc, "KTableOpenIndexRead() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "KTableOpenIndexRead() -> %R", rc );
+ else
{
int64_t start;
uint64_t count;
rc_t rc2 = 0;
-
for ( start = 1; rc2 == 0 && rc == 0; start += count )
{
size_t key_size;
@@ -1617,12 +1619,11 @@ static rc_t vdm_range_tab_index( const p_dump_context ctx, const VTable *my_tabl
if ( rc2 == 0 )
rc = KOutMsg( "%.*s : %lu ... %lu\n", ( int )key_size, key, start, start + count - 1 );
}
-
KIndexRelease( my_kindex );
}
- KTableRelease( my_ktable );
- }
- return rc;
+ KTableRelease( my_ktable );
+ }
+ return rc;
}
@@ -1643,71 +1644,83 @@ static rc_t vdm_range_db_index( const p_dump_context ctx, const VDatabase *my_da
static rc_t vdm_show_tab_spotgroups( const p_dump_context ctx, const VTable *my_table )
{
- const KMetadata * meta = NULL;
- rc_t rc = VTableOpenMetadataRead( my_table, &meta );
- DISP_RC( rc, "VTableOpenMetadataRead() failed" );
- if ( rc == 0 )
- {
- const KMDataNode * spot_groups_node;
- rc = KMetadataOpenNodeRead( meta, &spot_groups_node, "STATS/SPOT_GROUP" );
- DISP_RC( rc, "KMetadataOpenNodeRead( STATS/SPOT_GROUP ) failed" );
- if ( rc == 0 )
- {
- KNamelist * spot_groups;
- rc = KMDataNodeListChildren( spot_groups_node, &spot_groups );
- DISP_RC( rc, "KMDataNodeListChildren() failed" );
- if ( rc == 0 )
- {
- uint32_t count;
- rc = KNamelistCount( spot_groups, &count );
- if ( rc == 0 && count > 0 )
- {
- uint32_t i;
- for ( i = 0; i < count && rc == 0; ++i )
- {
- const char * name = NULL;
- rc = KNamelistGet( spot_groups, i, &name );
- if ( rc == 0 && name != NULL )
- {
- const KMDataNode * spot_count_node;
- rc = KMDataNodeOpenNodeRead( spot_groups_node, &spot_count_node, "%s/SPOT_COUNT", name );
- DISP_RC( rc, "KMDataNodeOpenNodeRead() failed" );
- if ( rc == 0 )
- {
- uint64_t spot_count = 0;
- rc = KMDataNodeReadAsU64( spot_count_node, &spot_count );
- if ( rc == 0 )
- {
- if ( spot_count > 0 )
- {
- const KMDataNode * spot_group_node;
- rc = KMDataNodeOpenNodeRead( spot_groups_node, &spot_group_node, name );
- if ( rc == 0 )
- {
- char name_attr[ 2048 ];
- size_t num_writ;
- rc = KMDataNodeReadAttr( spot_group_node, "name", name_attr, sizeof name_attr, &num_writ );
- rc = KOutMsg( "%s\t%,lu\n", rc == 0 ? name_attr : name, spot_count );
- KMDataNodeRelease( spot_group_node );
- }
-
- }
- }
- else
- vdm_clear_recorded_errors();
-
- KMDataNodeRelease( spot_count_node );
- }
- }
- }
- }
- KNamelistRelease( spot_groups );
- }
- KMDataNodeRelease( spot_groups_node );
- }
- KMetadataRelease ( meta );
- }
- return rc;
+ const KMetadata * meta = NULL;
+ rc_t rc = VTableOpenMetadataRead( my_table, &meta );
+ if ( rc != 0 )
+ ErrMsg( "VTableOpenMetadataRead() -> %R", rc );
+ else
+ {
+ const KMDataNode * spot_groups_node;
+ rc = KMetadataOpenNodeRead( meta, &spot_groups_node, "STATS/SPOT_GROUP" );
+ if ( rc != 0 )
+ ErrMsg( "KMetadataOpenNodeRead( STATS/SPOT_GROUP ) -> %R", rc );
+ else
+ {
+ KNamelist * spot_groups;
+ rc = KMDataNodeListChildren( spot_groups_node, &spot_groups );
+ if ( rc != 0 )
+ ErrMsg( "KMDataNodeListChildren() -> %R", rc );
+ else
+ {
+ uint32_t count;
+ rc = KNamelistCount( spot_groups, &count );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistCount() -> %R", rc );
+ else if ( count > 0 )
+ {
+ uint32_t i;
+ for ( i = 0; i < count && rc == 0; ++i )
+ {
+ const char * name = NULL;
+ rc = KNamelistGet( spot_groups, i, &name );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistCount( %d) -> %R", i, rc );
+ else if ( name != NULL )
+ {
+ const KMDataNode * spot_count_node;
+ rc = KMDataNodeOpenNodeRead( spot_groups_node, &spot_count_node, "%s/SPOT_COUNT", name );
+ if ( rc != 0 )
+ ErrMsg( "KMDataNodeOpenNodeRead() -> %R", rc );
+ else
+ {
+ uint64_t spot_count = 0;
+ rc = KMDataNodeReadAsU64( spot_count_node, &spot_count );
+ if ( rc != 0 )
+ {
+ ErrMsg( "KMDataNodeReadAsU64() -> %R", rc );
+ vdm_clear_recorded_errors();
+ }
+ else
+ {
+ if ( spot_count > 0 )
+ {
+ const KMDataNode * spot_group_node;
+ rc = KMDataNodeOpenNodeRead( spot_groups_node, &spot_group_node, name );
+ if ( rc != 0 )
+ ErrMsg( "KMDataNodeOpenNodeRead( '%s' ) -> %R", name, rc );
+ else
+ {
+ char name_attr[ 2048 ];
+ size_t num_writ;
+ rc = KMDataNodeReadAttr( spot_group_node, "name", name_attr, sizeof name_attr, &num_writ );
+ rc = KOutMsg( "%s\t%,lu\n", rc == 0 ? name_attr : name, spot_count );
+ KMDataNodeRelease( spot_group_node );
+ }
+
+ }
+ }
+ KMDataNodeRelease( spot_count_node );
+ }
+ }
+ }
+ }
+ KNamelistRelease( spot_groups );
+ }
+ KMDataNodeRelease( spot_groups_node );
+ }
+ KMetadataRelease ( meta );
+ }
+ return rc;
}
static rc_t vdm_show_db_spotgroups( const p_dump_context ctx, const VDatabase *my_database )
@@ -1751,8 +1764,9 @@ static rc_t vdm_dump_tab_fkt( const p_dump_context ctx,
vdh_parse_schema( my_manager, &my_schema, &(ctx->schema_list), true /*ctx->force_sra_schema*/ );
rc = VDBManagerOpenTableRead( my_manager, &my_table, my_schema, "%s", ctx->path );
- DISP_RC( rc, "VDBManagerOpenTableRead() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerOpenTableRead( '%R' ) -> %R", ctx->path, rc );
+ else
{
rc = vdm_check_table_empty( my_table );
if ( rc == 0 )
@@ -1761,8 +1775,11 @@ static rc_t vdm_dump_tab_fkt( const p_dump_context ctx,
}
if ( my_schema != NULL )
- VSchemaRelease( my_schema );
-
+ {
+ rc = VSchemaRelease( my_schema );
+ if ( rc != 0 )
+ ErrMsg( "VSchemaRelease() -> %R", rc );
+ }
return rc;
}
@@ -1798,7 +1815,7 @@ static rc_t vdm_dump_table( const p_dump_context ctx, const VDBManager *my_manag
else if ( ctx->table_enum_requested )
{
KOutMsg( "cannot enum tables of a table-object\n" );
- vdm_clear_recorded_errors();
+ vdm_clear_recorded_errors();
rc = 0;
}
else if ( enum_col_request( ctx ) )
@@ -1817,19 +1834,18 @@ static rc_t vdm_dump_table( const p_dump_context ctx, const VDBManager *my_manag
{
rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_range_tab_index );
}
- else if ( ctx->show_spotgroups )
- {
- rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_show_tab_spotgroups );
- }
- else if ( ctx->show_spread )
- {
- rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_show_tab_spread );
- }
+ else if ( ctx->show_spotgroups )
+ {
+ rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_show_tab_spotgroups );
+ }
+ else if ( ctx->show_spread )
+ {
+ rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_show_tab_spread );
+ }
else
{
rc = vdm_dump_tab_fkt( ctx, my_manager, vdm_dump_opened_table );
}
-
return rc;
}
@@ -1856,43 +1872,69 @@ ctx [IN] ... contains source-path, tablename, columns and row-range as ascii-
db_fkt [IN] ... function to be called if directory, manager and database are open
*************************************************************************************/
static rc_t vdm_dump_db_fkt( const p_dump_context ctx,
- const VDBManager *my_manager,
+ const VDBManager * mgr,
const db_fkt_t db_fkt )
{
- const VDatabase *my_database;
- VSchema *my_schema = NULL;
+ const VDatabase *db;
+ VSchema *schema = NULL;
rc_t rc;
- vdh_parse_schema( my_manager, &my_schema, &(ctx->schema_list), true /* ctx->force_sra_schema */ );
+ vdh_parse_schema( mgr, &schema, &(ctx->schema_list), true /* ctx->force_sra_schema */ );
- rc = VDBManagerOpenDBRead( my_manager, &my_database, my_schema, "%s", ctx->path );
- DISP_RC( rc, "VDBManagerOpenDBRead() failed" );
- if ( rc == 0 )
+ rc = VDBManagerOpenDBRead( mgr, &db, schema, "%s", ctx->path );
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerOpenDBRead( '%s' ) -> %R", ctx->path, rc );
+ else
{
- bool table_defined = ( ctx->table != NULL );
- if ( !table_defined )
- {
- table_defined = vdh_take_this_table_from_db( ctx, my_database, "SEQUENCE" );
- if ( !table_defined )
- table_defined = vdh_take_1st_table_from_db( ctx, my_database );
- }
- if ( table_defined || ctx->table_enum_requested )
- {
- rc = db_fkt( ctx, my_database ); /* fkt-pointer is called */
- }
+ KNamelist *tbl_names;
+ rc = VDatabaseListTbl( db, &tbl_names );
+ if ( rc != 0 )
+ ErrMsg( "VDatabaseListTbl( '%s' ) -> %R", ctx->path, rc );
else
{
- LOGMSG( klogInfo, "opened as vdb-database, but no table found" );
- ctx->usage_requested = true;
+ if ( ctx->table == NULL )
+ {
+ /* the user DID NOT not specify a table: by default assume the SEQUENCE-table */
+ bool table_found = vdh_take_this_table_from_list( ctx, tbl_names, "SEQUENCE" );
+ /* if there is no SEQUENCE-table, just pick the first table available... */
+ if ( !table_found )
+ vdh_take_1st_table_from_db( ctx, tbl_names );
+ }
+ else
+ {
+ /* the user DID specify a table: check if the database has a table with this name,
+ if not try with a sub-string */
+ String value;
+ StringInitCString( &value, ctx->table );
+ if ( !list_contains_value( tbl_names, &value ) )
+ vdh_take_this_table_from_list( ctx, tbl_names, ctx->table );
+ }
+
+ if ( ctx->table != NULL || ctx->table_enum_requested )
+ {
+ rc = db_fkt( ctx, db ); /* fkt-pointer is called */
+ }
+ else
+ {
+ LOGMSG( klogInfo, "opened as vdb-database, but no table found" );
+ ctx->usage_requested = true;
+ }
+ rc = KNamelistRelease( tbl_names );
+ if ( rc != 0 )
+ ErrMsg( "KNamelistRelease() -> %R", rc );
}
- VDatabaseRelease( my_database );
+ rc = VDatabaseRelease( db );
+ if ( rc != 0 )
+ ErrMsg( "VDatabaseRelease() -> %R", rc );
}
- if ( my_schema != NULL )
+ if ( schema != NULL )
{
- VSchemaRelease( my_schema );
+ rc = VSchemaRelease( schema );
+ if ( rc != 0 )
+ ErrMsg( "VSchemaRelease() -> %R", rc );
}
-
+
return rc;
}
@@ -1955,8 +1997,9 @@ static rc_t vdm_print_objver( const p_dump_context ctx, const VDBManager *mgr )
{
ver_t version;
rc_t rc = VDBManagerGetObjVersion ( mgr, &version, ctx->path );
- DISP_RC( rc, "VDBManagerGetObjVersion() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerGetObjVersion( '%s' ) -> %R", ctx->path, rc );
+ else
rc = KOutMsg( "%V\n", version );
return rc;
}
@@ -1964,8 +2007,9 @@ static rc_t vdm_print_objver( const p_dump_context ctx, const VDBManager *mgr )
static rc_t vdm_print_objts ( const p_dump_context ctx, const VDBManager *mgr )
{
KTime_t timestamp;
- rc_t rc = VDBManagerGetObjModDate ( mgr, ×tamp, ctx-> path );
- DISP_RC ( rc, "VDBManagerGetObjModDate () failed" );
+ rc_t rc = VDBManagerGetObjModDate ( mgr, ×tamp, ctx->path );
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerGetObjModDate( '%s' ) -> %R", ctx->path, rc );
if ( rc == 0 )
{
KTime kt;
@@ -2029,7 +2073,7 @@ static rc_t vdb_main_one_obj_by_pathtype( const p_dump_context ctx,
"p=%s", ctx->path ) );
if ( vdco_schema_count( ctx ) == 0 )
{
- LOGERR( klogInt, rc, "Maybe it is a legacy table. If so, specify a schema with the -S option" );
+ LOGERR( klogInt, rc, "Maybe it is a legacy table. If so, specify a schema with the -S option" );
}
break;
}
@@ -2120,38 +2164,38 @@ ctx [IN] ... contains path, tablename, columns, row-range etc.
***************************************************************************/
static rc_t vdm_main( const p_dump_context ctx, Args * args )
{
- rc_t rc, rc1;
KDirectory *dir;
-
- rc = KDirectoryNativeDir( &dir );
- DISP_RC( rc, "KDirectoryNativeDir() failed" );
- if ( rc == 0 )
+ rc_t rc1, rc = KDirectoryNativeDir( &dir );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryNativeDir() -> %R", rc );
+ else
{
const VDBManager *mgr;
-
- rc = VDBManagerMakeRead ( &mgr, dir );
- DISP_RC( rc, "VDBManagerMakeRead() failed" );
- if ( rc == 0 )
+ rc = VDBManagerMakeRead( &mgr, dir );
+ if ( rc != 0 )
+ ErrMsg( "VDBManagerMakeRead() -> %R", rc );
+ else
{
if ( ctx->disable_multithreading )
{
- rc = VDBManagerDisablePagemapThread ( mgr );
- DISP_RC( rc, "VDBManagerDisablePagemapThread() failed" );
- rc = 0;
+ rc = VDBManagerDisablePagemapThread( mgr );
+ if ( rc != 0 )
+ {
+ ErrMsg( "VDBManagerDisablePagemapThread() -> %R", rc );
+ rc = 0;
+ }
}
/* show manager is independend form db or tab */
if ( ctx->version_requested )
- {
rc = vdh_show_manager_version( mgr );
- DISP_RC( rc, "show_manager_version() failed" );
- }
else
{
uint32_t count;
rc = ArgsParamCount( args, &count );
- DISP_RC( rc, "ArgsParamCount() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamCount() -> %R", rc );
+ else
{
if ( count > 0 )
{
@@ -2160,21 +2204,22 @@ static rc_t vdm_main( const p_dump_context ctx, Args * args )
{
const char *value = NULL;
rc = ArgsParamValue( args, idx, (const void **)&value );
- DISP_RC( rc, "ArgsParamValue() failed" );
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamValue() -> %R", rc );
+ else
{
if ( ctx->print_info )
- {
rc = vdb_info( &(ctx->schema_list), ctx->format, mgr,
value, ctx->rows ); /* in vdb_info.c */
- }
else switch( ctx->format )
{
case df_fastq : ;
- case df_fastq1 : ;
+ case df_fastq1 : ;
case df_fasta : ;
case df_fasta1 : ;
- case df_fasta2 : vdf_main( ctx, mgr, value ); break;
+ case df_fasta2 : ;
+ case df_qual : ;
+ case df_qual1 : vdf_main( ctx, mgr, value ); break; /* in vdb-dump-fastq.c */
default : rc = vdm_main_one_obj( ctx, mgr, value ); break;
}
}
@@ -2188,10 +2233,12 @@ static rc_t vdm_main( const p_dump_context ctx, Args * args )
}
}
rc1 = VDBManagerRelease( mgr );
- DISP_RC( rc1, "VDBManagerRelease() failed" );
+ if ( rc1 != 0 )
+ ErrMsg( "VDBManagerRelease() -> %R", rc );
}
rc1 = KDirectoryRelease( dir );
- DISP_RC( rc1, "KDirectoryRelease() failed" );
+ if ( rc != 0 )
+ ErrMsg( "KDirectoryRelease() -> %R", rc );
}
return rc;
}
@@ -2199,29 +2246,32 @@ static rc_t vdm_main( const p_dump_context ctx, Args * args )
static rc_t diff_files( Args * args )
{
- uint32_t count;
- rc_t rc = ArgsParamCount( args, &count );
- DISP_RC( rc, "ArgsParamCount() failed" );
- if ( rc == 0 )
- {
- if ( count != 2 )
- KOutMsg( "this function needs exactly 2 files to diff\n" );
- else
- {
- const char * f1;
- rc = ArgsParamValue( args, 0, (const void **)&f1 );
- DISP_RC( rc, "ArgsParamValue( 0 ) failed" );
- if ( rc == 0 )
- {
- const char * f2;
- rc = ArgsParamValue( args, 1, (const void **)&f2 );
- DISP_RC( rc, "ArgsParamValue( 1 ) failed" );
- if ( rc == 0 )
- rc = vds_diff( f1, f2 ); /* in vdb-dump-str.c */
- }
- }
- }
- return rc;
+ uint32_t count;
+ rc_t rc = ArgsParamCount( args, &count );
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamCount() -> %R", rc );
+ else
+ {
+ if ( count != 2 )
+ KOutMsg( "this function needs exactly 2 files to diff\n" );
+ else
+ {
+ const char * f1;
+ rc = ArgsParamValue( args, 0, (const void **)&f1 );
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamValue( 0 ) -> %R", rc );
+ else
+ {
+ const char * f2;
+ rc = ArgsParamValue( args, 1, (const void **)&f2 );
+ if ( rc != 0 )
+ ErrMsg( "ArgsParamValue( 1 ) -> %R", rc );
+ else
+ rc = vds_diff( f1, f2 ); /* in vdb-dump-str.c */
+ }
+ }
+ }
+ return rc;
}
@@ -2246,52 +2296,54 @@ rc_t CC write_to_FILE ( void *f, const char *buffer, size_t bytes, size_t *num_w
rc_t CC KMain ( int argc, char *argv [] )
{
Args * args;
- rc_t rc;
-
- rc = KOutHandlerSet ( write_to_FILE, stdout );
- if ( rc == 0 )
+ rc_t rc = KOutHandlerSet( write_to_FILE, stdout );
+ if ( rc != 0 )
+ ErrMsg( "KOutHandlerSet() -> %R", rc );
+ else
{
- rc = ArgsMakeAndHandle (&args, argc, argv,
+ rc = ArgsMakeAndHandle( &args, argc, argv,
1, DumpOptions, sizeof DumpOptions / sizeof DumpOptions [ 0 ] );
- }
- if ( rc == 0 )
- {
- dump_context *ctx;
-
- rc = vdco_init( &ctx );
- DISP_RC( rc, "dump_context_init() failed" );
-
- if ( rc == 0 )
+ if ( rc != 0 )
+ ErrMsg( "ArgsMakeAndHandle() -> %R", rc );
+ else
{
- rc = vdco_capture_arguments_and_options( args, ctx );
+ dump_context *ctx;
+
+ rc = vdco_init( &ctx );
if ( rc == 0 )
{
- out_redir redir; /* vdb-dump-redir.h */
-
- KLogHandlerSetStdErr();
- rc = init_out_redir( &redir,
- ctx->compress_mode,
- ctx->output_file,
- ctx->output_buffer_size ); /* vdb-dump-redir.c */
+ rc = vdco_capture_arguments_and_options( args, ctx );
if ( rc == 0 )
{
- if ( ctx->phase > 0 )
- rc = vdi_bin_phase( ctx, args ); /* vdb-dump-bin.c */
- else if ( ctx->diff )
- rc = diff_files( args ); /* above calls into vdb-dump-str.c */
- else if ( ctx->slice_depth > 0 )
- rc = find_slice( ctx, args ); /* vdb-dump-str.c */
- else
- rc = vdm_main( ctx, args );
-
- release_out_redir( &redir ); /* vdb-dump-redir.c */
+ out_redir redir; /* vdb-dump-redir.h */
+
+ KLogHandlerSetStdErr();
+ rc = init_out_redir( &redir,
+ ctx->compress_mode,
+ ctx->output_file,
+ ctx->interactive ? 0 : ctx->output_buffer_size ); /* vdb-dump-redir.c */
+
+ if ( rc == 0 )
+ {
+ if ( ctx->phase > 0 )
+ rc = vdi_bin_phase( ctx, args ); /* vdb-dump-bin.c */
+ else if ( ctx->diff )
+ rc = diff_files( args ); /* above calls into vdb-dump-str.c */
+ else if ( ctx->interactive )
+ rc = vdi_main( ctx, args ); /* vdb-dump-interact.c */
+ else if ( ctx->slice_depth > 0 )
+ rc = find_slice( ctx, args ); /* vdb-dump-str.c */
+ else
+ rc = vdm_main( ctx, args );
+
+ release_out_redir( &redir ); /* vdb-dump-redir.c */
+ }
}
+ vdco_destroy( ctx );
}
- vdco_destroy( ctx );
+ ArgsWhack( args );
}
- ArgsWhack (args);
}
-
return rc;
}
diff --git a/tools/vdb-dump/vdb-dump.vers b/tools/vdb-dump/vdb-dump.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-dump/vdb-dump.vers
+++ b/tools/vdb-dump/vdb-dump.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-dump/vdb-dump.vers.h b/tools/vdb-dump/vdb-dump.vers.h
index ce77f40..a7aa35d 100644
--- a/tools/vdb-dump/vdb-dump.vers.h
+++ b/tools/vdb-dump/vdb-dump.vers.h
@@ -1 +1 @@
-#define VDB_DUMP_VERS 0x02050007
+#define VDB_DUMP_VERS 0x02060002
diff --git a/tools/vdb-dump/vdb_info.c b/tools/vdb-dump/vdb_info.c
index 252f583..d8a5318 100644
--- a/tools/vdb-dump/vdb_info.c
+++ b/tools/vdb-dump/vdb_info.c
@@ -87,13 +87,13 @@ typedef struct vdb_info_event
typedef struct vdb_info_bam_hdr
{
- bool present;
- size_t hdr_bytes;
- uint32_t total_lines;
- uint32_t HD_lines;
- uint32_t SQ_lines;
- uint32_t RG_lines;
- uint32_t PG_lines;
+ bool present;
+ size_t hdr_bytes;
+ uint32_t total_lines;
+ uint32_t HD_lines;
+ uint32_t SQ_lines;
+ uint32_t RG_lines;
+ uint32_t PG_lines;
} vdb_info_bam_hdr;
typedef struct vdb_info_data
@@ -102,21 +102,22 @@ typedef struct vdb_info_data
const char * s_path_type;
const char * s_platform;
- char path[ 1024 ];
- char cache[ 1024 ];
+ char path[ 4096 ];
+ char remote_path[ 4096 ];
+ char cache[ 1024 ];
char schema_name[ 1024 ];
- char species[ 1024 ];
-
+ char species[ 1024 ];
+
vdb_info_event formatter;
vdb_info_event loader;
vdb_info_event update;
vdb_info_date ts;
- vdb_info_bam_hdr bam_hdr;
-
- float cache_percent;
- uint64_t bytes_in_cache;
+ vdb_info_bam_hdr bam_hdr;
+
+ float cache_percent;
+ uint64_t bytes_in_cache;
uint64_t seq_rows;
uint64_t ref_rows;
@@ -357,98 +358,98 @@ static void split_date( vdb_info_date * d )
/* ----------------------------------------------------------------------------- */
static bool has_col( const VTable * tab, const char * colname )
{
- bool res = false;
- struct KNamelist * columns;
- rc_t rc = VTableListReadableColumns( tab, &columns );
- if ( rc == 0 )
- {
- uint32_t count;
- rc = KNamelistCount( columns, &count );
- if ( rc == 0 && count > 0 )
- {
- uint32_t idx;
- size_t colname_size = string_size( colname );
- for ( idx = 0; idx < count && rc == 0 && !res; ++idx )
- {
- const char * a_name;
- rc = KNamelistGet ( columns, idx, &a_name );
- if ( rc == 0 )
- {
- int cmp;
- size_t a_name_size = string_size( a_name );
- uint32_t max_chars = ( uint32_t )colname_size;
- if ( a_name_size > max_chars ) max_chars = ( uint32_t )a_name_size;
- cmp = strcase_cmp ( colname, colname_size,
- a_name, a_name_size,
- max_chars );
- res = ( cmp == 0 );
- }
- }
- }
- KNamelistRelease( columns );
- }
- return res;
+ bool res = false;
+ struct KNamelist * columns;
+ rc_t rc = VTableListReadableColumns( tab, &columns );
+ if ( rc == 0 )
+ {
+ uint32_t count;
+ rc = KNamelistCount( columns, &count );
+ if ( rc == 0 && count > 0 )
+ {
+ uint32_t idx;
+ size_t colname_size = string_size( colname );
+ for ( idx = 0; idx < count && rc == 0 && !res; ++idx )
+ {
+ const char * a_name;
+ rc = KNamelistGet ( columns, idx, &a_name );
+ if ( rc == 0 )
+ {
+ int cmp;
+ size_t a_name_size = string_size( a_name );
+ uint32_t max_chars = ( uint32_t )colname_size;
+ if ( a_name_size > max_chars ) max_chars = ( uint32_t )a_name_size;
+ cmp = strcase_cmp ( colname, colname_size,
+ a_name, a_name_size,
+ max_chars );
+ res = ( cmp == 0 );
+ }
+ }
+ }
+ KNamelistRelease( columns );
+ }
+ return res;
}
static const char * get_platform( const VTable * tab )
{
const char * res = PT_NONE;
- if ( has_col( tab, "PLATFORM" ) )
- {
- const VCursor * cur;
- rc_t rc = VTableCreateCursorRead( tab, &cur );
- if ( rc == 0 )
- {
- uint32_t idx;
- rc = VCursorAddColumn( cur, &idx, "PLATFORM" );
- if ( rc == 0 )
- {
- rc = VCursorOpen( cur );
- if ( rc == 0 )
- {
- const uint8_t * pf;
- rc = VCursorCellDataDirect( cur, 1, idx, NULL, (const void**)&pf, NULL, NULL );
- if ( rc == 0 )
- {
- res = vdcd_get_platform_txt( *pf );
- }
- }
- }
- VCursorRelease( cur );
- }
- }
+ if ( has_col( tab, "PLATFORM" ) )
+ {
+ const VCursor * cur;
+ rc_t rc = VTableCreateCursorRead( tab, &cur );
+ if ( rc == 0 )
+ {
+ uint32_t idx;
+ rc = VCursorAddColumn( cur, &idx, "PLATFORM" );
+ if ( rc == 0 )
+ {
+ rc = VCursorOpen( cur );
+ if ( rc == 0 )
+ {
+ const uint8_t * pf;
+ rc = VCursorCellDataDirect( cur, 1, idx, NULL, (const void**)&pf, NULL, NULL );
+ if ( rc == 0 )
+ {
+ res = vdcd_get_platform_txt( *pf );
+ }
+ }
+ }
+ VCursorRelease( cur );
+ }
+ }
return res;
}
static void get_string_cell( char * buffer, size_t buffer_size, const VTable * tab, int64_t row, const char * column )
{
- if ( has_col( tab, column ) )
- {
- const VCursor * cur;
- rc_t rc = VTableCreateCursorRead( tab, &cur );
- if ( rc == 0 )
- {
- uint32_t idx;
- rc = VCursorAddColumn( cur, &idx, column );
- if ( rc == 0 )
- {
- rc = VCursorOpen( cur );
- if ( rc == 0 )
- {
- const char * src;
- uint32_t row_len;
- rc = VCursorCellDataDirect( cur, row, idx, NULL, (const void**)&src, NULL, &row_len );
- if ( rc == 0 )
- {
- size_t num_writ;
- string_printf( buffer, buffer_size, &num_writ, "%.*s", row_len, src );
- }
- }
- }
- VCursorRelease( cur );
- }
- }
+ if ( has_col( tab, column ) )
+ {
+ const VCursor * cur;
+ rc_t rc = VTableCreateCursorRead( tab, &cur );
+ if ( rc == 0 )
+ {
+ uint32_t idx;
+ rc = VCursorAddColumn( cur, &idx, column );
+ if ( rc == 0 )
+ {
+ rc = VCursorOpen( cur );
+ if ( rc == 0 )
+ {
+ const char * src;
+ uint32_t row_len;
+ rc = VCursorCellDataDirect( cur, row, idx, NULL, (const void**)&src, NULL, &row_len );
+ if ( rc == 0 )
+ {
+ size_t num_writ;
+ string_printf( buffer, buffer_size, &num_writ, "%.*s", row_len, src );
+ }
+ }
+ }
+ VCursorRelease( cur );
+ }
+ }
}
@@ -525,92 +526,92 @@ static void get_meta_event( const KMetadata * meta, const char * node_path, vdb_
static size_t get_node_size( const KMDataNode * node )
{
- char buffer[ 10 ];
- size_t num_read, remaining, res = 0;
- rc_t rc = KMDataNodeRead( node, 0, buffer, sizeof( buffer ), &num_read, &remaining );
- if ( rc == 0 ) res = num_read + remaining;
- return res;
+ char buffer[ 10 ];
+ size_t num_read, remaining, res = 0;
+ rc_t rc = KMDataNodeRead( node, 0, buffer, sizeof( buffer ), &num_read, &remaining );
+ if ( rc == 0 ) res = num_read + remaining;
+ return res;
}
static bool is_newline( const char c ) { return ( c == 0x0A || c == 0x0D ); }
static void inspect_line( vdb_info_bam_hdr * bam_hdr, char * line, size_t len )
{
- bam_hdr->total_lines++;
- if ( len > 3 && line[ 0 ] == '@' )
- {
- switch( line[ 1 ] )
- {
- case 'H' : if ( line[ 2 ] == 'D' ) bam_hdr->HD_lines++; break;
- case 'S' : if ( line[ 2 ] == 'Q' ) bam_hdr->SQ_lines++; break;
- case 'R' : if ( line[ 2 ] == 'G' ) bam_hdr->RG_lines++; break;
- case 'P' : if ( line[ 2 ] == 'G' ) bam_hdr->PG_lines++; break;
- }
- }
+ bam_hdr->total_lines++;
+ if ( len > 3 && line[ 0 ] == '@' )
+ {
+ switch( line[ 1 ] )
+ {
+ case 'H' : if ( line[ 2 ] == 'D' ) bam_hdr->HD_lines++; break;
+ case 'S' : if ( line[ 2 ] == 'Q' ) bam_hdr->SQ_lines++; break;
+ case 'R' : if ( line[ 2 ] == 'G' ) bam_hdr->RG_lines++; break;
+ case 'P' : if ( line[ 2 ] == 'G' ) bam_hdr->PG_lines++; break;
+ }
+ }
}
static void parse_buffer( vdb_info_bam_hdr * bam_hdr, char * buffer, size_t len )
{
- char * line;
- size_t idx, line_len, state = 0;
- for ( idx = 0; idx < len; ++idx )
- {
- switch( state )
- {
- case 0 : if ( is_newline( buffer[ idx ] ) ) /* init */
- state = 2;
- else
- {
- line = &( buffer[ idx ] );
- line_len = 1;
- state = 1;
- }
- break;
-
- case 1 : if ( is_newline( buffer[ idx ] ) ) /* content */
- {
- inspect_line( bam_hdr, line, line_len );
- state = 2;
- }
- else
- line_len++;
- break;
-
- case 2 : if ( !is_newline( buffer[ idx ] ) ) /* newline */
- {
- line = &( buffer[ idx ] );
- line_len = 1;
- state = 1;
- }
- break;
- }
- }
+ char * line;
+ size_t idx, line_len, state = 0;
+ for ( idx = 0; idx < len; ++idx )
+ {
+ switch( state )
+ {
+ case 0 : if ( is_newline( buffer[ idx ] ) ) /* init */
+ state = 2;
+ else
+ {
+ line = &( buffer[ idx ] );
+ line_len = 1;
+ state = 1;
+ }
+ break;
+
+ case 1 : if ( is_newline( buffer[ idx ] ) ) /* content */
+ {
+ inspect_line( bam_hdr, line, line_len );
+ state = 2;
+ }
+ else
+ line_len++;
+ break;
+
+ case 2 : if ( !is_newline( buffer[ idx ] ) ) /* newline */
+ {
+ line = &( buffer[ idx ] );
+ line_len = 1;
+ state = 1;
+ }
+ break;
+ }
+ }
}
static void get_meta_bam_hdr( vdb_info_bam_hdr * bam_hdr, const KMetadata * meta )
{
const KMDataNode * node;
rc_t rc = KMetadataOpenNodeRead ( meta, &node, "BAM_HEADER" );
- bam_hdr -> present = ( rc == 0 );
+ bam_hdr -> present = ( rc == 0 );
if ( bam_hdr -> present )
{
- bam_hdr->hdr_bytes = get_node_size( node );
- if ( bam_hdr->hdr_bytes > 0 )
- {
- char * buffer = malloc( bam_hdr->hdr_bytes );
- if ( buffer != NULL )
- {
- size_t num_read, remaining;
- rc = KMDataNodeRead( node, 0, buffer, bam_hdr->hdr_bytes, &num_read, &remaining );
- if ( rc == 0 )
- {
- parse_buffer( bam_hdr, buffer, bam_hdr->hdr_bytes );
- }
- free( buffer );
- }
- }
- KMDataNodeRelease ( node );
- }
+ bam_hdr->hdr_bytes = get_node_size( node );
+ if ( bam_hdr->hdr_bytes > 0 )
+ {
+ char * buffer = malloc( bam_hdr->hdr_bytes );
+ if ( buffer != NULL )
+ {
+ size_t num_read, remaining;
+ rc = KMDataNodeRead( node, 0, buffer, bam_hdr->hdr_bytes, &num_read, &remaining );
+ if ( rc == 0 )
+ {
+ parse_buffer( bam_hdr, buffer, bam_hdr->hdr_bytes );
+ }
+ free( buffer );
+ }
+ }
+ KMDataNodeRelease ( node );
+ }
}
static void get_meta_info( vdb_info_data * data, const KMetadata * meta )
@@ -646,7 +647,7 @@ static void get_meta_info( vdb_info_data * data, const KMetadata * meta )
get_meta_event( meta, "SOFTWARE/formatter", &data->formatter );
get_meta_event( meta, "SOFTWARE/loader", &data->loader );
get_meta_event( meta, "SOFTWARE/update", &data->update );
- get_meta_bam_hdr( &data->bam_hdr, meta );
+ get_meta_bam_hdr( &data->bam_hdr, meta );
}
@@ -670,42 +671,42 @@ static const char * get_path_type( const VDBManager *mgr, const char * acc_or_pa
static rc_t make_remote_file( const KFile ** f, const char * url )
{
- KNSManager * kns_mgr;
- rc_t rc = KNSManagerMake ( & kns_mgr );
- *f = NULL;
- if ( rc == 0 )
- {
- rc = KNSManagerMakeHttpFile ( kns_mgr, f, NULL, 0x01010000, "%s", url );
- KNSManagerRelease ( kns_mgr );
- }
- return rc;
+ KNSManager * kns_mgr;
+ rc_t rc = KNSManagerMake ( & kns_mgr );
+ *f = NULL;
+ if ( rc == 0 )
+ {
+ rc = KNSManagerMakeHttpFile ( kns_mgr, f, NULL, 0x01010000, "%s", url );
+ KNSManagerRelease ( kns_mgr );
+ }
+ return rc;
}
static rc_t make_local_file( const KFile ** f, const char * path )
{
- KDirectory * dir;
- rc_t rc = KDirectoryNativeDir( &dir );
- *f = NULL;
- if ( rc == 0 )
- {
- rc = KDirectoryOpenFileRead( dir, f, "%s", path );
- KDirectoryRelease( dir );
- }
- return rc;
+ KDirectory * dir;
+ rc_t rc = KDirectoryNativeDir( &dir );
+ *f = NULL;
+ if ( rc == 0 )
+ {
+ rc = KDirectoryOpenFileRead( dir, f, "%s", path );
+ KDirectoryRelease( dir );
+ }
+ return rc;
}
static uint64_t get_file_size( const char * path, bool remotely )
{
uint64_t res = 0;
- const KFile * f;
- rc_t rc = ( remotely ) ? make_remote_file( &f, path ) : make_local_file( &f, path );
- if ( rc == 0 )
- {
- KFileSize ( f, &res );
- KFileRelease( f );
- }
+ const KFile * f;
+ rc_t rc = ( remotely ) ? make_remote_file( &f, path ) : make_local_file( &f, path );
+ if ( rc == 0 )
+ {
+ KFileSize ( f, &res );
+ KFileRelease( f );
+ }
return res;
}
@@ -720,7 +721,7 @@ static rc_t vdb_info_tab( vdb_info_data * data, VSchema * schema, const VDBManag
data->s_platform = get_platform( tab );
data->seq_rows = get_rowcount( tab );
- get_string_cell( data->species, sizeof data->species, tab, 1, "DEF_LINE" );
+ get_string_cell( data->species, sizeof data->species, tab, 1, "DEF_LINE" );
rc = VTableOpenMetadataRead ( tab, &meta );
if ( rc == 0 )
@@ -755,20 +756,20 @@ static void get_species( char * buffer, size_t buffer_size, const VDatabase * db
rc_t rc = VDatabaseOpenTableRead( db, &tab, "REFERENCE" );
if ( rc == 0 )
{
- char seq_id[ 1024 ];
-
- seq_id[ 0 ] = 0;
- get_string_cell( seq_id, sizeof seq_id, tab, 1, "SEQ_ID" );
- VTableRelease( tab );
- if ( seq_id[ 0 ] != 0 )
- {
- rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", seq_id );
- if ( rc == 0 )
- {
- get_string_cell( buffer, buffer_size, tab, 1, "DEF_LINE" );
- VTableRelease( tab );
- }
- }
+ char seq_id[ 1024 ];
+
+ seq_id[ 0 ] = 0;
+ get_string_cell( seq_id, sizeof seq_id, tab, 1, "SEQ_ID" );
+ VTableRelease( tab );
+ if ( seq_id[ 0 ] != 0 )
+ {
+ rc = VDBManagerOpenTableRead( mgr, &tab, NULL, "%s", seq_id );
+ if ( rc == 0 )
+ {
+ get_string_cell( buffer, buffer_size, tab, 1, "DEF_LINE" );
+ VTableRelease( tab );
+ }
+ }
}
}
@@ -798,9 +799,9 @@ static rc_t vdb_info_db( vdb_info_data * data, VSchema * schema, const VDBManage
data->passes_rows = get_tab_row_count( db, "PASSES" );
data->metrics_rows = get_tab_row_count( db, "ZMW_METRICS" );
- if ( data->ref_rows > 0 )
- get_species( data->species, sizeof data->species, db, mgr );
-
+ if ( data->ref_rows > 0 )
+ get_species( data->species, sizeof data->species, db, mgr );
+
rc = VDatabaseOpenMetadataRead ( db, &meta );
if ( rc == 0 )
{
@@ -907,9 +908,9 @@ static rc_t vdb_info_print_xml( vdb_info_data * data )
rc = KOutMsg( "<MINUTE>%.02d</MINUTE>\n", data->ts.minute );
}
- if ( rc == 0 && data->species[ 0 ] != 0 )
- rc = vdb_info_print_xml_s( "SPECIES", data->species );
-
+ if ( rc == 0 && data->species[ 0 ] != 0 )
+ rc = vdb_info_print_xml_s( "SPECIES", data->species );
+
if ( rc == 0 )
rc = vdb_info_print_xml_event( "FORMATTER", &data->formatter );
if ( rc == 0 )
@@ -1014,9 +1015,9 @@ static rc_t vdb_info_print_json( vdb_info_data * data )
rc = KOutMsg( "\"MINUTE\":%d,\n", data->ts.minute );
}
- if ( rc == 0 && data->species[ 0 ] != 0 )
- rc = vdb_info_print_json_s( "SPECIES", data->species );
-
+ if ( rc == 0 && data->species[ 0 ] != 0 )
+ rc = vdb_info_print_json_s( "SPECIES", data->species );
+
if ( rc == 0 )
rc = vdb_info_print_json_event( "FORMATTER", &data->formatter );
if ( rc == 0 )
@@ -1038,10 +1039,10 @@ static const char dflt_event_name[] = "-";
static rc_t vdb_info_print_sep_event( vdb_info_event * event, const char sep, bool last )
{
rc_t rc;
- const char * ev_name = event->name;
- if ( ev_name == NULL || ev_name[ 0 ] == 0 )
- ev_name = dflt_event_name;
-
+ const char * ev_name = event->name;
+ if ( ev_name == NULL || ev_name[ 0 ] == 0 )
+ ev_name = dflt_event_name;
+
if ( last )
{
rc = KOutMsg( "'%s'%c%d%c%d%c%d%c%d%c%d%c%d%c%d%c%d%c%d",
@@ -1075,25 +1076,25 @@ static rc_t vdb_info_print_sep( vdb_info_data * data, const char sep )
data->passes_rows, sep, data->metrics_rows, sep );
if ( rc == 0 )
rc = KOutMsg( "'%s'%c%d%c%d%c%d%c%d%c%d%c",
- data->schema_name, sep,
+ data->schema_name, sep,
data->ts.month, sep, data->ts.day, sep, data->ts.year, sep,
data->ts.hour, sep, data->ts.minute, sep );
if ( rc == 0 )
- {
- if ( data->species[ 0 ] != 0 )
- rc = KOutMsg( "'%s'%c", data->species, sep );
- else
- rc = KOutMsg( "-%c", sep );
- }
-
+ {
+ if ( data->species[ 0 ] != 0 )
+ rc = KOutMsg( "'%s'%c", data->species, sep );
+ else
+ rc = KOutMsg( "-%c", sep );
+ }
+
if ( rc == 0 )
rc = vdb_info_print_sep_event( &data->formatter, sep, false );
if ( rc == 0 )
rc = vdb_info_print_sep_event( &data->loader, sep, false );
if ( rc == 0 )
rc = vdb_info_print_sep_event( &data->update, sep, true );
-
+
if ( rc == 0 )
rc = KOutMsg( "\n" );
@@ -1137,22 +1138,25 @@ static rc_t vdb_info_print_dflt( vdb_info_data * data )
if ( rc == 0 && data->path[ 0 ] != 0 )
rc = KOutMsg( "path : %s\n", data->path );
- if ( rc == 0 && data->file_size != 0 )
+ if ( rc == 0 && data->remote_path[ 0 ] != 0 )
+ rc = KOutMsg( "remote : %s\n", data->remote_path );
+
+ if ( rc == 0 && data->file_size != 0 )
rc = KOutMsg( "size : %,lu\n", data->file_size );
- if ( rc == 0 && data->cache[ 0 ] != 0 )
- {
+ if ( rc == 0 && data->cache[ 0 ] != 0 )
+ {
rc = KOutMsg( "cache : %s\n", data->cache );
- if ( rc == 0 )
- rc = KOutMsg( "percent: %f\n", data->cache_percent );
- if ( rc == 0 )
- rc = KOutMsg( "bytes : %,lu\n", data->bytes_in_cache );
- }
-
- if ( rc == 0 && data->s_path_type[ 0 ] != 0 )
+ if ( rc == 0 )
+ rc = KOutMsg( "percent: %f\n", data->cache_percent );
+ if ( rc == 0 )
+ rc = KOutMsg( "bytes : %,lu\n", data->bytes_in_cache );
+ }
+
+ if ( rc == 0 && data->s_path_type[ 0 ] != 0 )
rc = KOutMsg( "type : %s\n", data->s_path_type );
- if ( rc == 0 && data->s_platform[ 0 ] != 0 )
+ if ( rc == 0 && data->s_platform[ 0 ] != 0 )
rc = KOutMsg( "platf : %s\n", data->s_platform );
if ( rc == 0 && data->seq_rows != 0 )
@@ -1192,7 +1196,7 @@ static rc_t vdb_info_print_dflt( vdb_info_data * data )
if ( rc == 0 && data->species[ 0 ] != 0 )
rc = KOutMsg( "SPECIES: %s\n", data->species );
-
+
if ( rc == 0 )
vdb_info_print_dflt_event( &data->formatter, "FMT" );
if ( rc == 0 )
@@ -1200,19 +1204,19 @@ static rc_t vdb_info_print_dflt( vdb_info_data * data )
if ( rc == 0 )
vdb_info_print_dflt_event( &data->update, "UPD" );
- if ( rc == 0 && data->bam_hdr.present )
- {
- rc = KOutMsg( "BAMHDR : %d bytes / %d lines\n", data->bam_hdr.hdr_bytes, data->bam_hdr.total_lines );
- if ( rc == 0 && data->bam_hdr.HD_lines > 0 )
- rc = KOutMsg( "BAMHDR : %d HD-lines\n", data->bam_hdr.HD_lines );
- if ( rc == 0 && data->bam_hdr.SQ_lines > 0 )
- rc = KOutMsg( "BAMHDR : %d SQ-lines\n", data->bam_hdr.SQ_lines );
- if ( rc == 0 && data->bam_hdr.RG_lines > 0 )
- rc = KOutMsg( "BAMHDR : %d RG-lines\n", data->bam_hdr.RG_lines );
- if ( rc == 0 && data->bam_hdr.PG_lines > 0 )
- rc = KOutMsg( "BAMHDR : %d PG-lines\n", data->bam_hdr.PG_lines );
- }
-
+ if ( rc == 0 && data->bam_hdr.present )
+ {
+ rc = KOutMsg( "BAMHDR : %d bytes / %d lines\n", data->bam_hdr.hdr_bytes, data->bam_hdr.total_lines );
+ if ( rc == 0 && data->bam_hdr.HD_lines > 0 )
+ rc = KOutMsg( "BAMHDR : %d HD-lines\n", data->bam_hdr.HD_lines );
+ if ( rc == 0 && data->bam_hdr.SQ_lines > 0 )
+ rc = KOutMsg( "BAMHDR : %d SQ-lines\n", data->bam_hdr.SQ_lines );
+ if ( rc == 0 && data->bam_hdr.RG_lines > 0 )
+ rc = KOutMsg( "BAMHDR : %d RG-lines\n", data->bam_hdr.RG_lines );
+ if ( rc == 0 && data->bam_hdr.PG_lines > 0 )
+ rc = KOutMsg( "BAMHDR : %d PG-lines\n", data->bam_hdr.PG_lines );
+ }
+
return rc;
}
@@ -1426,27 +1430,30 @@ static rc_t vdb_info_1( VSchema * schema, dump_format_t format, const VDBManager
case 'T' : vdb_info_tab( &data, schema, mgr ); break;
}
- /* try to resolve the path locally */
+ /* try to resolve the path locally */
rc1 = resolve_accession( acc_or_path, data.path, sizeof data.path, false ); /* vdb-dump-helper.c */
if ( rc1 == 0 )
+ {
data.file_size = get_file_size( data.path, false );
- else
- {
- /* try to resolve the path remotely */
- rc1 = resolve_accession( acc_or_path, data.path, sizeof data.path, true ); /* vdb-dump-helper.c */
- if ( rc1 == 0 )
- {
- data.file_size = get_file_size( data.path, true );
- /* try to find out the cache-file */
- rc1 = resolve_cache( acc_or_path, data.cache, sizeof data.cache ); /* vdb-dump-helper.c */
- if ( rc1 == 0 )
- {
- /* try to find out cache completeness */
- check_cache_comleteness( data.cache, &data.cache_percent, &data.bytes_in_cache );
- }
- }
- }
-
+ resolve_remote_accession( acc_or_path, data.remote_path, sizeof data.remote_path ); /* vdb-dump-helper.c */
+ }
+ else
+ {
+ /* try to resolve the path remotely */
+ rc1 = resolve_accession( acc_or_path, data.path, sizeof data.path, true ); /* vdb-dump-helper.c */
+ if ( rc1 == 0 )
+ {
+ data.file_size = get_file_size( data.path, true );
+ /* try to find out the cache-file */
+ rc1 = resolve_cache( acc_or_path, data.cache, sizeof data.cache ); /* vdb-dump-helper.c */
+ if ( rc1 == 0 )
+ {
+ /* try to find out cache completeness */
+ check_cache_comleteness( data.cache, &data.cache_percent, &data.bytes_in_cache );
+ }
+ }
+ }
+
switch ( format )
{
case df_xml : rc = vdb_info_print_xml( &data ); break;
diff --git a/tools/vdb-validate/Makefile b/tools/vdb-validate/Makefile
index 6107218..2778fdb 100644
--- a/tools/vdb-validate/Makefile
+++ b/tools/vdb-validate/Makefile
@@ -34,7 +34,8 @@ INT_TOOLS = \
EXT_TOOLS = \
- vdb-validate
+ vdb-validate \
+ check-corrupt
ALL_TOOLS = \
$(INT_TOOLS) \
@@ -100,3 +101,21 @@ vdb-validate.vers.h: vdb-validate.vers
$(BINDIR)/vdb-validate: $(VDB_VALIDATE_OBJ)
$(LD) --exe --vers $(SRCDIR) -o $@ $^ $(VDB_VALIDATE_LIB)
+
+
+#-------------------------------------------------------------------------------
+# check-corrupt
+#
+CHECK_CORRUPT_SRC = \
+ check-corrupt
+
+CHECK_CORRUPT_OBJ = \
+ $(addsuffix .$(OBJX),$(CHECK_CORRUPT_SRC))
+
+CHECK_CORRUPT_LIB = \
+ -lkapp \
+ -sncbi-vdb \
+ -lm
+
+$(BINDIR)/check-corrupt: $(CHECK_CORRUPT_OBJ)
+ $(LP) --exe --vers $(SRCDIR) -o $@ $^ $(CHECK_CORRUPT_LIB)
\ No newline at end of file
diff --git a/tools/vdb-validate/check-corrupt.cpp b/tools/vdb-validate/check-corrupt.cpp
new file mode 100644
index 0000000..d959b9e
--- /dev/null
+++ b/tools/vdb-validate/check-corrupt.cpp
@@ -0,0 +1,644 @@
+/*==============================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+#include "check-corrupt.vers.h"
+
+#include <klib/rc.h>
+#include <klib/log.h>
+#include <klib/out.h>
+#include <klib/writer.h>
+#include <kfs/directory.h>
+#include <vdb/manager.h>
+#include <vdb/database.h>
+#include <vdb/table.h>
+#include <vdb/cursor.h>
+#include <vdb/vdb-priv.h>
+#include <kdb/manager.h>
+
+#include <sstream>
+
+#include <cmath>
+
+#define SA_TABLE_LOOKUP_LIMIT 100000
+#define PA_LONGER_SA_LIMIT 0.01
+
+typedef struct CheckCorruptConfig
+{
+ double cutoff_percent; // negative when not used
+ uint64_t cutoff_number; // only used when cutoff_percent is negative
+
+ double pa_len_threshold_percent; // negative when not used
+ uint64_t pa_len_threshold_number; // only used when pa_len_threshold_percent is negative
+} CheckCorruptConfig;
+
+struct VDB_ERROR
+{
+ VDB_ERROR (const char * _msg, rc_t _rc)
+ : msg (_msg), rc (_rc)
+ {}
+
+ const char * msg;
+ rc_t rc;
+};
+
+struct VDB_ROW_ERROR
+{
+ VDB_ROW_ERROR (const char * _msg, int64_t _row_id, rc_t _rc)
+ : row_id ( _row_id ), msg (_msg), rc (_rc)
+ {}
+
+ int64_t row_id;
+ const char * msg;
+ rc_t rc;
+};
+
+struct DATA_ERROR
+{
+ DATA_ERROR (const std::string & _msg)
+ : msg(_msg)
+ {}
+
+ std::string msg;
+};
+
+/**
+ * returns true if checks are passed
+ */
+void runChecks ( const char * accession, const CheckCorruptConfig * config, const VCursor * pa_cursor, const VCursor * sa_cursor, const VCursor * seq_cursor )
+{
+ rc_t rc;
+ uint32_t pa_has_ref_offset_idx;
+ uint32_t sa_has_ref_offset_idx;
+ uint32_t sa_seq_spot_id_idx;
+ uint32_t sa_seq_read_id_idx;
+ uint32_t sa_pa_id_idx;
+ uint32_t sa_tmp_mismatch_idx;
+ uint32_t seq_read_len_idx;
+ bool has_tmp_mismatch;
+
+ /* add columns to cursor */
+#define add_column(tbl_name, cursor, idx, col_spec) \
+ rc = VCursorAddColumn( cursor, &idx, col_spec ); \
+ if ( rc != 0 ) \
+ throw VDB_ERROR("VCursorAddColumn() failed for " tbl_name " table, " col_spec " column", rc);
+
+ add_column( "PRIMARY_ALIGNMENT", pa_cursor, pa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" );
+ add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_has_ref_offset_idx, "(bool)HAS_REF_OFFSET" );
+ add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_spot_id_idx, "SEQ_SPOT_ID" );
+ add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_seq_read_id_idx, "SEQ_READ_ID" );
+ add_column( "SECONDARY_ALIGNMENT", sa_cursor, sa_pa_id_idx, "PRIMARY_ALIGNMENT_ID" );
+ add_column( "SEQUENCE", seq_cursor, seq_read_len_idx, "READ_LEN" );
+
+ // optional columns
+ rc = VCursorAddColumn( sa_cursor, &sa_tmp_mismatch_idx, "TMP_MISMATCH" );
+ if ( rc == 0 )
+ has_tmp_mismatch = true;
+ else
+ {
+ has_tmp_mismatch = false;
+ rc = 0;
+ }
+
+
+#undef add_column
+
+ rc = VCursorOpen( pa_cursor );
+ if (rc != 0)
+ throw VDB_ERROR("VCursorOpen() failed for PRIMARY_ALIGNMENT table", rc);
+ rc = VCursorOpen( sa_cursor );
+ if (rc != 0)
+ throw VDB_ERROR("VCursorOpen() failed for SECONDARY_ALIGNMENT table", rc);
+ rc = VCursorOpen( seq_cursor );
+ if (rc != 0)
+ throw VDB_ERROR("VCursorOpen() failed for SEQUENCE table", rc);
+
+ int64_t sa_id_first;
+ uint64_t sa_row_count;
+
+ rc = VCursorIdRange( sa_cursor, sa_pa_id_idx, &sa_id_first, &sa_row_count );
+ if (rc != 0)
+ throw VDB_ERROR("VCursorIdRange() failed for SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", rc);
+
+ bool reported_about_no_pa = false;
+ uint64_t pa_longer_sa_rows = 0;
+ uint64_t pa_longer_sa_limit;
+ if (config->pa_len_threshold_percent > 0)
+ pa_longer_sa_limit = ceil( config->pa_len_threshold_percent * sa_row_count );
+ else if (config->pa_len_threshold_number == 0 || config->pa_len_threshold_number > sa_row_count)
+ pa_longer_sa_limit = sa_row_count;
+ else
+ pa_longer_sa_limit = config->pa_len_threshold_number;
+
+ uint64_t sa_row_limit;
+ if (config->cutoff_percent > 0)
+ sa_row_limit = ceil( config->cutoff_percent * sa_row_count );
+ else if (config->cutoff_number == 0 || config->cutoff_number > sa_row_count)
+ sa_row_limit = sa_row_count;
+ else
+ sa_row_limit = config->cutoff_number;
+
+ for ( uint64_t i = 0; i < sa_row_count && i < sa_row_limit; ++i )
+ {
+ int64_t sa_row_id = i + sa_id_first;
+ const void * data_ptr = NULL;
+ uint32_t data_len;
+ uint32_t pa_row_len;
+ uint32_t sa_row_len;
+ uint32_t seq_read_len_len;
+
+ // SA:HAS_REF_OFFSET
+ rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_has_ref_offset_idx, NULL, (const void**)&data_ptr, NULL, &sa_row_len );
+ if ( rc != 0 )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, HAS_REF_OFFSET column", sa_row_id, rc);
+
+ const int64_t * p_seq_spot_id;
+ uint32_t seq_spot_id_len;
+ // SA:SEQ_SPOT_ID
+ rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_spot_id_idx, NULL, (const void**)&p_seq_spot_id, NULL, &seq_spot_id_len );
+ if ( rc != 0 || p_seq_spot_id == NULL || seq_spot_id_len != 1 )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_SPOT_ID column", sa_row_id, rc);
+
+ int64_t seq_spot_id = *p_seq_spot_id;
+ if (seq_spot_id == 0)
+ {
+ std::stringstream ss;
+ ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " has SEQ_SPOT_ID = " << seq_spot_id;
+
+ throw DATA_ERROR(ss.str());
+ }
+
+ if ( has_tmp_mismatch )
+ {
+ const char * p_sa_tmp_mismatch;
+ // SA:TMP_MISMATCH
+ rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_tmp_mismatch_idx, NULL, (const void**)&p_sa_tmp_mismatch, NULL, &data_len );
+ if ( rc != 0 || p_sa_tmp_mismatch == NULL )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, TMP_MISMATCH column", sa_row_id, rc);
+
+ for ( uint32_t j = 0; j < data_len; ++j )
+ {
+ if ( p_sa_tmp_mismatch[j] == '=' )
+ {
+ std::stringstream ss;
+ ss << "SECONDARY_ALIGNMENT:" << sa_row_id << " TMP_MISMATCH contains '='";
+
+ throw DATA_ERROR(ss.str());
+ }
+ }
+ }
+
+ const int64_t * p_pa_row_id;
+ // SA:PRIMARY_ALIGNMENT_ID
+ rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_pa_id_idx, NULL, (const void**)&p_pa_row_id, NULL, &data_len );
+ if ( rc != 0 || p_pa_row_id == NULL || data_len != 1 )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, PRIMARY_ALIGNMENT_ID column", sa_row_id, rc);
+
+ int64_t pa_row_id = *p_pa_row_id;
+ if (pa_row_id == 0)
+ {
+ if (!reported_about_no_pa)
+ {
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) has secondary alignments without primary", "ACC=%s", accession));
+ reported_about_no_pa = true;
+ }
+ continue;
+ }
+
+ // PA:HAS_REF_OFFSET
+ rc = VCursorCellDataDirect ( pa_cursor, pa_row_id, pa_has_ref_offset_idx, NULL, &data_ptr, NULL, &pa_row_len );
+ if ( rc != 0 )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on PRIMARY_ALIGNMENT table, HAS_REF_OFFSET column", pa_row_id, rc);
+
+ // move on when PA.len equal to SA.len
+ if (pa_row_len == sa_row_len)
+ continue;
+
+ if (pa_row_len < sa_row_len)
+ {
+ std::stringstream ss;
+ ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") less than SECONDARY_ALIGNMENT:" << sa_row_id << " HAS_REF_OFFSET length (" << sa_row_len << ")";
+
+ throw DATA_ERROR(ss.str());
+ }
+
+ // we already know that pa_row_len > sa_row_len
+ ++pa_longer_sa_rows;
+
+ const int32_t * p_seq_read_id;
+ // SA:SEQ_READ_ID
+ rc = VCursorCellDataDirect ( sa_cursor, sa_row_id, sa_seq_read_id_idx, NULL, (const void**)&p_seq_read_id, NULL, &data_len );
+ if ( rc != 0 || p_seq_read_id == NULL || data_len != 1 )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_READ_ID column", sa_row_id, rc);
+
+ // one-based read index
+ int32_t seq_read_id = *p_seq_read_id;
+
+ const uint32_t * p_seq_read_len;
+ // SEQ:READ_LEN
+ rc = VCursorCellDataDirect ( seq_cursor, seq_spot_id, seq_read_len_idx, NULL, (const void**)&p_seq_read_len, NULL, &seq_read_len_len );
+ if ( rc != 0 || p_seq_read_len == NULL )
+ throw VDB_ROW_ERROR("VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column", seq_spot_id, rc);
+
+ if ( seq_read_id < 1 || (uint32_t)seq_read_id > seq_read_len_len )
+ {
+ std::stringstream ss;
+ ss << "SECONDARY:" << sa_row_id << " SEQ_READ_ID value (" << seq_read_id << ") - 1 based, is out of SEQUENCE:" << seq_spot_id << " READ_LEN range (" << seq_read_len_len << ")";
+
+ throw DATA_ERROR(ss.str());
+ }
+
+ if (pa_row_len != p_seq_read_len[seq_read_id - 1])
+ {
+ std::stringstream ss;
+ ss << "PRIMARY_ALIGNMENT:" << pa_row_id << " HAS_REF_OFFSET length (" << pa_row_len << ") does not match its SEQUENCE:" << seq_spot_id << " READ_LEN[" << seq_read_id - 1 << "] value (" << p_seq_read_len[seq_read_id - 1] << ")";
+
+ throw DATA_ERROR(ss.str());
+ }
+
+ if (pa_longer_sa_rows >= pa_longer_sa_limit)
+ {
+ std::stringstream ss;
+ ss << "Limit violation (pa_longer_sa): there are at least " << pa_longer_sa_rows << " alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT";
+
+ throw DATA_ERROR(ss.str());
+ }
+ }
+}
+
+/**
+ * returns true if accession is good
+ */
+bool checkAccession ( const char * accession, const CheckCorruptConfig * config )
+{
+ rc_t rc;
+ KDirectory * cur_dir;
+ const VDBManager * manager;
+ const VDatabase * database;
+ const VTable * pa_table;
+ const VTable * sa_table;
+ const VTable * seq_table;
+
+ const VCursor * pa_cursor;
+ const VCursor * sa_cursor;
+ const VCursor * seq_cursor;
+
+ rc = KDirectoryNativeDir( &cur_dir );
+ if ( rc != 0 )
+ PLOGERR( klogInt, (klogInt, rc, "$(ACC) KDirectoryNativeDir() failed", "ACC=%s", accession));
+ else
+ {
+ rc = VDBManagerMakeRead ( &manager, cur_dir );
+ if ( rc != 0 )
+ PLOGERR( klogInt, (klogInt, rc, "$(ACC) VDBManagerMakeRead() failed", "ACC=%s", accession));
+ else
+ {
+ int type = VDBManagerPathType ( manager, "%s", accession );
+ if ( ( type & ~ kptAlias ) != kptDatabase )
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) SKIPPING - can't be opened as a database", "ACC=%s", accession));
+ else
+ {
+ rc = VDBManagerOpenDBRead( manager, &database, NULL, "%s", accession );
+ if (rc != 0)
+ PLOGERR( klogInt, (klogInt, rc, "$(ACC) VDBManagerOpenDBRead() failed", "ACC=%s", accession));
+ else
+ {
+ rc = VDatabaseOpenTableRead( database, &pa_table, "%s", "PRIMARY_ALIGNMENT" );
+ if ( rc != 0 )
+ {
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) SKIPPING - failed to open PRIMARY_ALIGNMENT table", "ACC=%s", accession));
+ rc = 0;
+ }
+ else
+ {
+ rc = VTableCreateCursorRead( pa_table, &pa_cursor );
+ if ( rc != 0 )
+ PLOGERR( klogInt, (klogInt, rc, "$(ACC) VTableCreateCursorRead() failed for PRIMARY_ALIGNMENT cursor", "ACC=%s", accession));
+ else
+ {
+ rc = VDatabaseOpenTableRead( database, &sa_table, "%s", "SECONDARY_ALIGNMENT" );
+ if ( rc != 0 )
+ {
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) SKIPPING - failed to open SECONDARY_ALIGNMENT table", "ACC=%s", accession));
+ rc = 0;
+ }
+ else
+ {
+ rc = VTableCreateCursorRead( sa_table, &sa_cursor );
+ if ( rc != 0 )
+ PLOGERR( klogInt, (klogInt, rc, "$(ACC) VTableCreateCursorRead() failed for SECONDARY_ALIGNMENT cursor", "ACC=%s", accession));
+ else
+ {
+ rc = VDatabaseOpenTableRead( database, &seq_table, "%s", "SEQUENCE" );
+ if ( rc != 0 )
+ {
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) SKIPPING - failed to open SEQUENCE table", "ACC=%s", accession));
+ rc = 0;
+ }
+ else
+ {
+ rc = VTableCreateCursorRead( seq_table, &seq_cursor );
+ if ( rc != 0 )
+ PLOGERR( klogInt, (klogInt, rc, "VTableCreateCursorRead() failed for SEQUENCE cursor", "ACC=%s", accession));
+ else
+ {
+ try {
+ runChecks( accession, config, pa_cursor, sa_cursor, seq_cursor );
+ if (config->cutoff_percent > 0)
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good (based on first $(CUTOFF)% of SECONDARY_ALIGNMENT rows)", "ACC=%s,CUTOFF=%f", accession, config->cutoff_percent * 100));
+ else if (config->cutoff_number == 0)
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good", "ACC=%s", accession));
+ else
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) looks good (based on first $(CUTOFF) SECONDARY_ALIGNMENT rows)", "ACC=%s,CUTOFF=%lu", accession, config->cutoff_number));
+ } catch ( VDB_ERROR & x ) {
+ PLOGERR (klogErr, (klogInfo, x.rc, "$(ACC) VDB error: $(MSG)", "ACC=%s,MSG=%s", accession, x.msg));
+ rc = 1;
+ } catch ( VDB_ROW_ERROR & x ) {
+ PLOGERR (klogErr, (klogInfo, x.rc, "$(ACC) VDB error: $(MSG) row_id: $(ROW_ID)", "ACC=%s,MSG=%s,ROW_ID=%ld", accession, x.msg, x.row_id));
+ rc = 1;
+ } catch ( DATA_ERROR & x ) {
+ KOutMsg("%s\n", accession);
+ PLOGMSG (klogInfo, (klogInfo, "$(ACC) Invalid data: $(MSG) ", "ACC=%s,MSG=%s", accession, x.msg.c_str()));
+ rc = 1;
+ }
+ VCursorRelease( seq_cursor );
+ }
+ VTableRelease( seq_table );
+ }
+ VCursorRelease( sa_cursor );
+ }
+ VTableRelease( sa_table );
+ }
+ VCursorRelease( pa_cursor );
+ }
+ VTableRelease( pa_table );
+ }
+ VDatabaseRelease( database );
+ }
+ }
+ VDBManagerRelease( manager );
+ }
+ KDirectoryRelease( cur_dir );
+ }
+ return rc == 0;
+}
+
+//////////////////////////////////////////// Main
+extern "C"
+{
+
+#include <kapp/args.h>
+#include <kapp/log-xml.h>
+#include "check-corrupt.vers.h"
+
+const char UsageDefaultName[] = "test-general-loader";
+
+#define ALIAS_SA_CUTOFF NULL
+#define OPTION_SA_CUTOFF "sa-cutoff"
+
+static const char * sa_cutoff_usage[] = { "specify maximum amount of secondary alignment rows to look at before saying accession is good, default 100000.",
+ "Specifying '0' will iterate the whole table. Can be in percent (e.g. 5%)",
+ NULL };
+
+#define ALIAS_SA_SHORT_THRESHOLD NULL
+#define OPTION_SA_SHORT_THRESHOLD "sa-short-threshold"
+static const char * sa_short_threshold_usage[] = { "specify amount of secondary alignment which are shorter (hard-clipped) than corresponding primaries, default 1%.",
+ NULL };
+
+OptDef Options[] = {
+ { OPTION_SA_CUTOFF , ALIAS_SA_CUTOFF , NULL, sa_cutoff_usage , 1, true , false },
+ { OPTION_SA_SHORT_THRESHOLD , ALIAS_SA_SHORT_THRESHOLD , NULL, sa_short_threshold_usage , 1, true , false }
+};
+
+ver_t CC KAppVersion ( void )
+{
+ return CHECK_CORRUPT_VERS;
+}
+rc_t CC UsageSummary (const char * progname)
+{
+ return KOutMsg (
+ "\n"
+ "Usage:\n"
+ " %s [options] path [path ...]\n"
+ "\n"
+ "Summary:\n"
+ " Validate a list of runs for corrupted data\n"
+ "\n", progname);
+ return 0;
+}
+
+rc_t CC Usage ( const Args * args )
+{
+ const char * progname = UsageDefaultName;
+ const char * fullpath = UsageDefaultName;
+ rc_t rc;
+
+ if (args == NULL)
+ rc = RC (rcApp, rcArgv, rcAccessing, rcSelf, rcNull);
+ else
+ rc = ArgsProgram (args, &fullpath, &progname);
+ if (rc)
+ progname = fullpath = UsageDefaultName;
+
+ UsageSummary (progname);
+
+ KOutMsg ("Options:\n");
+
+ HelpOptionLine(ALIAS_SA_CUTOFF , OPTION_SA_CUTOFF , "cutoff" , sa_cutoff_usage);
+ HelpOptionLine(ALIAS_SA_SHORT_THRESHOLD , OPTION_SA_SHORT_THRESHOLD , "threshold" , sa_short_threshold_usage);
+ XMLLogger_Usage();
+
+ KOutMsg ("\n");
+
+ HelpOptionsStandard ();
+
+ HelpVersion (fullpath, KAppVersion());
+
+ return rc;
+}
+
+rc_t parseArgs ( Args * args, CheckCorruptConfig * config )
+{
+ rc_t rc;
+ uint32_t opt_count;
+ rc = ArgsOptionCount ( args, OPTION_SA_CUTOFF, &opt_count );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionCount() failed for " OPTION_SA_CUTOFF);
+ return rc;
+ }
+
+ if (opt_count > 0)
+ {
+ const char * value;
+ size_t value_size;
+ rc = ArgsOptionValue ( args, OPTION_SA_CUTOFF, 0, (const void **) &value );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionValue() failed for " OPTION_SA_CUTOFF);
+ return rc;
+ }
+
+ value_size = string_size ( value );
+ if ( value_size >= 1 && value[value_size - 1] == '%' )
+ {
+ config->cutoff_percent = string_to_U64 ( value, value_size - 1, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SA_CUTOFF);
+ return rc;
+ }
+ else if (config->cutoff_percent == 0 || config->cutoff_percent > 100)
+ {
+ LOGERR (klogInt, rc, OPTION_SA_CUTOFF " has illegal percentage value (has to be 1-100%)" );
+ return 1;
+ }
+ config->cutoff_percent /= 100;
+ }
+ else
+ {
+ config->cutoff_percent = -1;
+ config->cutoff_number = string_to_U64 ( value, value_size, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SA_CUTOFF);
+ return rc;
+ }
+ }
+ }
+
+ rc = ArgsOptionCount ( args, OPTION_SA_SHORT_THRESHOLD, &opt_count );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionCount() failed for " OPTION_SA_SHORT_THRESHOLD);
+ return rc;
+ }
+
+ if (opt_count > 0)
+ {
+ const char * value;
+ size_t value_size;
+ rc = ArgsOptionValue ( args, OPTION_SA_SHORT_THRESHOLD, 0, (const void **) &value );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionValue() failed for " OPTION_SA_SHORT_THRESHOLD);
+ return rc;
+ }
+
+ value_size = string_size ( value );
+ if ( value_size >= 1 && value[value_size - 1] == '%' )
+ {
+ config->pa_len_threshold_percent = string_to_U64 ( value, value_size - 1, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SA_SHORT_THRESHOLD);
+ return rc;
+ }
+ else if (config->pa_len_threshold_percent == 0 || config->pa_len_threshold_percent > 100)
+ {
+ LOGERR (klogInt, rc, OPTION_SA_SHORT_THRESHOLD " has illegal percentage value (has to be 1-100%)" );
+ return 1;
+ }
+ config->pa_len_threshold_percent /= 100;
+ }
+ else
+ {
+ config->pa_len_threshold_percent = -1;
+ config->pa_len_threshold_number = string_to_U64 ( value, value_size, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SA_SHORT_THRESHOLD);
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+rc_t CC KMain ( int argc, char *argv [] )
+{
+ XMLLogger const *xlogger = NULL;
+ Args * args;
+ rc_t rc;
+ bool any_failed = false;
+ CheckCorruptConfig config = { -1.0, SA_TABLE_LOOKUP_LIMIT, PA_LONGER_SA_LIMIT, 0 };
+
+ KLogLevelSet(klogInfo);
+
+ rc = ArgsMakeAndHandle (&args, argc, argv, 2, Options,
+ sizeof (Options) / sizeof (Options[0]),
+ XMLLogger_Args, XMLLogger_ArgsQty);
+ if (rc)
+ LOGERR (klogInt, rc, "failed to parse command line parameters");
+ else
+ {
+ rc = XMLLogger_Make(&xlogger, NULL, args);
+ if (rc)
+ LOGERR (klogInt, rc, "failed to make xml logger");
+ else
+ {
+ rc = parseArgs ( args, &config );
+ if (rc == 0)
+ {
+ uint32_t pcount;
+ rc = ArgsParamCount ( args, &pcount );
+ if (rc)
+ LOGERR (klogInt, rc, "ArgsParamCount() failed");
+ else
+ {
+ if ( pcount == 0 )
+ LOGMSG (klogErr, "no accessions were passed in");
+ else
+ {
+ for ( uint32_t i = 0; i < pcount; ++i )
+ {
+ const char * accession;
+ rc = ArgsParamValue ( args, i, (const void **)&accession );
+ if (rc)
+ {
+ PLOGERR (klogInt, (klogInt, rc, "failed to get $(PARAM_I) accession from command line", "PARAM_I=%d", i));
+ any_failed = true;
+ }
+ else
+ {
+ if (!checkAccession ( accession, &config ))
+ any_failed = true;
+ }
+ }
+
+ if (!any_failed)
+ LOGMSG (klogInfo, "All accessions are good!");
+ }
+ }
+ }
+ XMLLogger_Release(xlogger);
+ }
+ ArgsWhack ( args );
+ }
+ return rc != 0 || any_failed ? 1 : 0;
+}
+
+}
diff --git a/tools/vdb-validate/check-corrupt.vers b/tools/vdb-validate/check-corrupt.vers
new file mode 100644
index 0000000..097a15a
--- /dev/null
+++ b/tools/vdb-validate/check-corrupt.vers
@@ -0,0 +1 @@
+2.6.2
diff --git a/tools/vdb-validate/check-corrupt.vers.h b/tools/vdb-validate/check-corrupt.vers.h
new file mode 100644
index 0000000..581f280
--- /dev/null
+++ b/tools/vdb-validate/check-corrupt.vers.h
@@ -0,0 +1 @@
+#define CHECK_CORRUPT_VERS 0x02060002
diff --git a/tools/vdb-validate/vdb-validate.c b/tools/vdb-validate/vdb-validate.c
index 6953772..a042be8 100644
--- a/tools/vdb-validate/vdb-validate.c
+++ b/tools/vdb-validate/vdb-validate.c
@@ -31,6 +31,7 @@
#include <kapp/main.h>
#include <kapp/args.h>
+#include <kapp/log-xml.h>
#include <kdb/manager.h>
#include <kdb/database.h>
@@ -80,12 +81,29 @@
#include <string.h>
#include <ctype.h>
#include <assert.h>
+#include <math.h>
#include "vdb-validate.vers.h"
+#ifndef MIN
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#ifndef MAX
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+
#define RELEASE(type, obj) do { rc_t rc2 = type##Release(obj); \
if (rc2 != 0 && rc == 0) { rc = rc2; } obj = NULL; } while (false)
+#define SDC_ROW_CHUNK_MAX 8ull*1024ull*1024ull
+
+#if 0
+#define DBG_MSG(args) KOutMsg args
+#else
+#define DBG_MSG(args)
+#endif
+
static bool exhaustive;
static bool md5_required;
static bool ref_int_check;
@@ -704,6 +722,7 @@ static rc_t get_schema_info(KMetadata const *meta, char buffer[], size_t bsz,
}
}
}
+ KMDataNodeRelease(node);
}
return rc;
}
@@ -726,7 +745,12 @@ static rc_t get_db_schema_info(VDatabase const *db, char buffer[], size_t bsz,
rc_t rc = VDatabaseOpenMetadataRead(db, &meta);
*(*vers = &buffer[0]) = '\0';
- if (rc == 0) rc = get_schema_info(meta, buffer, bsz, vers);
+ if (rc == 0)
+ {
+ rc = get_schema_info(meta, buffer, bsz, vers);
+ KMetadataRelease(meta);
+ }
+
return rc;
}
@@ -749,6 +773,22 @@ struct vdb_validate_params
bool index_chk;
bool consist_check;
bool exhaustive;
+
+ // secondary data checks parameters
+ bool sdc_enabled;
+ bool sdc_rows_in_percent;
+ union
+ {
+ double percent;
+ uint64_t number;
+ } sdc_rows;
+
+ bool sdc_pa_len_thold_in_percent;
+ union
+ {
+ double percent;
+ uint64_t number;
+ } sdc_pa_len_thold;
};
static rc_t tableConsistCheck(const vdb_validate_params *pb, const VTable *tbl)
@@ -1528,9 +1568,451 @@ static rc_t ric_align_seq_and_pri(char const dbname[],
return rc;
}
+/* referential integrity and data checks for secondary alignment table */
+static rc_t ridc_align_sec(const vdb_validate_params *pb,
+ char const dbname[],
+ VTable const *seq,
+ VTable const *pri,
+ VTable const *sec)
+{
+ rc_t rc = 0, rc2;
+ VCursor const *seq_cursor = NULL;
+ VCursor const *pri_cursor = NULL;
+ VCursor const *sec_cursor = NULL;
+ VCursor const *sec_cursor2 = NULL;
+
+ uint32_t seq_read_len_idx;
+ uint32_t seq_pa_id_idx;
+ uint32_t pri_has_ref_offset_idx;
+ uint32_t sec_has_ref_offset_idx;
+ uint32_t sec_seq_spot_id_idx;
+ uint32_t sec_seq_read_id_idx;
+ uint32_t sec_tmp_mismatch_idx;
+ bool has_tmp_mismatch;
+
+ int64_t sec_id_first;
+ uint64_t sec_row_count;
+
+ size_t chunk_size;
+ id_pair_t *pri_id_pairs = NULL;
+ id_pair_t * pri_len_pairs = NULL;
+ id_pair_t *seq_spot_id_pairs = NULL;
+ id_pair_t *seq_spot_read_id_pairs = NULL;
+ uint32_t *seq_read_lens = NULL;
+
+ // SEQUENCE cursor
+ if (rc == 0)
+ {
+ rc2 = VTableCreateCursorRead(seq, &seq_cursor);
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(seq_cursor, &seq_read_len_idx, "%s", "READ_LEN");
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(seq_cursor, &seq_pa_id_idx, "%s", "PRIMARY_ALIGNMENT_ID");
+ if (rc2 == 0)
+ rc2 = VCursorOpen(seq_cursor);
+ if (rc2 != 0)
+ {
+ rc = rc2;
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "alignment table SEQUENCE can not be read", "name=%s", dbname));
+ }
+ }
+
+ // PRIMARY_ALIGNMENT cursor
+ if (rc == 0)
+ {
+ if (rc2 == 0)
+ rc2 = VTableCreateCursorRead(pri, &pri_cursor);
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(pri_cursor, &pri_has_ref_offset_idx, "%s", "(bool)HAS_REF_OFFSET");
+ if (rc2 == 0)
+ rc2 = VCursorOpen(pri_cursor);
+ if (rc2 != 0)
+ {
+ rc = rc2;
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "alignment table PRIMARY_ALIGNMENT can not be read", "name=%s", dbname));
+ }
+ }
+
+ // SECONDARY_ALIGNMENT cursor
+ if (rc == 0)
+ {
+ if (rc2 == 0)
+ rc2 = VTableCreateCursorRead(sec, &sec_cursor);
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(sec_cursor, &sec_has_ref_offset_idx, "%s", "(bool)HAS_REF_OFFSET");
+ if (rc2 == 0)
+ {
+ rc2 = VCursorAddColumn(sec_cursor, &sec_tmp_mismatch_idx, "%s", "TMP_MISMATCH");
+ if (rc2 == 0)
+ has_tmp_mismatch = true;
+ else
+ {
+ has_tmp_mismatch = false;
+ rc2 = 0;
+ }
+ }
+ if (rc2 == 0)
+ rc2 = VCursorOpen(sec_cursor);
+ if (rc2 != 0)
+ {
+ rc = rc2;
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "alignment table SECONDARY_ALIGNMENT can not be read", "name=%s", dbname));
+ }
+ }
+ if (rc == 0)
+ {
+ if (rc2 == 0)
+ rc2 = VTableCreateCursorRead(sec, &sec_cursor2);
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(sec_cursor2, &sec_seq_spot_id_idx, "%s", "SEQ_SPOT_ID");
+ if (rc2 == 0)
+ rc2 = VCursorAddColumn(sec_cursor2, &sec_seq_read_id_idx, "%s", "SEQ_READ_ID");
+ if (rc2 == 0)
+ rc2 = VCursorOpen(sec_cursor2);
+ if (rc2 != 0)
+ {
+ rc = rc2;
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "alignment table SECONDARY_ALIGNMENT can not be read", "name=%s", dbname));
+ }
+ }
+
+ if (rc == 0)
+ rc = VCursorIdRange(sec_cursor, sec_has_ref_offset_idx, &sec_id_first, &sec_row_count);
+ if (rc != 0)
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "alignment table can not be read", "name=%s", dbname));
+
+ if (rc == 0)
+ {
+ chunk_size = sec_row_count > SDC_ROW_CHUNK_MAX ? SDC_ROW_CHUNK_MAX : sec_row_count;
+
+ pri_id_pairs = malloc(sizeof(*pri_id_pairs) * chunk_size);
+ pri_len_pairs = malloc(sizeof(*pri_len_pairs) * chunk_size);
+ seq_spot_id_pairs = malloc(sizeof(*seq_spot_id_pairs) * chunk_size);
+ seq_spot_read_id_pairs = malloc(sizeof(*seq_spot_read_id_pairs) * chunk_size);
+ seq_read_lens = malloc(sizeof(*seq_read_lens) * chunk_size);
+
+ if (seq_spot_id_pairs == NULL)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcMemory, rcExhausted);
+ }
+ }
+
+ if (rc == 0)
+ {
+ bool reported_about_no_pa = false;
+ uint64_t pa_longer_sa_rows = 0;
+ uint64_t pa_longer_sa_limit;
+ uint64_t sec_row_lmit;
+
+ int64_t sec_row_id_start = sec_id_first;
+ int64_t sec_row_id_end;
+
+ int64_t chunk;
+
+ // set limits from params
+ if (pb->sdc_pa_len_thold_in_percent)
+ pa_longer_sa_limit = ceil( pb->sdc_pa_len_thold.percent * sec_row_count );
+ else if (pb->sdc_pa_len_thold.number == 0 || pb->sdc_pa_len_thold.number > sec_row_count)
+ pa_longer_sa_limit = sec_row_count;
+ else
+ pa_longer_sa_limit = pb->sdc_pa_len_thold.number;
+
+ if (pb->sdc_rows_in_percent)
+ sec_row_lmit = ceil( pb->sdc_rows.percent * sec_row_count );
+ else if (pb->sdc_rows.number == 0 || pb->sdc_rows.number > sec_row_count)
+ sec_row_lmit = sec_row_count;
+ else
+ sec_row_lmit = pb->sdc_rows.number;
+
+ sec_row_id_end = sec_id_first + MIN(sec_row_count, sec_row_lmit);
+
+ for ( chunk = sec_row_id_start; chunk < sec_row_id_end; chunk += chunk_size )
+ {
+ int64_t i;
+ int64_t i_count = MIN(chunk_size, sec_row_id_end - chunk);
+ const void * data_ptr = NULL;
+ uint32_t data_len;
+ int64_t last_seq_spot_id = INT64_MIN;
+ int64_t last_pri_row_id = INT64_MIN;
+ bool ordered = true;
+
+ // Load chunk of SEQ_SPOT_ID and sort ids for faster data retrieval
+ for ( i = 0; i < i_count; ++i )
+ {
+ int64_t seq_spot_id;
+ int64_t sec_row_id = i + chunk;
+
+ // SECONDARY_ALIGNMENT:SEQ_SPOT_ID
+ rc = VCursorCellDataDirect ( sec_cursor2, sec_row_id, sec_seq_spot_id_idx, NULL, (const void**)&data_ptr, NULL, &data_len );
+ if ( rc != 0 || data_ptr == NULL || data_len != 1 )
+ {
+ if (rc == 0)
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_SPOT_ID column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+
+ seq_spot_id = *(const int64_t *)data_ptr;
+ DBG_MSG(("SECONDARY_ALIGNMENT:%ld SEQ_SPOT_ID column = %ld\n", sec_row_id, seq_spot_id));
+ if (seq_spot_id == 0)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "SECONDARY_ALIGNMENT:$(ROW_ID) has SEQ_SPOT_ID = 0", "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+
+ ordered &= last_seq_spot_id <= seq_spot_id;
+ last_seq_spot_id = seq_spot_id;
+
+ seq_spot_id_pairs[i].first = seq_spot_id;
+ seq_spot_id_pairs[i].second = sec_row_id;
+
+ // SECONDARY_ALIGNMENT:SEQ_READ_ID
+ rc = VCursorCellDataDirect ( sec_cursor2, sec_row_id, sec_seq_read_id_idx, NULL, (const void**)&data_ptr, NULL, &data_len );
+ if ( rc != 0 || data_ptr == NULL || data_len != 1 )
+ {
+ if (rc == 0)
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, SEQ_READ_ID column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+ DBG_MSG(("SECONDARY_ALIGNMENT:%ld SEQ_READ_ID column = %d\n", sec_row_id, *(const int32_t *)data_ptr));
+
+ // one-based read index
+ seq_spot_read_id_pairs[i].first = seq_spot_id;
+ seq_spot_read_id_pairs[i].second = *(const int32_t *)data_ptr;
+ }
+ if (rc != 0)
+ break;
+
+ if (!ordered)
+ {
+ sort_key_pairs(i_count, seq_spot_id_pairs);
+ }
+
+ // Load chunk of PRIMARY_ALIGNMENT_ID (and some other fields) and sort ids for faster data retrieval
+ ordered = true;
+ for ( i = 0; i < i_count; ++i )
+ {
+ int64_t pri_row_id;
+ int64_t sec_row_id = seq_spot_id_pairs[i].second;
+ int64_t seq_spot_id = seq_spot_id_pairs[i].first;
+ int32_t seq_read_id = seq_spot_read_id_pairs[sec_row_id - chunk].second;
+
+ // SEQUENCE:PRIMARY_ALIGNMENT_ID
+ rc = VCursorCellDataDirect ( seq_cursor, seq_spot_id, seq_pa_id_idx, NULL, (const void**)&data_ptr, NULL, &data_len );
+ if ( rc != 0 || data_ptr == NULL )
+ {
+ if (rc == 0)
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SEQUENCE table, PRIMARY_ALIGNMENT_ID column, spot_id: $(SPOT_ID)",
+ "name=%s,SPOT_ID=%ld", dbname, seq_spot_id));
+ break;
+ }
+
+ if ( seq_read_id < 1 || (uint32_t)seq_read_id > data_len )
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "SECONDARY_ALIGNMENT:$(SEC_ROW_ID) SEQ_READ_ID value ($(SEQ_READ_ID)) - 1 based, is out of SEQUENCE:$(SEQ_SPOT_ID) PRIMARY_ALIGNMENT range ($(PRIMARY_ALIGNMENT_LEN))",
+ "name=%s,SEC_ROW_ID=%ld,SEQ_READ_ID=%d,SEQ_SPOT_ID=%ld,PRIMARY_ALIGNMENT_LEN=%u", dbname, sec_row_id, seq_read_id, seq_spot_id, data_len));
+ break;
+ }
+
+ pri_row_id = ((const int64_t *)data_ptr)[seq_read_id - 1];
+ DBG_MSG(("SEQUENCE:%ld PRIMARY_ALIGNMENT_ID column = %ld\n", seq_spot_id, pri_row_id));
+ if (pri_row_id == 0)
+ {
+ if (!reported_about_no_pa)
+ {
+ PLOGMSG (klogWarn, (klogWarn, "Database '$(name)' has secondary alignments without primary", "name=%s", dbname));
+ reported_about_no_pa = true;
+ }
+ }
+
+ ordered &= last_pri_row_id <= pri_row_id;
+ last_pri_row_id = pri_row_id;
+
+ pri_id_pairs[i].first = pri_row_id;
+ pri_id_pairs[i].second = sec_row_id;
+
+ // SEQUENCE:READ_LEN
+ rc = VCursorCellDataDirect ( seq_cursor, seq_spot_id, seq_read_len_idx, NULL, (const void**)&data_ptr, NULL, &data_len );
+ if ( rc != 0 || data_ptr == NULL )
+ {
+ if (rc == 0)
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SEQUENCE table, READ_LEN column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, seq_spot_id));
+ break;
+ }
+
+ if ( seq_read_id < 1 || (uint32_t)seq_read_id > data_len )
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "SECONDARY_ALIGNMENT:$(SEC_ROW_ID) SEQ_READ_ID value ($(SEQ_READ_ID)) - 1 based, is out of SEQUENCE:$(SEQ_SPOT_ID) READ_LEN range ($(SEQ_READ_LEN_LEN))",
+ "name=%s,SEC_ROW_ID=%ld,SEQ_READ_ID=%d,SEQ_SPOT_ID=%ld,SEQ_READ_LEN_LEN=%u", dbname, sec_row_id, seq_read_id, seq_spot_id, data_len));
+ break;
+ }
+
+ seq_read_lens[sec_row_id - chunk] = ((const uint32_t *)data_ptr)[seq_read_id - 1];
+ DBG_MSG(("SEQUENCE:%ld READ_LEN column = %u\n", seq_spot_id, seq_read_lens[sec_row_id - chunk]));
+ }
+
+ if (rc != 0)
+ break;
+
+ if (!ordered)
+ {
+ sort_key_pairs(i_count, pri_id_pairs);
+ }
+
+ for ( i = 0; i < i_count; ++i )
+ {
+ uint32_t pri_len;
+ int sec_i_orig = pri_id_pairs[i].second - chunk;
+ pri_len_pairs[sec_i_orig].first = pri_id_pairs[i].first;
+ if (pri_id_pairs[i].first == 0)
+ {
+ pri_len_pairs[sec_i_orig].second = -1;
+ continue;
+ }
+
+ // PRIMARY_ALIGNMENT:HAS_REF_OFFSET
+ rc = VCursorCellDataDirect ( pri_cursor, pri_len_pairs[sec_i_orig].first, pri_has_ref_offset_idx, NULL, &data_ptr, NULL, &pri_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on PRIMARY_ALIGNMENT table, HAS_REF_OFFSET column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, pri_len_pairs[sec_i_orig].first));
+ break;
+ }
+ pri_len_pairs[sec_i_orig].second = pri_len;
+ DBG_MSG(("PRIMARY_ALIGNMENT:%ld HAS_REF_OFFSET column len = %u\n", pri_len_pairs[sec_i_orig].first, pri_len_pairs[sec_i_orig].second));
+ }
+ if (rc != 0)
+ break;
+
+ // Iterate over SECONDARY_ALIGNMENT chunk, having data from other table chunks already loaded
+ for ( i = 0; i < i_count; ++i )
+ {
+ int64_t pri_row_id = pri_len_pairs[i].first;
+ int64_t sec_row_id = i + chunk;
+
+ int64_t seq_spot_id = seq_spot_read_id_pairs[i].first;
+ int32_t seq_read_id = seq_spot_read_id_pairs[i].second;
+
+ uint32_t seq_read_len = seq_read_lens[i];
+
+ uint32_t pri_row_len = pri_len_pairs[i].second;
+ uint32_t sec_row_len;
+
+ // SECONDARY_ALIGNMENT:HAS_REF_OFFSET
+ rc = VCursorCellDataDirect ( sec_cursor, sec_row_id, sec_has_ref_offset_idx, NULL, (const void**)&data_ptr, NULL, &sec_row_len );
+ if ( rc != 0 )
+ {
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, HAS_REF_OFFSET column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+ DBG_MSG(("SECONDARY_ALIGNMENT:%ld HAS_REF_OFFSET column len = %u\n", sec_row_id, sec_row_len));
+
+ if ( has_tmp_mismatch )
+ {
+ const char * p_sa_tmp_mismatch;
+ // SECONDARY_ALIGNMENT:TMP_MISMATCH
+ rc = VCursorCellDataDirect ( sec_cursor, sec_row_id, sec_tmp_mismatch_idx, NULL, (const void**)&p_sa_tmp_mismatch, NULL, &data_len );
+ if ( rc != 0 || p_sa_tmp_mismatch == NULL )
+ {
+ if (rc == 0)
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "VCursorCellDataDirect() failed on SECONDARY_ALIGNMENT table, TMP_MISMATCH column, row_id: $(ROW_ID)",
+ "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+
+ if (string_chr(p_sa_tmp_mismatch, data_len, '=') != NULL)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "SECONDARY_ALIGNMENT:$(ROW_ID) TMP_MISMATCH column contains '='",
+ "name=%s,ROW_ID=%ld", dbname, sec_row_id));
+ break;
+ }
+ }
+
+ DBG_MSG(("Performing length check SA:%ld len = %u\t PA:%ld len = %u\t SEQ:%ld len = %u\n", sec_row_id, sec_row_len, pri_row_id, pri_row_len, seq_spot_id, seq_read_len));
+ // move on when there is no primary or PRIMARY_ALIGNMENT.len equal to SECONDARY_ALIGNMENT.len
+ if (pri_row_id == 0 || pri_row_len == sec_row_len)
+ continue;
+
+ if (pri_row_len < sec_row_len)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "PRIMARY_ALIGNMENT:$(PRI_ROW_ID) HAS_REF_OFFSET length ($(PRI_LEN)) less than SECONDARY_ALIGNMENT:$(SEC_ROW_ID) HAS_REF_OFFSET length ($(SEC_LEN))",
+ "name=%s,PRI_ROW_ID=%ld,SEC_ROW_ID=%ld,PRI_LEN=%u,SEC_LEN=%u", dbname, pri_row_id, sec_row_id, pri_row_len, sec_row_len));
+ break;
+ }
+
+ // we already know that pri_row_len > sec_row_len
+ ++pa_longer_sa_rows;
+
+ if (pri_row_len != seq_read_len)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "PRIMARY_ALIGNMENT:$(PRI_ROW_ID) HAS_REF_OFFSET length ($(PRI_LEN)) does not match its SEQUENCE:$(SEQ_SPOT_ID) READ_LEN[$(SEQ_READ_ID)] value ($(SEQ_READ_LEN))",
+ "name=%s,PRI_ROW_ID=%ld,PRI_LEN=%u,SEQ_SPOT_ID=%ld,SEQ_READ_ID=%d,SEQ_READ_LEN=%u", dbname, pri_row_id, pri_row_len, seq_spot_id, seq_read_id, seq_read_len));
+ break;
+ }
+
+ if (pa_longer_sa_rows >= pa_longer_sa_limit)
+ {
+ rc = RC(rcExe, rcDatabase, rcValidating, rcData, rcInconsistent);
+ (void)PLOGERR(klogErr, (klogErr, rc, "Database '$(name)': "
+ "Limit violation (pa_longer_sa): there are at least $(PA_LONGER_SA_ROWS) alignments where HAS_REF_OFFSET column is longer in PRIMARY_ALIGNMENT than in SECONDARY_ALIGNMENT",
+ "name=%s,PA_LONGER_SA_ROWS=%lu", dbname, pa_longer_sa_rows));
+ break;
+ }
+ }
+ }
+ }
+
+ free(pri_id_pairs);
+ free(pri_len_pairs);
+ free(seq_spot_id_pairs);
+ free(seq_spot_read_id_pairs);
+ free(seq_read_lens);
+
+ VCursorRelease(sec_cursor2);
+ VCursorRelease(sec_cursor);
+ VCursorRelease(pri_cursor);
+ VCursorRelease(seq_cursor);
+ return rc;
+
+}
+
/* database referential integrity check for alignment database */
-static rc_t dbric_align(char const dbname[],
+static rc_t dbric_align(const vdb_validate_params *pb,
+ char const dbname[],
VTable const *pri,
+ VTable const *sec,
VTable const *seq,
VTable const *ref)
{
@@ -1560,11 +2042,21 @@ static rc_t dbric_align(char const dbname[],
rc = rc2;
}
}
+ if (pb->sdc_enabled && (rc == 0 || exhaustive) && (pri != NULL && sec != NULL && seq != NULL)) {
+ rc_t rc2 = ridc_align_sec(pb, dbname, seq, pri, sec);
+ if (rc2 == 0) {
+ (void)PLOGMSG(klogInfo, (klogInfo, "Database '$(dbname)': "
+ "SECONDARY_ALIGNMENT table checks ok", "dbname=%s", dbname));
+ }
+ if (rc == 0) {
+ rc = rc2;
+ }
+ }
return rc;
}
-static rc_t verify_database_align(VDatabase const *db,
+static rc_t verify_database_align(const vdb_validate_params *pb, VDatabase const *db,
char const name[], node_t const nodes[], char const names[])
{
rc_t rc = 0;
@@ -1652,6 +2144,7 @@ static rc_t verify_database_align(VDatabase const *db,
}
while (ref_int_check) {
VTable const *pri = NULL;
+ VTable const *sec = NULL;
VTable const *seq = NULL;
VTable const *ref = NULL;
@@ -1659,6 +2152,10 @@ static rc_t verify_database_align(VDatabase const *db,
rc = VDatabaseOpenTableRead(db, &pri, "PRIMARY_ALIGNMENT");
if (rc) break;
}
+ if ((tables & tbSecondaryAlignment) != 0) {
+ rc = VDatabaseOpenTableRead(db, &sec, "SECONDARY_ALIGNMENT");
+ if (rc) break;
+ }
if ((tables & tbSequence) != 0) {
rc = VDatabaseOpenTableRead(db, &seq, "SEQUENCE");
if (rc) break;
@@ -1667,10 +2164,11 @@ static rc_t verify_database_align(VDatabase const *db,
rc = VDatabaseOpenTableRead(db, &ref, "REFERENCE");
if (rc) break;
}
- rc = dbric_align(name, pri, seq, ref);
+ rc = dbric_align(pb, name, pri, sec, seq, ref);
RELEASE(VTable, ref);
RELEASE(VTable, seq);
+ RELEASE(VTable, sec);
RELEASE(VTable, pri);
break;
@@ -1679,7 +2177,7 @@ static rc_t verify_database_align(VDatabase const *db,
return rc;
}
-static rc_t verify_database(VDatabase const *db,
+static rc_t verify_database(const vdb_validate_params *pb, VDatabase const *db,
char const name[], node_t const nodes[], char const names[])
{
char schemaName[1024];
@@ -1698,7 +2196,7 @@ static rc_t verify_database(VDatabase const *db,
/* TODO: verify NCBI:WGS:db:* */
}
else if (strncmp(schemaName, "NCBI:align:db:", 14) == 0) {
- rc = verify_database_align(db, name, nodes, names);
+ rc = verify_database_align(pb, db, name, nodes, names);
}
else if (strcmp(schemaName, "NCBI:SRA:PacBio:smrt:db") == 0) {
/* TODO: verify NCBI:SRA:PacBio:smrt:db */
@@ -1726,7 +2224,7 @@ static rc_t verify_mgr_database(const vdb_validate_params *pb,
rc = VDBManagerOpenDBRead(mgr, &child, NULL, "%s", name);
if (rc == 0) {
- rc = verify_database(child, name, nodes, names);
+ rc = verify_database(pb, child, name, nodes, names);
VDatabaseRelease(child);
}
@@ -2194,6 +2692,17 @@ static const char *USAGE_REF_INT[] =
#define ALIAS_REF_INT "I"
#define OPTION_REF_INT "REFERENTIAL-INTEGRITY"
+#define OPTION_SDC_ROWS "sdc:rows"
+static const char *USAGE_SDC_ROWS[] =
+{ "Specify maximum amount of secondary alignment rows to look at before saying accession is good, default 100000.",
+ "Specifying 0 will iterate the whole table. Can be in percent (e.g. 5%)",
+ NULL };
+
+#define OPTION_SDC_PLEN_THOLD "sdc:plen_thold"
+static const char *USAGE_SDC_PLEN_THOLD[] =
+{ "Specify a threshold for amount of secondary alignment which are shorter (hard-clipped) than corresponding primaries, default 1%.", NULL };
+
+
static const char *USAGE_DRI[] =
{ "Do not check data referential integrity for databases", NULL };
@@ -2212,6 +2721,10 @@ static OptDef options [] =
, { OPTION_REF_INT , ALIAS_REF_INT , NULL, USAGE_REF_INT , 1, true , false }
, { OPTION_CNS_CHK , ALIAS_CNS_CHK , NULL, USAGE_CNS_CHK , 1, true , false }
+ /* secondary alignment table data check options */
+ , { OPTION_SDC_ROWS, NULL , NULL, USAGE_SDC_ROWS, 1, true , false }
+ , { OPTION_SDC_PLEN_THOLD, NULL , NULL, USAGE_SDC_PLEN_THOLD, 1, true , false }
+
/* not printed by --help */
, { "dri" , NULL , NULL, USAGE_DRI , 1, false, false }
, { "index-only" ,NULL , NULL, USAGE_IND_ONLY, 1, false, false }
@@ -2221,6 +2734,7 @@ static OptDef options [] =
, { OPTION_blob_crc, ALIAS_blob_crc, NULL, USAGE_BLOB_CRC, 1, false, false }
, { OPTION_ref_int , ALIAS_ref_int , NULL, USAGE_REF_INT , 1, false, false }
};
+
/*
#define NUM_SILENT_TRAILING_OPTIONS 5
@@ -2263,6 +2777,8 @@ rc_t CC Usage ( const Args * args )
HelpOptionLine(ALIAS_REF_INT , OPTION_REF_INT , "yes | no", USAGE_REF_INT);
HelpOptionLine(ALIAS_CNS_CHK , OPTION_CNS_CHK , "yes | no", USAGE_CNS_CHK);
HelpOptionLine(ALIAS_EXHAUSTIVE, OPTION_EXHAUSTIVE, NULL, USAGE_EXHAUSTIVE);
+ HelpOptionLine(NULL , OPTION_SDC_ROWS, "rows" , USAGE_SDC_ROWS);
+ HelpOptionLine(NULL , OPTION_SDC_PLEN_THOLD, "threshold", USAGE_SDC_PLEN_THOLD);
/*
#define NUM_LISTABLE_OPTIONS \
@@ -2297,6 +2813,10 @@ rc_t parse_args ( vdb_validate_params *pb, Args *args )
pb->consist_check = false;
ref_int_check = pb -> blob_crc
= pb -> md5_chk_explicit = md5_required = true;
+ pb -> sdc_rows_in_percent = false;
+ pb -> sdc_rows.number = 100000;
+ pb -> sdc_pa_len_thold_in_percent = true;
+ pb -> sdc_pa_len_thold.percent = 0.01;
{
rc = ArgsOptionCount(args, OPTION_CNS_CHK, &cnt);
@@ -2402,6 +2922,112 @@ rc_t parse_args ( vdb_validate_params *pb, Args *args )
}
}
}
+ {
+ rc = ArgsOptionCount ( args, OPTION_SDC_ROWS, &cnt );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionCount() failed for " OPTION_SDC_ROWS);
+ return rc;
+ }
+
+ if (cnt > 0)
+ {
+ uint64_t value;
+ size_t value_size;
+ rc = ArgsOptionValue ( args, OPTION_SDC_ROWS, 0, (const void **) &dummy );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionValue() failed for " OPTION_SDC_ROWS);
+ return rc;
+ }
+
+ pb->sdc_enabled = true;
+
+ value_size = string_size ( dummy );
+ if ( value_size >= 1 && dummy[value_size - 1] == '%' )
+ {
+ value = string_to_U64 ( dummy, value_size - 1, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SDC_ROWS);
+ return rc;
+ }
+ else if (value == 0 || value > 100)
+ {
+ rc = RC(rcExe, rcArgv, rcParsing, rcParam, rcInvalid);
+ LOGERR (klogInt, rc, OPTION_SDC_ROWS " has illegal percentage value (has to be 1-100%)" );
+ return rc;
+ }
+
+ pb->sdc_rows_in_percent = true;
+ pb->sdc_rows.percent = (double)value / 100;
+ }
+ else
+ {
+ value = string_to_U64 ( dummy, value_size, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SDC_ROWS);
+ return rc;
+ }
+ pb->sdc_rows_in_percent = false;
+ pb->sdc_rows.number = value;
+ }
+ }
+ }
+ {
+ rc = ArgsOptionCount ( args, OPTION_SDC_PLEN_THOLD, &cnt );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionCount() failed for " OPTION_SDC_PLEN_THOLD);
+ return rc;
+ }
+
+ if (cnt > 0)
+ {
+ uint64_t value;
+ size_t value_size;
+ rc = ArgsOptionValue ( args, OPTION_SDC_PLEN_THOLD, 0, (const void **) &dummy );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "ArgsOptionValue() failed for " OPTION_SDC_PLEN_THOLD);
+ return rc;
+ }
+
+ pb->sdc_enabled = true;
+
+ value_size = string_size ( dummy );
+ if ( value_size >= 1 && dummy[value_size - 1] == '%' )
+ {
+ value = string_to_U64 ( dummy, value_size - 1, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SDC_PLEN_THOLD);
+ return rc;
+ }
+ else if (value == 0 || value > 100)
+ {
+ rc = RC(rcExe, rcArgv, rcParsing, rcParam, rcInvalid);
+ LOGERR (klogInt, rc, OPTION_SDC_PLEN_THOLD " has illegal percentage value (has to be 1-100%)" );
+ return rc;
+ }
+
+ pb->sdc_pa_len_thold_in_percent = true;
+ pb->sdc_pa_len_thold.percent = (double)value / 100;
+ }
+ else
+ {
+ value = string_to_U64 ( dummy, value_size, &rc );
+ if (rc)
+ {
+ LOGERR (klogInt, rc, "string_to_U64() failed for " OPTION_SDC_PLEN_THOLD);
+ return rc;
+ }
+ pb->sdc_pa_len_thold_in_percent = false;
+ pb->sdc_pa_len_thold.number = value;
+ }
+ }
+ }
if ( pb -> blob_crc || pb -> index_chk )
pb -> md5_chk = pb -> md5_chk_explicit;
@@ -2445,24 +3071,19 @@ rc_t vdb_validate_params_init ( vdb_validate_params *pb )
return rc;
}
-rc_t CC KMain ( int argc, char *argv [] )
+static rc_t main_with_args(Args *const args)
{
- Args * args;
- rc_t rc = ArgsMakeAndHandle ( & args, argc, argv, 1,
- options, sizeof options / sizeof options [ 0 ] );
- if ( rc != 0 )
- LOGERR ( klogErr, rc, "Failed to parse command line" );
- else
- {
+ XMLLogger const *xlogger = NULL;
+ rc_t rc = XMLLogger_Make(&xlogger, NULL, args);
+
+ if (rc) {
+ LOGERR(klogErr, rc, "Failed to make XML logger");
+ }
+ else {
uint32_t pcount;
rc = ArgsParamCount ( args, & pcount );
if ( rc != 0 )
LOGERR ( klogErr, rc, "Failed to count command line parameters" );
- else if ( argc <= 1 )
- {
- rc = RC ( rcExe, rcPath, rcValidating, rcParam, rcInsufficient );
- MiniUsage ( args );
- }
else if ( pcount == 0 )
{
rc = RC ( rcExe, rcPath, rcValidating, rcParam, rcInsufficient );
@@ -2522,7 +3143,23 @@ rc_t CC KMain ( int argc, char *argv [] )
vdb_validate_params_whack ( & pb );
}
}
+ XMLLogger_Release(xlogger);
+ }
+ return rc;
+}
+
+rc_t CC KMain(int argc, char *argv[])
+{
+ Args *args = NULL;
+ rc_t rc = ArgsMakeAndHandle(&args, argc, argv, 2,
+ options, sizeof(options)/sizeof(options[0]),
+ XMLLogger_Args, XMLLogger_ArgsQty);
+ if ( rc != 0 )
+ LOGERR ( klogErr, rc, "Failed to parse command line" );
+ else
+ {
+ rc = main_with_args(args);
ArgsWhack ( args );
}
diff --git a/tools/vdb-validate/vdb-validate.vers b/tools/vdb-validate/vdb-validate.vers
index 35d16fb..097a15a 100644
--- a/tools/vdb-validate/vdb-validate.vers
+++ b/tools/vdb-validate/vdb-validate.vers
@@ -1 +1 @@
-2.5.7
+2.6.2
diff --git a/tools/vdb-validate/vdb-validate.vers.h b/tools/vdb-validate/vdb-validate.vers.h
index 7d1cfb0..90495a8 100644
--- a/tools/vdb-validate/vdb-validate.vers.h
+++ b/tools/vdb-validate/vdb-validate.vers.h
@@ -1 +1 @@
-#define VDB_VALIDATE_VERS 0x02050007
+#define VDB_VALIDATE_VERS 0x02060002
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/sra-sdk.git
More information about the debian-med-commit
mailing list