[med-svn] [Git][med-team/bowtie2][upstream] New upstream version 2.3.4.2
Alexandre Mestiashvili
gitlab at salsa.debian.org
Tue Aug 14 13:27:45 BST 2018
Alexandre Mestiashvili pushed to branch upstream at Debian Med / bowtie2
Commits:
157dbc20 by Alexandre Mestiashvili at 2018-08-14T09:16:33Z
New upstream version 2.3.4.2
- - - - -
26 changed files:
- + CMakeLists.txt
- MANUAL
- MANUAL.markdown
- Makefile
- NEWS
- VERSION
- aln_sink.cpp
- bowtie2-build
- bowtie2-inspect
- bt2_idx.h
- bt2_search.cpp
- doc/manual.html
- doc/website/manual.ssi
- doc/website/recent_news.ssi
- doc/website/rhsidebar.ssi
- + example/reads/combined_reads.bam
- formats.h
- opts.h
- pat.cpp
- pat.h
- presets.cpp
- read.h
- ref_read.cpp
- sam.cpp
- sam.h
- scripts/test/simple_tests.pl
Changes:
=====================================
CMakeLists.txt
=====================================
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,172 @@
+cmake_minimum_required(VERSION 2.8)
+
+cmake_policy(SET CMP0048 NEW)
+cmake_policy(SET CMP0005 NEW)
+
+project(bowtie2 LANGUAGES CXX VERSION "2.3.4.2")
+
+enable_testing()
+
+include(CTest)
+include(ProcessorCount)
+include(CheckSymbolExists)
+
+ProcessorCount(NUM_CORES)
+
+option(BOWTIE_MM "enable bowtie2 memory mapping" ON)
+option(BOWITE_SHARED_MM "enable shared memory mapping" ON)
+
+set(CMAKE_CXX_STANDARD 98)
+set(NO_TBB ${NO_TBB})
+set(NO_SPINLOCK, ${NO_SPINLOCK})
+set(WITH_THREAD_PROFILING ${WITH_THREAD_PROFILING})
+set(POPCNT_CAPABILITY ${POPCNT_CAPABILITY})
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -g3 -Wall -msse2")
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -funroll-loops")
+set(INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
+
+set(CMAKE_EXPORT_COMPILE_COMMANDS on)
+
+if (MINGW)
+ option(BOWTIE_MM "Memory mapped files not supported on Windows" OFF)
+ option(BOWTIE_SHARED_MEM "Shared memory not supported on Windows" OFF)
+endif(MINGW)
+
+if (APPLE)
+ set(CMAKE_XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym")
+endif(APPLE)
+
+set(BOWTIE2_BIN_LIST
+ bowtie2-build-s
+ bowtie2-build-l
+ bowtie2-align-s
+ bowtie2-align-l
+ bowtie2-inspect-s
+ bowtie2-inspect-l
+ )
+
+set(SHARED_CPPS
+ ccnt_lut.cpp
+ ref_read.cpp
+ alphabet.cpp
+ shmem.cpp
+ edit.cpp
+ bt2_idx.cpp
+ bt2_io.cpp
+ bt2_util.cpp
+ reference.cpp
+ ds.cpp
+ multikey_qsort.cpp
+ limit.cpp
+ random_source.cpp
+ )
+
+set(SEARCH_CPPS
+ qual.cpp pat.cpp sam.cpp
+ read_qseq.cpp aligner_seed_policy.cpp
+ aligner_seed.cpp
+ aligner_seed2.cpp
+ aligner_sw.cpp
+ aligner_sw_driver.cpp aligner_cache.cpp
+ aligner_result.cpp ref_coord.cpp mask.cpp
+ pe.cpp aln_sink.cpp dp_framer.cpp
+ scoring.cpp presets.cpp unique.cpp
+ simple_func.cpp
+ random_util.cpp
+ aligner_bt.cpp sse_util.cpp
+ aligner_swsse.cpp outq.cpp
+ aligner_swsse_loc_i16.cpp
+ aligner_swsse_ee_i16.cpp
+ aligner_swsse_loc_u8.cpp
+ aligner_swsse_ee_u8.cpp
+ aligner_driver.cpp
+ bowtie_main.cpp
+ bt2_search.cpp
+ )
+
+set(BUILD_CPPS
+ bt2_build.cpp
+ diff_sample.cpp
+ bowtie_build_main.cpp)
+
+set(INSPECT_CPPS
+ bt2_inspect.cpp
+ )
+
+string(TIMESTAMP BUILD_DATE)
+execute_process(COMMAND ${CMAKE_CXX_COMPILER} -v OUTPUT_VARIABLE COMPILER_VERSION)
+
+add_definitions(
+ -DBOWTIE2
+ -DBUILD_HOST="${CMAKE_HOST_SYSTEM}"
+ -DBUILD_TIME="${BUILD_DATE}"
+ -DCOMPILER_VERSION="${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_VERSION}"
+ -DBOWTIE2_VERSION="${PROJECT_VERSION}"
+ -D_LARGEFILE_SOURCE
+ -D_FILE_OFFSET_BITS=64
+ -D_GNU_SOURCE
+ )
+
+if (WITH_AFFINITY)
+ add_definitions(-DWITH_AFFINITY=1)
+endif()
+
+if (NO_SPINLOCK)
+ add_definitions(-DNO_SPINLOCK)
+endif()
+
+if (POPCNT_CAPABILITY)
+ add_definitions(-DPOPCNT_CAPABILITY)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I third_party")
+endif()
+
+if (WITH_THREAD_PROFILING)
+ add_definitions(-DPER_THREAD_TIMING=1)
+endif()
+
+if (CMAKE_BUILD_TYPE STREQUAL "Release")
+ add_definitions(-DNDEBUG)
+endif()
+
+find_package(Threads REQUIRED)
+find_package(ZLIB REQUIRED)
+find_path(TBB_INCLUDE_PATH tbb)
+find_library(TBB_LIBRARY_PATH tbb)
+find_library(TBB_MALLOC_LIBRARY_PATH tbbmalloc)
+
+if (ZLIB_FOUND)
+ link_libraries(${ZLIB_LIBRARIES})
+ include_directories(${ZLIB_INCLUDE_DIRS})
+endif()
+
+if (Threads_FOUND)
+ link_libraries(Threads::Threads)
+endif()
+
+if (NOT TBB_LIBRARY_PATH)
+ set(SHARED_CPPS ${SHARED_CPPS} tinythread.cpp)
+else()
+ if (NOT NO_QUEUELOCK)
+ add_definitions(-DWITH_TBB -DWITH_QUEUELOCK=1 -DNO_SPINLOCK)
+ endif()
+ include_directories(${TBB_INCLUDE_PATH})
+ link_libraries(${TBB_LIBRARY_PATH} ${TBB_MALLOC_LIBRARY_PATH})
+endif()
+
+include_directories(${PROJECT_SOURCE_DIR})
+get_directory_property(COMPILER_DEFS COMPILE_DEFINITIONS)
+add_definitions(-DCOMPILER_OPTIONS="${CMAKE_CXX_FLAGS}")
+
+add_executable(bowtie2-align-s ${SEARCH_CPPS} ${SHARED_CPPS})
+add_executable(bowtie2-align-l ${SEARCH_CPPS} ${SHARED_CPPS})
+add_executable(bowtie2-build-s ${BUILD_CPPS} ${SHARED_CPPS})
+add_executable(bowtie2-build-l ${BUILD_CPPS} ${SHARED_CPPS})
+add_executable(bowtie2-inspect-s ${INSPECT_CPPS} ${SHARED_CPPS})
+add_executable(bowtie2-inspect-l ${INSPECT_CPPS} ${SHARED_CPPS})
+
+set_target_properties(bowtie2-align-l bowtie2-build-l bowtie2-inspect-l PROPERTIES COMPILE_FLAGS "-DBOWTIE2_64BIT_INDEX")
+set_target_properties(bowtie2-inspect-s bowtie2-inspect-l PROPERTIES COMPILE_FLAGS "-DBOWTIE_INSPECT_MAIN")
+set_target_properties(${BOWTIE2_BIN_LIST} PROPERTIES DEBUG_POSTFIX "-debug")
+
+add_test(NAME simple-align COMMAND ${PROJECT_SOURCE_DIR}/bowtie2 -x example/index/lambda_virus example/reads/longreads.fq)
=====================================
MANUAL
=====================================
--- a/MANUAL
+++ b/MANUAL
@@ -3,33 +3,35 @@
INTRODUCTION
-What is Bowtie 2?
-
Bowtie 2 is an ultrafast and memory-efficient tool for aligning
sequencing reads to long reference sequences. It is particularly good at
-aligning reads of about 50 up to 100s or 1,000s of characters to
-relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome
-with an FM Index (based on the Burrows-Wheeler Transform or BWT) to keep
-its memory footprint small: for the human genome, its memory footprint
-is typically around 3.2 gigabytes of RAM. Bowtie 2 supports gapped,
-local, and paired-end alignment modes. Multiple processors can be used
-simultaneously to achieve greater alignment speed. Cufflinks: a tool for
-transcriptome assembly and isoform quantitiation from Bowtie 2 outputs
-alignments in SAM format, enabling interoperation with a large number of
-other tools (e.g. SAMtools, GATK) that use SAM. Bowtie 2 is distributed
-under the GPLv3 license, and it runs on the command line under Windows,
-Mac OS X and Linux.
+aligning reads of about 50 up to 100s of characters to relatively long
+(e.g. mammalian) genomes. Bowtie 2 indexes the genome with an FM Index
+(based on the Burrows-Wheeler Transform or BWT) to keep its memory
+footprint small: for the human genome, its memory footprint is typically
+around 3.2 gigabytes of RAM. Bowtie 2 supports gapped, local, and
+paired-end alignment modes. Multiple processors can be used
+simultaneously to achieve greater alignment speed.
+
+Bowtie 2 outputs alignments in SAM format, enabling interoperation with
+a large number of other tools (e.g. SAMtools, GATK) that use SAM. Bowtie
+2 is distributed under the GPLv3 license, and it runs on the command
+line under Windows, Mac OS X and Linux.
Bowtie 2 is often the first step in pipelines for comparative genomics,
including for variation calling, ChIP-seq, RNA-seq, BS-seq. Bowtie 2 and
Bowtie (also called “Bowtie 1” here) are also tightly integrated into
-some tools, including TopHat: a fast splice junction mapper for RNA-seq
-reads, RNA-seq reads, Crossbow: a cloud-enabled software tool for
-analyzing resequencing data, and Myrna: a cloud-enabled software tool
-for aligning RNA-seq reads and measuring differential gene expression.
+many other tools, some of which are listed here.
+
+If you use Bowtie 2 for your published research, please cite our work.
+Papers describing Bowtie 2 are:
-If you use Bowtie 2 for your published research, please cite the Bowtie
-paper. Thank you!
+- Langmead B, Wilks C, Antonescu V, Charles R. Scaling read aligners
+ to hundreds of threads on general-purpose processors.
+ _Bioinformatics_. 2018 Jul 18. doi: 10.1093/bioinformatics/bty648.
+
+- Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2.
+ _Nature Methods_. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923.
How is Bowtie 2 different from Bowtie 1?
@@ -86,38 +88,30 @@ Bowtie 1’s.
What isn’t Bowtie 2?
-Bowtie 1 and Bowtie 2 are not general-purpose alignment tools like
-MUMmer, BLAST or Vmatch. Bowtie 2 works best when aligning to large
-genomes, though it supports arbitrarily small reference sequences
-(e.g. amplicons). It handles very long reads (i.e. upwards of 10s or
-100s of kilobases), but it is optimized for the read lengths and error
-modes yielded by recent sequencers, such as the Illumina HiSeq 2000,
-Roche 454, and Ion Torrent instruments.
-
-If your goal is to align two very large sequences (e.g. two genomes),
-consider using MUMmer. If your goal is very sensitive alignment to a
-relatively short reference sequence (e.g. a bacterial genome), this can
-be done with Bowtie 2 but you may want to consider using tools like
-NUCmer, BLAT, or BLAST. These tools can be extremely slow when the
-reference genome is long, but are often adequate when the reference is
-short.
+Bowtie 2 is geared toward aligning relatively short sequencing reads to
+long genomes. That said, it handles arbitrarily small reference
+sequences ( e.g. amplicons) and very long reads (i.e. upwards of 10s or
+100s of kilobases), though it is slower in those settings. It is
+optimized for the read lengths and error modes yielded by typical
+Illumina sequencers.
-Bowtie 2 does not support alignment of colorspace reads.
+Bowtie 2 does not support alignment of colorspace reads. (Bowtie 1
+does.)
-What does it mean that some older Bowtie 2 versions are “beta”?
-
-We said those Bowtie 2 versions were in “beta” to convey that it was not
-as polished as a tool that had been around for a while, and was still in
-flux. Since version 2.0.1, we declared Bowtie 2 was no longer “beta”.
+OBTAINING BOWTIE 2
-OBTAINING BOWTIE 2
+Bowtie 2 is available from various package managers, notably Bioconda.
+With Bioconda installed, you should be able to install Bowtie 2 with
+conda install bowtie2.
+Containerized versions of Bowtie 2 are also available via the
+Biocontainers project (e.g. via Docker Hub).
-Download Bowtie 2 sources and binaries from the Download section of the
-Sourceforge site. Binaries are available for the Intel x86_64
+You can also download Bowtie 2 sources and binaries from the Download
+section of the Sourceforge site. Binaries are available for the x86_64
architecture running Linux, Mac OS X, and Windows. If you plan to
compile Bowtie 2 yourself, make sure to get the source package, i.e.,
the filename that ends in “-source.zip”.
@@ -145,8 +139,64 @@ Bowtie 2 can be run on many threads. By default, Bowtie 2 uses the
Threading Building Blocks library (TBB) for this. If TBB is not
available on your system (e.g. make prints an error like
tbb/mutex.h: No such file or directory), you can install it yourself
-(see Threading Building Blocks library) or build Bowtie 2 with
-make NO_TBB=1 to use pthreads or Windows native multithreading instead.
+from source (see Threading Building Blocks library) or install it using
+your operating system’s preferred package manager. The table below list
+some of the commands for a few of the more popular operating systems.
+
+OS
+Sync Package List
+Search
+Install
+Ubuntu, Mint, Debian
+ apt-get update
+
+ apt-cache search tbb
+
+ apt-get install libtbb-dev
+
+Fedora, CentOS
+ yum check-update
+
+yum search tbb
+
+ yum install tbb-devel.x86_64
+
+Arch
+ packman -Sy
+
+ pacman -Ss tbb
+
+ pacman -S extra/intel-tbb
+
+Gentoo
+ emerge --sync
+
+ emerge --search tbb
+
+ emerge dev-cpp/tbb
+
+MacOS
+ brew update
+
+ brew search tbb
+
+ brew install tbb
+
+FreeBSD
+ portsnap fetch update
+
+ make search name=tbb
+
+ cd /usr/ports/devel/tbb && make install && make clean
+
+The Bowtie 2 Makefile also includes recipes for basic automatic
+dependency management. Running make static-libs && make STATIC_BUILD=1
+will issue a series of commands that will: 1. download TBB and zlib 2.
+compile them as static libraries 3. link the resulting libraries to the
+compiled Bowtie 2 binaries
+
+If all fails Bowtie 2 can be built with make NO_TBB=1 to use pthreads or
+Windows native multithreading instead.
Adding to PATH
@@ -923,6 +973,13 @@ Trim <int> bases from 5’ (left) end of each read before alignment
Trim <int> bases from 3’ (right) end of each read before alignment
(default: 0).
+ --trim-to [3:|5:]<int>
+
+Trim reads exceeding <int> bases. Bases will be trimmed from either the
+3’ (right) or 5’ (left) end of the read. If the read end if not
+specified, bowtie 2 will default to trimming from the 3’ (right) end of
+the read. --trim-to and -3/-5 are mutually exclusive.
+
--phred33
Input qualities are ASCII chars equal to the Phred quality plus 33. This
@@ -1416,7 +1473,8 @@ print an asterisk in those fields instead.
--soft-clipped-unmapped-tlen
-Consider soft-clipped bases unmapped when calculating TLEN.
+Consider soft-clipped bases unmapped when calculating TLEN. Only
+available in --local mode.
--sam-no-qname-trunc
=====================================
MANUAL.markdown
=====================================
--- a/MANUAL.markdown
+++ b/MANUAL.markdown
@@ -4,25 +4,18 @@
! of this text document, or see the HTML manual online.
! -->
----
-title: Bowtie 2 Manual
----
-
Introduction
============
-What is Bowtie 2?
------------------
-
[Bowtie 2] is an ultrafast and memory-efficient tool for aligning sequencing
reads to long reference sequences. It is particularly good at aligning reads of
-about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian)
+about 50 up to 100s of characters to relatively long (e.g. mammalian)
genomes. Bowtie 2 indexes the genome with an [FM Index][FM Index Wiki] (based on the
[Burrows-Wheeler Transform] or [BWT]) to keep its memory footprint small: for
the human genome, its memory footprint is typically around 3.2 gigabytes of RAM.
Bowtie 2 supports gapped, local, and paired-end alignment modes. Multiple
-processors can be used simultaneously to achieve greater alignment speed.
-[Cufflinks][]: a tool for transcriptome assembly and isoform quantitiation from
+processors can be used simultaneously to achieve greater alignment speed.
+
Bowtie 2 outputs alignments in [SAM] format, enabling interoperation with a
large number of other tools (e.g. [SAMtools], [GATK]) that use SAM. Bowtie 2 is
distributed under the [GPLv3 license], and it runs on the command line under
@@ -30,14 +23,18 @@ Windows, Mac OS X and Linux.
[Bowtie 2] is often the first step in pipelines for comparative genomics,
including for variation calling, ChIP-seq, RNA-seq, BS-seq. [Bowtie 2] and
-[Bowtie] (also called "[Bowtie 1]" here) are also tightly integrated into some
-tools, including [TopHat]: a fast splice junction mapper for RNA-seq reads,
-RNA-seq reads, [Crossbow]: a cloud-enabled software tool for analyzing
-resequencing data, and [Myrna]: a cloud-enabled software tool for aligning
-RNA-seq reads and measuring differential gene expression.
+[Bowtie] (also called "[Bowtie 1]" here) are also tightly integrated into many
+other tools, some of which [are listed here](http://bowtie-bio.sourceforge.net/bowtie2/other_tools.shtml).
+
+If you use [Bowtie 2] for your published research, please cite our work. Papers
+describing Bowtie 2 are:
-If you use [Bowtie 2] for your published research, please cite the [Bowtie
-paper]. Thank you!
+* Langmead B, Wilks C, Antonescu V, Charles R. [Scaling read aligners to hundreds
+of threads on general-purpose processors](https://doi.org/10.1093/bioinformatics/bty648). _Bioinformatics_. 2018 Jul 18. doi:
+10.1093/bioinformatics/bty648.
+
+* Langmead B, Salzberg SL. [Fast gapped-read alignment with Bowtie 2](https://www.nature.com/articles/nmeth.1923). _Nature
+Methods_. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923.
How is Bowtie 2 different from Bowtie 1?
@@ -91,35 +88,27 @@ arguments and genome index format are both different from Bowtie 1's.
What isn't Bowtie 2?
--------------------
-Bowtie 1 and Bowtie 2 are not general-purpose alignment tools like [MUMmer],
-[BLAST] or [Vmatch]. Bowtie 2 works best when aligning to large genomes, though
-it supports arbitrarily small reference sequences (e.g. amplicons). It handles
-very long reads (i.e. upwards of 10s or 100s of kilobases), but it is optimized
-for the read lengths and error modes yielded by recent sequencers, such as the
-Illumina HiSeq 2000, Roche 454, and Ion Torrent instruments.
-
-If your goal is to align two very large sequences (e.g. two genomes), consider
-using [MUMmer]. If your goal is very sensitive alignment to a relatively short
-reference sequence (e.g. a bacterial genome), this can be done with Bowtie 2 but
-you may want to consider using tools like [NUCmer], [BLAT], or [BLAST]. These
-tools can be extremely slow when the reference genome is long, but are often
-adequate when the reference is short.
-
-Bowtie 2 does not support alignment of colorspace reads.
-
+Bowtie 2 is geared toward aligning relatively short sequencing reads to long
+genomes. That said, it handles arbitrarily small reference sequences (e.g.
+amplicons) and very long reads (i.e. upwards of 10s or 100s of kilobases),
+though it is slower in those settings. It is optimized for the read lengths
+and error modes yielded by typical Illumina sequencers.
-What does it mean that some older Bowtie 2 versions are "beta"?
---------------------------------------------------------------
+Bowtie 2 does not support alignment of colorspace reads. (Bowtie 1 does.)
-We said those Bowtie 2 versions were in "beta" to convey that it was not as
-polished as a tool that had been around for a while, and was still in flux.
-Since version 2.0.1, we declared Bowtie 2 was no longer "beta".
Obtaining Bowtie 2
==================
-Download Bowtie 2 sources and binaries from the [Download] section of the
-Sourceforge site. Binaries are available for the Intel `x86_64` architecture
+Bowtie 2 is available from various package managers, notably [Bioconda](https://anaconda.org/bioconda/bowtie2).
+With Bioconda installed, you should be able to install Bowtie 2 with `conda
+install bowtie2`.
+
+Containerized versions of Bowtie 2 are also available via the [Biocontainers](https://BioContainers.pro)
+project (e.g. [via Docker Hub](https://hub.docker.com/r/biocontainers/bowtie2/)).
+
+You can also download Bowtie 2 sources and binaries from the [Download] section
+of the Sourceforge site. Binaries are available for the `x86_64` architecture
running Linux, Mac OS X, and Windows. If you plan to compile Bowtie 2 yourself,
make sure to get the source package, i.e., the filename that ends in
"-source.zip".
@@ -145,9 +134,118 @@ from the MSYS environment.
Bowtie 2 can be run on many threads. By default, Bowtie 2 uses the Threading
Building Blocks library (TBB) for this. If TBB is not available on your system
(e.g. `make` prints an error like `tbb/mutex.h: No such file or directory`),
-you can install it yourself (see [Threading Building Blocks library]) or build
-Bowtie 2 with `make NO_TBB=1` to use pthreads or Windows native multithreading
-instead.
+you can install it yourself from source (see [Threading Building Blocks library])
+or install it using your operating system's preferred package manager.
+The table below list some of the commands for a few of the more popular
+operating systems.
+
+<table>
+<tr><th>OS</th><th>Sync Package List</th><th>Search</th><th>Install</th></tr>
+<tr><td>Ubuntu, Mint, Debian</td>
+<td>
+
+ apt-get update
+
+</td>
+<td>
+
+ apt-cache search tbb
+
+</td>
+<td>
+
+ apt-get install libtbb-dev
+
+</td></tr>
+<tr><td>Fedora, CentOS</td>
+<td>
+
+ yum check-update
+
+</td>
+<td>
+ yum search tbb
+
+</td>
+<td>
+
+ yum install tbb-devel.x86_64
+
+</td></tr>
+<tr><td>Arch</td>
+<td>
+
+ packman -Sy
+
+</td>
+<td>
+
+ pacman -Ss tbb
+
+</td>
+<td>
+
+ pacman -S extra/intel-tbb
+
+</td></tr>
+<tr><td>Gentoo</td>
+<td>
+
+ emerge --sync
+
+</td>
+<td>
+
+ emerge --search tbb
+
+</td>
+<td>
+
+ emerge dev-cpp/tbb
+
+</td>
+<tr><td>MacOS</td>
+<td>
+
+ brew update
+
+</td>
+<td>
+
+ brew search tbb
+
+</td>
+<td>
+
+ brew install tbb
+
+</td></tr>
+<tr><td>FreeBSD</td>
+<td>
+
+ portsnap fetch update
+
+</td>
+<td>
+
+ make search name=tbb
+
+</td>
+<td>
+
+ cd /usr/ports/devel/tbb && make install && make clean
+
+</table>
+
+The Bowtie 2 Makefile also includes recipes for basic automatic dependency
+management. Running `make static-libs && make STATIC_BUILD=1` will issue
+a series of commands that will:
+ 1. download TBB and zlib
+ 2. compile them as static libraries
+ 3. link the resulting libraries to the compiled Bowtie 2 binaries
+
+If all fails Bowtie 2 can be built with `make NO_TBB=1` to use pthreads
+or Windows native multithreading instead.
Adding to PATH
--------------
@@ -211,7 +309,7 @@ end-to-end mode or in local mode.
Read: GACTGGGCGATCTCGACTTCG
Reference: GACTGCGATCTCGACATCG
-
+
Alignment:
Read: GACTGGGCGATCTCGACTTCG
||||| |||||||||| |||
@@ -227,7 +325,7 @@ in local mode.
Read: ACGGTTGCGTTAATCCGCCACG
Reference: TAACTTGCGTTAAATCCGCCTGG
-
+
Alignment:
Read: ACGGTTGCGTTAA-TCCGCCACG
||||||||| ||||||
@@ -465,7 +563,7 @@ alignment mode.
In general, when we say that a read has an alignment, we mean that it has a
[valid alignment]. When we say that a read has multiple alignments, we mean
-that it has multiple alignments that are valid and distinct from one another.
+that it has multiple alignments that are valid and distinct from one another.
### Distinct alignments map a read to different places
@@ -663,7 +761,7 @@ reflect only one of those reasons.
Alignment summary
------------------
-When Bowtie 2 finishes running, it prints messages summarizing what happened.
+When Bowtie 2 finishes running, it prints messages summarizing what happened.
These messages are printed to the "standard error" ("stderr") filehandle. For
datasets consisting of unpaired reads, the summary might look like this:
@@ -697,7 +795,7 @@ The indentation indicates how subtotals relate to totals.
Wrapper scripts
---------------
-The `bowtie2`, `bowtie2-build` and `bowtie2-inspect` executables are actually
+The `bowtie2`, `bowtie2-build` and `bowtie2-inspect` executables are actually
wrapper scripts that call binary programs as appropriate. The wrappers shield
users from having to distinguish between "small" and "large" index formats,
discussed briefly in the following section. Also, the `bowtie2` wrapper
@@ -985,6 +1083,17 @@ Trim `<int>` bases from 5' (left) end of each read before alignment (default: 0)
Trim `<int>` bases from 3' (right) end of each read before alignment (default: 0).
+</td></tr><tr><td id="bowtie2-options-trim-to">
+
+ --trim-to [3:|5:]<int>
+
+</td><td>
+
+Trim reads exceeding `<int>` bases. Bases will be trimmed from either the 3' (right)
+or 5' (left) end of the read. If the read end if not specified, bowtie 2 will default
+to trimming from the 3' (right) end of the read. [`--trim-to`] and [`-3`]/[`-5`] are
+mutually exclusive.
+
</td></tr><tr><td id="bowtie2-options-phred33-quals">
--phred33
@@ -1198,7 +1307,7 @@ gaps. Default: 15.
</td><td>
-Disallow gaps within `<int>` positions of the beginning or end of the read.
+Disallow gaps within `<int>` positions of the beginning or end of the read.
Default: 4.
</td></tr>
@@ -1209,7 +1318,7 @@ Default: 4.
</td><td>
When calculating a mismatch penalty, always consider the quality value at the
-mismatched position to be the highest possible, regardless of the actual value.
+mismatched position to be the highest possible, regardless of the actual value.
I.e. input is treated as though all quality values are high. This is also the
default behavior when the input doesn't specify quality values (e.g. in [`-f`],
[`-r`], or [`-c`] modes).
@@ -1227,7 +1336,7 @@ not attempt to align unpaired reads against the reverse-complement (Crick)
reference strand. In paired-end mode, `--nofw` and `--norc` pertain to the
fragments; i.e. specifying `--nofw` causes `bowtie2` to explore only those
paired-end configurations corresponding to fragments from the reverse-complement
-(Crick) strand. Default: both strands enabled.
+(Crick) strand. Default: both strands enabled.
</td></tr>
<tr><td id="bowtie2-options-no-1mm-upfront">
@@ -1382,7 +1491,7 @@ alignment equals the sum of the alignment scores of the individual mates. Each
reported read or pair alignment beyond the first has the SAM 'secondary' bit
(which equals 256) set in its FLAGS field. For reads that have more than
`<int>` distinct, valid alignments, `bowtie2` does not guarantee that the
-`<int>` alignments reported are the best possible in terms of alignment score.
+`<int>` alignments reported are the best possible in terms of alignment score.
`-k` is mutually exclusive with [`-a`].
Note: Bowtie 2 is not designed with large values for `-k` in mind, and when
@@ -1459,7 +1568,7 @@ Bowtie 2 scan a larger window to determine if a concordant alignment exists.
For typical fragment length ranges (200 to 400 nucleotides), Bowtie 2 is very
efficient.
-Default: 0 (essentially imposing no minimum)
+Default: 0 (essentially imposing no minimum)
</td></tr>
<tr><td id="bowtie2-options-X">
@@ -1556,7 +1665,7 @@ can contain the other in a concordant alignment.
</td><td>
If one mate alignment overlaps the other at all, consider that to be
-non-concordant. See also: [Mates can overlap, contain or dovetail each other].
+non-concordant. See also: [Mates can overlap, contain or dovetail each other].
Default: mates can overlap in a concordant alignment.
</td></tr></table>
@@ -1571,7 +1680,7 @@ Default: mates can overlap in a concordant alignment.
</td><td>
-Print the wall-clock time required to load the index files and align the reads.
+Print the wall-clock time required to load the index files and align the reads.
This is printed to the "standard error" ("stderr") filehandle. Default: off.
</td></tr>
@@ -1587,10 +1696,10 @@ This is printed to the "standard error" ("stderr") filehandle. Default: off.
Write unpaired reads that fail to align to file at `<path>`. These reads
correspond to the SAM records with the FLAGS `0x4` bit set and neither the
`0x40` nor `0x80` bits set. If `--un-gz` is specified, output will be gzip
-compressed. If `--un-bz2` or `--un-lz4` is specified, output will be bzip2 or
-lz4 compressed. Reads written in this way will appear exactly as they did in
-the input file, without any modification (same sequence, same name, same quality
-string, same quality encoding). Reads will not necessarily appear in the same
+compressed. If `--un-bz2` or `--un-lz4` is specified, output will be bzip2 or
+lz4 compressed. Reads written in this way will appear exactly as they did in
+the input file, without any modification (same sequence, same name, same quality
+string, same quality encoding). Reads will not necessarily appear in the same
order as they did in the input.
</td></tr>
@@ -1606,7 +1715,7 @@ order as they did in the input.
Write unpaired reads that align at least once to file at `<path>`. These reads
correspond to the SAM records with the FLAGS `0x4`, `0x40`, and `0x80` bits
unset. If `--al-gz` is specified, output will be gzip compressed. If `--al-bz2`
-is specified, output will be bzip2 compressed. Similarly if `--al-lz4` is specified,
+is specified, output will be bzip2 compressed. Similarly if `--al-lz4` is specified,
output will be lz4 compressed. Reads written in this way will
appear exactly as they did in the input file, without any modification (same
sequence, same name, same quality string, same quality encoding). Reads will
@@ -1768,13 +1877,14 @@ and `QUAL` strings. Specifying this option causes Bowtie 2 to print an asterisk
in those fields instead.
</td></tr>
-<tr><td id="bowtie2-options-soft-clipped-unmapped-tlen-sec-seq">
+<tr><td id="bowtie2-options-soft-clipped-unmapped-tlen">
--soft-clipped-unmapped-tlen
</td><td>
-Consider soft-clipped bases unmapped when calculating `TLEN`.
+Consider soft-clipped bases unmapped when calculating `TLEN`. Only available
+in [`--local`] mode.
</td></tr>
<tr><td id="bowtie2-options-sam-no-qname-trunc">
@@ -1923,10 +2033,10 @@ Print usage information and quit.
SAM output
----------
-Following is a brief description of the [SAM] format as output by `bowtie2`.
+Following is a brief description of the [SAM] format as output by `bowtie2`.
For more details, see the [SAM format specification][SAM].
-By default, `bowtie2` prints a SAM header with `@HD`, `@SQ` and `@PG` lines.
+By default, `bowtie2` prints a SAM header with `@HD`, `@SQ` and `@PG` lines.
When one or more [`--rg`] arguments are specified, `bowtie2` will also print
an `@RG` line that includes all user-specified [`--rg`] tokens separated by
tabs.
@@ -2088,7 +2198,7 @@ alignment.
</td><td>
-The number of ambiguous bases in the reference covering this alignment.
+The number of ambiguous bases in the reference covering this alignment.
Only present if SAM record is for an aligned read.
</td></tr><tr><td id="bowtie2-build-opt-fields-xm">
@@ -2156,7 +2266,7 @@ pair failed to aligned either concordantly or discordantly.
</td><td>
-A string representation of the mismatched reference bases in the alignment.
+A string representation of the mismatched reference bases in the alignment.
See [SAM Tags format specification][SAMTags] for details. Only present if SAM record is
for an aligned read.
@@ -2169,7 +2279,7 @@ The `bowtie2-build` indexer
`bowtie2-build` builds a Bowtie index from a set of DNA sequences.
`bowtie2-build` outputs a set of 6 files with suffixes `.1.bt2`, `.2.bt2`,
-`.3.bt2`, `.4.bt2`, `.rev.1.bt2`, and `.rev.2.bt2`. In the case of a large
+`.3.bt2`, `.4.bt2`, `.rev.1.bt2`, and `.rev.2.bt2`. In the case of a large
index these suffixes will have a `bt2l` termination. These files together
constitute the index: they are all that is needed to align reads to that
reference. The original sequence [`FASTA`] files are no longer used by Bowtie 2
@@ -2297,7 +2407,7 @@ automatically by default; use [`-a`/`--noauto`] to configure manually.
The maximum number of suffixes allowed in a block. Allowing more suffixes per
block makes indexing faster, but increases peak memory usage. Setting this
-option overrides any previous setting for [`--bmax`], or [`--bmaxdivn`].
+option overrides any previous setting for [`--bmax`], or [`--bmaxdivn`].
Default (in terms of the [`--bmaxdivn`] parameter) is [`--bmaxdivn`] 4 * number of threads. This is
configured automatically by default; use [`-a`/`--noauto`] to configure manually.
@@ -2362,7 +2472,7 @@ paired-end alignment.
To map alignments back to positions on the reference sequences, it's necessary
to annotate ("mark") some or all of the [Burrows-Wheeler] rows with their
-corresponding location on the genome.
+corresponding location on the genome.
[`-o`/`--offrate`](#bowtie2-build-options-o) governs how many rows get marked:
the indexer will mark every 2^`<int>` rows. Marking more rows makes
reference-position lookups faster, but requires more memory to hold the
@@ -2414,7 +2524,7 @@ print only error messages.
By default `bowtie2-build` is using only one thread. Increasing the number
of threads will speed up the index building considerably in most cases.
-
+
</td></tr><tr><td>
-h/--help
@@ -2492,7 +2602,7 @@ Print reference sequence names, one per line, and quit.
</td><td>
Print a summary that includes information about index settings, as well as the
-names and lengths of the input sequences. The summary has this format:
+names and lengths of the input sequences. The summary has this format:
Colorspace <0 or 1>
SA-Sample 1 in <sample>
@@ -2719,7 +2829,6 @@ for more details and variations on this process.
[Threading Building Blocks library]: https://www.threadingbuildingblocks.org
[TopHat]: http://tophat.cbcb.umd.edu/
[UCSC]: http://genome.ucsc.edu/cgi-bin/hgGateway
-[Vmatch]: http://www.vmatch.de/
[Xcode]: http://developer.apple.com/xcode/
[`+I`/`--minins`]: #bowtie2-options-I
[`+I`]: #bowtie2-options-I
@@ -2815,6 +2924,7 @@ for more details and variations on this process.
[`-3`]: #bowtie2-options-3
[`-5`/`--trim5`]: #bowtie2-options-5
[`-5`]: #bowtie2-options-5
+[`--trim-to`]: #bowtie2-options-trim-to
[`-D`]: #bowtie2-options-D
[`-L`]: #bowtie2-options-L
[`-N`]: #bowtie2-options-N
=====================================
Makefile
=====================================
--- a/Makefile
+++ b/Makefile
@@ -36,9 +36,9 @@ HEADERS := $(wildcard *.h)
BOWTIE_MM := 1
BOWTIE_SHARED_MEM :=
-ifdef RELEASE_BUILD
- LDFLAGS += -L$(CURDIR)/.lib
- CPPFLAGS += -I$(CURDIR)/.include
+ifdef STATIC_BUILD
+ LDFLAGS += -L$(CURDIR)/.tmp/lib
+ CPPFLAGS += -I$(CURDIR)/.tmp/include
endif
# Detect Cygwin or MinGW
@@ -58,7 +58,7 @@ ifneq (,$(findstring Darwin,$(shell uname)))
ifeq (1,$(shell uname -r | awk -F. '{ if ($$1 > 12 && $$1 < 16) print 1; }'))
CXXFLAGS += -stdlib=libstdc++
endif
- ifdef RELEASE_BUILD
+ ifdef STATIC_BUILD
CXXFLAGS += -mmacosx-version-min=10.9
endif
endif
@@ -102,7 +102,7 @@ endif
#default is to use Intel TBB
ifneq (1,$(NO_TBB))
LDLIBS += $(PTHREAD_LIB) -ltbb
- ifdef RELEASE_BUILD
+ ifdef STATIC_BUILD
LDLIBS += -ltbbmalloc
else
LDLIBS += -ltbbmalloc_proxy
@@ -443,8 +443,8 @@ bowtie2-inspect.bat:
echo "@echo off" > bowtie2-inspect.bat
echo "python %~dp0/bowtie2-inspect %*" >> bowtie2-inspect.bat
-.PHONY: bowtie2-src
-bowtie2-src: $(SRC_PKG_LIST)
+.PHONY: bowtie2-src-pkg
+bowtie2-src-pkg: $(SRC_PKG_LIST)
chmod a+x scripts/*.sh scripts/*.pl
mkdir .src.tmp
mkdir .src.tmp/bowtie2-$(VERSION)
@@ -455,9 +455,9 @@ bowtie2-src: $(SRC_PKG_LIST)
cp .src.tmp/bowtie2-$(VERSION)-source.zip .
rm -rf .src.tmp
-.PHONY: bowtie2-pkg
-bowtie2-pkg: PKG_DIR := bowtie2-$(VERSION)-$(if $(MACOS),macos,$(if $(MINGW),mingw,linux))-x86_64
-bowtie2-pkg: static-libs $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_DBG)
+.PHONY: bowtie2-bin-pkg
+bowtie2-bin-pkg: PKG_DIR := bowtie2-$(VERSION)-$(if $(MACOS),macos,$(if $(MINGW),mingw,linux))-x86_64
+bowtie2-bin-pkg: static-libs $(BIN_PKG_LIST) $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_DBG)
chmod a+x scripts/*.sh scripts/*.pl
rm -rf .bin.tmp
mkdir -p .bin.tmp/$(PKG_DIR)
@@ -487,8 +487,9 @@ doc: doc/manual.html MANUAL
doc/manual.html: MANUAL.markdown
echo "<h1>Table of Contents</h1>" > .tmp.head
- pandoc -T "Bowtie 2 Manual" -B .tmp.head \
- --css style.css -o $@ \
+ pandoc -B .tmp.head \
+ --metadata title:"Bowtie 2 Manual"\
+ --css doc/style.css -o $@ \
--from markdown --to HTML \
--table-of-contents $^
rm -f .tmp.head
@@ -505,41 +506,42 @@ install: all
.PHONY: simple-test
simple-test: perl-deps both both-debug both-sanitized
- eval `perl -I $(CURDIR)/.perllib.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.perllib.tmp` ; \
+ eval `perl -I $(CURDIR)/.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.tmp` ; \
sh ./scripts/test/simple_tests.sh
.PHONY: random-test
random-test: all perl-deps
- eval `perl -I $(CURDIR)/.perllib.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.perllib.tmp` ; \
+ eval `perl -I $(CURDIR)/.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.tmp` ; \
sh ./scripts/sim/run.sh $(if $(NUM_CORES), $(NUM_CORES), 2)
.PHONY: perl-deps
perl-deps:
- if [ ! -e .perllib.tmp ]; then \
+ if [ ! -e .tmp ]; then \
DL=$$([ `which wget` ] && echo "wget --no-check-certificate -O-" || echo "curl -L") ; \
- mkdir .perllib.tmp ; \
- $$DL http://cpanmin.us | perl - -l $(CURDIR)/.perllib.tmp App::cpanminus local::lib ; \
- eval `perl -I $(CURDIR)/.perllib.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.perllib.tmp` ; \
- $(CURDIR)/.perllib.tmp/bin/cpanm --force Math::Random Clone Test::Deep Sys::Info ; \
+ mkdir .tmp ; \
+ $$DL http://cpanmin.us | perl - -l $(CURDIR)/.tmp App::cpanminus local::lib ; \
+ eval `perl -I $(CURDIR)/.tmp/lib/perl5 -Mlocal::lib=$(CURDIR)/.tmp` ; \
+ $(CURDIR)/.tmp/bin/cpanm --force Math::Random Clone Test::Deep Sys::Info ; \
fi
.PHONY: static-libs
static-libs:
- if [[ ! -d $(CURDIR)/.lib || ! -d $(CURDIR)/.inc ]]; then \
- mkdir $(CURDIR)/.lib $(CURDIR)/.include ; \
+ if [[ ! -d $(CURDIR)/.tmp/lib || ! -d $(CURDIR)/.tmp/include ]]; then \
+ mkdir -p $(CURDIR)/.tmp/lib $(CURDIR)/.tmp/include ; \
fi ; \
if [[ `uname` = "Darwin" ]]; then \
export CFLAGS=-mmacosx-version-min=10.9 ; \
export CXXFLAGS=-mmacosx-version-min=10.9 ; \
fi ; \
DL=$$([ `which wget` ] && echo "wget --no-check-certificate" || echo "curl -LOk") ; \
- cd /tmp ; \
+ mkdir .tmp ; \
+ cd .tmp ; \
$$DL https://zlib.net/zlib-1.2.11.tar.gz && tar xzf zlib-1.2.11.tar.gz && cd zlib-1.2.11 ; \
- $(if $(MINGW), mingw32-make -f win32/Makefile.gcc, ./configure --static && make) && cp libz.a $(CURDIR)/.lib && cp zconf.h zlib.h $(CURDIR)/.include ; \
+ $(if $(MINGW), mingw32-make -f win32/Makefile.gcc, ./configure --static && make) && cp libz.a $(CURDIR)/.tmp/lib && cp zconf.h zlib.h $(CURDIR)/.tmp/include ; \
cd .. ; \
$$DL https://github.com/01org/tbb/archive/2017_U8.tar.gz && tar xzf 2017_U8.tar.gz && cd tbb-2017_U8; \
$(if $(MINGW), mingw32-make compiler=gcc arch=ia64 runtime=mingw, make) extra_inc=big_iron.inc -j4 \
- && cp -r include/tbb $(CURDIR)/.include && cp build/*_release/*.a $(CURDIR)/.lib
+ && cp -r include/tbb $(CURDIR)/.tmp/include && cp build/*_release/*.a $(CURDIR)/.tmp/lib
.PHONY: test
test: simple-test random-test
@@ -548,8 +550,7 @@ test: simple-test random-test
clean:
rm -f $(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_DBG) $(BOWTIE2_BIN_LIST_SAN) \
$(addsuffix .exe,$(BOWTIE2_BIN_LIST) $(BOWTIE2_BIN_LIST_DBG)) \
- bowtie2-src.zip bowtie2-bin.zip
+ bowtie2-*.zip
rm -f core.* .tmp.head
rm -rf *.dSYM
- rm -rf .perllib.tmp
- rm -rf .include .lib
+ rm -rf .tmp
=====================================
NEWS
=====================================
--- a/NEWS
+++ b/NEWS
@@ -19,10 +19,39 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu
Version Release History
=======================
+Version 2.3.4.2 - Aug 7, 2018
+
+ * Fixed issue causing `bowtie2` to fail in `--fast-local` mode.
+ * Fixed issue causing `--soft-clipped-unmapped-tlen` to be a positional argument.
+ * New option `--trim-to N` causes `bowtie2` to trim reads longer
+ than `N` bases to exactly `N` bases. Can trim from either 3'
+ or 5' end, e.g. `--trim-to 5:30` trims reads to 30 bases,
+ truncating at the 5' end.
+ * Updated "Building from source" manual section with additional
+ instructions on installing TBB.
+ * Several other updates to manual, including new mentions of
+ [Bioconda](http://bioconda.github.io) and
+ [Biocontainers](https://biocontainers.pro).
+ * Fixed an issue preventing `bowtie2` from processing more than
+ one pattern source when running single threaded.
+ * Fixed an issue causing `bowtie2` and `bowtie2-inspect` to crash if
+ the index contains a gap-only segment.
+ * Added experimental BAM input mode `-b`. Works only with unpaired
+ input reads and BAM files that are sorted by read name (`samtools
+ sort -n`). BAM input mode also supports the following options:
+ o `--preserve-sam-tags`: Preserve any optional fields present in BAM record
+ o `--align-paired-reads`: Paired-end mode for BAM files
+ * Added experimental cmake support
+
+Thread-scaling paper appears - July 19, 2018
+
+ * Our latest work on Bowtie's core thread scaling capabilities
+ [just appeared Open Access in the journal Bioinformatics](href="https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty648/5055585)
+
Version 2.3.4.1 - Feb 3, 2018
* Fixed an issue with `--reorder` that caused bowtie2 to crash
- while reordering SAM output
+ while reordering SAM output
Version 2.3.4 - Dec 29, 2017
=====================================
VERSION
=====================================
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.3.4.1
+2.3.4.2
=====================================
aln_sink.cpp
=====================================
--- a/aln_sink.cpp
+++ b/aln_sink.cpp
@@ -2110,6 +2110,7 @@ void AlnSinkSam::appendMate(
prm, // per-read metrics
sc); // scoring scheme
}
+ samc_.printPreservedOptFlags(o, rd);
o.append('\n');
}
=====================================
bowtie2-build
=====================================
--- a/bowtie2-build
+++ b/bowtie2-build
@@ -80,7 +80,7 @@ def main():
argv.appendleft('--wrapper')
argv.appendleft(build_bin_spec)
logging.info('Command: %s' % ' '.join(argv))
- subprocess.call(argv)
+ sys.exit(subprocess.call(argv))
if __name__ == '__main__':
main()
=====================================
bowtie2-inspect
=====================================
--- a/bowtie2-inspect
+++ b/bowtie2-inspect
@@ -77,7 +77,7 @@ def main():
argv.appendleft('--wrapper')
argv.appendleft(inspect_bin_spec)
logging.info('Command: %s' % ' '.join(argv))
- subprocess.call(argv)
+ sys.exit(subprocess.call(argv))
if __name__ == '__main__':
main()
=====================================
bt2_idx.h
=====================================
--- a/bt2_idx.h
+++ b/bt2_idx.h
@@ -2530,6 +2530,9 @@ TStr Ebwt::join(EList<FileBuf*>& l,
while(!l[i]->eof()) {
RefRecord rec = fastaRefReadAppend(*l[i], first, ret, dstoff, rpcp);
first = false;
+ if(rec.first && rec.len == 0) {
+ continue;
+ }
TIndexOffU bases = rec.len;
assert_eq(rec.off, szs[szsi].off);
assert_eq(rec.len, szs[szsi].len);
@@ -2596,7 +2599,7 @@ void Ebwt::joinToDisk(
writeU<TIndexOffU>(out1, this->plen()[npat], this->toBe());
}
this->plen()[++npat] = (szs[i].len + szs[i].off);
- } else {
+ } else if(!szs[i].first) {
// edge case, but we could get here with npat == -1
// e.g. when building from a reference of all Ns
if (npat < 0) npat = 0;
@@ -2639,6 +2642,9 @@ void Ebwt::joinToDisk(
//assert_eq(0, _refnames.back().length());
_refnames.pop_back();
}
+ if(rec.first && rec.len == 0) {
+ continue;
+ }
assert_lt(szsi, szs.size());
assert_eq(rec.off, szs[szsi].off);
assert_eq(rec.len, szs[szsi].len);
@@ -2646,7 +2652,7 @@ void Ebwt::joinToDisk(
assert(rec.first || rec.off > 0);
ASSERT_ONLY(szsi++);
// Increment seqsRead if this is the first fragment
- if(rec.first && rec.len > 0) seqsRead++;
+ if(rec.first) seqsRead++;
if(bases == 0) continue;
assert_leq(bases, this->plen()[seqsRead-1]);
// Reset the patoff if this is the first fragment
=====================================
bt2_search.cpp
=====================================
--- a/bt2_search.cpp
+++ b/bt2_search.cpp
@@ -97,6 +97,7 @@ static int ipause; // pause before maching?
static uint32_t qUpto; // max # of queries to read
static int gTrim5; // amount to trim from 5' end
static int gTrim3; // amount to trim from 3' end
+static pair<short, size_t> trimTo; // trim reads exceeding given length from either 3' or 5'-end
static int offRate; // keep default offRate
static bool solexaQuals; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
static bool phred64Quals; // quality chars are phred, but must subtract 64 (not 33)
@@ -176,6 +177,8 @@ static bool sam_print_zi;
static bool sam_print_zp;
static bool sam_print_zu;
static bool sam_print_zt;
+static bool preserve_sam_tags; // Only applies when aligning BAM files
+static bool align_paired_reads; // Process only the paired reads in BAM file
static bool bwaSwLike;
static bool gSeedLenIsSet;
static float bwaSwLikeC;
@@ -288,6 +291,7 @@ static void resetOptions() {
qUpto = 0xffffffff; // max # of queries to read
gTrim5 = 0; // amount to trim from 5' end
gTrim3 = 0; // amount to trim from 3' end
+ trimTo = pair<short, size_t>(5, 0); // default: don't do any trimming
offRate = -1; // keep default offRate
solexaQuals = false; // quality strings are solexa quals, not phred, and subtract 64 (not 33)
phred64Quals = false; // quality chars are phred, but must subtract 64 (not 33)
@@ -369,6 +373,8 @@ static void resetOptions() {
sam_print_zp = false;
sam_print_zu = false;
sam_print_zt = false;
+ preserve_sam_tags = false;
+ align_paired_reads = false;
bwaSwLike = false;
gSeedLenIsSet = false;
bwaSwLikeC = 5.5f;
@@ -455,7 +461,7 @@ static void resetOptions() {
logDpsOpp.clear(); // log mate-search dynamic programming problems
}
-static const char *short_options = "fF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:";
+static const char *short_options = "bfF:qbzhcu:rv:s:aP:t3:5:w:p:k:M:1:2:I:X:CQ:N:i:L:U:x:S:g:O:D:R:";
static struct option long_options[] = {
{(char*)"verbose", no_argument, 0, ARG_VERBOSE},
@@ -643,6 +649,9 @@ static struct option long_options[] = {
{(char*)"xeq", no_argument, 0, ARG_XEQ},
{(char*)"thread-ceiling", required_argument, 0, ARG_THREAD_CEILING},
{(char*)"thread-piddir", required_argument, 0, ARG_THREAD_PIDDIR},
+{(char*)"trim-to", required_argument, 0, ARG_TRIM_TO},
+{(char*)"preserve-sam-tags", no_argument, 0, ARG_PRESERVE_SAM_TAGS},
+{(char*)"align-paired-reads", no_argument, 0, ARG_ALIGN_PAIRED_READS},
{(char*)0, 0, 0, 0} // terminator
};
@@ -736,6 +745,8 @@ static void printUsage(ostream& out) {
<< " -u/--upto <int> stop after first <int> reads/pairs (no limit)" << endl
<< " -5/--trim5 <int> trim <int> bases from 5'/left end of reads (0)" << endl
<< " -3/--trim3 <int> trim <int> bases from 3'/right end of reads (0)" << endl
+ << " --trim-to [3:|5:]<int> trim reads exceeding <int> bases from either 3' or 5' end" << endl
+ << " If the read end is not specified then it defaults to 3 (0)" << endl
<< " --phred33 qualities are Phred+33 (default)" << endl
<< " --phred64 qualities are Phred+64" << endl
<< " --int-quals qualities encoded as space-delimited integers" << endl
@@ -944,6 +955,12 @@ static string applyPreset(const string& sorig, Presets& presets) {
static bool saw_M;
static bool saw_a;
static bool saw_k;
+static bool saw_trim3;
+static bool saw_trim5;
+static bool saw_trim_to;
+static bool saw_bam;
+static bool saw_preserve_sam_tags;
+static bool saw_align_paired_reads;
static EList<string> presetList;
/**
@@ -983,6 +1000,11 @@ static void parseOption(int next_option, const char *arg) {
case ARG_TAB5: tokenize(arg, ",", mates12); format = TAB_MATE5; break;
case ARG_TAB6: tokenize(arg, ",", mates12); format = TAB_MATE6; break;
case ARG_INTERLEAVED_FASTQ: tokenize(arg, ",", mates12); format = INTERLEAVED; break;
+ case 'b': {
+ format = BAM;
+ saw_bam = true;
+ break;
+ }
case 'f': format = FASTA; break;
case 'F': {
format = FASTA_CONT;
@@ -1026,6 +1048,16 @@ static void parseOption(int next_option, const char *arg) {
case ARG_SEED_SUMM: seedSumm = true; break;
case ARG_SC_UNMAPPED: scUnMapped = true; break;
case ARG_XEQ: xeq = true; break;
+ case ARG_PRESERVE_SAM_TAGS: {
+ preserve_sam_tags = true;
+ saw_preserve_sam_tags = true;
+ break;
+ }
+ case ARG_ALIGN_PAIRED_READS: {
+ align_paired_reads = true;
+ saw_align_paired_reads = true;
+ break;
+ }
case ARG_MM: {
#ifdef BOWTIE_MM
useMm = true;
@@ -1101,6 +1133,26 @@ static void parseOption(int next_option, const char *arg) {
break;
case '3': gTrim3 = parseInt(0, "-3/--trim3 arg must be at least 0", arg); break;
case '5': gTrim5 = parseInt(0, "-5/--trim5 arg must be at least 0", arg); break;
+ case ARG_TRIM_TO: {
+ if (strlen(arg) > 1 && arg[1] != ':') {
+ trimTo.first = 3;
+ trimTo.second = parseInt(0, "--trim-to: the number of bases to trim must be at least 0", arg);
+ break;
+ }
+ pair<int, int> res = parsePair<int>(arg, ':');
+ if (res.first != 3 && res.first != 5) {
+ cerr << "--trim-to: trim position must be either 3 or 5" << endl;
+ printUsage(cerr);
+ throw 1;
+ }
+ if(res.second < 0) {
+ cerr << "--trim-to: the number bases to trim must be at least 0" << endl;
+ printUsage(cerr);
+ throw 1;
+ }
+ trimTo = static_cast<pair<short, size_t> >(res);
+ break;
+ }
case 'h': printUsage(cout); throw 0; break;
case ARG_USAGE: printUsage(cout); throw 0; break;
//
@@ -1468,12 +1520,6 @@ static void parseOption(int next_option, const char *arg) {
printUsage(cerr);
throw 1;
}
- if (!localAlign && scUnMapped) {
- scUnMapped = false;
- cerr << "WARNING: --soft-clipped-unmapped-tlen can only be set for "
- << "local alignment... ignoring" << endl;
- }
-
}
/**
@@ -1485,6 +1531,12 @@ static void parseOptions(int argc, const char **argv) {
saw_M = false;
saw_a = false;
saw_k = false;
+ saw_trim3 = false;
+ saw_trim5 = false;
+ saw_trim_to = false;
+ saw_bam = false;
+ saw_preserve_sam_tags = false;
+ saw_align_paired_reads = false;
presetList.clear();
if(startVerbose) { cerr << "Parsing options: "; logTime(cerr, true); }
while(true) {
@@ -1503,6 +1555,27 @@ static void parseOptions(int argc, const char **argv) {
}
parseOption(next_option, arg);
}
+
+ if (!localAlign && scUnMapped) {
+ cerr << "ERROR: --soft-clipped-unmapped-tlen can only be set for local alignments." << endl;
+ exit(1);
+ }
+
+ if ((saw_trim3 || saw_trim5) && saw_trim_to) {
+ cerr << "ERROR: --trim5/--trim3 and --trim-to are mutually exclusive "
+ << "options." << endl;
+ exit(1);
+ }
+
+ if (!saw_bam && saw_preserve_sam_tags) {
+ cerr << "--preserve_sam_tag can only be used when aligning BAM reads." << endl;
+ exit(1);
+ }
+
+ if (!saw_bam && saw_align_paired_reads) {
+ cerr << "--align-paired-reads can only be used when aligning BAM reads." << endl;
+ exit(1);
+ }
// Now parse all the presets. Might want to pick which presets version to
// use according to other parameters.
auto_ptr<Presets> presets(new PresetsV0());
@@ -4717,11 +4790,14 @@ static void driver(
integerQuals, // true -> qualities are space-separated numbers
gTrim5, // amt to hard clip from 5' end
gTrim3, // amt to hard clip from 3' end
+ trimTo, // trim reads exceeding given length from either 3' or 5'-end
fastaContLen, // length of sampled reads for FastaContinuous...
fastaContFreq, // frequency of sampled reads for FastaContinuous...
skipReads, // skip the first 'skip' patterns
nthreads, //number of threads for locking
- outType != OUTPUT_SAM // whether to fix mate names
+ outType != OUTPUT_SAM, // whether to fix mate names
+ preserve_sam_tags, // keep existing SAM tags when aligning BAM files
+ align_paired_reads // Align only the paired reads in BAM file
);
if(gVerbose || startVerbose) {
cerr << "Creating PatternSource: "; logTime(cerr, true);
=====================================
doc/manual.html
=====================================
--- a/doc/manual.html
+++ b/doc/manual.html
@@ -4,14 +4,14 @@
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
- <title>Bowtie 2 Manual – Bowtie 2 Manual</title>
+ <title>Bowtie 2 Manual</title>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
- div.line-block{white-space: pre-line;}
+ span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
</style>
- <link rel="stylesheet" href="style.css">
+ <link rel="stylesheet" href="doc/style.css" />
<!--[if lt IE 9]>
<script src="//cdnjs.cloudflare.com/ajax/libs/html5shiv/3.7.3/html5shiv-printshiv.min.js"></script>
<![endif]-->
@@ -24,10 +24,8 @@
<nav id="TOC">
<ul>
<li><a href="#introduction">Introduction</a><ul>
-<li><a href="#what-is-bowtie-2">What is Bowtie 2?</a></li>
<li><a href="#how-is-bowtie-2-different-from-bowtie-1">How is Bowtie 2 different from Bowtie 1?</a></li>
<li><a href="#what-isnt-bowtie-2">What isn’t Bowtie 2?</a></li>
-<li><a href="#what-does-it-mean-that-some-older-bowtie-2-versions-are-beta">What does it mean that some older Bowtie 2 versions are “beta”?</a></li>
</ul></li>
<li><a href="#obtaining-bowtie-2">Obtaining Bowtie 2</a><ul>
<li><a href="#building-from-source">Building from source</a></li>
@@ -105,10 +103,14 @@
! of this text document, or see the HTML manual online.
! -->
<h1 id="introduction">Introduction</h1>
-<h2 id="what-is-bowtie-2">What is Bowtie 2?</h2>
-<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an <a href="http://en.wikipedia.org/wiki/FM-index">FM Index</a> (based on the <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">Burrows-Wheeler Transform</a> or <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">BWT</a>) to keep its memory footprint small: for the human genome, its memory footprint is typically around 3.2 gigabytes of RAM. Bowtie 2 supports gapped, local, and paired-end alignment modes. Multiple processors can be used simultaneously to achieve greater alignment speed. <a href="http://cufflinks.cbcb.umd.edu/">Cufflinks</a>: a tool for transcriptome assembly and isoform quantitiation from Bowtie 2 outputs alignments in <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM</a> format, enabling interoperation with a large number of other tools (e.g. <a href="http://samtools.sourceforge.net">SAMtools</a>, <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">GATK</a>) that use SAM. Bowtie 2 is distributed under the <a href="http://www.gnu.org/licenses/gpl-3.0.html">GPLv3 license</a>, and it runs on the command line under Windows, Mac OS X and Linux.</p>
-<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is often the first step in pipelines for comparative genomics, including for variation calling, ChIP-seq, RNA-seq, BS-seq. <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> and <a href="http://bowtie-bio.sf.net">Bowtie</a> (also called “<a href="http://bowtie-bio.sf.net">Bowtie 1</a>” here) are also tightly integrated into some tools, including <a href="http://tophat.cbcb.umd.edu/">TopHat</a>: a fast splice junction mapper for RNA-seq reads, RNA-seq reads, <a href="http://bowtie-bio.sf.net/crossbow">Crossbow</a>: a cloud-enabled software tool for analyzing resequencing data, and <a href="http://bowtie-bio.sf.net/myrna">Myrna</a>: a cloud-enabled software tool for aligning RNA-seq reads and measuring differential gene expression.</p>
-<p>If you use <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> for your published research, please cite the <a href="http://genomebiology.com/2009/10/3/R25">Bowtie paper</a>. Thank you!</p>
+<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an <a href="http://en.wikipedia.org/wiki/FM-index">FM Index</a> (based on the <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">Burrows-Wheeler Transform</a> or <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">BWT</a>) to keep its memory footprint small: for the human genome, its memory footprint is typically around 3.2 gigabytes of RAM. Bowtie 2 supports gapped, local, and paired-end alignment modes. Multiple processors can be used simultaneously to achieve greater alignment speed.</p>
+<p>Bowtie 2 outputs alignments in <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM</a> format, enabling interoperation with a large number of other tools (e.g. <a href="http://samtools.sourceforge.net">SAMtools</a>, <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">GATK</a>) that use SAM. Bowtie 2 is distributed under the <a href="http://www.gnu.org/licenses/gpl-3.0.html">GPLv3 license</a>, and it runs on the command line under Windows, Mac OS X and Linux.</p>
+<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is often the first step in pipelines for comparative genomics, including for variation calling, ChIP-seq, RNA-seq, BS-seq. <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> and <a href="http://bowtie-bio.sf.net">Bowtie</a> (also called “<a href="http://bowtie-bio.sf.net">Bowtie 1</a>” here) are also tightly integrated into many other tools, some of which <a href="http://bowtie-bio.sourceforge.net/bowtie2/other_tools.shtml">are listed here</a>.</p>
+<p>If you use <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> for your published research, please cite our work. Papers describing Bowtie 2 are:</p>
+<ul>
+<li><p>Langmead B, Wilks C, Antonescu V, Charles R. <a href="https://doi.org/10.1093/bioinformatics/bty648">Scaling read aligners to hundreds of threads on general-purpose processors</a>. <em>Bioinformatics</em>. 2018 Jul 18. doi: 10.1093/bioinformatics/bty648.</p></li>
+<li><p>Langmead B, Salzberg SL. <a href="https://www.nature.com/articles/nmeth.1923">Fast gapped-read alignment with Bowtie 2</a>. <em>Nature Methods</em>. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923.</p></li>
+</ul>
<h2 id="how-is-bowtie-2-different-from-bowtie-1">How is Bowtie 2 different from Bowtie 1?</h2>
<p>Bowtie 1 was released in 2009 and was geared toward aligning the relatively short sequencing reads (up to 25-50 nucleotides) prevalent at the time. Since then, technology has improved both sequencing throughput (more nucleotides produced per sequencer per day) and read length (more nucleotides per read).</p>
<p>The chief differences between Bowtie 1 and Bowtie 2 are:</p>
@@ -125,17 +127,115 @@
</ol>
<p>Bowtie 2 is not a “drop-in” replacement for Bowtie 1. Bowtie 2’s command-line arguments and genome index format are both different from Bowtie 1’s.</p>
<h2 id="what-isnt-bowtie-2">What isn’t Bowtie 2?</h2>
-<p>Bowtie 1 and Bowtie 2 are not general-purpose alignment tools like <a href="http://mummer.sourceforge.net/">MUMmer</a>, <a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi">BLAST</a> or <a href="http://www.vmatch.de/">Vmatch</a>. Bowtie 2 works best when aligning to large genomes, though it supports arbitrarily small reference sequences (e.g. amplicons). It handles very long reads (i.e. upwards of 10s or 100s of kilobases), but it is optimized for the read lengths and error modes yielded by recent sequencers, such as the Illumina HiSeq 2000, Roche 454, and Ion Torrent instruments.</p>
-<p>If your goal is to align two very large sequences (e.g. two genomes), consider using <a href="http://mummer.sourceforge.net/">MUMmer</a>. If your goal is very sensitive alignment to a relatively short reference sequence (e.g. a bacterial genome), this can be done with Bowtie 2 but you may want to consider using tools like <a href="http://mummer.sourceforge.net/manual/#nucmer">NUCmer</a>, <a href="http://genome.ucsc.edu/cgi-bin/hgBlat?command=start">BLAT</a>, or <a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi">BLAST</a>. These tools can be extremely slow when the reference genome is long, but are often adequate when the reference is short.</p>
-<p>Bowtie 2 does not support alignment of colorspace reads.</p>
-<h2 id="what-does-it-mean-that-some-older-bowtie-2-versions-are-beta">What does it mean that some older Bowtie 2 versions are “beta”?</h2>
-<p>We said those Bowtie 2 versions were in “beta” to convey that it was not as polished as a tool that had been around for a while, and was still in flux. Since version 2.0.1, we declared Bowtie 2 was no longer “beta”.</p>
+<p>Bowtie 2 is geared toward aligning relatively short sequencing reads to long genomes. That said, it handles arbitrarily small reference sequences ( e.g. amplicons) and very long reads (i.e. upwards of 10s or 100s of kilobases), though it is slower in those settings. It is optimized for the read lengths and error modes yielded by typical Illumina sequencers.</p>
+<p>Bowtie 2 does not support alignment of colorspace reads. (Bowtie 1 does.)</p>
<h1 id="obtaining-bowtie-2">Obtaining Bowtie 2</h1>
-<p>Download Bowtie 2 sources and binaries from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">Download</a> section of the Sourceforge site. Binaries are available for the Intel <code>x86_64</code> architecture running Linux, Mac OS X, and Windows. If you plan to compile Bowtie 2 yourself, make sure to get the source package, i.e., the filename that ends in “-source.zip”.</p>
+<p>Bowtie 2 is available from various package managers, notably <a href="https://anaconda.org/bioconda/bowtie2">Bioconda</a>. With Bioconda installed, you should be able to install Bowtie 2 with <code>conda install bowtie2</code>.</p>
+<p>Containerized versions of Bowtie 2 are also available via the <a href="https://BioContainers.pro">Biocontainers</a> project (e.g. <a href="https://hub.docker.com/r/biocontainers/bowtie2/">via Docker Hub</a>).</p>
+<p>You can also download Bowtie 2 sources and binaries from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">Download</a> section of the Sourceforge site. Binaries are available for the <code>x86_64</code> architecture running Linux, Mac OS X, and Windows. If you plan to compile Bowtie 2 yourself, make sure to get the source package, i.e., the filename that ends in “-source.zip”.</p>
<h2 id="building-from-source">Building from source</h2>
<p>Building Bowtie 2 from source requires a GNU-like environment with GCC, GNU Make and other basics. It should be possible to build Bowtie 2 on most vanilla Linux installations or on a Mac installation with <a href="http://developer.apple.com/xcode/">Xcode</a> installed. (But see note about the TBB library below). Bowtie 2 can also be built on Windows using a 64-bit MinGW distribution and MSYS. In order to simplify the MinGW setup it might be worth investigating popular MinGW personal builds since these are coming already prepared with most of the toolchains needed.</p>
<p>First, download the source package from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">sourceforge site</a>. Make sure you’re getting the source package; the file downloaded should end in <code>-source.zip</code>. Unzip the file, change to the unzipped directory, and build the Bowtie 2 tools by running GNU <code>make</code> (usually with the command <code>make</code>, but sometimes with <code>gmake</code>) with no arguments. If building with MinGW, run <code>make</code> from the MSYS environment.</p>
-<p>Bowtie 2 can be run on many threads. By default, Bowtie 2 uses the Threading Building Blocks library (TBB) for this. If TBB is not available on your system (e.g. <code>make</code> prints an error like <code>tbb/mutex.h: No such file or directory</code>), you can install it yourself (see <a href="https://www.threadingbuildingblocks.org">Threading Building Blocks library</a>) or build Bowtie 2 with <code>make NO_TBB=1</code> to use pthreads or Windows native multithreading instead.</p>
+<p>Bowtie 2 can be run on many threads. By default, Bowtie 2 uses the Threading Building Blocks library (TBB) for this. If TBB is not available on your system (e.g. <code>make</code> prints an error like <code>tbb/mutex.h: No such file or directory</code>), you can install it yourself from source (see <a href="https://www.threadingbuildingblocks.org">Threading Building Blocks library</a>) or install it using your operating system’s preferred package manager. The table below list some of the commands for a few of the more popular operating systems.</p>
+<table>
+<tr>
+<th>
+OS
+</th>
+<th>
+Sync Package List
+</th>
+<th>
+Search
+</th>
+<th>
+Install
+</th>
+</tr>
+<tr>
+<td>
+Ubuntu, Mint, Debian
+</td>
+<td>
+<pre><code>apt-get update</code></pre>
+</td>
+<td>
+<pre><code>apt-cache search tbb</code></pre>
+</td>
+<td>
+<pre><code>apt-get install libtbb-dev</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Fedora, CentOS
+</td>
+<td>
+<pre><code>yum check-update</code></pre>
+</td>
+<td>
+<p>yum search tbb</p>
+</td>
+<td>
+<pre><code>yum install tbb-devel.x86_64</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Arch
+</td>
+<td>
+<pre><code>packman -Sy</code></pre>
+</td>
+<td>
+<pre><code>pacman -Ss tbb</code></pre>
+</td>
+<td>
+<pre><code>pacman -S extra/intel-tbb</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Gentoo
+</td>
+<td>
+<pre><code>emerge --sync</code></pre>
+</td>
+<td>
+<pre><code>emerge --search tbb</code></pre>
+</td>
+<td>
+<pre><code>emerge dev-cpp/tbb</code></pre>
+</td>
+<tr>
+<td>
+MacOS
+</td>
+<td>
+<pre><code>brew update</code></pre>
+</td>
+<td>
+<pre><code>brew search tbb</code></pre>
+</td>
+<td>
+<pre><code>brew install tbb</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+FreeBSD
+</td>
+<td>
+<pre><code>portsnap fetch update</code></pre>
+</td>
+<td>
+<pre><code>make search name=tbb</code></pre>
+</td>
+<td>
+<pre><code>cd /usr/ports/devel/tbb && make install && make clean</code></pre>
+</table>
+<p>The Bowtie 2 Makefile also includes recipes for basic automatic dependency management. Running <code>make static-libs && make STATIC_BUILD=1</code> will issue a series of commands that will: 1. download TBB and zlib 2. compile them as static libraries 3. link the resulting libraries to the compiled Bowtie 2 binaries</p>
+<p>If all fails Bowtie 2 can be built with <code>make NO_TBB=1</code> to use pthreads or Windows native multithreading instead.</p>
<h2 id="adding-to-path">Adding to PATH</h2>
<p>By adding your new Bowtie 2 directory to your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH environment variable</a>, you ensure that whenever you run <code>bowtie2</code>, <code>bowtie2-build</code> or <code>bowtie2-inspect</code> from the command line, you will get the version you just installed without having to specify the entire path. This is recommended for most users. To do this, follow your operating system’s instructions for adding the directory to your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH</a>.</p>
<p>If you would like to install Bowtie 2 by copying the Bowtie 2 executable files to an existing directory in your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH</a>, make sure that you copy all the executables, including <code>bowtie2</code>, <code>bowtie2-align-s</code>, <code>bowtie2-align-l</code>, <code>bowtie2-build</code>, <code>bowtie2-build-s</code>, <code>bowtie2-build-l</code>, <code>bowtie2-inspect</code>, <code>bowtie2-inspect-s</code> and <code>bowtie2-inspect-l</code>.</p>
@@ -465,6 +565,14 @@ Reads are substrings (k-mers) extracted from a FASTA file <code><s></code>
</td>
</tr>
<tr>
+<td id="bowtie2-options-trim-to">
+<pre><code>--trim-to [3:|5:]<int></code></pre>
+</td>
+<td>
+<p>Trim reads exceeding <code><int></code> bases. Bases will be trimmed from either the 3’ (right) or 5’ (left) end of the read. If the read end if not specified, bowtie 2 will default to trimming from the 3’ (right) end of the read. <a href="#bowtie2-options-trim-to"><code>--trim-to</code></a> and <a href="#bowtie2-options-3"><code>-3</code></a>/<a href="#bowtie2-options-5"><code>-5</code></a> are mutually exclusive.</p>
+</td>
+</tr>
+<tr>
<td id="bowtie2-options-phred33-quals">
<pre><code>--phred33</code></pre>
</td>
@@ -969,11 +1077,11 @@ Seed 4 rc: TTATGCATGA</code></pre>
</td>
</tr>
<tr>
-<td id="bowtie2-options-soft-clipped-unmapped-tlen-sec-seq">
+<td id="bowtie2-options-soft-clipped-unmapped-tlen">
<pre><code>--soft-clipped-unmapped-tlen</code></pre>
</td>
<td>
-<p>Consider soft-clipped bases unmapped when calculating <code>TLEN</code>.</p>
+<p>Consider soft-clipped bases unmapped when calculating <code>TLEN</code>. Only available in <a href="#bowtie2-options-local"><code>--local</code></a> mode.</p>
</td>
</tr>
<tr>
=====================================
doc/website/manual.ssi
=====================================
--- a/doc/website/manual.ssi
+++ b/doc/website/manual.ssi
@@ -1,22 +1,9 @@
<h1>Table of Contents</h1>
-<p> Version <b>2.3.4.1</b></p>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
- <title>Bowtie 2 Manual - </title>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <meta name="generator" content="pandoc" />
- <link rel="stylesheet" href="style.css" type="text/css" />
-</head>
-<body>
-
-<div id="TOC">
+<nav id="TOC">
<ul>
<li><a href="#introduction">Introduction</a><ul>
-<li><a href="#what-is-bowtie-2">What is Bowtie 2?</a></li>
<li><a href="#how-is-bowtie-2-different-from-bowtie-1">How is Bowtie 2 different from Bowtie 1?</a></li>
<li><a href="#what-isnt-bowtie-2">What isn’t Bowtie 2?</a></li>
-<li><a href="#what-does-it-mean-that-some-older-bowtie-2-versions-are-beta">What does it mean that some older Bowtie 2 versions are “beta”?</a></li>
</ul></li>
<li><a href="#obtaining-bowtie-2">Obtaining Bowtie 2</a><ul>
<li><a href="#building-from-source">Building from source</a></li>
@@ -87,17 +74,21 @@
<li><a href="#using-samtoolsbcftools-downstream">Using SAMtools/BCFtools downstream</a></li>
</ul></li>
</ul>
-</div>
+</nav>
<!--
! This manual is written in "markdown" format and thus contains some
! distracting formatting clutter. See 'MANUAL' for an easier-to-read version
! of this text document, or see the HTML manual online.
! -->
<h1 id="introduction">Introduction</h1>
-<h2 id="what-is-bowtie-2">What is Bowtie 2?</h2>
-<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an <a href="http://en.wikipedia.org/wiki/FM-index">FM Index</a> (based on the <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">Burrows-Wheeler Transform</a> or <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">BWT</a>) to keep its memory footprint small: for the human genome, its memory footprint is typically around 3.2 gigabytes of RAM. Bowtie 2 supports gapped, local, and paired-end alignment modes. Multiple processors can be used simultaneously to achieve greater alignment speed. <a href="http://cufflinks.cbcb.umd.edu/">Cufflinks</a>: a tool for transcriptome assembly and isoform quantitiation from Bowtie 2 outputs alignments in <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM</a> format, enabling interoperation with a large number of other tools (e.g. <a href="http://samtools.sourceforge.net">SAMtools</a>, <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">GATK</a>) that use SAM. Bowtie 2 is distributed under the <a href="http://www.gnu.org/licenses/gpl-3.0.html">GPLv3 license</a>, and it runs on the command line under Windows, Mac OS X and Linux.</p>
-<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is often the first step in pipelines for comparative genomics, including for variation calling, ChIP-seq, RNA-seq, BS-seq. <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> and <a href="http://bowtie-bio.sf.net">Bowtie</a> (also called “<a href="http://bowtie-bio.sf.net">Bowtie 1</a>” here) are also tightly integrated into some tools, including <a href="http://tophat.cbcb.umd.edu/">TopHat</a>: a fast splice junction mapper for RNA-seq reads, RNA-seq reads, <a href="http://bowtie-bio.sf.net/crossbow">Crossbow</a>: a cloud-enabled software tool for analyzing resequencing data, and <a href="http://bowtie-bio.sf.net/myrna">Myrna</a>: a cloud-enabled software tool for aligning RNA-seq reads and measuring differential gene expression.</p>
-<p>If you use <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> for your published research, please cite the <a href="http://genomebiology.com/2009/10/3/R25">Bowtie paper</a>. Thank you!</p>
+<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s of characters to relatively long (e.g. mammalian) genomes. Bowtie 2 indexes the genome with an <a href="http://en.wikipedia.org/wiki/FM-index">FM Index</a> (based on the <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">Burrows-Wheeler Transform</a> or <a href="http://en.wikipedia.org/wiki/Burrows-Wheeler_transform">BWT</a>) to keep its memory footprint small: for the human genome, its memory footprint is typically around 3.2 gigabytes of RAM. Bowtie 2 supports gapped, local, and paired-end alignment modes. Multiple processors can be used simultaneously to achieve greater alignment speed.</p>
+<p>Bowtie 2 outputs alignments in <a href="http://samtools.sourceforge.net/SAM1.pdf">SAM</a> format, enabling interoperation with a large number of other tools (e.g. <a href="http://samtools.sourceforge.net">SAMtools</a>, <a href="http://www.broadinstitute.org/gsa/wiki/index.php/The_Genome_Analysis_Toolkit">GATK</a>) that use SAM. Bowtie 2 is distributed under the <a href="http://www.gnu.org/licenses/gpl-3.0.html">GPLv3 license</a>, and it runs on the command line under Windows, Mac OS X and Linux.</p>
+<p><a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> is often the first step in pipelines for comparative genomics, including for variation calling, ChIP-seq, RNA-seq, BS-seq. <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> and <a href="http://bowtie-bio.sf.net">Bowtie</a> (also called “<a href="http://bowtie-bio.sf.net">Bowtie 1</a>” here) are also tightly integrated into many other tools, some of which <a href="http://bowtie-bio.sourceforge.net/bowtie2/other_tools.shtml">are listed here</a>.</p>
+<p>If you use <a href="http://bowtie-bio.sf.net/bowtie2">Bowtie 2</a> for your published research, please cite our work. Papers describing Bowtie 2 are:</p>
+<ul>
+<li><p>Langmead B, Wilks C, Antonescu V, Charles R. <a href="https://doi.org/10.1093/bioinformatics/bty648">Scaling read aligners to hundreds of threads on general-purpose processors</a>. <em>Bioinformatics</em>. 2018 Jul 18. doi: 10.1093/bioinformatics/bty648.</p></li>
+<li><p>Langmead B, Salzberg SL. <a href="https://www.nature.com/articles/nmeth.1923">Fast gapped-read alignment with Bowtie 2</a>. <em>Nature Methods</em>. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923.</p></li>
+</ul>
<h2 id="how-is-bowtie-2-different-from-bowtie-1">How is Bowtie 2 different from Bowtie 1?</h2>
<p>Bowtie 1 was released in 2009 and was geared toward aligning the relatively short sequencing reads (up to 25-50 nucleotides) prevalent at the time. Since then, technology has improved both sequencing throughput (more nucleotides produced per sequencer per day) and read length (more nucleotides per read).</p>
<p>The chief differences between Bowtie 1 and Bowtie 2 are:</p>
@@ -114,17 +105,115 @@
</ol>
<p>Bowtie 2 is not a “drop-in” replacement for Bowtie 1. Bowtie 2’s command-line arguments and genome index format are both different from Bowtie 1’s.</p>
<h2 id="what-isnt-bowtie-2">What isn’t Bowtie 2?</h2>
-<p>Bowtie 1 and Bowtie 2 are not general-purpose alignment tools like <a href="http://mummer.sourceforge.net/">MUMmer</a>, <a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi">BLAST</a> or <a href="http://www.vmatch.de/">Vmatch</a>. Bowtie 2 works best when aligning to large genomes, though it supports arbitrarily small reference sequences (e.g. amplicons). It handles very long reads (i.e. upwards of 10s or 100s of kilobases), but it is optimized for the read lengths and error modes yielded by recent sequencers, such as the Illumina HiSeq 2000, Roche 454, and Ion Torrent instruments.</p>
-<p>If your goal is to align two very large sequences (e.g. two genomes), consider using <a href="http://mummer.sourceforge.net/">MUMmer</a>. If your goal is very sensitive alignment to a relatively short reference sequence (e.g. a bacterial genome), this can be done with Bowtie 2 but you may want to consider using tools like <a href="http://mummer.sourceforge.net/manual/#nucmer">NUCmer</a>, <a href="http://genome.ucsc.edu/cgi-bin/hgBlat?command=start">BLAT</a>, or <a href="http://blast.ncbi.nlm.nih.gov/Blast.cgi">BLAST</a>. These tools can be extremely slow when the reference genome is long, but are often adequate when the reference is short.</p>
-<p>Bowtie 2 does not support alignment of colorspace reads.</p>
-<h2 id="what-does-it-mean-that-some-older-bowtie-2-versions-are-beta">What does it mean that some older Bowtie 2 versions are “beta”?</h2>
-<p>We said those Bowtie 2 versions were in “beta” to convey that it was not as polished as a tool that had been around for a while, and was still in flux. Since version 2.0.1, we declared Bowtie 2 was no longer “beta”.</p>
+<p>Bowtie 2 is geared toward aligning relatively short sequencing reads to long genomes. That said, it handles arbitrarily small reference sequences ( e.g. amplicons) and very long reads (i.e. upwards of 10s or 100s of kilobases), though it is slower in those settings. It is optimized for the read lengths and error modes yielded by typical Illumina sequencers.</p>
+<p>Bowtie 2 does not support alignment of colorspace reads. (Bowtie 1 does.)</p>
<h1 id="obtaining-bowtie-2">Obtaining Bowtie 2</h1>
-<p>Download Bowtie 2 sources and binaries from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">Download</a> section of the Sourceforge site. Binaries are available for the Intel <code>x86_64</code> architecture running Linux, Mac OS X, and Windows. If you plan to compile Bowtie 2 yourself, make sure to get the source package, i.e., the filename that ends in “-source.zip”.</p>
+<p>Bowtie 2 is available from various package managers, notably <a href="https://anaconda.org/bioconda/bowtie2">Bioconda</a>. With Bioconda installed, you should be able to install Bowtie 2 with <code>conda install bowtie2</code>.</p>
+<p>Containerized versions of Bowtie 2 are also available via the <a href="https://BioContainers.pro">Biocontainers</a> project (e.g. <a href="https://hub.docker.com/r/biocontainers/bowtie2/">via Docker Hub</a>).</p>
+<p>You can also download Bowtie 2 sources and binaries from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">Download</a> section of the Sourceforge site. Binaries are available for the <code>x86_64</code> architecture running Linux, Mac OS X, and Windows. If you plan to compile Bowtie 2 yourself, make sure to get the source package, i.e., the filename that ends in “-source.zip”.</p>
<h2 id="building-from-source">Building from source</h2>
-<p>Building Bowtie 2 from source requires a GNU-like environment with GCC, GNU Make and other basics. It should be possible to build Bowtie 2 on most vanilla Linux installations or on a Mac installation with <a href="http://developer.apple.com/xcode/">Xcode</a> installed. Bowtie 2 can also be built on Windows using a 64-bit MinGW distribution and MSYS. In order to simplify the MinGW setup it might be worth investigating popular MinGW personal builds since these are coming already prepared with most of the toolchains needed.</p>
+<p>Building Bowtie 2 from source requires a GNU-like environment with GCC, GNU Make and other basics. It should be possible to build Bowtie 2 on most vanilla Linux installations or on a Mac installation with <a href="http://developer.apple.com/xcode/">Xcode</a> installed. (But see note about the TBB library below). Bowtie 2 can also be built on Windows using a 64-bit MinGW distribution and MSYS. In order to simplify the MinGW setup it might be worth investigating popular MinGW personal builds since these are coming already prepared with most of the toolchains needed.</p>
<p>First, download the source package from the <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/">sourceforge site</a>. Make sure you’re getting the source package; the file downloaded should end in <code>-source.zip</code>. Unzip the file, change to the unzipped directory, and build the Bowtie 2 tools by running GNU <code>make</code> (usually with the command <code>make</code>, but sometimes with <code>gmake</code>) with no arguments. If building with MinGW, run <code>make</code> from the MSYS environment.</p>
-<p>Bowtie 2 is using the multithreading software model in order to speed up execution times on SMP architectures where this is possible. The Threading Building Blocks library, TBB, is now the default threading library in Bowtie 2. On POSIX platforms (like Linux, Mac OS, etc.) if TBB is not available the pthread library will be used. Although it is possible to use pthread library on Windows, a non-POSIX platform, due to performance reasons Bowtie 2 will try to use Windows native multithreading if possible. We recommend that you first install the <a href="https://www.threadingbuildingblocks.org">Threading Building Blocks library</a>, but if unable to do so please specify <code>make NO_TBB=1</code>. TBB comes installed by default on many popular Linux distros. Please note, packages built without TBB will have <em>-legacy</em> appended to the name.</p>
+<p>Bowtie 2 can be run on many threads. By default, Bowtie 2 uses the Threading Building Blocks library (TBB) for this. If TBB is not available on your system (e.g. <code>make</code> prints an error like <code>tbb/mutex.h: No such file or directory</code>), you can install it yourself from source (see <a href="https://www.threadingbuildingblocks.org">Threading Building Blocks library</a>) or install it using your operating system’s preferred package manager. The table below list some of the commands for a few of the more popular operating systems.</p>
+<table>
+<tr>
+<th>
+OS
+</th>
+<th>
+Sync Package List
+</th>
+<th>
+Search
+</th>
+<th>
+Install
+</th>
+</tr>
+<tr>
+<td>
+Ubuntu, Mint, Debian
+</td>
+<td>
+<pre><code>apt-get update</code></pre>
+</td>
+<td>
+<pre><code>apt-cache search tbb</code></pre>
+</td>
+<td>
+<pre><code>apt-get install libtbb-dev</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Fedora, CentOS
+</td>
+<td>
+<pre><code>yum check-update</code></pre>
+</td>
+<td>
+<p>yum search tbb</p>
+</td>
+<td>
+<pre><code>yum install tbb-devel.x86_64</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Arch
+</td>
+<td>
+<pre><code>packman -Sy</code></pre>
+</td>
+<td>
+<pre><code>pacman -Ss tbb</code></pre>
+</td>
+<td>
+<pre><code>pacman -S extra/intel-tbb</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+Gentoo
+</td>
+<td>
+<pre><code>emerge --sync</code></pre>
+</td>
+<td>
+<pre><code>emerge --search tbb</code></pre>
+</td>
+<td>
+<pre><code>emerge dev-cpp/tbb</code></pre>
+</td>
+<tr>
+<td>
+MacOS
+</td>
+<td>
+<pre><code>brew update</code></pre>
+</td>
+<td>
+<pre><code>brew search tbb</code></pre>
+</td>
+<td>
+<pre><code>brew install tbb</code></pre>
+</td>
+</tr>
+<tr>
+<td>
+FreeBSD
+</td>
+<td>
+<pre><code>portsnap fetch update</code></pre>
+</td>
+<td>
+<pre><code>make search name=tbb</code></pre>
+</td>
+<td>
+<pre><code>cd /usr/ports/devel/tbb && make install && make clean</code></pre>
+</table>
+<p>The Bowtie 2 Makefile also includes recipes for basic automatic dependency management. Running <code>make static-libs && make STATIC_BUILD=1</code> will issue a series of commands that will: 1. download TBB and zlib 2. compile them as static libraries 3. link the resulting libraries to the compiled Bowtie 2 binaries</p>
+<p>If all fails Bowtie 2 can be built with <code>make NO_TBB=1</code> to use pthreads or Windows native multithreading instead.</p>
<h2 id="adding-to-path">Adding to PATH</h2>
<p>By adding your new Bowtie 2 directory to your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH environment variable</a>, you ensure that whenever you run <code>bowtie2</code>, <code>bowtie2-build</code> or <code>bowtie2-inspect</code> from the command line, you will get the version you just installed without having to specify the entire path. This is recommended for most users. To do this, follow your operating system’s instructions for adding the directory to your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH</a>.</p>
<p>If you would like to install Bowtie 2 by copying the Bowtie 2 executable files to an existing directory in your <a href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH</a>, make sure that you copy all the executables, including <code>bowtie2</code>, <code>bowtie2-align-s</code>, <code>bowtie2-align-l</code>, <code>bowtie2-build</code>, <code>bowtie2-build-s</code>, <code>bowtie2-build-l</code>, <code>bowtie2-inspect</code>, <code>bowtie2-inspect-s</code> and <code>bowtie2-inspect-l</code>.</p>
@@ -454,6 +543,14 @@ Reads are substrings (k-mers) extracted from a FASTA file <code><s></code>
</td>
</tr>
<tr>
+<td id="bowtie2-options-trim-to">
+<pre><code>--trim-to [3:|5:]<int></code></pre>
+</td>
+<td>
+<p>Trim reads exceeding <code><int></code> bases. Bases will be trimmed from either the 3’ (right) or 5’ (left) end of the read. If the read end if not specified, bowtie 2 will default to trimming from the 3’ (right) end of the read. <a href="#bowtie2-options-trim-to"><code>--trim-to</code></a> and <a href="#bowtie2-options-3"><code>-3</code></a>/<a href="#bowtie2-options-5"><code>-5</code></a> are mutually exclusive.</p>
+</td>
+</tr>
+<tr>
<td id="bowtie2-options-phred33-quals">
<pre><code>--phred33</code></pre>
</td>
@@ -958,11 +1055,11 @@ Seed 4 rc: TTATGCATGA</code></pre>
</td>
</tr>
<tr>
-<td id="bowtie2-options-soft-clipped-unmapped-tlen-sec-seq">
+<td id="bowtie2-options-soft-clipped-unmapped-tlen">
<pre><code>--soft-clipped-unmapped-tlen</code></pre>
</td>
<td>
-<p>Consider soft-clipped bases unmapped when calculating <code>TLEN</code>.</p>
+<p>Consider soft-clipped bases unmapped when calculating <code>TLEN</code>. Only available in <a href="#bowtie2-options-local"><code>--local</code></a> mode.</p>
</td>
</tr>
<tr>
=====================================
doc/website/recent_news.ssi
=====================================
--- a/doc/website/recent_news.ssi
+++ b/doc/website/recent_news.ssi
@@ -1,3 +1,25 @@
+<h2>Version 2.3.4.2 - August 07, 1987</h2>
+<ul>
+ <li>Fixed issue causing <code>bowtie2</code> to fail in <code><a href="bowtie2-options-fast-local">--fast-local</a></code> mode.</li>
+ <li>Fixed issue causing <code><a href="bowtie2-options-soft-clipped-unmapped-tlen>--soft-clipped-unmapped-tlen</a></code> to be a positional argument.</li>
+ <li>New option <code><a href="bowtie2-options-trim-to">--trim-to</a> N</code> causes <code>bowtie2</code> to trim reads longer than <code>N</code> bases to exactly <code>N</code> bases. Can trim from either 3' or 5' end, e.g. <code><a href="bowtie2-options-trim-to">--trim-to</a> 5:30</code> trims reads to 30 bases, truncating at the 5' end.</li>
+ <li>Updated <a href="#building-from-source">"Building from source"</a> manual section with additional instructions on installing TBB.</li>
+ <li>Several other updates to manual, including new mentions of <a href="http://bioconda.github.io">Bioconda</a> and <a href="https://biocontainers.pro">Biocontainers</a>.</li>
+ <li>Fixed an issue preventing <code>bowtie2</code> from processing more than one pattern source when running single threaded.</li>
+ <li>Fixed an issue causing <code>bowtie2</code> and <code>bowtie2-inspect</code> to crash if the index contains a gap-only segment.</li>
+ <li>Added <i>experimental</i> BAM input mode <code>-b</code>. Works only with unpaired input reads and BAM files that are sorted by read name (<code>samtools sort -n</code>). BAM input mode also supports the following options:<li>
+ <ul>
+ <li><code>--preserve-sam-tags</code>: Preserve any optional fields present in BAM record</li>
+ <li><code>--align-paired-reads</code>: Paired-end mode for BAM files</li>
+ </ul>
+ <li>Add <i>experimental</i> CMake support</li>
+</ul>
+
+<h2>Thread-scaling paper appears - July 19, 2018</h2>
+<ul>
+ <li>Our latest work on Bowtie's core thread scaling capabilities <a href="https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty648/5055585">just appeared Open Access in the journal Bioinformatics</a></li>
+</ul>
+
<h2>Version 2.3.4.1 - February 03, 2018</h2>
<ul>
<li>Fixed an issue with <tt><a href="manual.shtml#bowtie2-options-reorder">--reorder</a></tt> that caused bowtie2 to crash while reordering SAM output</li>
=====================================
doc/website/rhsidebar.ssi
=====================================
--- a/doc/website/rhsidebar.ssi
+++ b/doc/website/rhsidebar.ssi
@@ -18,10 +18,10 @@
</tr>
<tr>
<td>
- <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.4.1">Bowtie2 2.3.4.1</a>
+ <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.3.4.2">Bowtie2 2.3.4.2</a>
</td>
<td align="right">
- 02/03/18
+ 08/07/18
</td>
</tr>
<tr>
=====================================
example/reads/combined_reads.bam
=====================================
Binary files /dev/null and b/example/reads/combined_reads.bam differ
=====================================
formats.h
=====================================
--- a/formats.h
+++ b/formats.h
@@ -31,6 +31,7 @@ enum file_format {
FASTA_CONT,
FASTQ,
INTERLEAVED,
+ BAM,
TAB_MATE5,
TAB_MATE6,
RAW,
@@ -43,12 +44,14 @@ static const std::string file_format_names[] = {
"FASTA",
"FASTA sampling",
"FASTQ",
+ "Interleaved FASTQ",
+ "BAM",
"Tabbed mated",
"Raw",
"Command line",
+ "Qseq",
"Chain file",
- "Random",
- "Qseq"
+ "Random"
};
#endif /*FORMATS_H_*/
=====================================
opts.h
=====================================
--- a/opts.h
+++ b/opts.h
@@ -156,7 +156,10 @@ enum {
ARG_XEQ, // --xeq
ARG_THREAD_CEILING, // --thread-ceiling
ARG_THREAD_PIDDIR, // --thread-piddir
- ARG_INTERLEAVED_FASTQ // --interleaved
+ ARG_INTERLEAVED_FASTQ, // --interleaved
+ ARG_TRIM_TO, // --trim-to
+ ARG_PRESERVE_SAM_TAGS, // --preserve-sam-tags
+ ARG_ALIGN_PAIRED_READS, // --align-paired-reads
};
#endif
=====================================
pat.cpp
=====================================
--- a/pat.cpp
+++ b/pat.cpp
@@ -18,9 +18,12 @@
*/
#include <cmath>
+#include <stdio.h>
#include <iostream>
+#include <sstream>
#include <string>
#include <stdexcept>
+#include <string.h>
#include "sstring.h"
#include "pat.h"
@@ -98,6 +101,7 @@ PatternSource* PatternSource::patsrcFromStrings(
case FASTA_CONT: return new FastaContinuousPatternSource(qs, p);
case RAW: return new RawPatternSource(qs, p);
case FASTQ: return new FastqPatternSource(qs, p);
+ case BAM: return new BAMPatternSource(qs, p);
case INTERLEAVED: return new FastqPatternSource(qs, p, true /* interleaved */);
case TAB_MATE5: return new TabbedPatternSource(qs, p, false);
case TAB_MATE6: return new TabbedPatternSource(qs, p, true);
@@ -172,8 +176,11 @@ pair<bool, bool> PatternSourcePerThread::nextReadPair() {
}
// Finalize read/pair
if(!buf_.read_b().patFw.empty()) {
+ trim(buf_.read_a());
+ trim(buf_.read_b());
finalizePair(buf_.read_a(), buf_.read_b());
} else {
+ trim(buf_.read_a());
finalize(buf_.read_a());
}
bool this_is_last = buf_.cur_buf_ == static_cast<unsigned int>(last_batch_size_-1);
@@ -268,7 +275,7 @@ pair<bool, int> DualPatternComposer::nextBatch(PerThreadReadBuf& pt) {
}
assert_eq(resa.first, resb.first);
assert_eq(resa.second, resb.second);
- return make_pair(resa.first, resa.second);
+ return make_pair(resa.first && cur == srca_->size() - 1, resa.second);
}
}
assert_leq(cur, srca_->size());
@@ -463,23 +470,22 @@ void CFilePatternSource::open() {
zfp_ = gzdopen(fn, "rb");
}
else {
- compressed_ = false;
- if (is_gzipped_file(infiles_[filecur_])) {
- compressed_ = true;
- zfp_ = gzopen(infiles_[filecur_].c_str(), "rb");
- }
- else {
- fp_ = fopen(infiles_[filecur_].c_str(), "rb");
+ const char* filename = infiles_[filecur_].c_str();
+ compressed_ = is_gzipped_file(filename);
+ if (compressed_) {
+ zfp_ = gzopen(filename, "rb");
+ } else {
+ fp_ = fopen(filename, "rb");
}
if((compressed_ && zfp_ == NULL) || (!compressed_ && fp_ == NULL)) {
if(!errs_[filecur_]) {
cerr << "Warning: Could not open read file \""
- << infiles_[filecur_].c_str()
+ << filename
<< "\" for reading; skipping..." << endl;
errs_[filecur_] = true;
- }
- filecur_++;
- continue;
+ }
+ filecur_++;
+ continue;
}
}
is_open_ = true;
@@ -1125,6 +1131,167 @@ bool FastqPatternSource::parse(Read &r, Read& rb, TReadId rdid) const {
return true;
}
+const int BAMPatternSource::offset[] = {
+ 0, //refID
+ 4, //pos
+ 8, //l_read_name
+ 9, //mapq
+ 10, //bin
+ 12, //n_cigar_op
+ 14, //flag
+ 16, //l_seq
+ 20, //next_refID
+ 24, //next_pos
+ 28, //tlen
+ 32, //read_name
+};
+
+bool BAMPatternSource::parse_bam_header() {
+ char magic[4];
+
+ if (zread(magic, 4) != 4 || strncmp(magic, "BAM\001", 4) != 0) {
+ std::cerr << "This file is not a BAM file" << std::endl;
+ return false;
+ }
+
+ int32_t l_text = 0;
+ int32_t n_ref = 0;
+ char* data = NULL;
+ int32_t size = 0;
+
+ if (zread(&l_text, sizeof(l_text)) != sizeof(l_text)) {
+ return false;
+ }
+
+ size = l_text + 1;
+ data = new char[size];
+ if (data == NULL) {
+ return false;
+ }
+ if (zread(data, l_text) != l_text) {
+ delete[] data;
+ return false;
+ }
+ if (zread(&n_ref, sizeof(n_ref)) != sizeof(n_ref)) {
+ delete[] data;
+ return false;
+ }
+
+ for (int i = 0; i != n_ref; i++) {
+ int32_t l_name, l_ref;
+ if (zread(&l_name, sizeof(l_name)) != sizeof(l_name)) {
+ delete[] data;
+ return false;
+ }
+ if (l_name > size) {
+ size = l_name;
+ delete[] data;
+ data = new char[size];
+ }
+ if (zread(data, l_name) != l_name) {
+ delete[] data;
+ return false;
+ }
+ if (zread(&l_ref, sizeof(l_ref)) != sizeof(l_ref)) {
+ delete[] data;
+ return false;
+ }
+ }
+
+ delete[] data;
+
+ return true;
+}
+
+std::pair<bool, int> BAMPatternSource::nextBatchFromFile(PerThreadReadBuf& pt,
+ bool batch_a, unsigned readi) {
+ if (first_) {
+ first_ = false;
+ if (!parse_bam_header()) {
+ std::cerr << "Unable to parse BAM file" << std::endl;
+ return make_pair(true, 0);
+ }
+ }
+
+ bool done = false;
+ bool read1 = true;
+
+ while (readi < pt.max_buf_) {
+ int r;
+ uint16_t flag;
+ int32_t block_size;
+ EList<Read>& readbuf = pp_.align_paired_reads && !read1 ? pt.bufb_ : pt.bufa_;
+
+ if ((r = zread(&block_size, sizeof(block_size))) != sizeof(block_size)) {
+ return make_pair(true, readi);
+ }
+ if (readbuf[readi].readOrigBuf.length() < block_size) {
+ readbuf[readi].readOrigBuf.resize(block_size);
+ }
+ if (zread(readbuf[readi].readOrigBuf.wbuf(), block_size) != block_size) {
+ done = true;
+ break;
+ }
+ memcpy(&flag, readbuf[readi].readOrigBuf.buf() + offset[BAMField::flag], sizeof(flag));
+ if (!pp_.align_paired_reads && ((flag & 0x40) != 0 || (flag & 0x80) != 0)) {
+ readbuf[readi].readOrigBuf.clear();
+ continue;
+ }
+ if (pp_.align_paired_reads && ((flag & 0x40) == 0 && (flag & 0x80) == 0)) {
+ readbuf[readi].readOrigBuf.clear();
+ continue;
+ }
+ if (pp_.align_paired_reads && read1 && (flag & 0x40) == 0) {
+ std::cerr << "Paired reads are out of order" << std::endl;
+ return make_pair(true, readi == 0 ? readi : readi-1);
+ }
+ if (pp_.align_paired_reads && !read1 && (flag & 0x80) == 0) {
+ std::cerr << "Paired reads are out of order" << std::endl;
+ return make_pair(true, readi == 0 ? readi : readi-1);
+ }
+
+ read1 = !read1;
+ readi = (pp_.align_paired_reads
+ && pt.bufb_[readi].readOrigBuf.length() == 0) ? readi : readi + 1;
+ }
+ return make_pair(done, readi);
+}
+
+bool BAMPatternSource::parse(Read& ra, Read& rb, TReadId rdid) const {
+ uint8_t l_read_name;
+ int32_t l_seq;
+ uint16_t n_cigar_op;
+ const char* buf = ra.readOrigBuf.buf();
+ int block_size = ra.readOrigBuf.length();
+
+ memcpy(&l_read_name, buf + offset[BAMField::l_read_name], sizeof(l_read_name));
+ memcpy(&n_cigar_op, buf + offset[BAMField::n_cigar_op], sizeof(n_cigar_op));
+ memcpy(&l_seq, buf + offset[BAMField::l_seq], sizeof(l_seq));
+
+ int off = offset[BAMField::read_name];
+ ra.name.install(buf + off, l_read_name-1);
+ off += (l_read_name + sizeof(uint32_t) * n_cigar_op);
+ const char* seq = buf + off;
+ off += (l_seq+1)/2;
+ const char* qual = buf + off;
+ for (int i = 0; i < l_seq; i++) {
+ ra.qual.append(qual[i] + 33);
+ int base = "=ACMGRSVTWYHKDBN"[static_cast<uint8_t>(seq[i/2]) >> 4*(1-(i%2)) & 0xf];
+ ra.patFw.append(asc2dna[base]);
+ }
+ if (pp_.preserve_sam_tags) {
+ off += l_seq;
+ ra.preservedOptFlags.install(buf + off, block_size - off);
+ }
+
+ ra.parsed = true;
+ if (!rb.parsed && rb.readOrigBuf.length() != 0) {
+ return parse(rb, ra, rdid);
+ }
+
+ return true;
+}
+
/**
* Light-parse a batch of tabbed-format reads into given buffer.
*/
=====================================
pat.h
=====================================
--- a/pat.h
+++ b/pat.h
@@ -62,11 +62,14 @@ struct PatternParams {
bool intQuals_,
int trim5_,
int trim3_,
+ pair<short, size_t> trimTo_,
int sampleLen_,
int sampleFreq_,
size_t skip_,
int nthreads_,
- bool fixName_) :
+ bool fixName_,
+ bool preserve_sam_tags_,
+ bool align_paired_reads_) :
format(format_),
fileParallel(fileParallel_),
seed(seed_),
@@ -76,11 +79,14 @@ struct PatternParams {
intQuals(intQuals_),
trim5(trim5_),
trim3(trim3_),
+ trimTo(trimTo_),
sampleLen(sampleLen_),
sampleFreq(sampleFreq_),
skip(skip_),
nthreads(nthreads_),
- fixName(fixName_) { }
+ fixName(fixName_),
+ preserve_sam_tags(preserve_sam_tags_),
+ align_paired_reads(align_paired_reads_) { }
int format; // file format
bool fileParallel; // true -> wrap files with separate PatternComposers
@@ -91,11 +97,14 @@ struct PatternParams {
bool intQuals; // true -> qualities are space-separated numbers
int trim5; // amt to hard clip from 5' end
int trim3; // amt to hard clip from 3' end
+ pair<short, size_t> trimTo;
int sampleLen; // length of sampled reads for FastaContinuous...
int sampleFreq; // frequency of sampled reads for FastaContinuous...
size_t skip; // skip the first 'skip' patterns
int nthreads; // number of threads for locking
bool fixName; //
+ bool preserve_sam_tags; // keep existing SAM tags when aligning BAM files
+ bool align_paired_reads;
};
/**
@@ -153,7 +162,7 @@ struct PerThreadReadBuf {
*/
bool exhausted() {
assert_leq(cur_buf_, bufa_.size());
- return cur_buf_ >= bufa_.size()-1;
+ return cur_buf_ >= bufa_.size()-1 || bufa_[cur_buf_+1].readOrigBuf.empty();
}
/**
@@ -403,21 +412,34 @@ protected:
return compressed_ ? gzungetc(c, zfp_) : ungetc(c, fp_);
}
- bool is_gzipped_file(const std::string& filename) {
- struct stat s;
- if (stat(filename.c_str(), &s) != 0) {
- perror("stat");
+ int zread(voidp buf, unsigned len) {
+ int r = gzread(zfp_, buf, len);
+ if (r < 0) {
+ const char *err = gzerror(zfp_, NULL);
+ if (err != NULL) {
+ std::cerr << err << std::endl;
+ }
}
- else {
- if (S_ISFIFO(s.st_mode))
- return true;
+ return r;
+ }
+
+ bool is_gzipped_file(const char* filename) {
+ FILE* f = fopen(filename, "rb");
+ if (ferror(f) > 0) {
+ return false;
}
- size_t pos = filename.find_last_of(".");
- std::string ext = (pos == std::string::npos) ? "" : filename.substr(pos + 1);
- if (ext == "" || ext == "gz" || ext == "Z") {
- return true;
+
+ bool ret = false;
+ uint8_t byte1, byte2;
+
+ fread(&byte1, 1, sizeof(uint8_t), f);
+ fread(&byte2, 1, sizeof(uint8_t), f);
+ if (byte1 == 0x1f && byte2 == 0x8b) {
+ ret = true;
}
- return false;
+ fclose(f);
+
+ return ret;
}
EList<std::string> infiles_; // filenames for read files
@@ -696,6 +718,68 @@ protected:
bool interleaved_; // fastq reads are interleaved
};
+class BAMPatternSource : public CFilePatternSource {
+
+public:
+
+ BAMPatternSource(
+ const EList<std::string>& infiles,
+ const PatternParams& p) :
+ CFilePatternSource(infiles, p),
+ first_(true) {}
+
+ virtual void reset() {
+ first_ = true;
+ CFilePatternSource::reset();
+ }
+
+ /**
+ * Finalize BAM parsing outside critical section.
+ */
+ virtual bool parse(Read& ra, Read& rb, TReadId rdid) const;
+
+protected:
+
+ /**
+ * Light-parse a batch into the given buffer.
+ */
+ virtual std::pair<bool, int> nextBatchFromFile(PerThreadReadBuf& pt, bool batch_a,
+ unsigned readi);
+
+
+ /**
+ * Reset state to be ready for the next file.
+ */
+ virtual void resetForNextFile() {
+ first_ = true;
+ }
+
+ bool first_; // parsing first read in file
+
+private:
+
+ bool parse_bam_header();
+
+ struct BAMField {
+ enum aln_rec_field_name {
+ refID,
+ pos,
+ l_read_name,
+ mapq,
+ bin,
+ n_cigar_op,
+ flag,
+ l_seq,
+ next_refID,
+ next_pos,
+ tlen,
+ read_name,
+ };
+ };
+
+ static const int offset[];
+};
+
/**
* Read a Raw-format file (one sequence per line). No quality strings
* allowed. All qualities are assumed to be 'I' (40 on the Phred-33
@@ -988,6 +1072,27 @@ private:
return composer_.parse(ra, rb, buf_.rdid());
}
+ void trim(Read& r) {
+ if (pp_.trimTo.second > 0) {
+ switch (pp_.trimTo.first) {
+ case 3:
+ if (r.patFw.length() > pp_.trimTo.second) {
+ r.trimmed5 = r.patFw.length() - pp_.trimTo.second;
+ r.patFw.trimEnd(r.trimmed5);
+ r.qual.trimEnd(r.trimmed5);
+ }
+ break;
+ case 5:
+ if (r.patFw.length() > pp_.trimTo.second) {
+ r.trimmed3 = r.patFw.length() - pp_.trimTo.second;
+ r.patFw.trimBegin(r.trimmed3);
+ r.qual.trimBegin(r.trimmed3);
+ }
+ break;
+ }
+ }
+ }
+
PatternComposer& composer_; // pattern composer
PerThreadReadBuf buf_; // read data buffer
const PatternParams& pp_; // pattern-related parameters
=====================================
presets.cpp
=====================================
--- a/presets.cpp
+++ b/presets.cpp
@@ -71,7 +71,8 @@ void PresetsV0::apply(
policy += ";ROUNDS=1";
policy += ";IVAL=S,1,2.00";
} else if(preset == "fast-local") {
- policy += ";SEED=0,22";
+ policy += ";SEED=0";
+ policy += ";SEEDLEN=22";
policy += ";DPS=10";
policy += ";ROUNDS=2";
policy += ";IVAL=S,1,1.75";
=====================================
read.h
=====================================
--- a/read.h
+++ b/read.h
@@ -55,6 +55,7 @@ struct Read {
patRcRev.clear();
qualRev.clear();
name.clear();
+ preservedOptFlags.clear();
filter = '?';
seed = 0;
parsed = false;
@@ -254,6 +255,7 @@ struct Read {
TBuf readOrigBuf;
BTString name; // read name
+ BTString preservedOptFlags;
TReadId rdid; // 0-based id based on pair's offset in read file(s)
int mate; // 0 = single-end, 1 = mate1, 2 = mate2
uint32_t seed; // random seed
=====================================
ref_read.cpp
=====================================
--- a/ref_read.cpp
+++ b/ref_read.cpp
@@ -273,12 +273,16 @@ fastaRefReadSizes(
cerr << e.what() << endl;
throw 1;
}
+ first = false;
// Add the length of this record.
- if(rec.first) numSeqs++;
+ if(rec.len == 0 && rec.first) {
+ continue;
+ } else if(rec.first) {
+ numSeqs++;
+ }
unambigTot += rec.len;
bothTot += rec.len;
bothTot += rec.off;
- first = false;
if(rec.len == 0 && rec.off == 0 && !rec.first) continue;
recs.push_back(rec);
}
=====================================
sam.cpp
=====================================
--- a/sam.cpp
+++ b/sam.cpp
@@ -873,3 +873,92 @@ void SamConfig::printEmptyOptFlags(
printOptFieldNewlineEscapedZ(o, rd.readOrigBuf);
}
}
+
+void SamConfig::printPreservedOptFlags(BTString& o, const Read& rd) const {
+ if (rd.preservedOptFlags.length() != 0) {
+ char buf[1024];
+ const char* b = rd.preservedOptFlags.buf();
+ int i = 0, len = rd.preservedOptFlags.length();
+ while (i < len) {
+ o.append('\t');
+ char tag[2], val_type;
+ memcpy(tag, b + i, 2 * sizeof(char));
+ o.append(tag, 2);
+ i += 2 * sizeof(char);
+ memcpy(&val_type, b + i, 1);
+ o.append(':');
+ if (val_type == 'c' || val_type == 'C'
+ || val_type == 'i' || val_type == 'I'
+ || val_type == 's' || val_type == 'S') {
+ o.append('i');
+ } else {
+ o.append(val_type);
+ }
+ o.append(':');
+ i += sizeof(char);
+ switch (val_type) {
+ case 'A':
+ char A_val;
+ memcpy(&A_val, b + i, sizeof(A_val));
+ i += sizeof(A_val);
+ itoa10<char>(A_val, buf);
+ o.append(buf);
+ break;
+ case 'c':
+ int8_t c_val;
+ memcpy(&c_val, b + i, sizeof(c_val));
+ i += sizeof(c_val);
+ itoa10<int8_t>(c_val, buf);
+ o.append(buf);
+ break;
+ case 'C':
+ uint8_t C_val;
+ memcpy(&C_val, b + i, sizeof(C_val));
+ i += sizeof(C_val);
+ itoa10<uint8_t>(C_val, buf);
+ o.append(buf);
+ break;
+ case 's':
+ int16_t s_val;
+ memcpy(&s_val, b + i, sizeof(s_val));
+ i += sizeof(s_val);
+ itoa10<int16_t>(s_val, buf);
+ o.append(buf);
+ break;
+ case 'S':
+ uint16_t S_val;
+ memcpy(&S_val, b + i, sizeof(S_val));
+ i += sizeof(S_val);
+ itoa10<uint16_t>(S_val, buf);
+ o.append(buf);
+ break;
+ case 'i':
+ int32_t i_val;
+ memcpy(&i_val, b + i, sizeof(i_val));
+ i += sizeof(i_val);
+ itoa10<int32_t>(i_val, buf);
+ o.append(buf);
+ break;
+ case 'I':
+ uint32_t I_val;
+ memcpy(&I_val, b + i, sizeof(I_val));
+ i += sizeof(I_val);
+ itoa10<uint32_t>(I_val, buf);
+ o.append(buf);
+ break;
+ case 'Z':
+ char c;
+ memcpy(&c, b + i, sizeof(char));
+ while (c != '\0') {
+ o.append(c);
+ i++;
+ memcpy(&c, b + i, sizeof(char));
+ }
+ i++;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+}
=====================================
sam.h
=====================================
--- a/sam.h
+++ b/sam.h
@@ -305,6 +305,11 @@ public:
const PerReadMetrics& prm, // per-read metrics
const Scoring& sc) // scoring scheme
const;
+
+ /**
+ * Print optional flags that that have been preserve from BAM input
+ */
+ void printPreservedOptFlags(BTString& o, const Read& rd) const;
/**
* Return true iff we should try to obey the SAM spec's recommendations
=====================================
scripts/test/simple_tests.pl
=====================================
--- a/scripts/test/simple_tests.pl
+++ b/scripts/test/simple_tests.pl
@@ -133,8 +133,23 @@ my @cases = (
# -c
+ { name => "Align should not fail when first seq in ref is all Ns",
+ ref => [ "NNNNNNNNNNNNNNN", "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG",
+ hits => [{ 2 => 1 }] },
+
+ { name => "Align should not fail when multiple seqs are all Ns",
+ ref => [ "NNNNNNNNNNNNNNN", "NNNNNNNNNNNNNNN", "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG",
+ hits => [{ 2 => 1 }] },
+
+ { name => "Should account for stretch of Ns at beginning",
+ ref => [ "NNNNNNNNNNNNNNNAGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG",
+ hits => [{ 17 => 1 }] },
+
{ name => "Cline 1",
- ref => [ "AGCATCGATCAGTATCTGA" ],
+ ref => ["AGCATCGATCAGTATCTGA" ],
cline_reads => "CATCGATCAGTATCTG",
hits => [{ 2 => 1 }] },
@@ -158,6 +173,41 @@ my @cases = (
cline_reads => "CATCGATCAGTATCTG:ABCDEDGHIJKLMNOPQ", # qual too long
should_abort => 1},
+ { name => "trim-to: trim from 5'-end",
+ ref => [ "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
+ args => "--trim-to 5:12",
+ norc => 1,
+ hits => [{ 6 => 1 }] },
+
+ { name => "trim-to: trim from 3'-end",
+ ref => [ "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
+ args => "--trim-to 3:12",
+ norc => 1,
+ hits => [{ 2 => 1 }] },
+
+ { name => "trim-to: trim from 3'-end (default)",
+ ref => [ "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
+ args => "--trim-to 12",
+ norc => 1,
+ hits => [{ 2 => 1 }] },
+
+ { name => "trim-to: invalid position",
+ ref => [ "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
+ args => "--trim-to 4:12",
+ norc => 1,
+ should_abort => 1 },
+
+ { name => "trim-to: invalid count",
+ ref => [ "AGCATCGATCAGTATCTGA" ],
+ cline_reads => "CATCGATCAGTATCTG:IIIIIIIIIIIIIIII\n",
+ args => "--trim-to 5:-12",
+ norc => 1,
+ should_abort => 1 },
+
# Part of sequence is trimmed
{ name => "Cline 7",
ref => [ "AGCATCGATCAGTATCTGA" ],
@@ -4518,7 +4568,7 @@ my $idx_type = "";
else {
$binary_type = "--" . $binary_type;
}
-
+
my $cmd;
my $batch_size = int(rand(16) + 1);
if($pe) {
View it on GitLab: https://salsa.debian.org/med-team/bowtie2/commit/157dbc209f47672a9a7167d16e3a324803906000
--
View it on GitLab: https://salsa.debian.org/med-team/bowtie2/commit/157dbc209f47672a9a7167d16e3a324803906000
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180814/1a35eb08/attachment-0001.html>
More information about the debian-med-commit
mailing list