[med-svn] [Git][med-team/bowtie2][upstream] New upstream version 2.5.3
Charles Plessy (@plessy)
gitlab at salsa.debian.org
Wed Apr 17 04:56:31 BST 2024
Charles Plessy pushed to branch upstream at Debian Med / bowtie2
Commits:
886d9d4f by Charles Plessy at 2024-04-17T08:56:07+09:00
New upstream version 2.5.3
- - - - -
17 changed files:
- BOWTIE2_VERSION
- CMakeLists.txt
- MANUAL
- MANUAL.markdown
- Makefile
- NEWS
- README.md
- blockwise_sa.h
- bowtie2-build
- bt2_idx.h
- bt2_search.cpp
- doc/manual.html
- doc/website/manual.ssi
- doc/website/recent_news.ssi
- doc/website/rhsidebar.ssi
- filebuf.h
- pat.cpp
Changes:
=====================================
BOWTIE2_VERSION
=====================================
@@ -1 +1 @@
-2.5.2
+2.5.3
=====================================
CMakeLists.txt
=====================================
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
cmake_policy(SET CMP0048 NEW)
cmake_policy(SET CMP0005 NEW)
@@ -25,6 +25,7 @@ if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
endif()
set(USE_SRA ${USE_SRA})
+set(USE_SAIS ${USE_SAIS})
set(WITH_THREAD_PROFILING ${WITH_THREAD_PROFILING})
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
@@ -152,67 +153,6 @@ elseif (${ARCH} MATCHES amd64|x64_64)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
endif()
-if (USE_SRA)
- set(NGS_VER 2.10.2)
- set(NCBI_VDB_VER 2.10.2)
-
- find_package(Java COMPONENTS Development REQUIRED)
- find_package(PythonInterp REQUIRED)
- find_package(Perl REQUIRED)
-
- find_program(MAKE_EXE NAMES gmake nmake make)
- ExternalProject_add(ngs_project
- URL https://github.com/ncbi/ngs/archive/${NGS_VER}.tar.gz
- PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ngs-${NGS_VER}
- BUILD_IN_SOURCE 1
- CONFIGURE_COMMAND ./configure --prefix=install
- BUILD_COMMAND ${MAKE_EXE}
- INSTALL_COMMAND ${MAKE_EXE} install
- )
-
- ExternalProject_Get_Property(ngs_project SOURCE_DIR)
- set(NGS_INSTALL_DIR ${SOURCE_DIR}/ngs-sdk/install)
-
- link_directories(${NGS_INSTALL_DIR}/lib64)
- include_directories(${NGS_INSTALL_DIR}/include)
-
- add_library(ngs-c++-static STATIC IMPORTED)
- add_dependencies(ngs-c++-static ngs_project)
- find_library(LIBNGS_CPP ngs-c++-static PATHS ${NGS_INSTALL_DIR}/lib64 NO_DEFAULT_PATH)
- set_property(TARGET ngs-c++-static PROPERTY IMPORTED_LOCATION ${LIBNGS_CPP})
-
- link_libraries(ngs-c++-static)
-
- ExternalProject_add(ncbi_vdb_project
- URL https://github.com/ncbi/ncbi-vdb/archive/${NCBI_VDB_VER}.tar.gz
- PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ncbi-vdb-${NCBI_VDB_VER}
- BUILD_IN_SOURCE 1
- CONFIGURE_COMMAND ./configure --prefix=install --with-ngs-sdk=${NGS_INSTALL_DIR}
- BUILD_COMMAND ${MAKE_EXE}
- INSTALL_COMMAND ${MAKE_EXE} install
- DEPENDS ngs_project
- )
-
- ExternalProject_Get_Property(ncbi_vdb_project SOURCE_DIR)
- set(NCBI_VDB_INSTALL_DIR ${SOURCE_DIR}/install)
-
- link_directories(${NCBI_VDB_INSTALL_DIR}/lib64)
- include_directories(${NCBI_VDB_INSTALL_DIR}/include)
-
- add_library(ncbi-ngs-c++-static STATIC IMPORTED)
- add_dependencies(ncbi-ngs-c++-static ncbi_vdb_project)
- find_library(LIBNCBI_NGS_CPP ncbi-ngs-c++-static PATHS ${NCBI_VDB_INSTALL_DIR}/lib64 NO_DEFAULT_ATH)
- set_property(TARGET ncbi-ngs-c++-static PROPERTY IMPORTED_LOCATION ${LIBNCBI_NGS_CPP})
-
- add_library(ncbi-vdb-static STATIC IMPORTED)
- add_dependencies(ncbi-vdb-static ncbi_vdb_project)
- find_library(LIBNCBI_VDB ncbi-vdb-static PATHS ${NCBI_VDB_INSTALL_DIR}/lib64 NO_DEFAULT_PATH)
- set_property(TARGET ncbi-vdb-static PROPERTY IMPORTED_LOCATION ${LIBNCBI_VDB})
-
- link_libraries(ncbi-ngs-c++-static ncbi-vdb-static)
- add_definitions(-DUSE_SRA)
-endif()
-
if (WITH_THREAD_PROFILING)
add_definitions(-DPER_THREAD_TIMING=1)
endif()
@@ -243,6 +183,95 @@ string(REPLACE ";" " -D" COMPILER_DEFS "${COMPILER_DEFS}")
string(REPLACE "\"" "" COMPILER_DEFS "${COMPILER_DEFS}")
add_definitions(-DCOMPILER_OPTIONS="${CMAKE_CXX_FLAGS} -D${COMPILER_DEFS}")
+if (USE_SRA)
+ set(SRA_TOOLS_VER 3.0.9)
+ set(NCBI_VDB_VER 3.0.9)
+
+ set(THIRD_PARTY_LIBS "${PROJECT_SOURCE_DIR}/third_party")
+
+ find_package(Java COMPONENTS Development REQUIRED)
+ find_package(PythonInterp REQUIRED)
+ find_package(Perl REQUIRED)
+ find_program(MAKE_EXE NAMES gmake nmake make)
+
+ ExternalProject_add(ncbi_vdb_project
+ URL https://github.com/ncbi/ncbi-vdb/archive/${NCBI_VDB_VER}.tar.gz
+ PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ncbi-vdb-${NCBI_VDB_VER}
+ BUILD_IN_SOURCE 1
+ CONFIGURE_COMMAND ./configure --prefix="${THIRD_PARTY_LIBS}" --without-debug
+ BUILD_COMMAND ${MAKE_EXE}
+ INSTALL_COMMAND ${MAKE_EXE} install
+ )
+
+ ExternalProject_add(ngs_project
+ URL https://github.com/ncbi/sra-tools/archive/${SRA_TOOLS_VER}.tar.gz
+ PREFIX ${CMAKE_CURRENT_BINARY_DIR}/sra-tools-${SRA_TOOLS_VER}
+ BUILD_IN_SOURCE 1
+ CONFIGURE_COMMAND ./configure --prefix=${THIRD_PARTY_LIBS} --with-ncbi-vdb-prefix=${THIRD_PARTY_LIBS} --enable-static --without-debug
+ BUILD_COMMAND ${MAKE_EXE}
+ INSTALL_COMMAND ${MAKE_EXE} install
+ DEPENDS ncbi_vdb_project
+ )
+
+ link_directories(${THIRD_PARTY_LIBS}/lib64)
+ include_directories(${THIRD_PARTY_LIBS}/include)
+
+ add_library(ncbi-vdb-static STATIC IMPORTED)
+ add_dependencies(ncbi-vdb-static ncbi_vdb_project)
+ find_library(LIBNCBI_VDB ncbi-vdb-static PATHS ${THIRD_PARTY_LIBS}/lib64 NO_DEFAULT_PATH)
+ set_property(TARGET ncbi-vdb-static PROPERTY IMPORTED_LOCATION ${LIBNCBI_VDB})
+
+ add_library(ncbi-ngs-static STATIC IMPORTED)
+ add_dependencies(ncbi-ngs-static ngs_project)
+ find_library(LIBNCBI_NGS ncbi-ngs-static PATHS ${THIRD_PARTY_LIBS}/lib64 NO_DEFAULT_PATH)
+ set_property(TARGET ncbi-ngs-static PROPERTY IMPORTED_LOCATION ${LIBNCBI_NGS})
+
+ link_libraries(ncbi-ngs-static ncbi-vdb-static dl)
+ add_definitions(-DUSE_SRA)
+endif()
+
+if (USE_SAIS)
+ set(LIBSAIS_VER 2.7.3)
+
+ find_package(OpenMP)
+ if (OpenMP_CXX_FOUND)
+ link_libraries(OpenMP::OpenMP_CXX)
+ ExternalProject_add(libsais_project
+ URL https://github.com/IlyaGrebnov/libsais/archive/refs/tags/v${LIBSAIS_VER}.tar.gz
+ PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libsais-${LIBSAIS_VER}
+ BUILD_IN_SOURCE 1
+ DOWNLOAD_EXTRACT_TIMESTAMP false
+ CONFIGURE_COMMAND cmake . -D LIBSAIS_USE_OPENMP=1
+ BUILD_COMMAND cmake --build .
+ INSTALL_COMMAND ""
+ )
+ else()
+ ExternalProject_add(libsais_project
+ URL https://github.com/IlyaGrebnov/libsais/archive/refs/tags/v${LIBSAIS_VER}.tar.gz
+ PREFIX ${CMAKE_CURRENT_BINARY_DIR}/libsais-${LIBSAIS_VER}
+ BUILD_IN_SOURCE 1
+ DOWNLOAD_EXTRACT_TIMESTAMP false
+ CONFIGURE_COMMAND cmake .
+ BUILD_COMMAND cmake --build .
+ INSTALL_COMMAND ""
+ )
+ endif()
+
+
+ add_library(libsais STATIC IMPORTED)
+ add_dependencies(libsais libsais_project)
+
+ ExternalProject_Get_Property(libsais_project SOURCE_DIR)
+ link_directories(${SOURCE_DIR})
+ include_directories(${SOURCE_DIR}/include)
+
+ # find_library(LIBLIBSAIS libsais PATHS ${SOURCE_DIR} NO_DEFAULT_PATH)
+ set_property(TARGET libsais PROPERTY IMPORTED_LOCATION ${SOURCE_DIR}/liblibsais.a)
+
+ link_libraries(libsais)
+ add_definitions(-DUSE_SAIS -DLIBSAIS_OPENMP)
+endif()
+
add_executable(bowtie2-align-s ${SEARCH_CPPS} ${SHARED_CPPS})
add_executable(bowtie2-align-l ${SEARCH_CPPS} ${SHARED_CPPS})
add_executable(bowtie2-build-s ${BUILD_CPPS} ${SHARED_CPPS})
=====================================
MANUAL
=====================================
@@ -132,12 +132,53 @@ will issue a series of commands that will: 1. download zstd and zlib 2.
compile them as static libraries 3. link the resulting libraries to the
compiled Bowtie 2 binaries
+Building with SRA support
+
As of version 2.3.5 bowtie2 now supports aligning SRA reads. Prepackaged
builds will include a package that supports SRA. If you're building
bowtie2 from source please make sure that the Java runtime is available
on your system. You can then proceed with the build by running
make sra-deps && make USE_SRA=1.
+Building with libsais support
+
+As of version 2.5.3 bowtie2 supports building indexes using the SAIS
+algorithm provided by libsais. SAIS is a state-of-the-art suffix array
+construction algorithm that will bring-forth a significant speed-up to
+the overall index building process. There is, however, the downside of a
+significant increase in memory usage compared to the persistent
+blockwise algorithm that bowtie2-build uses by default. When using SAIS
+small indexes can be built for inputs up to 2GB. The bowtie2-build
+wrapper will help determine the appropriate index type for uncompressed
+and gzipped inputs.
+
+To build bowtie2-build with libsais first make sure that the libsais
+submodule is available. This can be done in one of the following ways: *
+first time cloning bowtie2 --
+git clone --recursive https://github.com/BenLangmead/bowtie2.git *
+existing checkout of bowtie2 --
+git submodule init && git submodule update
+
+Issue the following command line to build libsais: * with OpenMP support
+-- [g]make libsais USE_SAIS_OPENMP=1 * without OpenMP support --
+[g]make libsais USE_SAIS=1
+
+The choice of using OpenMP will determine whether or not the algorithm
+runs multithreaded. The [-p/--threads] argument to bowtie2-build will be
+ignored when libsais is compiled without OpenMP support.
+
+Finally, building the build executable: * with OpenMP support --
+[g]make bowtie2-build-s USE_SAIS_OPENMP=1 * without OpenMP support --
+[g]make bowtie2-build-s USE_SAIS=1
+
+Building with CMake
+
+To build Bowtie2 with SRA and libsais support issue the following
+command: * cmake . -D USE_SRA=1 -D USE_SAIS=1 && cmake --build .
+
+CMake will take care of building and linking against the specified
+dependencies.
+
Adding to PATH
By adding your new Bowtie 2 directory to your PATH environment variable,
@@ -775,7 +816,7 @@ alignment to be considered valid, and x is the read length.
Usage
- bowtie2 [options]* -x <bt2-idx> {-1 <m1> -2 <m2> | -U <r> | --interleaved <i> | --sra-acc <acc> | -b <bam>} -S [<sam>]
+ bowtie2 [options]* -x <bt2-idx> {-1 <m1> -2 <m2> | -U <r> | --interleaved <i> | --sra-acc <acc> | b <bam>} -S [<sam>]
Main arguments
@@ -882,11 +923,11 @@ When -r is set, the result is as if --ignore-quals is also set.
-F k:<int>,i:<int>
-Reads are substrings (k-mers) extracted from a FASTA file <s>.
-Specifically, for every reference sequence in FASTA file <s>, Bowtie 2
-aligns the k-mers at offsets 1, 1+i, 1+2i, ... until reaching the end of
-the reference. Each k-mer is aligned as a separate read. Quality values
-are set to all Is (40 on Phred scale). Each k-mer (read) is given a name
+Reads are substrings (k-mers) extracted from a FASTA file. Specifically,
+for every reference sequence in the FASTA file, Bowtie 2 aligns the
+k-mers at offsets 1, 1+i, 1+2i, ... until reaching the end of the
+reference. Each k-mer is aligned as a separate read. Quality values are
+set to all Is (40 on Phred scale). Each k-mer (read) is given a name
like <sequence>_<offset>, where <sequence> is the name of the FASTA
sequence it was drawn from and <offset> is its 0-based offset of origin
with respect to the sequence. Only single k-mers, i.e. unpaired reads,
=====================================
MANUAL.markdown
=====================================
@@ -139,11 +139,47 @@ a series of commands that will:
2. compile them as static libraries
3. link the resulting libraries to the compiled Bowtie 2 binaries
+### Building with SRA support ###
+
As of version 2.3.5 bowtie2 now supports aligning SRA reads. Prepackaged
builds will include a package that supports SRA. If you're building bowtie2
from source please make sure that the Java runtime is available on your system.
You can then proceed with the build by running `make sra-deps && make USE_SRA=1`.
+### Building with libsais support ###
+
+As of version 2.5.3 `bowtie2` supports building indexes using the SAIS algorithm
+provided by [libsais]. SAIS is a state-of-the-art suffix array construction algorithm
+that will bring-forth a significant speed-up to the overall index building process.
+There is, however, the downside of a significant increase in memory usage compared
+to the persistent blockwise algorithm that `bowtie2-build` uses by default. When using
+SAIS small indexes can be built for inputs up to 2GB. The `bowtie2-build` wrapper
+will help determine the appropriate index type for uncompressed and gzipped inputs.
+
+To build `bowtie2-build` with [libsais] first make sure that the libsais submodule
+is available. This can be done in one of the following ways:
+* first time cloning bowtie2 -- `git clone --recursive https://github.com/BenLangmead/bowtie2.git`
+* existing checkout of bowtie2 -- `git submodule init && git submodule update`
+
+Issue the following command line to build libsais:
+* with OpenMP support -- `[g]make libsais USE_SAIS_OPENMP=1`
+* without OpenMP support -- `[g]make libsais USE_SAIS=1`
+
+The choice of using OpenMP will determine whether or not the algorithm
+runs multithreaded. The [`-p/--threads`] argument to `bowtie2-build` will
+be ignored when libsais is compiled without OpenMP support.
+
+Finally, building the build executable:
+* with OpenMP support -- `[g]make bowtie2-build-s USE_SAIS_OPENMP=1`
+* without OpenMP support -- `[g]make bowtie2-build-s USE_SAIS=1`
+
+### Building with CMake ###
+
+To build Bowtie2 with SRA and libsais support issue the following command:
+* `cmake . -D USE_SRA=1 -D USE_SAIS=1 && cmake --build .`
+
+CMake will take care of building and linking against the specified dependencies.
+
Adding to PATH
--------------
@@ -957,8 +993,8 @@ per line, without any other information (no read names, no qualities). When
</td><td>
-Reads are substrings (k-mers) extracted from a FASTA file `<s>`.
-Specifically, for every reference sequence in FASTA file `<s>`, Bowtie
+Reads are substrings (k-mers) extracted from a FASTA file.
+Specifically, for every reference sequence in the FASTA file, Bowtie
2 aligns the k-mers at offsets 1, 1+i, 1+2i, ... until reaching the
end of the reference. Each k-mer is aligned as a separate read.
Quality values are set to all Is (40 on Phred scale). Each k-mer
@@ -2965,3 +3001,4 @@ warnings due to the case insensitive nature of markdown URLs -->
[using a pre-built index]: #using-a-pre-built-index
[valid alignment]: #valid-alignments-meet-or-exceed-the-minimum-score-threshold
[yields a larger memory footprint]: #fm-index-memory-footprint
+[libsais]: https://github.com/IlyaGrebnov/libsais
=====================================
Makefile
=====================================
@@ -30,10 +30,14 @@ HEADERS := $(wildcard *.h)
BOWTIE_MM := 1
BOWTIE_SHARED_MEM :=
-CXXFLAGS += -std=c++11
+CXXFLAGS += -std=c++11 -Wall -Wno-unused-but-set-variable
-NGS_VER ?= 2.10.2
-VDB_VER ?= 2.10.2
+SRA_TOOLS_VER ?= 3.0.9
+VDB_VER ?= 3.0.9
+
+ifneq (,$(findstring $(shell uname),Linux))
+ LDLIBS += -lrt
+endif
# Detect Cygwin or MinGW
ifneq (,$(findstring mingw,$(shell $(CXX) --version)))
@@ -42,6 +46,7 @@ ifneq (,$(findstring mingw,$(shell $(CXX) --version)))
endif
ifeq (1, $(MINGW))
+ LDLIBS := $(subst -lrt,,$(LDLIBS))
CXX = x86_64-w64-mingw32-g++-posix
CC = x86_64-w64-mingw32-gcc-posix
AR = x86_64-w64-mingw32-ar
@@ -86,11 +91,14 @@ else ifneq (,$(findstring $(ARCH), aarch64 arm64 s390x powerpc64 powerpc64le ppc
POPCNT_CAPABILITY ?= 0
endif
-ifdef USE_SAIS
+ifeq (1, $(if $(or $(USE_SAIS),$(USE_SAIS_OPENMP)),1))
CPPFLAGS += -Ithird_party/libsais/include
- CXXFLAGS += -fopenmp -DUSE_SAIS
- LDFLAGS += -Lthird_party/libsais/lib
- LDLIBS += -lsais
+ifdef USE_SAIS_OPENMP
+ BUILD_CXXFLAGS += -fopenmp
+endif
+ BUILD_CXXFLAGS += -DUSE_SAIS
+ BUILD_LDFLAGS += -Lthird_party/libsais/lib
+ BUILD_LDLIBS += -lsais
endif
# msys will always be 32 bit so look at the cpu arch instead.
@@ -104,6 +112,7 @@ ifeq (32,$(BITS))
endif
ifdef STATIC_BUILD
+ WITH_ZSTD = 1
LDFLAGS += -L$(CURDIR)/.tmp/lib
CPPFLAGS += -I$(CURDIR)/.tmp/include
endif
@@ -147,10 +156,9 @@ ifeq (1, $(USE_SRA))
ifndef ($(STATIC_BUILD))
CPPFLAGS += -I$(CURDIR)/.tmp/include
endif
- LDLIBS += -lncbi-ngs-c++-static
- LDLIBS += -lngs-c++-static
- LDLIBS += -lncbi-vdb-static
- LDLIBS += -ldl
+ ALIGN_LDLIBS += -lncbi-ngs-static
+ ALIGN_LDLIBS += -lncbi-vdb-static
+ ALIGN_LDLIBS += -ldl
CXXFLAGS += -DUSE_SRA
endif
endif
@@ -322,12 +330,17 @@ else
$(BOWTIE2_BIN_LIST_SAN): $(error "Compiler does not support...")
endif
+bowtie2-build-%: CXXFLAGS += $(BUILD_CXXFLAGS)
+bowtie2-build-%: LDFLAGS += $(BUILD_LDFLAGS)
+bowtie2-build-%: LDLIBS += $(BUILD_LDLIBS)
+
+bowtie2-align-%: LDLIBS += $(ALIGN_LDLIBS)
#
# bowtie2-build targets
#
bowtie2-build-s-sanitized bowtie2-build-s: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall \
+ $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(BUILD_CPPS_MAIN) \
@@ -335,7 +348,7 @@ bowtie2-build-s-sanitized bowtie2-build-s: bt2_build.cpp $(SHARED_CPPS) $(HEADER
bowtie2-build-l-sanitized bowtie2-build-l: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(BUILD_CPPS_MAIN) \
@@ -343,7 +356,7 @@ bowtie2-build-l-sanitized bowtie2-build-l: bt2_build.cpp $(SHARED_CPPS) $(HEADER
bowtie2-build-s-debug: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
$(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -Wall \
+ $(DEFS) -DBOWTIE2 \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(BUILD_CPPS_MAIN) \
@@ -351,7 +364,7 @@ bowtie2-build-s-debug: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
bowtie2-build-l-debug: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
$(CXX) $(DEBUG_FLAGS) $(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(BUILD_CPPS_MAIN) \
@@ -363,7 +376,7 @@ bowtie2-build-l-debug: bt2_build.cpp $(SHARED_CPPS) $(HEADERS)
bowtie2-align-s-sanitized bowtie2-align-s: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) -Wall \
+ $(DEFS) -DBOWTIE2 $(NOASSERT_FLAGS) \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -371,7 +384,7 @@ bowtie2-align-s-sanitized bowtie2-align-s: bt2_search.cpp $(SEARCH_CPPS) $(SHARE
bowtie2-align-l-sanitized bowtie2-align-l: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
$(CXX) $(RELEASE_FLAGS) $(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX $(NOASSERT_FLAGS) \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -380,7 +393,7 @@ bowtie2-align-l-sanitized bowtie2-align-l: bt2_search.cpp $(SEARCH_CPPS) $(SHARE
bowtie2-align-s-debug: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -Wall \
+ $(DEFS) -DBOWTIE2 \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -389,7 +402,7 @@ bowtie2-align-s-debug: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $
bowtie2-align-l-debug: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $(SEARCH_FRAGMENTS)
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) $(SEARCH_CPPS_MAIN) \
@@ -402,7 +415,7 @@ bowtie2-align-l-debug: bt2_search.cpp $(SEARCH_CPPS) $(SHARED_CPPS) $(HEADERS) $
bowtie2-inspect-s-sanitized bowtie2-inspect-s: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
$(CXX) $(RELEASE_FLAGS) \
$(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) \
@@ -411,7 +424,7 @@ bowtie2-inspect-s-sanitized bowtie2-inspect-s: bt2_inspect.cpp $(HEADERS) $(SHAR
bowtie2-inspect-l-sanitized bowtie2-inspect-l: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
$(CXX) $(RELEASE_FLAGS) \
$(RELEASE_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN -DBOWTIE_64BIT_INDEX -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN -DBOWTIE_64BIT_INDEX \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) \
@@ -420,7 +433,7 @@ bowtie2-inspect-l-sanitized bowtie2-inspect-l: bt2_inspect.cpp $(HEADERS) $(SHAR
bowtie2-inspect-s-debug: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_INSPECT_MAIN \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) \
@@ -429,7 +442,7 @@ bowtie2-inspect-s-debug: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
bowtie2-inspect-l-debug: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -DBOWTIE_INSPECT_MAIN -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_64BIT_INDEX -DBOWTIE_INSPECT_MAIN \
$(CPPFLAGS) \
-o $@ $< \
$(SHARED_CPPS) \
@@ -442,7 +455,7 @@ bowtie2-inspect-l-debug: bt2_inspect.cpp $(HEADERS) $(SHARED_CPPS)
bowtie2-dp: bt2_dp.cpp $(HEADERS) $(SHARED_CPPS) $(DP_CPPS)
$(CXX) $(RELEASE_FLAGS) \
$(RELEASE_DEFS) $(CXXFLAGS) $(NOASSERT_FLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_DP_MAIN -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_DP_MAIN \
$(CPPFLAGS) \
-o $@ $< \
$(DP_CPPS) $(SHARED_CPPS) \
@@ -451,7 +464,7 @@ bowtie2-dp: bt2_dp.cpp $(HEADERS) $(SHARED_CPPS) $(DP_CPPS)
bowtie2-dp-debug: bt2_dp.cpp $(HEADERS) $(SHARED_CPPS) $(DP_CPPS)
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
- $(DEFS) -DBOWTIE2 -DBOWTIE_DP_MAIN -Wall \
+ $(DEFS) -DBOWTIE2 -DBOWTIE_DP_MAIN \
$(CPPFLAGS) \
-o $@ $< \
$(DP_CPPS) $(SHARED_CPPS) \
@@ -502,7 +515,7 @@ bowtie2-seeds-debug: aligner_seed.cpp ccnt_lut.cpp alphabet.cpp aligner_seed.h b
$(CXX) $(DEBUG_FLAGS) \
$(DEBUG_DEFS) $(CXXFLAGS) \
-DSCAN_MAIN \
- $(DEFS) -Wall \
+ $(DEFS) \
$(CPPFLAGS) \
-o $@ $< \
aligner_seed.cpp bt2_idx.cpp ccnt_lut.cpp alphabet.cpp bt2_io.cpp \
@@ -576,6 +589,16 @@ static-libs:
rm -rf zstd-1.5.5* ; \
fi
+.PHONY: libsais
+libsais:
+ if [ ! -d $(CURDIR)/third_party/libsais ]; then \
+ git submodule init && git submodule update --recursive ; \
+ fi ; \
+ if [ -n "$(USE_SAIS_OPENMP)" ]; then \
+ export CFLAGS="$(CFLAGS) -fopenmp -O2" ; \
+ fi ; \
+ cd $(CURDIR)/third_party/libsais && $(MAKE) -B CC=$(CC) && $(MAKE) install PREFIX=$$PWD ;
+
.PHONY: sra-deps
sra-deps:
DL=$$( ( which wget >/dev/null 2>&1 && echo "wget --no-check-certificate" ) || echo "curl -LOk") ; \
@@ -586,25 +609,29 @@ sra-deps:
export CFLAGS=-mmacosx-version-min=10.9 ; \
export CXXFLAGS=-mmacosx-version-min=10.9 ; \
fi ; \
- if [ ! -f "$(CURDIR)/.tmp/include/ngs/Alignment.hpp" ] ; then \
- if [ ! -d "$(CURDIR)/.tmp/ngs-$(NGS_VER)/ngs-sdk" ] ; then \
- cd $(CURDIR)/.tmp ; \
- $$DL https://github.com/ncbi/ngs/archive/$(NGS_VER).tar.gz ; \
- tar xzvf $(NGS_VER).tar.gz ; \
- rm -f $(NGS_VER).tar.gz ; \
- fi ; \
- cd $(CURDIR)/.tmp/ngs-$(NGS_VER) && ./configure --prefix=$(CURDIR)/.tmp --build-prefix=`pwd`/build ; \
- make && make install ; \
- fi ; \
if [ ! -f "$(CURDIR)/.tmp/include/ncbi-vdb/NGS.hpp" ] ; then \
if [ ! -d "$(CURDIR)/.tmp/ncbi-vdb-$(VDB_VER)/vdb3" ] ; then \
+ cd $(CURDIR)/.tmp ; \
+ $$DL https://github.com/ncbi/ncbi-vdb/archive/refs/tags/$(VDB_VER).tar.gz ; \
+ tar zxvf $(VDB_VER).tar.gz ; \
+ rm -f $(VDB_VER).tar.gz ; \
+ fi ; \
+ cd $(CURDIR)/.tmp/ncbi-vdb-$(VDB_VER) \
+ && ./configure --prefix=$(CURDIR)/.tmp --build-prefix=`pwd`/build --without-debug \
+ && make && make install ; \
+ fi ; \
+ if [ ! -f "$(CURDIR)/.tmp/include/ngs/Alignment.hpp" ] ; then \
+ if [ ! -d "$(CURDIR)/.tmp/sra-tools-$(SRA_TOOLS_VER)" ] ; then \
cd $(CURDIR)/.tmp ; \
- $$DL https://github.com/ncbi/ncbi-vdb/archive/$(VDB_VER).tar.gz ; \
- tar zxvf $(VDB_VER).tar.gz ; \
- rm -f $(VDB_VER).tar.gz ; \
- fi ; \
- cd $(CURDIR)/.tmp/ncbi-vdb-$(VDB_VER) && ./configure --prefix=$(CURDIR)/.tmp --build-prefix=`pwd`/build --with-ngs-sdk=$(CURDIR)/.tmp && make && make install ; \
- fi ;
+ $$DL https://github.com/ncbi/sra-tools/archive/$(SRA_TOOLS_VER).tar.gz ; \
+ tar xzvf $(SRA_TOOLS_VER).tar.gz ; \
+ rm -f $(SRA_TOOLS_VER).tar.gz ; \
+ fi ; \
+ cd $(CURDIR)/.tmp/sra-tools-$(SRA_TOOLS_VER) \
+ && ./configure --prefix=$(CURDIR)/.tmp --build-prefix=`pwd`/build \
+ --with-ncbi-vdb-prefix="$(CURDIR)/.tmp" --enable-static --without-debug \
+ && make && make install ; \
+ fi ;
.PHONY: test
test: simple-test random-test
=====================================
NEWS
=====================================
@@ -4,7 +4,7 @@ Bowtie 2 NEWS
Bowtie 2 is available for download from the project website,
http://bowtie-bio.sf.net/bowtie2 and on Github,
https://github.com/BenLangmead/bowtie2/releases. 2.0.0-beta1 is
-the first version released to the public and 2.5.2 is the latest
+the first version released to the public and 2.5.3 is the latest
version. Bowtie 2 is licensed under the GPLv3 license. See `LICENSE'
file for details.
@@ -19,6 +19,15 @@ Please report any issues to the Bowtie 2 Github page or using the Sourceforge bu
Version Release History
=======================
+## Version 2.5.3 - Jan 16, 2024 ##
+
+ * Fixed an issue causing `bowtie2`'s memory usage to increase over time.
+ * Fixed an issue causing `bowtie2` to crash when processing empty FASTQ records.
+ * Fixed an issue that would cause `bowtie2` to erroneously treat `-f` and `-F` as mutually exclusive.
+ * Add optional support for `libsais` to `bowtie2-build` for faster index building.
+ N.B. -- small indexes are capped at 2GB when using `libsais` as opposed to 4GB when using the
+ default blockwise algorithm. See MANUAL for more details.
+
## Version 2.5.2 - Oct 13, 2023 ##
### bowtie2 ###
=====================================
README.md
=====================================
@@ -2,7 +2,7 @@
<!-- badges: start -->
![Random Tests](https://github.com/BenLangmead/bowtie2/actions/workflows/random-tests.yml/badge.svg)
![Simple Tests](https://github.com/BenLangmead/bowtie2/actions/workflows/simple-tests.yml/badge.svg)
-[![Version](https://img.shields.io/badge/version-2.5.2-green.svg)](https://shields.io/)
+[![Version](https://img.shields.io/badge/version-2.5.3-green.svg)](https://shields.io/)
<!-- [![Build Status](https://travis-ci.org/BenLangmead/bowtie2.svg?branch=master)](https://travis-ci.org/BenLangmead/bowtie2) -->
[![License: GPL v3](https://img.shields.io/badge/license-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0)
<!--badges: end -->
=====================================
blockwise_sa.h
=====================================
@@ -204,11 +204,18 @@ public:
__text.wbuf()[__text.length()] = (char)127; // $ is larger than any character in the suffix array
_suffixes.resize(__text.length() + 1);
#ifdef BOWTIE_64BIT_INDEX
+#ifdef _OPENMP
libsais64_omp((const uint8_t *)__text.buf(), (int64_t *)(_suffixes.ptr()), (int64_t)_suffixes.size(), 0, NULL, _nthreads);
+#else
+ libsais64((const uint8_t *)__text.buf(), (int64_t *)(_suffixes.ptr()), (int64_t)_suffixes.size(), 0, NULL);
+#endif
#else
+#ifdef _OPENMP
libsais_omp((const uint8_t *)__text.buf(), (int32_t *)(_suffixes.ptr()), (int32_t)_suffixes.size(), 0, NULL, _nthreads);
-
+#else
+ libsais((const uint8_t *)__text.buf(), (int32_t *)(_suffixes.ptr()), (int32_t)_suffixes.size(), 0, NULL);
+#endif
#endif
_suffixes[__text.length()] = __text.length();
}
@@ -231,7 +238,7 @@ public:
}
bool hasMoreBlocks() const {
- return _i == _suffixes.size();
+ return _i < _suffixes.size();
}
private:
=====================================
bowtie2-build
=====================================
@@ -38,6 +38,11 @@ def get_gunzip_size(fn):
break
return size
+def sais_enabled(build_bin_spec):
+ output = subprocess.check_output([build_bin_spec, "--version"], universal_newlines=True)
+
+ return output.find("USE_SAIS") != -1
+
def main():
parser = argparse.ArgumentParser(add_help = False)
@@ -60,6 +65,9 @@ def main():
ex_path = os.path.dirname(curr_script)
build_bin_spec = os.path.join(ex_path,build_bin_s)
+ if sais_enabled(build_bin_spec):
+ small_index_max_size = 2 * 1024 ** 3 - delta
+
script_options, argv = parser.parse_known_args()
if script_options.verbose:
=====================================
bt2_idx.h
=====================================
@@ -1067,13 +1067,17 @@ public:
bmax = (TIndexOffU)sqrt(s.length());
VMSG_NL("bmax defaulted to: " << bmax);
}
+#ifndef USE_SAIS
int iter = 0;
bool first = true;
streampos out1pos = out1.tellp();
streampos out2pos = out2.tellp();
+#endif
+
// Look for bmax/dcv parameters that work.
thread_pool pool(nthreads - 1);
while(true) {
+#ifndef USE_SAIS
if(!first && bmax < 40 && _passMemExc) {
cerr << "Could not find approrpiate bmax/dcv settings for building this index." << endl;
if(!isPacked()) {
@@ -1107,7 +1111,9 @@ public:
VMSG_NL(" --dcv " << dcv);
}
iter++;
+#endif
try {
+#ifndef USE_SAIS
{
VMSG_NL(" Doing ahead-of-time memory usage test");
// Make a quick-and-dirty attempt to force a bad_alloc iff
@@ -1135,10 +1141,12 @@ public:
}
VMSG_NL("");
}
- VMSG_NL("Constructing suffix-array element generator");
+#endif
#ifdef USE_SAIS
+ VMSG_NL("Using SAIS algorithm for constructing suffix array");
SAISBlockwiseSA<TStr> bsa(s, bmax, nthreads);
#else
+ VMSG_NL("Constructing suffix-array element generator");
KarkkainenBlockwiseSA<TStr> bsa(s, bmax, nthreads, pool, dcv, seed, _sanity, _passMemExc, _verbose, outfile);
#endif
assert(bsa.suffixItrIsReset());
@@ -1161,6 +1169,10 @@ public:
}
break;
} catch(bad_alloc& e) {
+#ifdef USE_SAIS
+ cerr << "Out of memory while constructing suffix array. Please try using a smaller" << endl;
+ throw 1;
+#else
if(_passMemExc) {
VMSG_NL(" Ran out of memory; automatically trying more memory-economical parameters.");
} else {
@@ -1168,8 +1180,12 @@ public:
<< "number of blocks by specifying a smaller --bmax or a larger --bmaxdivn" << endl;
throw 1;
}
+#endif
}
+#ifndef USE_SAIS
first = false;
+#endif
+
}
assert(repOk());
// Now write reference sequence names on the end
=====================================
bt2_search.cpp
=====================================
@@ -765,7 +765,7 @@ static void printUsage(ostream& out) {
<< " -f query input files are (multi-)FASTA .fa/.mfa" << endl
<< " -r query input files are raw one-sequence-per-line" << endl
<< " -F k:<int>,i:<int> query input files are continuous FASTA where reads" << endl
- << " are substrings (k-mers) extracted from a FASTA file <s>" << endl
+ << " are substrings (k-mers) extracted from the FASTA file" << endl
<< " and aligned at offsets 1, 1+i, 1+2i ... end of reference" << endl
<< " -c <m1>, <m2>, <r> are sequences themselves, not files" << endl
<< " -s/--skip <int> skip the first <int> reads/pairs in the input (none)" << endl
@@ -1047,8 +1047,15 @@ static void parseOption(int next_option, const char *arg) {
saw_bam = true;
break;
}
- case 'f': set_format(format, FASTA); break;
+ case 'f': {
+ if (format != FASTA_CONT)
+ set_format(format, FASTA);
+ break;
+ }
case 'F': {
+ if (format == FASTA) {
+ format = UNKNOWN;
+ }
set_format(format, FASTA_CONT);
pair<uint32_t, uint32_t> p = parsePair<uint32_t>(arg, ',');
fastaContLen = p.first;
=====================================
doc/manual.html
=====================================
@@ -11,8 +11,11 @@
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
- ul.task-list{list-style: none;}
+ /* The extra [class] is a hack that increases specificity enough to
+ override a similar rule in reveal.js */
+ ul.task-list[class]{list-style: none;}
ul.task-list li input[type="checkbox"] {
+ font-size: inherit;
width: 0.8em;
margin: 0 0.8em 0.2em -1.6em;
vertical-align: middle;
@@ -43,7 +46,16 @@ Bowtie 2?</a></li>
Bowtie 2</a>
<ul>
<li><a href="#building-from-source"
-id="toc-building-from-source">Building from source</a></li>
+id="toc-building-from-source">Building from source</a>
+<ul>
+<li><a href="#building-with-sra-support"
+id="toc-building-with-sra-support">Building with SRA support</a></li>
+<li><a href="#building-with-libsais-support"
+id="toc-building-with-libsais-support">Building with libsais
+support</a></li>
+<li><a href="#building-with-cmake" id="toc-building-with-cmake">Building
+with CMake</a></li>
+</ul></li>
<li><a href="#adding-to-path" id="toc-adding-to-path">Adding to
PATH</a></li>
</ul></li>
@@ -338,11 +350,48 @@ dependency management. Running
a series of commands that will: 1. download zstd and zlib 2. compile
them as static libraries 3. link the resulting libraries to the compiled
Bowtie 2 binaries</p>
+<h3 id="building-with-sra-support">Building with SRA support</h3>
<p>As of version 2.3.5 bowtie2 now supports aligning SRA reads.
Prepackaged builds will include a package that supports SRA. If you're
building bowtie2 from source please make sure that the Java runtime is
available on your system. You can then proceed with the build by running
<code>make sra-deps && make USE_SRA=1</code>.</p>
+<h3 id="building-with-libsais-support">Building with libsais
+support</h3>
+<p>As of version 2.5.3 <code>bowtie2</code> supports building indexes
+using the SAIS algorithm provided by <a
+href="https://github.com/IlyaGrebnov/libsais">libsais</a>. SAIS is a
+state-of-the-art suffix array construction algorithm that will
+bring-forth a significant speed-up to the overall index building
+process. There is, however, the downside of a significant increase in
+memory usage compared to the persistent blockwise algorithm that
+<code>bowtie2-build</code> uses by default. When using SAIS small
+indexes can be built for inputs up to 2GB. The
+<code>bowtie2-build</code> wrapper will help determine the appropriate
+index type for uncompressed and gzipped inputs.</p>
+<p>To build <code>bowtie2-build</code> with <a
+href="https://github.com/IlyaGrebnov/libsais">libsais</a> first make
+sure that the libsais submodule is available. This can be done in one of
+the following ways: * first time cloning bowtie2 --
+<code>git clone --recursive https://github.com/BenLangmead/bowtie2.git</code>
+* existing checkout of bowtie2 --
+<code>git submodule init && git submodule update</code></p>
+<p>Issue the following command line to build libsais: * with OpenMP
+support -- <code>[g]make libsais USE_SAIS_OPENMP=1</code> * without
+OpenMP support -- <code>[g]make libsais USE_SAIS=1</code></p>
+<p>The choice of using OpenMP will determine whether or not the
+algorithm runs multithreaded. The [<code>-p/--threads</code>] argument
+to <code>bowtie2-build</code> will be ignored when libsais is compiled
+without OpenMP support.</p>
+<p>Finally, building the build executable: * with OpenMP support --
+<code>[g]make bowtie2-build-s USE_SAIS_OPENMP=1</code> * without OpenMP
+support -- <code>[g]make bowtie2-build-s USE_SAIS=1</code></p>
+<h3 id="building-with-cmake">Building with CMake</h3>
+<p>To build Bowtie2 with SRA and libsais support issue the following
+command: *
+<code>cmake . -D USE_SRA=1 -D USE_SAIS=1 && cmake --build .</code></p>
+<p>CMake will take care of building and linking against the specified
+dependencies.</p>
<h2 id="adding-to-path">Adding to PATH</h2>
<p>By adding your new Bowtie 2 directory to your <a
href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH environment
@@ -1167,13 +1216,12 @@ also set.</p>
<pre><code>-F k:<int>,i:<int></code></pre>
</td>
<td>
-Reads are substrings (k-mers) extracted from a FASTA file
-<code><s></code>. Specifically, for every reference sequence in
-FASTA file <code><s></code>, Bowtie 2 aligns the k-mers at offsets
-1, 1+i, 1+2i, ... until reaching the end of the reference. Each k-mer is
-aligned as a separate read. Quality values are set to all Is (40 on
-Phred scale). Each k-mer (read) is given a name like
-<code><sequence>_<offset></code>, where
+Reads are substrings (k-mers) extracted from a FASTA file. Specifically,
+for every reference sequence in the FASTA file, Bowtie 2 aligns the
+k-mers at offsets 1, 1+i, 1+2i, ... until reaching the end of the
+reference. Each k-mer is aligned as a separate read. Quality values are
+set to all Is (40 on Phred scale). Each k-mer (read) is given a name
+like <code><sequence>_<offset></code>, where
<code><sequence></code> is the name of the FASTA sequence it was
drawn from and <code><offset></code> is its 0-based offset of
origin with respect to the sequence. Only single k-mers, i.e. unpaired
=====================================
doc/website/manual.ssi
=====================================
@@ -12,7 +12,16 @@ Bowtie 2?</a></li>
Bowtie 2</a>
<ul>
<li><a href="#building-from-source"
-id="toc-building-from-source">Building from source</a></li>
+id="toc-building-from-source">Building from source</a>
+<ul>
+<li><a href="#building-with-sra-support"
+id="toc-building-with-sra-support">Building with SRA support</a></li>
+<li><a href="#building-with-libsais-support"
+id="toc-building-with-libsais-support">Building with libsais
+support</a></li>
+<li><a href="#building-with-cmake" id="toc-building-with-cmake">Building
+with CMake</a></li>
+</ul></li>
<li><a href="#adding-to-path" id="toc-adding-to-path">Adding to
PATH</a></li>
</ul></li>
@@ -307,11 +316,48 @@ dependency management. Running
a series of commands that will: 1. download zstd and zlib 2. compile
them as static libraries 3. link the resulting libraries to the compiled
Bowtie 2 binaries</p>
+<h3 id="building-with-sra-support">Building with SRA support</h3>
<p>As of version 2.3.5 bowtie2 now supports aligning SRA reads.
Prepackaged builds will include a package that supports SRA. If you're
building bowtie2 from source please make sure that the Java runtime is
available on your system. You can then proceed with the build by running
<code>make sra-deps && make USE_SRA=1</code>.</p>
+<h3 id="building-with-libsais-support">Building with libsais
+support</h3>
+<p>As of version 2.5.3 <code>bowtie2</code> supports building indexes
+using the SAIS algorithm provided by <a
+href="https://github.com/IlyaGrebnov/libsais">libsais</a>. SAIS is a
+state-of-the-art suffix array construction algorithm that will
+bring-forth a significant speed-up to the overall index building
+process. There is, however, the downside of a significant increase in
+memory usage compared to the persistent blockwise algorithm that
+<code>bowtie2-build</code> uses by default. When using SAIS small
+indexes can be built for inputs up to 2GB. The
+<code>bowtie2-build</code> wrapper will help determine the appropriate
+index type for uncompressed and gzipped inputs.</p>
+<p>To build <code>bowtie2-build</code> with <a
+href="https://github.com/IlyaGrebnov/libsais">libsais</a> first make
+sure that the libsais submodule is available. This can be done in one of
+the following ways: * first time cloning bowtie2 --
+<code>git clone --recursive https://github.com/BenLangmead/bowtie2.git</code>
+* existing checkout of bowtie2 --
+<code>git submodule init && git submodule update</code></p>
+<p>Issue the following command line to build libsais: * with OpenMP
+support -- <code>[g]make libsais USE_SAIS_OPENMP=1</code> * without
+OpenMP support -- <code>[g]make libsais USE_SAIS=1</code></p>
+<p>The choice of using OpenMP will determine whether or not the
+algorithm runs multithreaded. The [<code>-p/--threads</code>] argument
+to <code>bowtie2-build</code> will be ignored when libsais is compiled
+without OpenMP support.</p>
+<p>Finally, building the build executable: * with OpenMP support --
+<code>[g]make bowtie2-build-s USE_SAIS_OPENMP=1</code> * without OpenMP
+support -- <code>[g]make bowtie2-build-s USE_SAIS=1</code></p>
+<h3 id="building-with-cmake">Building with CMake</h3>
+<p>To build Bowtie2 with SRA and libsais support issue the following
+command: *
+<code>cmake . -D USE_SRA=1 -D USE_SAIS=1 && cmake --build .</code></p>
+<p>CMake will take care of building and linking against the specified
+dependencies.</p>
<h2 id="adding-to-path">Adding to PATH</h2>
<p>By adding your new Bowtie 2 directory to your <a
href="http://en.wikipedia.org/wiki/PATH_(variable)">PATH environment
@@ -1136,13 +1182,12 @@ also set.</p>
<pre><code>-F k:<int>,i:<int></code></pre>
</td>
<td>
-Reads are substrings (k-mers) extracted from a FASTA file
-<code><s></code>. Specifically, for every reference sequence in
-FASTA file <code><s></code>, Bowtie 2 aligns the k-mers at offsets
-1, 1+i, 1+2i, ... until reaching the end of the reference. Each k-mer is
-aligned as a separate read. Quality values are set to all Is (40 on
-Phred scale). Each k-mer (read) is given a name like
-<code><sequence>_<offset></code>, where
+Reads are substrings (k-mers) extracted from a FASTA file. Specifically,
+for every reference sequence in the FASTA file, Bowtie 2 aligns the
+k-mers at offsets 1, 1+i, 1+2i, ... until reaching the end of the
+reference. Each k-mer is aligned as a separate read. Quality values are
+set to all Is (40 on Phred scale). Each k-mer (read) is given a name
+like <code><sequence>_<offset></code>, where
<code><sequence></code> is the name of the FASTA sequence it was
drawn from and <code><offset></code> is its 0-based offset of
origin with respect to the sequence. Only single k-mers, i.e. unpaired
=====================================
doc/website/recent_news.ssi
=====================================
@@ -1,3 +1,19 @@
+<h2>Version 2.5.3 - Jan 16, 2024</h2>
+<h3 id="bowtie2">bowtie2</h3>
+<ul>
+ <li>Fixed an issue causing <code>bowtie2</code>'s memory usage to increase over time.</li>
+ <li>Fixed an issue causing <code>bowtie2</code> to crash when processing eempty FASTQ records.</li>
+ <li>Fixed an issue causing <code>bowtie2</code> to erroneously treat <code><a href="manual.shtml#bowtie2-options-f">-f</a></code>
+ and <code><a href="manual.shtml#bowtie2-options-F">-F</a></code> as mutually exclusive.</li>
+</ul>
+<h3 id="bowtie2-build">bowtie2-build</h3>
+<ul>
+ <li>Add optional support for <code><a href="https://github.com/IlyaGrebnov/libsais">libsais</a></code>
+ to <code>bowtie2-build</code> for faster index building.</li>
+ N.B. -- small indexes are capped at <em>2GB</em> when using <code>libsais</code> as opposed to <em>4GB</em> when using
+ the default blockwise algorithm. See <a href="manual.shtml#building-from-source">MANUAL</a> for details.
+</ul>
+
<h2>Version 2.5.2 - Oct 13, 2023</h2>
<h3 id="bowtie2">bowtie2</h3>
<ul>
=====================================
doc/website/rhsidebar.ssi
=====================================
@@ -18,10 +18,10 @@
</tr>
<tr>
<td>
- <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.5.2">Bowtie2 v2.5.2</a>
+ <a href="https://sourceforge.net/projects/bowtie-bio/files/bowtie2/2.5.3">Bowtie2 v2.5.3</a>
</td>
<td align="right">
- 10/13/23
+ 01/16/24
</td>
</tr>
<tr>
=====================================
filebuf.h
=====================================
@@ -967,7 +967,7 @@ private:
}
static constexpr size_t BUF_SZ = 16ul * 1024ul;
- static constexpr size_t MAX_BUF_SZ = 16ul * 1024ul * 1024ul * 1024ul;
+ static constexpr size_t MAX_BUF_SZ = 16ul * 1024ul * 1024ul;
const char *name_;
FILE *out_;
=====================================
pat.cpp
=====================================
@@ -1051,13 +1051,21 @@ pair<bool, int> FastqPatternSource::nextBatchFromFile(
int newlines = 4;
while(newlines) {
c = getc_wrapper();
- if (previous_was_newline && (c == '\n' || c == '\r'))
+ // We check that buf.length == 0 so that empty lines
+ // at the beginning of a batch can get discarded.
+ if ((previous_was_newline || buf.length() == 0)
+ && (c == '\n' || c == '\r')) {
continue;
+ }
// We've encountered a new record implying that the
- // previous record was incomplete. Reset the line count
- // and let the parser take care of that.
- if (previous_was_newline && c == '@' && newlines > 1) {
- newlines = 4;
+ // previous record was incomplete. Move on to the next
+ // record, the parser will take care of the partial record.
+ // We cannot simply assume that if we see an '@' and the
+ // number of newlines != 4 then it is a partial record since
+ // the quality string can also contain the '@' character.
+ if (previous_was_newline && c == '@' && newlines > 1 && newlines < 4) {
+ ungetc_wrapper(c);
+ break;
}
previous_was_newline = false;
done = c < 0;
View it on GitLab: https://salsa.debian.org/med-team/bowtie2/-/commit/886d9d4f67c0e23b7789bef848c55a4f7b2b6ea4
--
View it on GitLab: https://salsa.debian.org/med-team/bowtie2/-/commit/886d9d4f67c0e23b7789bef848c55a4f7b2b6ea4
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20240417/c32f7d69/attachment-0001.htm>
More information about the debian-med-commit
mailing list