[med-svn] [salmon] 01/11: New upstream version 0.7.2+ds1
Sascha Steinbiss
satta at debian.org
Sat Sep 10 13:22:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository salmon.
commit 668d9b7068d750b5ad90c479c3873451499d2380
Author: Sascha Steinbiss <satta at debian.org>
Date: Sat Sep 10 11:46:55 2016 +0000
New upstream version 0.7.2+ds1
---
CMakeLists.txt | 18 +-
doc/source/conf.py | 4 +-
doc/source/file_formats.rst | 18 ++
doc/source/salmon.rst | 43 +++++
include/AlignmentLibrary.hpp | 20 ++
include/LibraryFormat.hpp | 21 +-
include/LibraryTypeDetector.hpp | 2 +
include/ReadExperiment.hpp | 30 ---
include/SalmonConfig.hpp | 4 +-
include/SalmonOpts.hpp | 9 +
include/SalmonUtils.hpp | 3 +
include/atomicops.h | 402 ++++++++++++++++++++++++++++++++++++++-
include/concurrentqueue.h | 2 +-
include/readerwriterqueue.h | 207 ++++++++++++++++++--
scripts/build_docker_binary.sh | 5 +
scripts/fetchRapMap.sh | 7 +-
src/BuildSalmonIndex.cpp | 4 +-
src/CMakeLists.txt | 2 +-
src/GZipWriter.cpp | 16 +-
src/Salmon.cpp | 2 -
src/SalmonQuantify.cpp | 137 +++++++++++--
src/SalmonQuantifyAlignments.cpp | 75 ++++++--
src/SalmonUtils.cpp | 106 ++++++++++-
23 files changed, 1009 insertions(+), 128 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0a6ec55..0b84540 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,10 +4,10 @@ enable_testing()
project (Salmon)
-set(CPACK_PACKAGE_VERSION "0.7.1")
+set(CPACK_PACKAGE_VERSION "0.7.2")
set(CPACK_PACKAGE_VERSION_MAJOR "0")
set(CPACK_PACKAGE_VERSION_MINOR "7")
-set(CPACK_PACKAGE_VERSION_PATCH "1")
+set(CPACK_PACKAGE_VERSION_PATCH "2")
set(PROJECT_VERSION ${CPACK_PACKAGE_VERSION})
set(CPACK_GENERATOR "TGZ")
set(CPACK_SOURCE_GENERATOR "TGZ")
@@ -506,11 +506,11 @@ message("Build system will compile Staden IOLib")
message("==================================================================")
ExternalProject_Add(libstadenio
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
- DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/releases/download/v1.13.10/io_lib-1.13.10.tar.gz -o staden-io_lib-v1.13.10.tar.gz &&
- mkdir -p staden-io_lib-1.13.10 &&
- tar -xzf staden-io_lib-v1.13.10.tar.gz --strip-components=1 -C staden-io_lib-1.13.10 &&
+ DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/staden-io_lib/archive/v1.14.8.tar.gz -o staden-io_lib-v1.14.8.tar.gz &&
+ mkdir -p staden-io_lib-1.14.8 &&
+ tar -xzf staden-io_lib-v1.14.8.tar.gz --strip-components=1 -C staden-io_lib-1.14.8 &&
rm -fr staden-io_lib &&
- mv -f staden-io_lib-1.13.10 staden-io_lib
+ mv -f staden-io_lib-1.14.8 staden-io_lib
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/staden-io_lib
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND ./configure --enable-shared=no --without-libcurl --prefix=<INSTALL_DIR> LDFLAGS=${LIBSTADEN_LDFLAGS} CFLAGS=${LIBSTADEN_CFLAGS} CC=${CMAKE_C_COMPILER} CXX=${CMAKE_CXX_COMPILER}
@@ -524,10 +524,10 @@ message("Build system will fetch SPDLOG")
message("==================================================================")
ExternalProject_Add(libspdlog
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
- DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.0.1.tar.gz -o spdlog-v1.0.1.tar.gz &&
- tar -xzf spdlog-v1.0.1.tar.gz &&
+ DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/spdlog/archive/v1.12.tar.gz -o spdlog-v1.12.tar.gz &&
+ tar -xzf spdlog-v1.12.tar.gz &&
rm -fr spdlog &&
- mv -f spdlog-1.0.1 spdlog
+ mv -f spdlog-1.12 spdlog
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/spdlog
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND ""
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 5bbc8c1..ef756f3 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -55,9 +55,9 @@ copyright = u'2016, Rob Patro, Geet Duggal, Mike Love, Rafael Irizarry and Carl
# built documents.
#
# The short X.Y version.
-version = '0.7.0'
+version = '0.7.2'
# The full version, including alpha/beta/rc tags.
-release = '0.7.0'
+release = '0.7.2'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/doc/source/file_formats.rst b/doc/source/file_formats.rst
index 831b107..f9ac0d0 100644
--- a/doc/source/file_formats.rst
+++ b/doc/source/file_formats.rst
@@ -67,6 +67,24 @@ particularly important piece of information contained in this file is
the inferred library type. Most of the information recorded in this
file should be self-descriptive.
+""""""""""""""""""""""""""""""
+Observed library format counts
+""""""""""""""""""""""""""""""
+
+When run in *mapping-based* mode, the quantification directory will
+contain a file called ``lib_format_counts.json``. This JSON file
+reports the number of fragments that had at least one mapping compatible
+with the designated library format, as well as the number that didn't.
+It also records the strand-bias that provides some information about
+how strand-specific the computed mappings were.
+
+Finally, this file contains a count of the number of *mappings* that
+were computed that matched each possible library type. These are
+counts of *mappings*, and so a single fragment that maps to the
+transcriptome in more than one way may contribute to multiple library
+type counts. **Note**: This file is currently not generated when Salmon
+is run in alignment-based mode.
+
""""""""""""""""""""""""""""
Fragment length distribution
diff --git a/doc/source/salmon.rst b/doc/source/salmon.rst
index 7138d34..6f6dc30 100644
--- a/doc/source/salmon.rst
+++ b/doc/source/salmon.rst
@@ -478,6 +478,29 @@ done independently, but future versions of Salmon may provide a script to
generate this unmapped FASTA/Q file from the unmapped file and the original
inputs.
+
+"""""""""""""""""""
+``--writeMappings``
+"""""""""""""""""""
+
+Passing the ``--writeMappings`` argument to Salmon will have an effect
+only in mapping-based mode and *only when using a quasi-index*. When
+executed with the ``--writeMappings`` argument, Salmon will write out
+the mapping information that it then processes to quantify transcript
+abundances. The mapping information will be written in a SAM
+compatible format. If no options are provided to this argument, then
+the output will be written to stdout (so that e.g. it can be piped to
+samtools and directly converted into BAM format). Otherwise, this
+argument can optionally be provided with a filename, and the mapping
+information will be written to that file.
+
+.. note:: Compatible mappings
+
+ The mapping information is computed and written *before* library
+ type compatibility checks take place, thus the mapping file will
+ contain information about all mappings of the reads considered by
+ Salmon, even those that may later be filtered out due to
+ incompatibility with the library type.
What's this ``LIBTYPE``?
------------------------
@@ -493,6 +516,26 @@ allow Salmon to infer the library type for you, you should still read
the section below, so that you can interpret how Salmon reports the
library type it discovers.
+.. note:: Automatic library type detection in alignment-based mode
+
+ The implementation of this feature involves opening the BAM
+ file, peaking at the first record, and then closing it to
+ determine if the library should be treated as single-end or
+ paired-end. Thus, *in alignment-based mode* automatic
+ library type detection will not work with an input
+ stream. If your input is a regular file, everything should
+ work as expected; otherwise, you should provide the library
+ type explicitly in alignment-based mode.
+
+ Also the automatic library type detection is performed *on the
+ basis of the alignments in the file*. Thus, for example, if the
+ upstream aligner has been told to perform strand-aware mapping
+ (i.e. to ignore potential alignments that don't map in the
+ expected manner), but the actual library is unstranded,
+ automatic library type detection cannot detect this. It will
+ attempt to detect the library type that is most consistent *with
+ the alignment that are provided*.
+
The library type string consists of three parts: the relative orientation of
the reads, the strandedness of the library, and the directionality of the
reads.
diff --git a/include/AlignmentLibrary.hpp b/include/AlignmentLibrary.hpp
index bd4beea..970dd60 100644
--- a/include/AlignmentLibrary.hpp
+++ b/include/AlignmentLibrary.hpp
@@ -17,6 +17,7 @@ extern "C" {
#include "BAMQueue.hpp"
#include "SalmonUtils.hpp"
#include "LibraryFormat.hpp"
+#include "LibraryTypeDetector.hpp"
#include "SalmonOpts.hpp"
#include "FragmentLengthDistribution.hpp"
#include "FragmentStartPositionDistribution.hpp"
@@ -292,6 +293,24 @@ class AlignmentLibrary {
inline LibraryFormat format() { return libFmt_; }
inline const LibraryFormat format() const { return libFmt_; }
+ /**
+ * If this is set, attempt to automatically detect this library's type
+ */
+ void enableAutodetect() {
+ // if auto detection is not already enabled, and we're enabling it
+ if (!detector_){
+ detector_.reset(new LibraryTypeDetector(libFmt_.type));
+ }
+ }
+
+ bool autoDetect() const { return (detector_.get() != nullptr);}
+
+ LibraryTypeDetector* getDetector() { return detector_.get(); }
+
+ LibraryFormat& getFormat() { return libFmt_; }
+ const LibraryFormat& getFormat() const { return libFmt_; }
+
+
void setGCFracForward(double fracForward) { gcFracFwd_ = fracForward; }
double gcFracFwd() const { return gcFracFwd_; }
@@ -436,6 +455,7 @@ class AlignmentLibrary {
//ReadKmerDist<6, std::atomic<uint32_t>> readBias_;
std::vector<double> expectedBias_;
+ std::unique_ptr<LibraryTypeDetector> detector_{nullptr};
};
#endif // ALIGNMENT_LIBRARY_HPP
diff --git a/include/LibraryFormat.hpp b/include/LibraryFormat.hpp
index 6f42f5c..b92c5a2 100644
--- a/include/LibraryFormat.hpp
+++ b/include/LibraryFormat.hpp
@@ -89,11 +89,17 @@ public:
if (type == ReadType::SINGLE_END) {
if (orientation != ReadOrientation::NONE) {
return desc;
+ } else if (strandedness == ReadStrandedness::SA or
+ strandedness == ReadStrandedness::AS ) {
+ return desc;
}
}
if (type == ReadType::PAIRED_END) {
if (orientation == ReadOrientation::NONE) {
return desc;
+ } else if (strandedness == ReadStrandedness::S or
+ strandedness == ReadStrandedness::A ) {
+ return desc;
}
}
@@ -131,26 +137,15 @@ public:
desc += "SR";
break;
case ReadStrandedness::S:
- desc += "F";
+ desc += "SF";
break;
case ReadStrandedness::A:
- desc += "R";
+ desc += "SR";
break;
case ReadStrandedness::U:
desc += "U";
break;
}
- /*
- if (type == ReadType::PAIRED_END) {
- if (desc == "SF" or
- desc == "SR" or
- desc == "F" or
- desc == "R" or
- desc == "U" ) {
- desc.clear();
- }
- }
- */
return desc;
}
diff --git a/include/LibraryTypeDetector.hpp b/include/LibraryTypeDetector.hpp
index 774d711..955316d 100644
--- a/include/LibraryTypeDetector.hpp
+++ b/include/LibraryTypeDetector.hpp
@@ -124,6 +124,8 @@ public:
active_ = false;
ret = true;
} // end if active_
+
+ mut_.unlock(); // release the lock
} // end try_lock()
return ret;
}
diff --git a/include/ReadExperiment.hpp b/include/ReadExperiment.hpp
index 3d333d0..68f709d 100644
--- a/include/ReadExperiment.hpp
+++ b/include/ReadExperiment.hpp
@@ -535,20 +535,6 @@ class ReadExperiment {
oa(cereal::make_nvp("num_consistent_mappings", numAgree));
oa(cereal::make_nvp("num_inconsistent_mappings", numDisagree));
oa(cereal::make_nvp("strand_mapping_bias", ratio));
- /*
- ofile << "========\n"
- << "Read library consisting of files: "
- << rl.readFilesAsString()
- << "\n\n"
- << "Expected format: " << rl.format()
- << "\n\n"
- << "# of consistent alignments: " << numAgree << "\n"
- << "# of inconsistent alignments: " << numDisagree << "\n"
- << "strand bias = " << ratio << " (0.5 is unbiased)\n"
- << "# alignments with format " << fmt1 << ": " << numFmt1 << "\n"
- << "# alignments with format " << fmt2 << ": " << numFmt2 << "\n"
- << "\n========\n";
- */
} else {
numAgree = 0;
numDisagree = 0;
@@ -572,18 +558,6 @@ class ReadExperiment {
oa(cereal::make_nvp("num_consistent_mappings", numAgree));
oa(cereal::make_nvp("num_inconsistent_mappings", numDisagree));
-
- /*
- ofile << "========\n"
- << "Read library consisting of files: "
- << rl.readFilesAsString()
- << "\n\n"
- << "Expected format: " << rl.format()
- << "\n\n"
- << "# of consistent alignments: " << numAgree << "\n"
- << "# of inconsistent alignments: " << numDisagree << "\n"
- << "\n========\n";
- */
} //end else
@@ -599,17 +573,13 @@ class ReadExperiment {
errstr.clear();
}
- //ofile << "---- counts for each format type ---\n";
for (size_t i = 0; i < counts.size(); ++i) {
- //ofile << LibraryFormat::formatFromID(i) << " : " << counts[i] << "\n";
std::string desc = LibraryFormat::formatFromID(i).toString();
if (!desc.empty()) {
oa(cereal::make_nvp(desc, counts[i].load()));
}
}
- //ofile << "------------------------------------\n\n";
}
- //ofile.close();
}
std::vector<ReadLibrary>& readLibraries() { return readLibraries_; }
diff --git a/include/SalmonConfig.hpp b/include/SalmonConfig.hpp
index 1e42435..59524d0 100644
--- a/include/SalmonConfig.hpp
+++ b/include/SalmonConfig.hpp
@@ -28,8 +28,8 @@
namespace salmon {
constexpr char majorVersion[] = "0";
constexpr char minorVersion[] = "7";
- constexpr char patchVersion[] = "1";
- constexpr char version[] = "0.7.1";
+ constexpr char patchVersion[] = "2";
+ constexpr char version[] = "0.7.2";
constexpr uint32_t indexVersion = 2;
}
diff --git a/include/SalmonOpts.hpp b/include/SalmonOpts.hpp
index 332c7d2..0b3a937 100644
--- a/include/SalmonOpts.hpp
+++ b/include/SalmonOpts.hpp
@@ -6,6 +6,8 @@
// Logger includes
#include "spdlog/spdlog.h"
+#include <fstream>
+#include <ostream>
#include <memory> // for shared_ptr
@@ -124,6 +126,13 @@ struct SalmonOpts {
bool useVBOpt; // Use Variational Bayesian EM instead of "regular" EM in the batch passes
bool useQuasi; // Are we using the quasi-mapping based index or not.
+
+ // For writing quasi-mappings
+ std::string qmFileName;
+ std::ofstream qmFile;
+ std::unique_ptr<std::ostream> qmStream{nullptr};
+ std::shared_ptr<spdlog::logger> qmLog{nullptr};
+
std::unique_ptr<std::ofstream> unmappedFile{nullptr};
bool writeUnmappedNames; // write the names of unmapped reads
diff --git a/include/SalmonUtils.hpp b/include/SalmonUtils.hpp
index 2fb7279..4dd6428 100644
--- a/include/SalmonUtils.hpp
+++ b/include/SalmonUtils.hpp
@@ -87,6 +87,9 @@ std::vector<ReadLibrary> extractReadLibraries(boost::program_options::parsed_opt
LibraryFormat parseLibraryFormatString(std::string& fmt);
+ bool peekBAMIsPaired(const boost::filesystem::path& fname);
+
+
size_t numberOfReadsInFastaFile(const std::string& fname);
bool readKmerOrder( const std::string& fname, std::vector<uint64_t>& kmers );
diff --git a/include/atomicops.h b/include/atomicops.h
index aadd472..c375710 100644
--- a/include/atomicops.h
+++ b/include/atomicops.h
@@ -1,6 +1,8 @@
-// ©2013 Cameron Desrochers.
+// ©2013-2016 Cameron Desrochers.
// Distributed under the simplified BSD license (see the license file that
// should have come with this header).
+// Uses Jeff Preshing's semaphore implementation (under the terms of its
+// separate zlib license, embedded below).
#pragma once
@@ -10,7 +12,10 @@
// Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols.
#include <cassert>
-
+#include <type_traits>
+#include <cerrno>
+#include <cstdint>
+#include <ctime>
// Platform detection
#if defined(__INTEL_COMPILER)
@@ -78,7 +83,7 @@ enum memory_order {
} // end namespace moodycamel
-#if defined(AE_VCPP) || defined(AE_ICC)
+#if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || defined(AE_ICC)
// VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
#include <intrin.h>
@@ -99,6 +104,9 @@ enum memory_order {
#ifdef AE_VCPP
#pragma warning(push)
#pragma warning(disable: 4365) // Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert`
+#ifdef __cplusplus_cli
+#pragma managed(push, off)
+#endif
#endif
namespace moodycamel {
@@ -201,9 +209,7 @@ AE_FORCEINLINE void fence(memory_order order)
#endif
-
-
-#if !defined(AE_VCPP) || _MSC_VER >= 1700
+#if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
#define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
#endif
@@ -226,6 +232,10 @@ public:
#pragma warning(disable: 4100) // Get rid of (erroneous) 'unreferenced formal parameter' warning
#endif
template<typename U> weak_atomic(U&& x) : value(std::forward<U>(x)) { }
+#ifdef __cplusplus_cli
+ // Work around bug with universal reference/nullptr combination that only appears when /clr is on
+ weak_atomic(nullptr_t) : value(nullptr) { }
+#endif
weak_atomic(weak_atomic const& other) : value(other.value) { }
weak_atomic(weak_atomic&& other) : value(std::move(other.value)) { }
#ifdef AE_VCPP
@@ -240,6 +250,34 @@ public:
AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) { value = other.value; return *this; }
AE_FORCEINLINE T load() const { return value; }
+
+ AE_FORCEINLINE T fetch_add_acquire(T increment)
+ {
+#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
+ if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
+#if defined(_M_AMD64)
+ else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
+#endif
+#else
+#error Unsupported platform
+#endif
+ assert(false && "T must be either a 32 or 64 bit type");
+ return value;
+ }
+
+ AE_FORCEINLINE T fetch_add_release(T increment)
+ {
+#if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
+ if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
+#if defined(_M_AMD64)
+ else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
+#endif
+#else
+#error Unsupported platform
+#endif
+ assert(false && "T must be either a 32 or 64 bit type");
+ return value;
+ }
#else
template<typename U>
AE_FORCEINLINE weak_atomic const& operator=(U&& x)
@@ -255,6 +293,16 @@ public:
}
AE_FORCEINLINE T load() const { return value.load(std::memory_order_relaxed); }
+
+ AE_FORCEINLINE T fetch_add_acquire(T increment)
+ {
+ return value.fetch_add(increment, std::memory_order_acquire);
+ }
+
+ AE_FORCEINLINE T fetch_add_release(T increment)
+ {
+ return value.fetch_add(increment, std::memory_order_release);
+ }
#endif
@@ -271,6 +319,346 @@ private:
} // end namespace moodycamel
-#ifdef AE_VCPP
+
+// Portable single-producer, single-consumer semaphore below:
+
+#if defined(_WIN32)
+// Avoid including windows.h in a header; we only need a handful of
+// items, so we'll redeclare them here (this is relatively safe since
+// the API generally has to remain stable between Windows versions).
+// I know this is an ugly hack but it still beats polluting the global
+// namespace with thousands of generic names or adding a .cpp for nothing.
+extern "C" {
+ struct _SECURITY_ATTRIBUTES;
+ __declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
+ __declspec(dllimport) int __stdcall CloseHandle(void* hObject);
+ __declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
+ __declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
+}
+#elif defined(__MACH__)
+#include <mach/mach.h>
+#elif defined(__unix__)
+#include <semaphore.h>
+#endif
+
+namespace moodycamel
+{
+ // Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
+ // portable + lightweight semaphore implementations, originally from
+ // https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
+ // LICENSE:
+ // Copyright (c) 2015 Jeff Preshing
+ //
+ // This software is provided 'as-is', without any express or implied
+ // warranty. In no event will the authors be held liable for any damages
+ // arising from the use of this software.
+ //
+ // Permission is granted to anyone to use this software for any purpose,
+ // including commercial applications, and to alter it and redistribute it
+ // freely, subject to the following restrictions:
+ //
+ // 1. The origin of this software must not be misrepresented; you must not
+ // claim that you wrote the original software. If you use this software
+ // in a product, an acknowledgement in the product documentation would be
+ // appreciated but is not required.
+ // 2. Altered source versions must be plainly marked as such, and must not be
+ // misrepresented as being the original software.
+ // 3. This notice may not be removed or altered from any source distribution.
+ namespace spsc_sema
+ {
+#if defined(_WIN32)
+ class Semaphore
+ {
+ private:
+ void* m_hSema;
+
+ Semaphore(const Semaphore& other);
+ Semaphore& operator=(const Semaphore& other);
+
+ public:
+ Semaphore(int initialCount = 0)
+ {
+ assert(initialCount >= 0);
+ const long maxLong = 0x7fffffff;
+ m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
+ }
+
+ ~Semaphore()
+ {
+ CloseHandle(m_hSema);
+ }
+
+ void wait()
+ {
+ const unsigned long infinite = 0xffffffff;
+ WaitForSingleObject(m_hSema, infinite);
+ }
+
+ bool try_wait()
+ {
+ const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
+ return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
+ }
+
+ bool timed_wait(std::uint64_t usecs)
+ {
+ const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
+ return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT;
+ }
+
+ void signal(int count = 1)
+ {
+ ReleaseSemaphore(m_hSema, count, nullptr);
+ }
+ };
+#elif defined(__MACH__)
+ //---------------------------------------------------------
+ // Semaphore (Apple iOS and OSX)
+ // Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
+ //---------------------------------------------------------
+ class Semaphore
+ {
+ private:
+ semaphore_t m_sema;
+
+ Semaphore(const Semaphore& other);
+ Semaphore& operator=(const Semaphore& other);
+
+ public:
+ Semaphore(int initialCount = 0)
+ {
+ assert(initialCount >= 0);
+ semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
+ }
+
+ ~Semaphore()
+ {
+ semaphore_destroy(mach_task_self(), m_sema);
+ }
+
+ void wait()
+ {
+ semaphore_wait(m_sema);
+ }
+
+ bool try_wait()
+ {
+ return timed_wait(0);
+ }
+
+ bool timed_wait(std::int64_t timeout_usecs)
+ {
+ mach_timespec_t ts;
+ ts.tv_sec = timeout_usecs / 1000000;
+ ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
+
+ // added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
+ kern_return_t rc = semaphore_timedwait(m_sema, ts);
+
+ return rc != KERN_OPERATION_TIMED_OUT;
+ }
+
+ void signal()
+ {
+ semaphore_signal(m_sema);
+ }
+
+ void signal(int count)
+ {
+ while (count-- > 0)
+ {
+ semaphore_signal(m_sema);
+ }
+ }
+ };
+#elif defined(__unix__)
+ //---------------------------------------------------------
+ // Semaphore (POSIX, Linux)
+ //---------------------------------------------------------
+ class Semaphore
+ {
+ private:
+ sem_t m_sema;
+
+ Semaphore(const Semaphore& other);
+ Semaphore& operator=(const Semaphore& other);
+
+ public:
+ Semaphore(int initialCount = 0)
+ {
+ assert(initialCount >= 0);
+ sem_init(&m_sema, 0, initialCount);
+ }
+
+ ~Semaphore()
+ {
+ sem_destroy(&m_sema);
+ }
+
+ void wait()
+ {
+ // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
+ int rc;
+ do
+ {
+ rc = sem_wait(&m_sema);
+ }
+ while (rc == -1 && errno == EINTR);
+ }
+
+ bool try_wait()
+ {
+ int rc;
+ do {
+ rc = sem_trywait(&m_sema);
+ } while (rc == -1 && errno == EINTR);
+ return !(rc == -1 && errno == EAGAIN);
+ }
+
+ bool timed_wait(std::uint64_t usecs)
+ {
+ struct timespec ts;
+ const int usecs_in_1_sec = 1000000;
+ const int nsecs_in_1_sec = 1000000000;
+ clock_gettime(CLOCK_REALTIME, &ts);
+ ts.tv_sec += usecs / usecs_in_1_sec;
+ ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
+ // sem_timedwait bombs if you have more than 1e9 in tv_nsec
+ // so we have to clean things up before passing it in
+ if (ts.tv_nsec > nsecs_in_1_sec) {
+ ts.tv_nsec -= nsecs_in_1_sec;
+ ++ts.tv_sec;
+ }
+
+ int rc;
+ do {
+ rc = sem_timedwait(&m_sema, &ts);
+ } while (rc == -1 && errno == EINTR);
+ return !(rc == -1 && errno == ETIMEDOUT);
+ }
+
+ void signal()
+ {
+ sem_post(&m_sema);
+ }
+
+ void signal(int count)
+ {
+ while (count-- > 0)
+ {
+ sem_post(&m_sema);
+ }
+ }
+ };
+#else
+#error Unsupported platform! (No semaphore wrapper available)
+#endif
+
+ //---------------------------------------------------------
+ // LightweightSemaphore
+ //---------------------------------------------------------
+ class LightweightSemaphore
+ {
+ public:
+ typedef std::make_signed<std::size_t>::type ssize_t;
+
+ private:
+ weak_atomic<ssize_t> m_count;
+ Semaphore m_sema;
+
+ bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
+ {
+ ssize_t oldCount;
+ // Is there a better way to set the initial spin count?
+ // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
+ // as threads start hitting the kernel semaphore.
+ int spin = 10000;
+ while (--spin >= 0)
+ {
+ if (m_count.load() > 0)
+ {
+ m_count.fetch_add_acquire(-1);
+ return true;
+ }
+ compiler_fence(memory_order_acquire); // Prevent the compiler from collapsing the loop.
+ }
+ oldCount = m_count.fetch_add_acquire(-1);
+ if (oldCount > 0)
+ return true;
+ if (timeout_usecs < 0)
+ {
+ m_sema.wait();
+ return true;
+ }
+ if (m_sema.timed_wait(timeout_usecs))
+ return true;
+ // At this point, we've timed out waiting for the semaphore, but the
+ // count is still decremented indicating we may still be waiting on
+ // it. So we have to re-adjust the count, but only if the semaphore
+ // wasn't signaled enough times for us too since then. If it was, we
+ // need to release the semaphore too.
+ while (true)
+ {
+ oldCount = m_count.fetch_add_release(1);
+ if (oldCount < 0)
+ return false; // successfully restored things to the way they were
+ // Oh, the producer thread just signaled the semaphore after all. Try again:
+ oldCount = m_count.fetch_add_acquire(-1);
+ if (oldCount > 0 && m_sema.try_wait())
+ return true;
+ }
+ }
+
+ public:
+ LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
+ {
+ assert(initialCount >= 0);
+ }
+
+ bool tryWait()
+ {
+ if (m_count.load() > 0)
+ {
+ m_count.fetch_add_acquire(-1);
+ return true;
+ }
+ return false;
+ }
+
+ void wait()
+ {
+ if (!tryWait())
+ waitWithPartialSpinning();
+ }
+
+ bool wait(std::int64_t timeout_usecs)
+ {
+ return tryWait() || waitWithPartialSpinning(timeout_usecs);
+ }
+
+ void signal(ssize_t count = 1)
+ {
+ assert(count >= 0);
+ ssize_t oldCount = m_count.fetch_add_release(count);
+ assert(oldCount >= -1);
+ if (oldCount < 0)
+ {
+ m_sema.signal(1);
+ }
+ }
+
+ ssize_t availableApprox() const
+ {
+ ssize_t count = m_count.load();
+ return count > 0 ? count : 0;
+ }
+ };
+ } // end namespace spsc_sema
+} // end namespace moodycamel
+
+#if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
#pragma warning(pop)
+#ifdef __cplusplus_cli
+#pragma managed(pop)
+#endif
#endif
diff --git a/include/concurrentqueue.h b/include/concurrentqueue.h
index 2eb54d6..d43514d 100644
--- a/include/concurrentqueue.h
+++ b/include/concurrentqueue.h
@@ -237,7 +237,7 @@ namespace details {
: static_cast<T>(-1);
};
-#if (defined(__GNUC__) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)))
+#if (!defined(__clang__) && defined(__GNUC__) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)))
typedef ::max_align_t max_align_t; // GCC forgot to add it to std:: for a while
#else
typedef std::max_align_t max_align_t; // Others (e.g. MSVC) insist it can *only* be accessed via std::
diff --git a/include/readerwriterqueue.h b/include/readerwriterqueue.h
index 1451e05..3fef647 100644
--- a/include/readerwriterqueue.h
+++ b/include/readerwriterqueue.h
@@ -1,4 +1,4 @@
-// ©2013-2015 Cameron Desrochers.
+// ©2013-2016 Cameron Desrochers.
// Distributed under the simplified BSD license (see the license file that
// should have come with this header).
@@ -9,8 +9,12 @@
#include <utility>
#include <cassert>
#include <stdexcept>
+#include <new>
#include <cstdint>
-#include <cstdlib> // For malloc/free & size_t
+#include <cstdlib> // For malloc/free/abort & size_t
+#if __cplusplus > 199711L
+#include <chrono>
+#endif
// A lock-free queue for a single-consumer, single-producer architecture.
@@ -26,7 +30,15 @@
// one role, is not safe unless properly synchronized.
// Using the queue exclusively from one thread is fine, though a bit silly.
-#define CACHE_LINE_SIZE 64
+#ifndef MOODYCAMEL_CACHE_LINE_SIZE
+#define MOODYCAMEL_CACHE_LINE_SIZE 64
+#endif
+
+#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
+#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
+#define MOODYCAMEL_EXCEPTIONS_ENABLED
+#endif
+#endif
#ifdef AE_VCPP
#pragma warning(push)
@@ -90,7 +102,11 @@ public:
for (size_t i = 0; i != initialBlockCount; ++i) {
auto block = make_block(largestBlockSize);
if (block == nullptr) {
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
throw std::bad_alloc();
+#else
+ abort();
+#endif
}
if (firstBlock == nullptr) {
firstBlock = block;
@@ -105,7 +121,11 @@ public:
else {
firstBlock = make_block(largestBlockSize);
if (firstBlock == nullptr) {
+#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
throw std::bad_alloc();
+#else
+ abort();
+#endif
}
firstBlock->next = firstBlock;
}
@@ -543,11 +563,7 @@ private:
ReentrantGuard(bool& _inSection)
: inSection(_inSection)
{
- assert(!inSection);
- if (inSection) {
- throw std::runtime_error("ReaderWriterQueue does not support enqueuing or dequeuing elements from other elements' ctors and dtors");
- }
-
+ assert(!inSection && "ReaderWriterQueue does not support enqueuing or dequeuing elements from other elements' ctors and dtors");
inSection = true;
}
@@ -567,11 +583,11 @@ private:
weak_atomic<size_t> front; // (Atomic) Elements are read from here
size_t localTail; // An uncontended shadow copy of tail, owned by the consumer
- char cachelineFiller0[CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
+ char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
weak_atomic<size_t> tail; // (Atomic) Elements are enqueued here
size_t localFront;
- char cachelineFiller1[CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)]; // next isn't very contended, but we don't want it on the same cache line as tail (which is)
+ char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)]; // next isn't very contended, but we don't want it on the same cache line as tail (which is)
weak_atomic<Block*> next; // (Atomic)
char* data; // Contents (on heap) are aligned to T's alignment
@@ -612,7 +628,7 @@ private:
private:
weak_atomic<Block*> frontBlock; // (Atomic) Elements are enqueued to this block
- char cachelineFiller[CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
+ char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
weak_atomic<Block*> tailBlock; // (Atomic) Elements are dequeued from this block
size_t largestBlockSize;
@@ -623,6 +639,175 @@ private:
#endif
};
+// Like ReaderWriterQueue, but also providees blocking operations
+template<typename T, size_t MAX_BLOCK_SIZE = 512>
+class BlockingReaderWriterQueue
+{
+private:
+ typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
+
+public:
+ explicit BlockingReaderWriterQueue(size_t maxSize = 15)
+ : inner(maxSize)
+ { }
+
+
+ // Enqueues a copy of element if there is room in the queue.
+ // Returns true if the element was enqueued, false otherwise.
+ // Does not allocate memory.
+ AE_FORCEINLINE bool try_enqueue(T const& element)
+ {
+ if (inner.try_enqueue(element)) {
+ sema.signal();
+ return true;
+ }
+ return false;
+ }
+
+ // Enqueues a moved copy of element if there is room in the queue.
+ // Returns true if the element was enqueued, false otherwise.
+ // Does not allocate memory.
+ AE_FORCEINLINE bool try_enqueue(T&& element)
+ {
+ if (inner.try_enqueue(std::forward<T>(element))) {
+ sema.signal();
+ return true;
+ }
+ return false;
+ }
+
+
+ // Enqueues a copy of element on the queue.
+ // Allocates an additional block of memory if needed.
+ // Only fails (returns false) if memory allocation fails.
+ AE_FORCEINLINE bool enqueue(T const& element)
+ {
+ if (inner.enqueue(element)) {
+ sema.signal();
+ return true;
+ }
+ return false;
+ }
+
+ // Enqueues a moved copy of element on the queue.
+ // Allocates an additional block of memory if needed.
+ // Only fails (returns false) if memory allocation fails.
+ AE_FORCEINLINE bool enqueue(T&& element)
+ {
+ if (inner.enqueue(std::forward<T>(element))) {
+ sema.signal();
+ return true;
+ }
+ return false;
+ }
+
+
+ // Attempts to dequeue an element; if the queue is empty,
+ // returns false instead. If the queue has at least one element,
+ // moves front to result using operator=, then returns true.
+ template<typename U>
+ bool try_dequeue(U& result)
+ {
+ if (sema.tryWait()) {
+ bool success = inner.try_dequeue(result);
+ assert(success);
+ AE_UNUSED(success);
+ return true;
+ }
+ return false;
+ }
+
+
+ // Attempts to dequeue an element; if the queue is empty,
+ // waits until an element is available, then dequeues it.
+ template<typename U>
+ void wait_dequeue(U& result)
+ {
+ sema.wait();
+ bool success = inner.try_dequeue(result);
+ AE_UNUSED(result);
+ assert(success);
+ AE_UNUSED(success);
+ }
+
+
+ // Attempts to dequeue an element; if the queue is empty,
+ // waits until an element is available up to the specified timeout,
+ // then dequeues it and returns true, or returns false if the timeout
+ // expires before an element can be dequeued.
+ // Using a negative timeout indicates an indefinite timeout,
+ // and is thus functionally equivalent to calling wait_dequeue.
+ template<typename U>
+ bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs)
+ {
+ if (!sema.wait(timeout_usecs)) {
+ return false;
+ }
+ bool success = inner.try_dequeue(result);
+ AE_UNUSED(result);
+ assert(success);
+ AE_UNUSED(success);
+ return true;
+ }
+
+
+#if __cplusplus > 199711L
+ // Attempts to dequeue an element; if the queue is empty,
+ // waits until an element is available up to the specified timeout,
+ // then dequeues it and returns true, or returns false if the timeout
+ // expires before an element can be dequeued.
+ // Using a negative timeout indicates an indefinite timeout,
+ // and is thus functionally equivalent to calling wait_dequeue.
+ template<typename U, typename Rep, typename Period>
+ inline bool wait_dequeue_timed(U& result, std::chrono::duration<Rep, Period> const& timeout)
+ {
+ return wait_dequeue_timed(result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
+ }
+#endif
+
+
+ // Returns a pointer to the front element in the queue (the one that
+ // would be removed next by a call to `try_dequeue` or `pop`). If the
+ // queue appears empty at the time the method is called, nullptr is
+ // returned instead.
+ // Must be called only from the consumer thread.
+ AE_FORCEINLINE T* peek()
+ {
+ return inner.peek();
+ }
+
+ // Removes the front element from the queue, if any, without returning it.
+ // Returns true on success, or false if the queue appeared empty at the time
+ // `pop` was called.
+ AE_FORCEINLINE bool pop()
+ {
+ if (sema.tryWait()) {
+ bool result = inner.pop();
+ assert(result);
+ AE_UNUSED(result);
+ return true;
+ }
+ return false;
+ }
+
+ // Returns the approximate number of items currently in the queue.
+ // Safe to call from both the producer and consumer threads.
+ AE_FORCEINLINE size_t size_approx() const
+ {
+ return sema.availableApprox();
+ }
+
+
+private:
+ // Disable copying & assignment
+ BlockingReaderWriterQueue(ReaderWriterQueue const&) { }
+ BlockingReaderWriterQueue& operator=(ReaderWriterQueue const&) { }
+
+private:
+ ReaderWriterQueue inner;
+ spsc_sema::LightweightSemaphore sema;
+};
+
} // end namespace moodycamel
#ifdef AE_VCPP
diff --git a/scripts/build_docker_binary.sh b/scripts/build_docker_binary.sh
new file mode 100644
index 0000000..954313f
--- /dev/null
+++ b/scripts/build_docker_binary.sh
@@ -0,0 +1,5 @@
+#~/bin/bash
+
+echo "building pre-compiled linux release for Salmon $1"
+
+docker run -t -i --rm -v `pwd`:/io phusion/holy-build-box-64:latest bash /io/compile.sh develop $1
diff --git a/scripts/fetchRapMap.sh b/scripts/fetchRapMap.sh
index a148f05..febdbf1 100755
--- a/scripts/fetchRapMap.sh
+++ b/scripts/fetchRapMap.sh
@@ -17,10 +17,12 @@ if [ -d ${INSTALL_DIR}/src/rapmap ] ; then
fi
mkdir -p ${EXTERNAL_DIR}
-curl -k -L https://github.com/COMBINE-lab/RapMap/archive/develop-salmon.zip -o ${EXTERNAL_DIR}/rapmap.zip
+curl -k -L https://github.com/COMBINE-lab/RapMap/archive/salmon-v0.7.2.zip -o ${EXTERNAL_DIR}/rapmap.zip
+#curl -k -L https://github.com/COMBINE-lab/RapMap/archive/develop-salmon.zip -o ${EXTERNAL_DIR}/rapmap.zip
rm -fr ${EXTERNAL_DIR}/RapMap
unzip ${EXTERNAL_DIR}/rapmap.zip -d ${EXTERNAL_DIR}
-mv ${EXTERNAL_DIR}/RapMap-develop-salmon ${EXTERNAL_DIR}/RapMap
+#mv ${EXTERNAL_DIR}/RapMap-develop-salmon ${EXTERNAL_DIR}/RapMap
+mv ${EXTERNAL_DIR}/RapMap-salmon-v0.7.2 ${EXTERNAL_DIR}/RapMap
mkdir -p ${INSTALL_DIR}/include/rapmap
mkdir -p ${INSTALL_DIR}/src/rapmap
@@ -34,4 +36,3 @@ cp -r ${EXTERNAL_DIR}/RapMap/src/*.cpp ${INSTALL_DIR}/src/rapmap
cp -r ${EXTERNAL_DIR}/RapMap/include/tclap ${INSTALL_DIR}/include/rapmap
cp -r ${EXTERNAL_DIR}/RapMap/include/*.h ${INSTALL_DIR}/include/rapmap
cp -r ${EXTERNAL_DIR}/RapMap/include/*.hpp ${INSTALL_DIR}/include/rapmap
-cp -r ${EXTERNAL_DIR}/RapMap/include/emphf ${INSTALL_DIR}/include/rapmap
diff --git a/src/BuildSalmonIndex.cpp b/src/BuildSalmonIndex.cpp
index 6f6ef3d..098f246 100644
--- a/src/BuildSalmonIndex.cpp
+++ b/src/BuildSalmonIndex.cpp
@@ -117,8 +117,8 @@ Index
==========
Creates a salmon index.
)";
- std::cout << hstring << std::endl;
- std::cout << generic << std::endl;
+ std::cerr << hstring << std::endl;
+ std::cerr << generic << std::endl;
std::exit(0);
}
po::notify(vm);
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index f31a927..c851492 100755
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -40,7 +40,6 @@ SalmonQuantify.cpp
FragmentLengthDistribution.cpp
FragmentStartPositionDistribution.cpp
SequenceBiasModel.cpp
-StadenUtils.cpp
TranscriptGroup.cpp
GZipWriter.cpp
#${GAT_SOURCE_DIR}/external/install/src/rapmap/sais.c
@@ -61,6 +60,7 @@ GenomicFeature.cpp
VersionChecker.cpp
SBModel.cpp
FastxParser.cpp
+StadenUtils.cpp
SalmonUtils.cpp
DistributionUtils.cpp
SalmonStringUtils.cpp
diff --git a/src/GZipWriter.cpp b/src/GZipWriter.cpp
index b8b94d1..c165c52 100644
--- a/src/GZipWriter.cpp
+++ b/src/GZipWriter.cpp
@@ -175,7 +175,7 @@ bool GZipWriter::writeMeta(
std::vector<int32_t> observedBias(bcounts.size(), 0);
std::copy(bcounts.begin(), bcounts.end(), observedBias.begin());
writeVectorToFile(obsBiasPath, observedBias);
-
+
bfs::path obsBiasPath3p = auxDir / "observed_bias_3p.gz";
const auto& bcounts3p = experiment.readBias(salmon::utils::Direction::REVERSE_COMPLEMENT).counts;
std::vector<int32_t> observedBias3p(bcounts3p.size(), 0);
@@ -227,7 +227,7 @@ bool GZipWriter::writeMeta(
}
if (opts.gcBiasCorrect) {
- // GC observed
+ // GC observed
{
bfs::path obsGCPath = auxDir / "obs_gc.gz";
auto flags = std::ios_base::out | std::ios_base::binary;
@@ -237,7 +237,7 @@ bool GZipWriter::writeMeta(
auto& obsgc = experiment.observedGC();
obsgc.writeBinary(out);
}
- // GC expected
+ // GC expected
{
bfs::path expGCPath = auxDir / "exp_gc.gz";
auto flags = std::ios_base::out | std::ios_base::binary;
@@ -258,7 +258,7 @@ bool GZipWriter::writeMeta(
std::copy(gcCounts.begin(), gcCounts.end(), observedGC.begin());
writeVectorToFile(obsGCPath, observedGC);
*/
-
+
bfs::path info = auxDir / "meta_info.json";
{
@@ -278,7 +278,7 @@ bool GZipWriter::writeMeta(
oa(cereal::make_nvp("samp_type", sampType));
auto libStrings = getLibTypeStrings(experiment);
- oa(cereal::make_nvp("num_libraries", libStrings.size()));
+ oa(cereal::make_nvp("num_libraries", libStrings.size()));
oa(cereal::make_nvp("library_types", libStrings));
oa(cereal::make_nvp("frag_dist_length", fldSamples.size()));
@@ -290,7 +290,7 @@ bool GZipWriter::writeMeta(
oa(cereal::make_nvp("mapping_type", mapTypeStr));
oa(cereal::make_nvp("num_targets", transcripts.size()));
- oa(cereal::make_nvp("num_bootstraps", numBootstraps));
+ oa(cereal::make_nvp("num_bootstraps", numSamples));
oa(cereal::make_nvp("num_processed", experiment.numObservedFragments()));
oa(cereal::make_nvp("num_mapped", experiment.numMappedFragments()));
oa(cereal::make_nvp("percent_mapped", experiment.effectiveMappingRate() * 100.0));
@@ -417,8 +417,8 @@ bool GZipWriter::writeMeta<AlignmentLibrary<ReadPair>>(
const AlignmentLibrary<ReadPair>& experiment,
const std::string& tstring);
-template
+template
std::vector<std::string> getLibTypeStrings(const AlignmentLibrary<UnpairedRead>& experiment);
-template
+template
std::vector<std::string> getLibTypeStrings(const AlignmentLibrary<ReadPair>& experiment);
diff --git a/src/Salmon.cpp b/src/Salmon.cpp
index ffaf8c2..e4c7025 100644
--- a/src/Salmon.cpp
+++ b/src/Salmon.cpp
@@ -75,7 +75,6 @@ int help(int argc, char* argv[]) {
*/
std::cerr << helpMsg.str();
-//std::cerr << " Salmon v" << salmon::version << helpmsg << "\n";
return 1;
}
@@ -176,7 +175,6 @@ int main( int argc, char* argv[] ) {
}
if (vm.count("help") and !vm.count("command")) {
- //std::cout << sfopts << std::endl;
help(argc, argv);
std::exit(0);
}
diff --git a/src/SalmonQuantify.cpp b/src/SalmonQuantify.cpp
index c88940e..ab669bc 100644
--- a/src/SalmonQuantify.cpp
+++ b/src/SalmonQuantify.cpp
@@ -123,6 +123,9 @@ extern "C" {
#include "SACollector.hpp"
#include "SASearcher.hpp"
#include "SalmonOpts.hpp"
+#include "PairAlignmentFormatter.hpp"
+#include "SingleAlignmentFormatter.hpp"
+#include "RapMapUtils.hpp"
//#include "TextBootstrapWriter.hpp"
/****** QUASI MAPPING DECLARATIONS *********/
@@ -202,7 +205,8 @@ void processMiniBatch(ReadExperiment& readExp, ForgettingMassCalculator& fmCalc,
bool posBiasCorrect = salmonOpts.posBiasCorrect;
bool gcBiasCorrect = salmonOpts.gcBiasCorrect;
bool updateCounts = initialRound;
- bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
+ double incompatPrior = salmonOpts.incompatPrior;
+ bool useReadCompat = incompatPrior != salmon::math::LOG_1;
bool useFSPD{salmonOpts.useFSPD};
bool useFragLengthDist{!salmonOpts.noFragLengthDist};
bool noFragLenFactor{salmonOpts.noFragLenFactor};
@@ -211,7 +215,10 @@ void processMiniBatch(ReadExperiment& readExp, ForgettingMassCalculator& fmCalc,
// If we're auto detecting the library type
auto* detector = readLib.getDetector();
bool autoDetect = (detector != nullptr) ? detector->isActive() : false;
- const auto expectedLibraryFormat = readLib.format();
+ // If we haven't detected yet, nothing is incompatible
+ if (autoDetect) { incompatPrior = salmon::math::LOG_1; }
+
+ auto expectedLibraryFormat = readLib.format();
uint64_t zeroProbFrags{0};
// EQClass
@@ -323,6 +330,12 @@ void processMiniBatch(ReadExperiment& readExp, ForgettingMassCalculator& fmCalc,
detector->addSample(aln.libFormat());
if (detector->canGuess()) {
detector->mostLikelyType(readLib.getFormat());
+ expectedLibraryFormat = readLib.getFormat();
+ incompatPrior = salmonOpts.incompatPrior;
+ autoDetect = false;
+ } else if (!detector->isActive()) {
+ expectedLibraryFormat = readLib.getFormat();
+ incompatPrior = salmonOpts.incompatPrior;
autoDetect = false;
}
}
@@ -333,13 +346,27 @@ void processMiniBatch(ReadExperiment& readExp, ForgettingMassCalculator& fmCalc,
// The probability that the fragments align to the given strands in
// the
// given orientations.
- double logAlignCompatProb =
- (useReadCompat) ? (salmon::utils::logAlignFormatProb(
- aln.libFormat(), expectedLibraryFormat,
- static_cast<int32_t>(aln.pos), aln.fwd,
+ bool isCompat =
+ salmon::utils::isCompatible(
+ aln.libFormat(),
+ expectedLibraryFormat,
+ static_cast<int32_t>(aln.pos),
+ aln.fwd,
+ aln.mateStatus);
+ double logAlignCompatProb = isCompat ? LOG_1 : incompatPrior;
+ if (!isCompat and salmonOpts.ignoreIncompat) {
+ aln.logProb = salmon::math::LOG_0;
+ continue;
+ }
+
+ /*
+ double logAlignCompatProb =
+ (useReadCompat) ? (salmon::utils::logAlignFormatProb(
+ aln.libFormat(), expectedLibraryFormat,
+ static_cast<int32_t>(aln.pos), aln.fwd,
aln.mateStatus, salmonOpts.incompatPrior))
: LOG_1;
-
+ */
/** New compat handling
// True if the read is compatible with the
// expected library type; false otherwise.
@@ -742,7 +769,13 @@ void processReadsQuasi(
std::vector<QuasiAlignment> rightHits;
rapmap::utils::HitCounters hctr;
salmon::utils::MappingType mapType{salmon::utils::MappingType::UNMAPPED};
+
+ PairAlignmentFormatter<RapMapIndexT*> formatter(qidx);
+ fmt::MemoryWriter sstream;
+ auto* qmLog = salmonOpts.qmLog.get();
+ bool writeQuasimappings = (qmLog != nullptr);
+
auto rg = parser->getReadGroup();
while (parser->refill(rg)) {
rangeSize = rg.size();
@@ -969,6 +1002,12 @@ void processReadsQuasi(
} break;
}
}
+
+ if (writeQuasimappings) {
+ rapmap::utils::writeAlignmentsToStream(rp, formatter,
+ hctr, jointHits, sstream);
+ }
+
} else {
// This read was completely unmapped.
mapType = salmon::utils::MappingType::UNMAPPED;
@@ -980,6 +1019,7 @@ void processReadsQuasi(
unmappedNames << rp.first.name << ' ' << salmon::utils::str(mapType) << '\n';
}
+
validHits += jointHits.size();
localNumAssignedFragments += (jointHits.size() > 0);
locRead++;
@@ -1015,6 +1055,15 @@ void processReadsQuasi(
unmappedNames.clear();
}
+ if (writeQuasimappings) {
+ std::string outStr(sstream.str());
+ // Get rid of last newline
+ if (!outStr.empty()) {
+ outStr.pop_back();
+ qmLog->info(std::move(outStr));
+ }
+ sstream.clear();
+ }
prevObservedFrags = numObservedFragments;
AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
@@ -1094,6 +1143,12 @@ void processReadsQuasi(
SACollector<RapMapIndexT> hitCollector(qidx);
SASearcher<RapMapIndexT> saSearcher(qidx);
rapmap::utils::HitCounters hctr;
+
+ SingleAlignmentFormatter<RapMapIndexT*> formatter(qidx);
+ fmt::MemoryWriter sstream;
+ auto* qmLog = salmonOpts.qmLog.get();
+ bool writeQuasimappings = (qmLog != nullptr);
+
auto rg = parser->getReadGroup();
while (parser->refill(rg)) {
rangeSize = rg.size();
@@ -1187,6 +1242,11 @@ void processReadsQuasi(
} break;
}
}
+
+ if (writeQuasimappings) {
+ rapmap::utils::writeAlignmentsToStream(rp, formatter,
+ hctr, jointHits, sstream);
+ }
if (writeUnmapped and jointHits.empty()) {
// If we have no mappings --- then there's nothing to do
@@ -1228,6 +1288,16 @@ void processReadsQuasi(
unmappedNames.clear();
}
+ if (writeQuasimappings) {
+ std::string outStr(sstream.str());
+ // Get rid of last newline
+ if (!outStr.empty()) {
+ outStr.pop_back();
+ qmLog->info(std::move(outStr));
+ }
+ sstream.clear();
+ }
+
prevObservedFrags = numObservedFragments;
AlnGroupVecRange<QuasiAlignment> hitLists = boost::make_iterator_range(
structureVec.begin(), structureVec.begin() + rangeSize);
@@ -1319,6 +1389,9 @@ void processReadLibrary(
// change value before the lambda below is evaluated --- crazy!
if (largeIndex) {
if (perfectHashIndex) { // Perfect Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash64()), salmonOpts.qmLog);
+ }
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
@@ -1330,6 +1403,9 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndex64()), salmonOpts.qmLog);
+ }
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
@@ -1343,6 +1419,9 @@ void processReadLibrary(
}
} else {
if (perfectHashIndex) { // Perfect Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash32()), salmonOpts.qmLog);
+ }
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
@@ -1354,6 +1433,9 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndex32()), salmonOpts.qmLog);
+ }
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
@@ -1470,10 +1552,14 @@ void processReadLibrary(
bool largeIndex = sidx->is64BitQuasi();
bool perfectHashIndex = sidx->isPerfectHashQuasi();
for (int i = 0; i < numThreads; ++i) {
+
// NOTE: we *must* capture i by value here, b/c it can (sometimes, does)
// change value before the lambda below is evaluated --- crazy!
if (largeIndex) {
if (perfectHashIndex) { // Perfect Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash64()), salmonOpts.qmLog);
+ }
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, PerfectHash<int64_t>>>(
pairedParserPtr.get(), readExp, rl, structureVec[i],
@@ -1485,6 +1571,10 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndex64()), salmonOpts.qmLog);
+ }
+
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int64_t, DenseHash<int64_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
@@ -1498,6 +1588,10 @@ void processReadLibrary(
}
} else {
if (perfectHashIndex) { // Perfect Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndexPerfectHash32()), salmonOpts.qmLog);
+ }
+
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, PerfectHash<int32_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
@@ -1509,6 +1603,10 @@ void processReadLibrary(
};
threads.emplace_back(threadFun);
} else { // Dense Hash
+ if (salmonOpts.qmFileName != "" and i == 0) {
+ rapmap::utils::writeSAMHeader(*(sidx->quasiIndex32()), salmonOpts.qmLog);
+ }
+
auto threadFun = [&, i]() -> void {
processReadsQuasi<RapMapSAIndex<int32_t, DenseHash<int32_t>>>(
singleParserPtr.get(), readExp, rl, structureVec[i],
@@ -1899,7 +1997,12 @@ int salmonQuantify(int argc, char* argv[]) {
"should be parsed. Files ending in \'.gtf\' or \'.gff\' are assumed to "
"be in GTF "
"format; files with any other extension are assumed to be in the simple "
- "format.");
+ "format.")
+ (
+ "writeMappings", po::value<string>(&sopt.qmFileName)->default_value("")->implicit_value("-"),
+ "If this option is provided, then the quasi-mapping results will be written out in SAM-compatible "
+ "format. By default, output will be directed to stdout, but an alternative file name can be "
+ "provided instead.");
sopt.noRichEqClasses = false;
// mapping cache has been deprecated
@@ -2023,7 +2126,7 @@ int salmonQuantify(int argc, char* argv[]) {
"[experimental] : "
"If this option is enabled, then no (lower) threshold will be set on "
"how short bias correction can make effective lengths. This can increase the precision "
- "of bias correction, but harm robustness. The default correction applies a threshold")
+ "of bias correction, but harm robustness. The default correction applies a threshold.")
(
"numBiasSamples",
po::value<int32_t>(&numBiasSamples)->default_value(2000000),
@@ -2080,7 +2183,7 @@ int salmonQuantify(int argc, char* argv[]) {
(
"writeUnmappedNames",
po::bool_switch(&(sopt.writeUnmappedNames))->default_value(false),
- "Write the names of un-mapped reads to the file unmapped.txt in the auxiliary directory.");
+ "Write the names of un-mapped reads to the file unmapped_names.txt in the auxiliary directory.");
po::options_description fmd("\noptions that apply to the old FMD index");
@@ -2190,8 +2293,8 @@ Quant
Perform streaming mapping-based estimation of
transcript abundance from RNA-seq reads
)";
- std::cout << hstring << std::endl;
- std::cout << visible << std::endl;
+ std::cerr << hstring << std::endl;
+ std::cerr << visible << std::endl;
std::exit(0);
}
@@ -2384,7 +2487,7 @@ transcript abundance from RNA-seq reads
return gzw.writeBootstrap(alphas);
};
- jointLog->info("Staring Bootstrapping");
+ jointLog->info("Starting Bootstrapping");
bool bootstrapSuccess =
optimizer.gatherBootstraps(experiment, sopt, bsWriter, 0.01, 10000);
jointLog->info("Finished Bootstrapping");
@@ -2443,6 +2546,14 @@ transcript abundance from RNA-seq reads
if (sopt.unmappedFile) { sopt.unmappedFile->close(); }
}
}
+
+ // if we wrote quasimappings, flush that buffer
+ if (sopt.qmFileName != "" ){
+ sopt.qmLog->flush();
+ // if we wrote to a buffer other than stdout, close
+ // the file
+ if (sopt.qmFileName != "-") { sopt.qmFile.close(); }
+ }
} catch (po::error& e) {
std::cerr << "Exception : [" << e.what() << "]. Exiting.\n";
std::exit(1);
diff --git a/src/SalmonQuantifyAlignments.cpp b/src/SalmonQuantifyAlignments.cpp
index 6226974..d1de11d 100644
--- a/src/SalmonQuantifyAlignments.cpp
+++ b/src/SalmonQuantifyAlignments.cpp
@@ -138,12 +138,19 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
// Whether or not we are using "banking"
bool useMassBanking = (!initialRound and salmonOpts.useMassBanking);
- bool useReadCompat = salmonOpts.incompatPrior != salmon::math::LOG_1;
+ double incompatPrior = salmonOpts.incompatPrior;
+ bool useReadCompat = incompatPrior != salmon::math::LOG_1;
// Create a random uniform distribution
std::default_random_engine eng(rd());
std::uniform_real_distribution<> uni(0.0, 1.0 + std::numeric_limits<double>::min());
+ // If we're auto detecting the library type
+ auto* detector = alnLib.getDetector();
+ bool autoDetect = (detector != nullptr) ? detector->isActive() : false;
+ // If we haven't detected yet, nothing is incompatible
+ if (autoDetect) { incompatPrior = salmon::math::LOG_1; }
+
//EQClass
EquivalenceClassBuilder& eqBuilder = alnLib.equivalenceClassBuilder();
auto& readBiasFW = observedBiasParams.seqBiasModelFW;
@@ -179,7 +186,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
bool noFragLenFactor{salmonOpts.noFragLenFactor};
double startingCumulativeMass = fmCalc.cumulativeLogMassAt(firstTimestepOfRound);
- const auto expectedLibraryFormat = alnLib.format();
+ auto expectedLibraryFormat = alnLib.format();
uint32_t numBurninFrags{salmonOpts.numBurninFrags};
bool useAuxParams = (processedReads > salmonOpts.numPreBurninFrags);
@@ -281,7 +288,20 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
// TESTING
if (noFragLenFactor) { logFragProb = LOG_1; }
- // @TODO: handle this case better
+ if (autoDetect) {
+ detector->addSample(aln->libFormat());
+ if (detector->canGuess()) {
+ detector->mostLikelyType(alnLib.getFormat());
+ expectedLibraryFormat = alnLib.getFormat();
+ incompatPrior = salmonOpts.incompatPrior;
+ autoDetect = false;
+ } else if (!detector->isActive()) {
+ expectedLibraryFormat = alnLib.getFormat();
+ incompatPrior = salmonOpts.incompatPrior;
+ autoDetect = false;
+ }
+ }
+ // @TODO: handle this case better
//double fragProb = cdf(fragLengthDist, fragLength + 0.5) - cdf(fragLengthDist, fragLength - 0.5);
//fragProb = std::max(fragProb, 1e-3);
//fragProb /= cdf(fragLengthDist, refLength);
@@ -304,7 +324,7 @@ void processMiniBatch(AlignmentLibrary<FragT>& alnLib,
expectedLibraryFormat,
aln->pos(),
aln->fwd(), aln->mateStatus());
- double logAlignCompatProb = isCompat ? LOG_1 : salmonOpts.incompatPrior;
+ double logAlignCompatProb = isCompat ? LOG_1 : incompatPrior;
if (!isCompat and salmonOpts.ignoreIncompat) {
aln->logProb = salmon::math::LOG_0;
continue;
@@ -1120,7 +1140,8 @@ bool processSample(AlignmentLibrary<ReadT>& alnLib,
}
}
-
+ //bfs::path libCountFilePath = outputDirectory / "lib_format_counts.json";
+ //alnLib.summarizeLibraryTypeCounts(libCountFilePath);
if (sopt.sampleOutput) {
// In this case, we should "re-convert" transcript
@@ -1198,13 +1219,13 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
po::options_description advanced("\nadvanced options");
advanced.add_options()
- ("auxDir", po::value<std::string>(&(sopt.auxDir))->default_value("aux"), "The sub-directory of the quantification directory where auxiliary information "
+ ("auxDir", po::value<std::string>(&(sopt.auxDir))->default_value("aux_info"), "The sub-directory of the quantification directory where auxiliary information "
"e.g. bootstraps, bias parameters, etc. will be written.")
("noBiasLengthThreshold", po::bool_switch(&(sopt.noBiasLengthThreshold))->default_value(false), "[experimental] : "
"If this option is enabled, then no (lower) threshold will be set on "
"how short bias correction can make effective lengths. This can increase the precision "
- "of bias correction, but harm robustness. The default correction applies a threshold")
- ("fldMax" , po::value<size_t>(&(sopt.fragLenDistMax))->default_value(800), "The maximum fragment length to consider when building the empirical distribution")
+ "of bias correction, but harm robustness. The default correction applies a thresholdi.")
+ ("fldMax" , po::value<size_t>(&(sopt.fragLenDistMax))->default_value(1000), "The maximum fragment length to consider when building the empirical distribution")
("fldMean", po::value<size_t>(&(sopt.fragLenDistPriorMean))->default_value(200), "The mean used in the fragment length distribution prior")
("fldSD" , po::value<size_t>(&(sopt.fragLenDistPriorSD))->default_value(80), "The standard deviation used in the fragment length distribution prior")
("forgettingFactor,f", po::value<double>(&(sopt.forgettingFactor))->default_value(0.65), "The forgetting factor used "
@@ -1251,10 +1272,11 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
"all information about the relative weights for each transcript in the "
"label of an equivalence class will be ignored, and only the relative "
"abundance and effective length of each transcript will be considered.")
- */
("noBiasLengthThreshold", po::bool_switch(&(sopt.noBiasLengthThreshold))->default_value(false), "[experimental] : "
"If this option is enabled, then bias correction will be allowed to estimate effective lengths "
"shorter than the approximate mean fragment length")
+
+ */
("numErrorBins", po::value<uint32_t>(&(sopt.numErrorBins))->default_value(6), "The number of bins into which to divide "
"each read when learning and applying the error model. For example, a value of 10 would mean that "
"effectively, a separate error model is leared and applied to each 10th of the read, while a value of "
@@ -1274,7 +1296,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
"going to perform downstream analysis of the alignments with tools which don't, themselves, take "
"fragment assignment ambiguity into account, you should use this output.")
("sampleUnaligned,u", po::bool_switch(&(sopt.sampleUnaligned))->default_value(false), "In addition to sampling the aligned reads, also write "
- "the un-aligned reads to \"posSample.bam\".")
+ "the un-aligned reads to \"postSample.bam\".")
("numGibbsSamples", po::value<uint32_t>(&(sopt.numGibbsSamples))->default_value(0), "Number of Gibbs sampling rounds to "
"perform.")
("numBootstraps", po::value<uint32_t>(&(sopt.numBootstraps))->default_value(0), "Number of bootstrap samples to generate. Note: "
@@ -1327,8 +1349,8 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
po::store(orderedOptions, vm);
if (vm.count("help")) {
- std::cout << "Salmon quant (alignment-based)\n";
- std::cout << visible << std::endl;
+ std::cerr << "Salmon quant (alignment-based)\n";
+ std::cerr << visible << std::endl;
std::exit(0);
}
po::notify(vm);
@@ -1368,7 +1390,6 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
// Set the atomic variable numBiasSamples from the local version
sopt.numBiasSamples.store(numBiasSamples);
- sopt.noBiasLengthThreshold = !sopt.useBiasLengthThreshold;
// Get the time at the start of the run
std::time_t result = std::time(NULL);
@@ -1401,9 +1422,27 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
alignmentFiles.push_back(alignmentFile);
}
}
-
+
+ // Just so we have the variable around
+ LibraryFormat libFmt(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U);
+ // Get the library format string
std::string libFmtStr = vm["libType"].as<std::string>();
- LibraryFormat libFmt = salmon::utils::parseLibraryFormatStringNew(libFmtStr);
+
+ // If we're auto-detecting, set things up appropriately
+ bool autoDetectFmt = (libFmtStr == "a" or libFmtStr == "A");//(autoTypes.find(libFmtStr) != autoTypes.end());
+ if (autoDetectFmt) {
+
+ bool isPairedEnd = salmon::utils::peekBAMIsPaired(alignmentFiles.front());
+
+ if (isPairedEnd) {
+ libFmt = LibraryFormat(ReadType::PAIRED_END, ReadOrientation::TOWARD, ReadStrandedness::U);
+ } else {
+ libFmt = LibraryFormat(ReadType::SINGLE_END, ReadOrientation::NONE, ReadStrandedness::U);
+ }
+ } else { // Parse the provided type
+ libFmt = salmon::utils::parseLibraryFormatStringNew(libFmtStr);
+ }
+
if (libFmt.check()) {
std::cerr << libFmt << "\n";
} else {
@@ -1446,7 +1485,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
std::exit(1);
}
if (!sopt.quiet) {
- std::cout << "Logs will be written to " << logDirectory.string() << "\n";
+ std::cerr << "Logs will be written to " << logDirectory.string() << "\n";
}
bfs::path logPath = logDirectory / "salmon.log";
@@ -1508,7 +1547,6 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
}
}
-
// Write out information about the command / run
{
bfs::path cmdInfoPath = outputDirectory / "cmd_info.json";
@@ -1560,6 +1598,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
libFmt,
sopt);
+ if (autoDetectFmt) { alnLib.enableAutodetect(); }
success = processSample<UnpairedRead>(alnLib, runStartTime,
requiredObservations, sopt,
outputDirectory);
@@ -1571,7 +1610,7 @@ int salmonAlignmentQuantify(int argc, char* argv[]) {
transcriptFile,
libFmt,
sopt);
-
+ if (autoDetectFmt) { alnLib.enableAutodetect(); }
success = processSample<ReadPair>(alnLib, runStartTime,
requiredObservations, sopt,
outputDirectory);
diff --git a/src/SalmonUtils.cpp b/src/SalmonUtils.cpp
index d81cba2..cdb9a2d 100644
--- a/src/SalmonUtils.cpp
+++ b/src/SalmonUtils.cpp
@@ -42,6 +42,8 @@
#include "SGSmooth.hpp"
#include "TranscriptGeneMap.hpp"
+#include "StadenUtils.hpp"
+
namespace salmon {
namespace utils {
@@ -850,6 +852,55 @@ LibraryFormat parseLibraryFormatString(std::string& fmt) {
return lf;
}
+ bool peekBAMIsPaired(const boost::filesystem::path& file) {
+ namespace bfs = boost::filesystem;
+ std::string readMode = "r";
+
+ if (bfs::is_regular_file(file)) {
+ if (bfs::is_empty(file)) {
+ fmt::MemoryWriter errstr;
+ errstr << "file [" << file.string() << "] appears to be empty "
+ "(i.e. it has size 0). This is likely an error. "
+ "Please re-run salmon with a corrected input file.\n\n";
+ throw std::invalid_argument(errstr.str());
+ return false;
+ }
+ }
+ if (file.extension() == ".bam") {
+ readMode = "rb";
+ }
+
+ auto* fp = scram_open(file.c_str(), readMode.c_str());
+
+ // If we couldn't open the file, then report this and exit.
+ if (fp == NULL) {
+ fmt::MemoryWriter errstr;
+ errstr << "ERROR: Failed to open file " << file.string() << ", exiting!\n";
+ throw std::invalid_argument(errstr.str());
+ return false;
+ }
+
+ bam_seq_t* read = nullptr;
+ read = staden::utils::bam_init();
+
+ bool didRead = (scram_get_seq(fp, &read) >= 0);
+ bool isPaired{false};
+
+ if (didRead) {
+ isPaired = bam_flag(read) & BAM_FPAIRED;
+ } else {
+ fmt::MemoryWriter errstr;
+ errstr << "ERROR: Failed to read alignment from " << file.string() << ", exiting!\n";
+ staden::utils::bam_destroy(read);
+ throw std::invalid_argument(errstr.str());
+ return false;
+ }
+
+ scram_close(fp);
+ staden::utils::bam_destroy(read);
+ return isPaired;
+ }
+
uint64_t encode(uint64_t tid, uint64_t offset) {
uint64_t res = (((tid & 0xFFFFFFFF) << 32) | (offset & 0xFFFFFFFF));
return res;
@@ -1271,17 +1322,27 @@ bool processQuantOptions(SalmonOpts& sopt,
}
if (!sopt.quiet) {
- std::cout << "Logs will be written to " << logDirectory.string() << "\n";
+ std::cerr << "Logs will be written to " << logDirectory.string() << "\n";
}
+ // Determine what we'll do with quasi-mapping results
+ bool writeQuasimappings = (sopt.qmFileName != "");
+
bfs::path logPath = logDirectory / "salmon_quant.log";
// must be a power-of-two
-
size_t max_q_size = 2097152;
+
+ // make it larger if we're writing mappings or
+ // unmapped names.
+ std::streambuf* qmBuf;
+ if (writeQuasimappings or sopt.writeUnmappedNames) {
+ max_q_size = 16777216;
+ }
+
spdlog::set_async_mode(max_q_size);
auto fileSink = std::make_shared<spdlog::sinks::simple_file_sink_mt>(
- logPath.string(), true);
+ logPath.string());
auto rawConsoleSink = std::make_shared<spdlog::sinks::stdout_sink_mt>();
auto consoleSink =
std::make_shared<spdlog::sinks::ansicolor_sink>(rawConsoleSink);
@@ -1310,9 +1371,8 @@ bool processQuantOptions(SalmonOpts& sopt,
std::ofstream* outFile = new std::ofstream(unmappedNameFile.string());
// Must be a power of 2
- size_t queueSize{268435456};
-
- spdlog::set_async_mode(queueSize);
+ //size_t queueSize{268435456};
+ //spdlog::set_async_mode(queueSize);
auto outputSink =
std::make_shared<spdlog::sinks::ostream_sink_mt>(*outFile);
@@ -1324,9 +1384,43 @@ bool processQuantOptions(SalmonOpts& sopt,
} else {
jointLog->error("Couldn't create auxiliary directory in which to place "
"\"unmapped_names.txt\"");
+ return false;
}
}
+ if (writeQuasimappings) {
+ // output to stdout
+ if (sopt.qmFileName == "-") {
+ qmBuf = std::cout.rdbuf();
+ } else { // output to the requested path, making the directory if it doesn't exist
+ // get the parent directory
+ bfs::path qmDir = boost::filesystem::path(sopt.qmFileName).parent_path();
+ // if it's not already a directory that exists
+ bool qmDirSuccess = boost::filesystem::is_directory(qmDir);
+ // try to create it
+ if (!qmDirSuccess) {
+ qmDirSuccess = boost::filesystem::create_directories(qmDir);
+ }
+ // if the directory already existed, or we created it successfully, open the file
+ if (qmDirSuccess) {
+ sopt.qmFile.open(sopt.qmFileName);
+ qmBuf = sopt.qmFile.rdbuf();
+ } else {
+ bfs::path qmFileName = boost::filesystem::path(sopt.qmFileName).filename();
+ jointLog->error("Couldn't create requested directory {} in which "
+ "to place the mapping output {}", qmDir.string(), qmFileName.string());
+ return false;
+ }
+ }
+ // Now set the output stream to the buffer, which is
+ // either std::cout, or a file.
+ sopt.qmStream.reset(new std::ostream(qmBuf));
+
+ auto outputSink = std::make_shared<spdlog::sinks::ostream_sink_mt>(*(sopt.qmStream.get()));
+ sopt.qmLog = std::make_shared<spdlog::logger>("qmStream", outputSink);
+ sopt.qmLog->set_pattern("%v");
+ }
+
// Verify that no inconsistent options were provided
if (sopt.numGibbsSamples > 0 and sopt.numBootstraps > 0) {
jointLog->error("You cannot perform both Gibbs sampling and bootstrapping. "
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/salmon.git
More information about the debian-med-commit
mailing list