[med-svn] [Git][med-team/jellyfish][upstream] New upstream version 2.2.8
Michael R. Crusoe
gitlab at salsa.debian.org
Sun Feb 11 18:57:37 UTC 2018
Michael R. Crusoe pushed to branch upstream at Debian Med / jellyfish
Commits:
d2ced9a4 by Michael R. Crusoe at 2018-02-11T04:40:47-08:00
New upstream version 2.2.8
- - - - -
26 changed files:
- Makefile.am
- configure.ac
- include/jellyfish/file_header.hpp
- include/jellyfish/hash_counter.hpp
- include/jellyfish/large_hash_array.hpp
- include/jellyfish/mer_overlap_sequence_parser.hpp
- include/jellyfish/rectangular_binary_matrix.hpp
- include/jellyfish/whole_sequence_parser.hpp
- jellyfish/dbg.cc
- lib/rectangular_binary_matrix.cc
- swig/Makefile.am
- swig/jellyfish.i
- swig/python/setup.py
- swig/python/test_hash_counter.py
- swig/python/test_mer_file.py
- swig/python/test_string_mers.py
- swig/ruby/test_hash_counter.rb
- swig/ruby/test_mer_file.rb
- swig/ruby/test_string_mers.rb
- swig/string_mers.i
- tests/compat.sh.in
- unit_tests/test_file_header.cc
- unit_tests/test_generator_manager.cc
- unit_tests/test_hash_counter.cc
- unit_tests/test_large_hash_array.cc
- unit_tests/test_rectangular_binary_matrix.cc
Changes:
=====================================
Makefile.am
=====================================
--- a/Makefile.am
+++ b/Makefile.am
@@ -146,7 +146,8 @@ AM_SH_LOG_FLAGS =
TESTS = tests/generate_sequence.sh tests/parallel_hashing.sh \
tests/merge.sh tests/bloom_filter.sh tests/big.sh \
tests/subset_hashing.sh tests/multi_file.sh \
- tests/bloom_counter.sh tests/large_key.sh tests/sam.sh
+ tests/bloom_counter.sh tests/large_key.sh tests/sam.sh \
+ tests/small_mers.sh
EXTRA_DIST += $(TESTS)
clean-local: clean-local-check
@@ -164,6 +165,7 @@ tests/min_qual.log: tests/generate_fastq_sequence.log
tests/large_key.log: tests/generate_sequence.log
tests/quality_filter.log: tests/generate_sequence.log
tests/sam.log: tests/generate_sequence.log
+tests/small_mers.log: tests/generate_sequence.log
# SWIG tests
TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh
=====================================
configure.ac
=====================================
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([jellyfish], [2.2.7], [gmarcais at umd.edu])
+AC_INIT([jellyfish], [2.2.8], [gmarcais at umd.edu])
AC_CANONICAL_HOST
AC_CONFIG_MACRO_DIR([m4])
AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests])
@@ -10,18 +10,23 @@ AC_LIB_RPATH
PKG_PROG_PKG_CONFIG
# Change default compilation flags
-AC_SUBST([ALL_CXXFLAGS], [-std=c++0x])
-CXXFLAGS="-std=c++0x $CXXFLAGS"
AC_LANG(C++)
AC_PROG_CXX
# Major version of the library
AC_SUBST([PACKAGE_LIB], [2.0])
+# Check if gnu++11 is necessary
+save_CXXFLAGS=$CXXFLAGS
+AC_CANONICAL_HOST
+case "${host_os}" in
+ cygwin*) CXXFLAGS="-std=gnu++11 $save_CXXFLAGS" ;;
+ *) CXXFLAGS="-std=c++11 $save_CXXFLAGS" ;;
+esac
+
# Try to find htslib to read SAM/BAM/CRAM files
AC_ARG_ENABLE([htslib],
[AS_HELP_STRING([--enable-htslib], [Look for the HTS library (default=yes)])])
-echo "enable_htslib $enable_htslib"
AS_IF([test "x$enable_htslib" = "xyes" -o "x$enable_htslib" = "x"],
[PKG_CHECK_MODULES([HTSLIB], [htslib], [AC_DEFINE([HAVE_HTSLIB], [1], [Defined if htslib is available])], [true])]
[AC_LIB_LINKFLAGS_FROM_LIBS([HTSLIB_RPATH], [$HTSLIB_LIBS], [LIBTOOL])])
@@ -88,8 +93,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <mach-o/dyld.h>]],
[AC_DEFINE([HAVE_NSGETEXECUTABLEPATH], [1], [Used to find executable path on MacOS X])],
[AC_MSG_RESULT([no])])
-# Check the version of strerror_r
-AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h])
+AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h sys/syscall.h])
AC_CHECK_MEMBER([siginfo_t.si_int],
[AC_DEFINE([HAVE_SI_INT], [1], [Define if siginfo_t.si_int exists])],
[], [[#include <signal.h>]])
@@ -134,6 +138,9 @@ AM_CONDITIONAL(PYTHON_BINDING, [test -n "$enable_python_binding" -a x$enable_pyt
AM_COND_IF([PYTHON_BINDING],
[AS_IF([test x$enable_python_binding != xyes], [PYTHON_SITE_PKG=$enable_python_binding])]
[AX_PYTHON_DEVEL([], [$prefix])])
+AC_ARG_ENABLE([python-deprecated],
+ [AC_HELP_STRING([--enable-python-deprecated], [enable the deprecated 'jellyfish' module (in addition to 'dna_jellyfish')])])
+AM_CONDITIONAL([PYTHON_DEPRECATED], [test -z "$enable_python_deprecated" -o x$enable_python_deprecated != xno])
# Ruby binding setup
AS_IF([test -z "$enable_ruby_binding"], [enable_ruby_binding="$enable_all_binding"])
=====================================
include/jellyfish/file_header.hpp
=====================================
--- a/include/jellyfish/file_header.hpp
+++ b/include/jellyfish/file_header.hpp
@@ -45,6 +45,9 @@ public:
name += std::to_string((long long int)i); // Cast to make gcc4.4 happy!
const unsigned int r = root_[name]["r"].asUInt();
const unsigned int c = root_[name]["c"].asUInt();
+ if(root_[name]["identity"].asBool())
+ return RectangularBinaryMatrix::identity(r, c);
+
std::vector<uint64_t> raw(c, (uint64_t)0);
for(unsigned int i = 0; i < c; ++i)
raw[i] = root_[name]["columns"][i].asUInt64();
@@ -57,9 +60,14 @@ public:
root_[name].clear();
root_[name]["r"] = m.r();
root_[name]["c"] = m.c();
- for(unsigned int i = 0; i < m.c(); ++i) {
- Json::UInt64 x = m[i];
- root_[name]["columns"].append(x);
+ if(m.is_low_identity()) {
+ root_[name]["identity"] = true;
+ } else {
+ root_[name]["identity"] = false;
+ for(unsigned int i = 0; i < m.c(); ++i) {
+ Json::UInt64 x = m[i];
+ root_[name]["columns"].append(x);
+ }
}
}
=====================================
include/jellyfish/hash_counter.hpp
=====================================
--- a/include/jellyfish/hash_counter.hpp
+++ b/include/jellyfish/hash_counter.hpp
@@ -104,7 +104,7 @@ public:
while(!ary_->add(k, v, &carry_shift, is_new_ptr, id_ptr)) {
handle_full_ary();
- v &= ~(uint64_t)0 << carry_shift;
+ v &= ~(uint64_t)0 << carry_shift;
// If carry_shift == 0, failed to allocate the first field for
// key, hence status of is_new and value for id are not
// determined yet. On the other hand, if carry_shift > 0, we
@@ -112,8 +112,8 @@ public:
// of is_new and value of id are known. We do not update them in future
// calls.
if(carry_shift) {
- is_new_ptr = &is_new_void;
- id_ptr = &id_void;
+ is_new_ptr = &is_new_void;
+ id_ptr = &id_void;
}
}
}
@@ -204,9 +204,16 @@ protected:
bool double_size(bool serial_thread) {
if(serial_thread) {// Allocate new array for size doubling
try {
- new_ary_ = new array(ary_->size() * 2, ary_->key_len(), ary_->val_len(),
- ary_->max_reprobe(), ary_->reprobes());
- } catch(typename array::ErrorAllocation e) {
+ if(ary_->key_len() >= sizeof(size_t) * 8 || ary_->size() < ((size_t)1 << ary_->key_len())) {
+ // Increase number of keys
+ new_ary_ = new array(ary_->size() * 2, ary_->key_len(), ary_->val_len(),
+ ary_->max_reprobe(), ary_->reprobes());
+ } else {
+ // Array is already maximum compared to key len, increase val_len
+ new_ary_ = new array(ary_->size(), ary_->key_len(), ary_->val_len() + 1,
+ ary_->max_reprobe(), ary_->reprobes());
+ }
+ } catch(typename array::ErrorAllocation e) {
new_ary_ = 0;
}
}
@@ -219,10 +226,6 @@ protected:
// Copy data from old to new
uint16_t id = atomic_t::fetch_add(&size_thid_, (uint16_t)1);
- // Why doesn't the following work? Seems like a bug to
- // me. Equivalent call works in test_large_hash_array. Or am I
- // missing something?
- // eager_iterator it = ary_->iterator_slice<eager_iterator>(id, nb_threads_);
eager_iterator it = ary_->eager_slice(id, nb_threads_);
while(it.next())
my_ary->add(it.key(), it.val());
=====================================
include/jellyfish/large_hash_array.hpp
=====================================
--- a/include/jellyfish/large_hash_array.hpp
+++ b/include/jellyfish/large_hash_array.hpp
@@ -930,23 +930,35 @@ public:
};
-template<typename Key, typename word = uint64_t, typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
-class array :
+// Large array. Memory managed by the mmap allocator. Do not check the
+// relation between the size of the array and key_len.
+template<typename Key, typename word = uint64_t,
+ typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
+class unbounded_array :
protected mem_block_t,
- public array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >
+ public array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >
{
- typedef array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> > super;
- friend class array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >;
+ typedef array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> > super;
+ friend class array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >;
public:
- array(size_t size, // Size of hash. To be rounded up to a power of 2
- uint16_t key_len, // Size of key in bits
- uint16_t val_len, // Size of val in bits
- uint16_t reprobe_limit, // Maximum reprobe
- const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
- mem_block_t(),
- super(size, key_len, val_len, reprobe_limit, RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
- reprobes)
+ unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
+ uint16_t key_len, // Size of key in bits
+ uint16_t val_len, // Size of val in bits
+ uint16_t reprobe_limit, // Maximum reprobe
+ const size_t* reprobes = quadratic_reprobes) // Reprobing policy
+ : super(size, key_len, val_len, reprobe_limit,
+ RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
+ reprobes)
+ { }
+
+ unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
+ uint16_t key_len, // Size of key in bits
+ uint16_t val_len, // Size of val in bits
+ uint16_t reprobe_limit, // Maximum reprobe
+ RectangularBinaryMatrix&& m, // Hashing matrix
+ const size_t* reprobes = quadratic_reprobes) // Reprobing policy
+ : super(size, key_len, val_len, reprobe_limit, m, reprobes)
{ }
protected:
@@ -956,6 +968,35 @@ protected:
}
};
+// Large array. Memory managed by the mmap allocator, bound the size
+// of the array if the key_len is small.
+template<typename Key, typename word = uint64_t,
+ typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
+class array : public unbounded_array<Key, word, atomic_t, mem_block_t>
+{
+ typedef unbounded_array<Key, word, atomic_t, mem_block_t> super;
+
+ static size_t key_len_size(uint16_t key_len) {
+ return key_len >= std::numeric_limits<size_t>::digits ? std::numeric_limits<size_t>::max() / 2 : (size_t)1 << key_len;
+ }
+
+public:
+ array(size_t size, // Size of hash. To be rounded up to a power of 2
+ uint16_t key_len, // Size of key in bits
+ uint16_t val_len, // Size of val in bits
+ uint16_t reprobe_limit, // Maximum reprobe
+ const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
+ super(std::min(size, key_len_size(key_len)), key_len, val_len, reprobe_limit,
+ (size < key_len_size(key_len))
+ ? RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse()
+ : RectangularBinaryMatrix::identity(key_len),
+ reprobes)
+ {
+ // std::cerr << this->size() << ' ' << this->val_len() << '\n';
+ }
+
+};
+
struct ptr_info {
void* ptr_;
size_t bytes_;
=====================================
include/jellyfish/mer_overlap_sequence_parser.hpp
=====================================
--- a/include/jellyfish/mer_overlap_sequence_parser.hpp
+++ b/include/jellyfish/mer_overlap_sequence_parser.hpp
@@ -131,7 +131,7 @@ protected:
// streams_iterator_ noticed that we closed that stream before
// requesting a new one.
st.stream.reset();
- st.stream = streams_iterator_.next();
+ st.stream = std::move(streams_iterator_.next());
if(!st.stream.good()) {
st.type = DONE_TYPE;
return false;
=====================================
include/jellyfish/rectangular_binary_matrix.hpp
=====================================
--- a/include/jellyfish/rectangular_binary_matrix.hpp
+++ b/include/jellyfish/rectangular_binary_matrix.hpp
@@ -41,19 +41,33 @@
// bits of each word are set to 0).
//
// Multiplication between a matrix and vector of size _c x 1 gives a
-// vector of size _r x 1 stored as one 64 bit word.
+// vector of size _r x 1 stored as one 64 bit word. A matrix with a
+// NULL _columns pointer behaves like the identity.
namespace jellyfish {
class RectangularBinaryMatrix {
+ explicit RectangularBinaryMatrix(unsigned int c)
+ : _columns(NULL)
+ , _r(c)
+ , _c(c)
+ { }
+
public:
RectangularBinaryMatrix(unsigned int r, unsigned c)
: _columns(alloc(r, c)), _r(r), _c(c) { }
RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
- : _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) {
- memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
+ : _columns(rhs._columns ? alloc(rhs._r, rhs._c) : NULL)
+ , _r(rhs._r)
+ , _c(rhs._c)
+ {
+ if(_columns)
+ memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
}
- RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) :
- _columns(rhs._columns), _r(rhs._r), _c(rhs._c) {
+ RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs)
+ : _columns(rhs._columns)
+ , _r(rhs._r)
+ , _c(rhs._c)
+ {
rhs._columns = 0;
}
// Initialize from raw data. raw must contain at least c words.
@@ -67,6 +81,16 @@ namespace jellyfish {
free(_columns);
}
+ static RectangularBinaryMatrix identity(unsigned c) {
+ return RectangularBinaryMatrix(c);
+ }
+
+ static RectangularBinaryMatrix identity(unsigned r, unsigned c) {
+ RectangularBinaryMatrix res(r, c);
+ res.init_low_identity();
+ return res;
+ }
+
RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
if(_r != rhs._r || _c != rhs._c)
throw std::invalid_argument("RHS matrix dimensions do not match");
@@ -90,7 +114,7 @@ namespace jellyfish {
}
// Get i-th column. No check on range
- const uint64_t & operator[](unsigned int i) const { return _columns[i]; }
+ uint64_t operator[](unsigned int i) const { return _columns ? _columns[i] : ((uint64_t)1 << i); }
unsigned int r() const { return _r; }
unsigned int c() const { return _c; }
@@ -112,8 +136,8 @@ namespace jellyfish {
// Make and check that the matrix the lower right corner of the
// identity.
- void init_low_identity();
- bool is_low_identity();
+ void init_low_identity(bool simplify = true);
+ bool is_low_identity() const;
// Left matrix vector multiplication. Type T supports the operator
// v[i] to return the i-th 64 bit word of v.
@@ -204,6 +228,7 @@ namespace jellyfish {
template<typename T>
uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
+ if(!_columns) return v[0] & cmask();
uint64_t *p = _columns + _c - 1;
uint64_t res = 0, x = 0, j = 0;
const uint64_t one = (uint64_t)1;
@@ -244,6 +269,7 @@ namespace jellyfish {
#ifdef HAVE_SSE
template<typename T>
uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
+ if(!_columns) return v[0] & cmask();
#define FFs ((uint64_t)-1)
static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
{0, 0, 0, FFs, FFs, 0, FFs, FFs};
@@ -338,6 +364,7 @@ namespace jellyfish {
#ifdef HAVE_INT128
template<typename T>
uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
+ if(!_columns) return v[0] & cmask();
typedef unsigned __int128 u128;
static const u128 smear[4] =
{ (u128)0,
=====================================
include/jellyfish/whole_sequence_parser.hpp
=====================================
--- a/include/jellyfish/whole_sequence_parser.hpp
+++ b/include/jellyfish/whole_sequence_parser.hpp
@@ -92,7 +92,7 @@ public:
protected:
void open_next_file(stream_status& st) {
st.stream.reset();
- st.stream = streams_iterator_.next();
+ st.stream = std::move(streams_iterator_.next());
if(!st.stream.good()) {
st.type = DONE_TYPE;
return;
=====================================
jellyfish/dbg.cc
=====================================
--- a/jellyfish/dbg.cc
+++ b/jellyfish/dbg.cc
@@ -16,7 +16,14 @@
#include <jellyfish/dbg.hpp>
#include <jellyfish/time.hpp>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
+#endif
namespace dbg {
pthread_mutex_t print_t::_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -33,7 +40,7 @@ namespace dbg {
}
Time toc() {
#ifdef DEBUG
- Time t;
+ Time t;
return t - _tic_time;
#else
return Time::zero;
=====================================
lib/rectangular_binary_matrix.cc
=====================================
--- a/lib/rectangular_binary_matrix.cc
+++ b/lib/rectangular_binary_matrix.cc
@@ -31,13 +31,20 @@ uint64_t *jellyfish::RectangularBinaryMatrix::alloc(unsigned int r, unsigned int
// Make sure the number of words allocated is a multiple of
// 8. Necessary for loop unrolling of vector multiplication
size_t alloc_columns = (c / 8 + (c % 8 != 0)) * 8;
- if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
+ // if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
+ if(!(mem = aligned_alloc(sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t))))
throw std::bad_alloc();
memset(mem, '\0', sizeof(uint64_t) * alloc_columns);
return (uint64_t *)mem;
}
-void jellyfish::RectangularBinaryMatrix::init_low_identity() {
+void jellyfish::RectangularBinaryMatrix::init_low_identity(bool simplify) {
+ if(!_columns) return;
+ if(_c == _r && simplify) {
+ free(_columns);
+ _columns = NULL;
+ return;
+ }
memset(_columns, '\0', sizeof(uint64_t) * _c);
unsigned int row = std::min(_c, _r);
unsigned int col = _c - row;
@@ -46,7 +53,8 @@ void jellyfish::RectangularBinaryMatrix::init_low_identity() {
_columns[i] = _columns[i - 1] >> 1;
}
-bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
+bool jellyfish::RectangularBinaryMatrix::is_low_identity() const {
+ if(!_columns) return true;
unsigned int row = std::min(_c, _r);
unsigned int col = _c - row;
@@ -64,6 +72,9 @@ bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_multiplication(const jellyfish::RectangularBinaryMatrix &rhs) const {
if(_r != rhs._r || _c != rhs._c)
throw std::domain_error("Matrices of different size");
+ if(!_columns) return rhs;
+ if(!rhs._columns) return *this;
+
RectangularBinaryMatrix res(_r, _c);
// v is a vector. The lower part is equal to the given column of rhs
@@ -102,6 +113,8 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_mu
}
unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
+ if(!_columns) return _c;
+
unsigned int rank = _c;
RectangularBinaryMatrix pivot(*this);
@@ -136,8 +149,10 @@ unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
}
jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_inverse() const {
+ if(!_columns) return *this;
+
RectangularBinaryMatrix pivot(*this);
- RectangularBinaryMatrix res(_r, _c); res.init_low_identity();
+ RectangularBinaryMatrix res(_r, _c); res.init_low_identity(false);
unsigned int i, j;
uint64_t mask;
@@ -186,12 +201,19 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_in
}
void jellyfish::RectangularBinaryMatrix::print(std::ostream &os) const {
- uint64_t mask = (uint64_t)1 << (_r - 1);
- for( ; mask; mask >>= 1) {
- for(unsigned int j = 0; j < _c; ++j) {
- os << (mask & _columns[j] ? "1" : "0");
+ if(!_columns) {
+ for(unsigned int i = 0; i < _c; ++i) {
+ for(unsigned int j = 0; j < _c; ++j)
+ os << (i == j ? '1' : '0');
+ os << '\n';
}
- os << "\n";
+ } else {
+ uint64_t mask = (uint64_t)1 << (_r - 1);
+ for( ; mask; mask >>= 1) {
+ for(unsigned int j = 0; j < _c; ++j)
+ os << (mask & _columns[j] ? '1' : '0');
+ os << '\n';
+ }
}
}
=====================================
swig/Makefile.am
=====================================
--- a/swig/Makefile.am
+++ b/swig/Makefile.am
@@ -16,23 +16,27 @@ endif
# Python support
if PYTHON_BINDING
-PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/jellyfish.py
+PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/dna_jellyfish.py
BUILT_SOURCES += $(PYTHON_BUILT)
-pythonextdir = $(PYTHON_SITE_PKG)/jellyfish
+if PYTHON_DEPRECATED
+pythonglobaldir = $(PYTHON_SITE_PKG)
+pythonglobal_SCRIPTS = swig/python/jellyfish.py
+endif
+pythonextdir = $(PYTHON_SITE_PKG)/dna_jellyfish
pythonext_SCRIPTS = swig/python/__init__.pyc
-pythonext_LTLIBRARIES = swig/python/_jellyfish.la
-swig_python__jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
-swig_python__jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
-swig_python__jellyfish_la_LDFLAGS = -module
-swig_python__jellyfish_la_LIBADD = libjellyfish-2.0.la
+pythonext_LTLIBRARIES = swig/python/_dna_jellyfish.la
+swig_python__dna_jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
+swig_python__dna_jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
+swig_python__dna_jellyfish_la_LDFLAGS = -module
+swig_python__dna_jellyfish_la_LIBADD = libjellyfish-2.0.la
CLEANFILES += $(PYTHON_BUILT) $(pythonext_SCRIPTS)
PYTHONC_V_GEN = $(pythonc_v_GEN_$(V))
pythonc_v_GEN_ = $(pythonc_v_GEN_$(AM_DEFAULT_VERBOSITY))
pythonc_v_GEN_0 = @echo " PYTHONC " $@;
-%/__init__.pyc: %/jellyfish.py
+%/__init__.pyc: %/dna_jellyfish.py
$(PYTHONC_V_GEN)$(PYTHON) -c 'import py_compile, sys; py_compile.compile(sys.argv[1], sys.argv[2])' $< $@
-swig/python/jellyfish.py: swig/python/swig_wrap.cpp
+swig/python/dna_jellyfish.py: swig/python/swig_wrap.cpp
EXTRA_DIST += $(PYTHON_BUILT)
endif
=====================================
swig/jellyfish.i
=====================================
--- a/swig/jellyfish.i
+++ b/swig/jellyfish.i
@@ -1,6 +1,22 @@
+#ifdef SWIGPYTHON
+// Default Python loading code does not seem to work. Use our own.
+%define MODULEIMPORT
+"
+import os
+if os.path.basename(__file__) == \"__init__.pyc\" or os.path.basename(__file__) == \"__init__.py\":
+ import dna_jellyfish.$module
+else:
+ import $module
+"
+%enddef
+%module(docstring="Jellyfish binding", moduleimport=MODULEIMPORT) dna_jellyfish
+#else
%module(docstring="Jellyfish binding") jellyfish
+#endif
+
%naturalvar; // Use const reference instead of pointers
%include "std_string.i"
+
%include "exception.i"
%include "std_except.i"
%include "typemaps.i"
@@ -8,7 +24,6 @@
%{
#ifdef SWIGPYTHON
-#define SWIG_FILE_WITH_INIT
#endif
#ifdef SWIGPERL
=====================================
swig/python/setup.py
=====================================
--- a/swig/python/setup.py
+++ b/swig/python/setup.py
@@ -29,7 +29,7 @@ jf_rpath = [re.sub(r'^', '-Wl,-rpath,', x) for x in jf_libdir]
jf_ldflags = os.popen("pkg-config --libs-only-other jellyfish-2.0").read().rstrip().split()
-jellyfish_module = Extension('_jellyfish',
+jellyfish_module = Extension('_dna_jellyfish',
sources = ['jellyfish_wrap.cxx'],
include_dirs = jf_include,
libraries = jf_libs,
@@ -37,9 +37,9 @@ jellyfish_module = Extension('_jellyfish',
extra_compile_args = ["-std=c++0x"] + jf_cflags,
extra_link_args = jf_ldflags + jf_rpath,
language = "c++")
-setup(name = 'jellyfish',
+setup(name = 'dna_jellyfish',
version = '0.0.1',
author = 'Guillaume Marcais',
description = 'Access to jellyfish k-mer counting',
ext_modules = [jellyfish_module],
- py_modules = ["jellyfish"])
+ py_modules = ["dna_jellyfish"])
=====================================
swig/python/test_hash_counter.py
=====================================
--- a/swig/python/test_hash_counter.py
+++ b/swig/python/test_hash_counter.py
@@ -1,20 +1,22 @@
import unittest
import sys
import random
-import jellyfish
+
+
+import dna_jellyfish as jf
class TestHashCounter(unittest.TestCase):
def setUp(self):
- jellyfish.MerDNA.k(100)
- self.hash = jellyfish.HashCounter(1024, 5)
+ jf.MerDNA.k(100)
+ self.hash = jf.HashCounter(1024, 5)
def test_info(self):
- self.assertEqual(100, jellyfish.MerDNA.k())
+ self.assertEqual(100, jf.MerDNA.k())
self.assertEqual(1024, self.hash.size())
self.assertEqual(5, self.hash.val_len())
def test_add(self):
- mer = jellyfish.MerDNA()
+ mer = jf.MerDNA()
good = True
for i in range(1000):
mer.randomize()
=====================================
swig/python/test_mer_file.py
=====================================
--- a/swig/python/test_mer_file.py
+++ b/swig/python/test_mer_file.py
@@ -1,12 +1,14 @@
-import jellyfish
import unittest
import sys
import os
from collections import Counter
+import dna_jellyfish as jf
+
+
class TestMerFile(unittest.TestCase):
def setUp(self):
- self.mf = jellyfish.ReadMerFile(os.path.join(data, "swig_python.jf"))
+ self.mf = jf.ReadMerFile(os.path.join(data, "swig_python.jf"))
def test_histo(self):
histo = Counter()
@@ -46,7 +48,7 @@ class TestMerFile(unittest.TestCase):
def test_query(self):
good = True
- qf = jellyfish.QueryMerFile(os.path.join(data, "swig_python.jf"))
+ qf = jf.QueryMerFile(os.path.join(data, "swig_python.jf"))
for mer, count in self.mf:
good = good and count == qf[mer]
if not good: break
=====================================
swig/python/test_string_mers.py
=====================================
--- a/swig/python/test_string_mers.py
+++ b/swig/python/test_string_mers.py
@@ -1,31 +1,35 @@
import unittest
import sys
import random
-import jellyfish
+
+import dna_jellyfish as jf
class TestStringMers(unittest.TestCase):
def setUp(self):
bases = "ACGTacgt"
self.str = ''.join(random.choice(bases) for _ in range(1000))
self.k = random.randint(10, 110)
- jellyfish.MerDNA.k(self.k)
+ jf.MerDNA.k(self.k)
def test_all_mers(self):
count = 0
- good = True
- mers = jellyfish.string_mers(self.str)
+ good1 = True
+ good2 = True
+ mers = jf.string_mers(self.str)
for m in mers:
- m2 = jellyfish.MerDNA(self.str[count:count+self.k])
- good = good and m == m2
+ m2 = jf.MerDNA(self.str[count:count+self.k])
+ good1 = good1 and m == m2
+ good2 = good2 and self.str[count:count+self.k].upper() == str(m2)
count += 1
- self.assertTrue(good)
+ self.assertTrue(good1)
+ self.assertTrue(good2)
self.assertEqual(len(self.str) - self.k + 1, count)
def test_canonical_mers(self):
good = True
- mers = jellyfish.string_canonicals(self.str)
+ mers = jf.string_canonicals(self.str)
for count, m in enumerate(mers):
- m2 = jellyfish.MerDNA(self.str[count:count+self.k])
+ m2 = jf.MerDNA(self.str[count:count+self.k])
rm2 = m2.get_reverse_complement()
good = good and (m == m2 or m == rm2)
good = good and (not (m > m2)) and (not (m > rm2))
=====================================
swig/ruby/test_hash_counter.rb
=====================================
--- a/swig/ruby/test_hash_counter.rb
+++ b/swig/ruby/test_hash_counter.rb
@@ -1,7 +1,7 @@
-require 'minitest/autorun'
+require 'test/unit'
require 'jellyfish'
-class TestHashCounter < MiniTest::Unit::TestCase
+class TestHashCounter < Test::Unit::TestCase
def setup
Jellyfish::MerDNA::k(100)
@hash = Jellyfish::HashCounter.new(1024, 5)
=====================================
swig/ruby/test_mer_file.rb
=====================================
--- a/swig/ruby/test_mer_file.rb
+++ b/swig/ruby/test_mer_file.rb
@@ -1,9 +1,9 @@
-require 'minitest/autorun'
+require 'test/unit'
require 'jellyfish'
$data = ARGV.shift
-class TestMerFile < MiniTest::Unit::TestCase
+class TestMerFile < Test::Unit::TestCase
def setup
@mf = Jellyfish::ReadMerFile.new(File.join($data, "swig_ruby.jf"))
end
=====================================
swig/ruby/test_string_mers.rb
=====================================
--- a/swig/ruby/test_string_mers.rb
+++ b/swig/ruby/test_string_mers.rb
@@ -1,7 +1,7 @@
-require 'minitest/autorun'
+require 'test/unit'
require 'jellyfish'
-class TestStringMers < MiniTest::Unit::TestCase
+class TestStringMers < Test::Unit::TestCase
def setup
bases = "ACGTacgt"
@str = (0..1000).map { bases[rand(bases.size())] }.join("")
=====================================
swig/string_mers.i
=====================================
--- a/swig/string_mers.i
+++ b/swig/string_mers.i
@@ -20,6 +20,9 @@
%{
class StringMers {
+#ifdef SWIGPYTHON
+ const char* const m_str;
+#endif
const char* m_current;
const char* const m_last;
const bool m_canonical;
@@ -28,12 +31,23 @@
public:
StringMers(const char* str, int len, bool canonical)
+#ifdef SWIGPYTHON
+ : m_str(strndup(str, len)) // In Python, duplicate the string! Can this be improved?
+ , m_current(m_str)
+#else
: m_current(str)
- , m_last(str + len)
+#endif
+ , m_last(m_current + len)
, m_canonical(canonical)
, m_filled(0)
{ }
+#ifdef SWIGPYTHON
+ ~StringMers() {
+ free((void*)m_str);
+ }
+#endif
+
bool next_mer() {
if(m_current == m_last)
return false;
=====================================
tests/compat.sh.in
=====================================
--- a/tests/compat.sh.in
+++ b/tests/compat.sh.in
@@ -9,7 +9,7 @@ SRCDIR=@abs_top_srcdir@
BUILDDIR=@abs_top_builddir@
check () {
- cut -d\ -f 2 $1 | xargs @MD5@ | sort -k2,2 | diff -w $DIFFFLAGS $1 -
+ cut -d\ -f 2 $1 | xargs @MD5@ | sed 's/ \*/ /' | sort -k2,2 | diff -w $DIFFFLAGS $1 -
}
ENABLE_RUBY_BINDING="@RUBY_EXT_LIB@"
=====================================
unit_tests/test_file_header.cc
=====================================
--- a/unit_tests/test_file_header.cc
+++ b/unit_tests/test_file_header.cc
@@ -40,14 +40,16 @@ TEST(FileHeader, WriteRead) {
const unsigned int val_len = random_bits(4);
const unsigned int max_reprobe = random_bits(7);
const double fpr = (double)random_bits(10) / 1024.0;
- RectangularBinaryMatrix m(random_bits(6) + 1, random_bits(8) + 1);
- m.randomize(random_bits);
+ RectangularBinaryMatrix m1(random_bits(6) + 1, random_bits(8) + 1);
+ m1.randomize(random_bits);
+ RectangularBinaryMatrix m2 = RectangularBinaryMatrix::identity(random_bits(6) + 1);
EXPECT_EQ(8, hw.alignment());
hw.fill_standard();
hw.size(random_size);
- hw.matrix(m);
- hw.key_len(m.r());
+ hw.matrix(m1, 1);
+ hw.matrix(m2, 2);
+ hw.key_len(m1.r());
hw.val_len(val_len);
hw.max_reprobe(max_reprobe);
hw.set_reprobes(jellyfish::quadratic_reprobes);
@@ -70,8 +72,10 @@ TEST(FileHeader, WriteRead) {
EXPECT_EQ(0, is.tellg() % 8);
EXPECT_EQ(8, hr.alignment());
EXPECT_EQ(random_size, hr.size());
- EXPECT_EQ(m, hr.matrix());
- EXPECT_EQ(m.r(), hr.key_len());
+ EXPECT_EQ(m1, hr.matrix(1));
+ EXPECT_TRUE(hr.matrix(2).is_low_identity());
+ EXPECT_EQ(m2.r(), hr.matrix(2).r());
+ EXPECT_EQ(m1.r(), hr.key_len());
EXPECT_EQ(val_len, hr.val_len());
EXPECT_EQ(fpr, hr.fpr());
=====================================
unit_tests/test_generator_manager.cc
=====================================
--- a/unit_tests/test_generator_manager.cc
+++ b/unit_tests/test_generator_manager.cc
@@ -58,7 +58,7 @@ TEST(GeneratorManager, OneLiners) {
ASSERT_TRUE(cmds.good()) << "Failed to open cmd file '" << cmds_file << "'";
cmds << "echo hello\n"
<< "date\n"
- << "uptime\n"
+ << "whoami\n"
<< "uname\n";
ASSERT_TRUE(cmds.good()) << "Failed to write to cmd file";
}
=====================================
unit_tests/test_hash_counter.cc
=====================================
--- a/unit_tests/test_hash_counter.cc
+++ b/unit_tests/test_hash_counter.cc
@@ -64,7 +64,7 @@ public:
TEST(HashCounterCooperative, SizeDouble) {
static const int mer_len = 35;
- static const int nb_threads = 5;
+ static const int nb_threads = 1;
static const int nb = 200;
static const size_t init_size = 128;
mer_dna::k(mer_len);
=====================================
unit_tests/test_large_hash_array.cc
=====================================
--- a/unit_tests/test_large_hash_array.cc
+++ b/unit_tests/test_large_hash_array.cc
@@ -19,7 +19,7 @@ void PrintTo(jellyfish::mer_dna& m, ::std::ostream* os) {
}
namespace {
-typedef jellyfish::large_hash::array<jellyfish::mer_dna> large_array;
+typedef jellyfish::large_hash::unbounded_array<jellyfish::mer_dna> large_array;
typedef std::map<jellyfish::mer_dna, uint64_t> mer_map;
typedef std::set<jellyfish::mer_dna> mer_set;
=====================================
unit_tests/test_rectangular_binary_matrix.cc
=====================================
--- a/unit_tests/test_rectangular_binary_matrix.cc
+++ b/unit_tests/test_rectangular_binary_matrix.cc
@@ -78,6 +78,10 @@ TEST(RectangularBinaryMatrix, LowIdentity) {
uint64_t res = m.times(v);
EXPECT_EQ(v.get_bits(0, std::min(r, c)), res);
+
+ RectangularBinaryMatrix m2 = RectangularBinaryMatrix::identity(r);
+ uint64_t res2 = m2.times(v);
+ EXPECT_EQ(v.get_bits(0, r), res2);
}
}
}
View it on GitLab: https://salsa.debian.org/med-team/jellyfish/commit/d2ced9a4ebd39a9a21073b29a2642a6b3d1d1d9f
---
View it on GitLab: https://salsa.debian.org/med-team/jellyfish/commit/d2ced9a4ebd39a9a21073b29a2642a6b3d1d1d9f
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/debian-med-commit/attachments/20180211/b31310d3/attachment-0001.html>
More information about the debian-med-commit
mailing list