[med-svn] [Git][med-team/jellyfish][upstream] New upstream version 2.2.8

Michael R. Crusoe gitlab at salsa.debian.org
Sun Feb 11 18:57:37 UTC 2018


Michael R. Crusoe pushed to branch upstream at Debian Med / jellyfish


Commits:
d2ced9a4 by Michael R. Crusoe at 2018-02-11T04:40:47-08:00
New upstream version 2.2.8
- - - - -


26 changed files:

- Makefile.am
- configure.ac
- include/jellyfish/file_header.hpp
- include/jellyfish/hash_counter.hpp
- include/jellyfish/large_hash_array.hpp
- include/jellyfish/mer_overlap_sequence_parser.hpp
- include/jellyfish/rectangular_binary_matrix.hpp
- include/jellyfish/whole_sequence_parser.hpp
- jellyfish/dbg.cc
- lib/rectangular_binary_matrix.cc
- swig/Makefile.am
- swig/jellyfish.i
- swig/python/setup.py
- swig/python/test_hash_counter.py
- swig/python/test_mer_file.py
- swig/python/test_string_mers.py
- swig/ruby/test_hash_counter.rb
- swig/ruby/test_mer_file.rb
- swig/ruby/test_string_mers.rb
- swig/string_mers.i
- tests/compat.sh.in
- unit_tests/test_file_header.cc
- unit_tests/test_generator_manager.cc
- unit_tests/test_hash_counter.cc
- unit_tests/test_large_hash_array.cc
- unit_tests/test_rectangular_binary_matrix.cc


Changes:

=====================================
Makefile.am
=====================================
--- a/Makefile.am
+++ b/Makefile.am
@@ -146,7 +146,8 @@ AM_SH_LOG_FLAGS =
 TESTS = tests/generate_sequence.sh tests/parallel_hashing.sh	\
         tests/merge.sh tests/bloom_filter.sh tests/big.sh	\
         tests/subset_hashing.sh tests/multi_file.sh		\
-        tests/bloom_counter.sh tests/large_key.sh tests/sam.sh
+        tests/bloom_counter.sh tests/large_key.sh tests/sam.sh	\
+        tests/small_mers.sh
 
 EXTRA_DIST += $(TESTS)
 clean-local: clean-local-check
@@ -164,6 +165,7 @@ tests/min_qual.log: tests/generate_fastq_sequence.log
 tests/large_key.log: tests/generate_sequence.log
 tests/quality_filter.log: tests/generate_sequence.log
 tests/sam.log: tests/generate_sequence.log
+tests/small_mers.log: tests/generate_sequence.log
 
 # SWIG tests
 TESTS += tests/swig_python.sh tests/swig_ruby.sh tests/swig_perl.sh


=====================================
configure.ac
=====================================
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([jellyfish], [2.2.7], [gmarcais at umd.edu])
+AC_INIT([jellyfish], [2.2.8], [gmarcais at umd.edu])
 AC_CANONICAL_HOST
 AC_CONFIG_MACRO_DIR([m4])
 AM_INIT_AUTOMAKE([subdir-objects foreign parallel-tests color-tests])
@@ -10,18 +10,23 @@ AC_LIB_RPATH
 PKG_PROG_PKG_CONFIG
 
 # Change default compilation flags
-AC_SUBST([ALL_CXXFLAGS], [-std=c++0x])
-CXXFLAGS="-std=c++0x $CXXFLAGS"
 AC_LANG(C++)
 AC_PROG_CXX
 
 # Major version of the library
 AC_SUBST([PACKAGE_LIB], [2.0])
 
+# Check if gnu++11 is necessary
+save_CXXFLAGS=$CXXFLAGS
+AC_CANONICAL_HOST
+case "${host_os}" in
+     cygwin*) CXXFLAGS="-std=gnu++11 $save_CXXFLAGS" ;;
+     *) CXXFLAGS="-std=c++11 $save_CXXFLAGS" ;;
+esac
+
 # Try to find htslib to read SAM/BAM/CRAM files
 AC_ARG_ENABLE([htslib],
               [AS_HELP_STRING([--enable-htslib], [Look for the HTS library (default=yes)])])
-echo "enable_htslib $enable_htslib"
 AS_IF([test "x$enable_htslib" = "xyes" -o "x$enable_htslib" = "x"],
       [PKG_CHECK_MODULES([HTSLIB], [htslib], [AC_DEFINE([HAVE_HTSLIB], [1], [Defined if htslib is available])], [true])]
       [AC_LIB_LINKFLAGS_FROM_LIBS([HTSLIB_RPATH], [$HTSLIB_LIBS], [LIBTOOL])])
@@ -88,8 +93,7 @@ AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <mach-o/dyld.h>]],
                   [AC_DEFINE([HAVE_NSGETEXECUTABLEPATH], [1], [Used to find executable path on MacOS X])],
                   [AC_MSG_RESULT([no])])
 
-# Check the version of strerror_r
-AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h])
+AC_CHECK_HEADERS_ONCE([execinfo.h ext/stdio_filebuf.h sys/syscall.h])
 AC_CHECK_MEMBER([siginfo_t.si_int],
                 [AC_DEFINE([HAVE_SI_INT], [1], [Define if siginfo_t.si_int exists])],
                 [], [[#include <signal.h>]])
@@ -134,6 +138,9 @@ AM_CONDITIONAL(PYTHON_BINDING, [test -n "$enable_python_binding" -a x$enable_pyt
 AM_COND_IF([PYTHON_BINDING],
            [AS_IF([test x$enable_python_binding != xyes], [PYTHON_SITE_PKG=$enable_python_binding])]
            [AX_PYTHON_DEVEL([], [$prefix])])
+AC_ARG_ENABLE([python-deprecated],
+              [AC_HELP_STRING([--enable-python-deprecated], [enable the deprecated 'jellyfish' module (in addition to 'dna_jellyfish')])])
+AM_CONDITIONAL([PYTHON_DEPRECATED], [test -z "$enable_python_deprecated" -o x$enable_python_deprecated != xno])
 
 # Ruby binding setup
 AS_IF([test -z "$enable_ruby_binding"], [enable_ruby_binding="$enable_all_binding"])


=====================================
include/jellyfish/file_header.hpp
=====================================
--- a/include/jellyfish/file_header.hpp
+++ b/include/jellyfish/file_header.hpp
@@ -45,6 +45,9 @@ public:
     name += std::to_string((long long int)i); // Cast to make gcc4.4 happy!
     const unsigned int r = root_[name]["r"].asUInt();
     const unsigned int c = root_[name]["c"].asUInt();
+    if(root_[name]["identity"].asBool())
+      return RectangularBinaryMatrix::identity(r, c);
+
     std::vector<uint64_t> raw(c, (uint64_t)0);
     for(unsigned int i = 0; i < c; ++i)
       raw[i] = root_[name]["columns"][i].asUInt64();
@@ -57,9 +60,14 @@ public:
     root_[name].clear();
     root_[name]["r"] = m.r();
     root_[name]["c"] = m.c();
-    for(unsigned int i = 0; i < m.c(); ++i) {
-      Json::UInt64 x = m[i];
-      root_[name]["columns"].append(x);
+    if(m.is_low_identity()) {
+      root_[name]["identity"] = true;
+    } else {
+      root_[name]["identity"] = false;
+      for(unsigned int i = 0; i < m.c(); ++i) {
+        Json::UInt64 x = m[i];
+        root_[name]["columns"].append(x);
+      }
     }
   }
 


=====================================
include/jellyfish/hash_counter.hpp
=====================================
--- a/include/jellyfish/hash_counter.hpp
+++ b/include/jellyfish/hash_counter.hpp
@@ -104,7 +104,7 @@ public:
 
     while(!ary_->add(k, v, &carry_shift, is_new_ptr, id_ptr)) {
       handle_full_ary();
-      v &= ~(uint64_t)0 << carry_shift;
+      v          &= ~(uint64_t)0 << carry_shift;
       // If carry_shift == 0, failed to allocate the first field for
       // key, hence status of is_new and value for id are not
       // determined yet. On the other hand, if carry_shift > 0, we
@@ -112,8 +112,8 @@ public:
       // of is_new and value of id are known. We do not update them in future
       // calls.
       if(carry_shift) {
-        is_new_ptr = &is_new_void;
-        id_ptr     = &id_void;
+        is_new_ptr  = &is_new_void;
+        id_ptr      = &id_void;
       }
     }
   }
@@ -204,9 +204,16 @@ protected:
   bool double_size(bool serial_thread) {
     if(serial_thread) {// Allocate new array for size doubling
       try {
-        new_ary_   = new array(ary_->size() * 2, ary_->key_len(), ary_->val_len(),
-                               ary_->max_reprobe(), ary_->reprobes());
-       } catch(typename array::ErrorAllocation e) {
+        if(ary_->key_len() >= sizeof(size_t) * 8 || ary_->size() < ((size_t)1 << ary_->key_len())) {
+          // Increase number of keys
+          new_ary_   = new array(ary_->size() * 2, ary_->key_len(), ary_->val_len(),
+                                 ary_->max_reprobe(), ary_->reprobes());
+        } else {
+          // Array is already maximum compared to key len, increase val_len
+          new_ary_ = new  array(ary_->size(), ary_->key_len(), ary_->val_len() + 1,
+                                ary_->max_reprobe(), ary_->reprobes());
+        }
+      } catch(typename array::ErrorAllocation e) {
         new_ary_ = 0;
       }
     }
@@ -219,10 +226,6 @@ protected:
 
     // Copy data from old to new
     uint16_t       id = atomic_t::fetch_add(&size_thid_, (uint16_t)1);
-    // Why doesn't the following work? Seems like a bug to
-    // me. Equivalent call works in test_large_hash_array. Or am I
-    // missing something?
-    // eager_iterator it = ary_->iterator_slice<eager_iterator>(id, nb_threads_);
     eager_iterator it = ary_->eager_slice(id, nb_threads_);
     while(it.next())
       my_ary->add(it.key(), it.val());


=====================================
include/jellyfish/large_hash_array.hpp
=====================================
--- a/include/jellyfish/large_hash_array.hpp
+++ b/include/jellyfish/large_hash_array.hpp
@@ -930,23 +930,35 @@ public:
 
 };
 
-template<typename Key, typename word = uint64_t, typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
-class array :
+// Large array. Memory managed by the mmap allocator. Do not check the
+// relation between the size of the array and key_len.
+template<typename Key, typename word = uint64_t,
+         typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
+class unbounded_array  :
     protected mem_block_t,
-    public array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >
+    public array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >
 {
-  typedef array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> > super;
-  friend class array_base<Key, word, atomic_t, array<Key, word, atomic_t, mem_block_t> >;
+  typedef array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> > super;
+  friend class array_base<Key, word, atomic_t, unbounded_array<Key, word, atomic_t, mem_block_t> >;
 
 public:
-  array(size_t size, // Size of hash. To be rounded up to a power of 2
-        uint16_t key_len, // Size of key in bits
-        uint16_t val_len, // Size of val in bits
-        uint16_t reprobe_limit, // Maximum reprobe
-        const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
-    mem_block_t(),
-    super(size, key_len, val_len, reprobe_limit, RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
-          reprobes)
+  unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
+                  uint16_t key_len, // Size of key in bits
+                  uint16_t val_len, // Size of val in bits
+                  uint16_t reprobe_limit, // Maximum reprobe
+                  const size_t* reprobes = quadratic_reprobes) // Reprobing policy
+    : super(size, key_len, val_len, reprobe_limit,
+            RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse(),
+            reprobes)
+  { }
+
+    unbounded_array(size_t size, // Size of hash. To be rounded up to a power of 2
+                    uint16_t key_len, // Size of key in bits
+                    uint16_t val_len, // Size of val in bits
+                    uint16_t reprobe_limit, // Maximum reprobe
+                    RectangularBinaryMatrix&& m, // Hashing matrix
+                    const size_t* reprobes = quadratic_reprobes) // Reprobing policy
+      : super(size, key_len, val_len, reprobe_limit, m, reprobes)
   { }
 
 protected:
@@ -956,6 +968,35 @@ protected:
   }
 };
 
+// Large array. Memory managed by the mmap allocator, bound the size
+// of the array if the key_len is small.
+template<typename Key, typename word = uint64_t,
+         typename atomic_t = ::atomic::gcc, typename mem_block_t = ::allocators::mmap>
+class array : public unbounded_array<Key, word, atomic_t, mem_block_t>
+{
+  typedef unbounded_array<Key, word, atomic_t, mem_block_t> super;
+
+  static size_t key_len_size(uint16_t key_len) {
+    return key_len >= std::numeric_limits<size_t>::digits ? std::numeric_limits<size_t>::max() / 2 : (size_t)1 << key_len;
+  }
+
+public:
+  array(size_t size, // Size of hash. To be rounded up to a power of 2
+        uint16_t key_len, // Size of key in bits
+        uint16_t val_len, // Size of val in bits
+        uint16_t reprobe_limit, // Maximum reprobe
+        const size_t* reprobes = quadratic_reprobes) : // Reprobing policy
+    super(std::min(size, key_len_size(key_len)), key_len, val_len, reprobe_limit,
+          (size < key_len_size(key_len))
+          ? RectangularBinaryMatrix(ceilLog2(size), key_len).randomize_pseudo_inverse()
+          : RectangularBinaryMatrix::identity(key_len),
+          reprobes)
+  {
+    //    std::cerr << this->size() << ' ' << this->val_len() << '\n';
+  }
+
+};
+
 struct ptr_info {
   void*  ptr_;
   size_t bytes_;


=====================================
include/jellyfish/mer_overlap_sequence_parser.hpp
=====================================
--- a/include/jellyfish/mer_overlap_sequence_parser.hpp
+++ b/include/jellyfish/mer_overlap_sequence_parser.hpp
@@ -131,7 +131,7 @@ protected:
     // streams_iterator_ noticed that we closed that stream before
     // requesting a new one.
     st.stream.reset();
-    st.stream = streams_iterator_.next();
+    st.stream = std::move(streams_iterator_.next());
     if(!st.stream.good()) {
       st.type = DONE_TYPE;
       return false;


=====================================
include/jellyfish/rectangular_binary_matrix.hpp
=====================================
--- a/include/jellyfish/rectangular_binary_matrix.hpp
+++ b/include/jellyfish/rectangular_binary_matrix.hpp
@@ -41,19 +41,33 @@
 // bits of each word are set to 0).
 //
 // Multiplication between a matrix and vector of size _c x 1 gives a
-// vector of size _r x 1 stored as one 64 bit word.
+// vector of size _r x 1 stored as one 64 bit word. A matrix with a
+// NULL _columns pointer behaves like the identity.
 
 namespace jellyfish {
   class RectangularBinaryMatrix {
+    explicit RectangularBinaryMatrix(unsigned int c)
+      : _columns(NULL)
+      , _r(c)
+      , _c(c)
+    { }
+
   public:
     RectangularBinaryMatrix(unsigned int r, unsigned c)
       : _columns(alloc(r, c)), _r(r), _c(c) { }
     RectangularBinaryMatrix(const RectangularBinaryMatrix &rhs)
-    : _columns(alloc(rhs._r, rhs._c)), _r(rhs._r), _c(rhs._c) {
-      memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
+      : _columns(rhs._columns ? alloc(rhs._r, rhs._c) : NULL)
+      , _r(rhs._r)
+      , _c(rhs._c)
+    {
+      if(_columns)
+        memcpy(_columns, rhs._columns, sizeof(uint64_t) * _c);
     }
-    RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs) :
-    _columns(rhs._columns), _r(rhs._r), _c(rhs._c) {
+    RectangularBinaryMatrix(RectangularBinaryMatrix&& rhs)
+      : _columns(rhs._columns)
+      , _r(rhs._r)
+      , _c(rhs._c)
+    {
       rhs._columns = 0;
     }
     // Initialize from raw data. raw must contain at least c words.
@@ -67,6 +81,16 @@ namespace jellyfish {
       free(_columns);
     }
 
+    static RectangularBinaryMatrix identity(unsigned c) {
+      return RectangularBinaryMatrix(c);
+    }
+
+    static RectangularBinaryMatrix identity(unsigned r, unsigned c) {
+      RectangularBinaryMatrix res(r, c);
+      res.init_low_identity();
+      return res;
+    }
+
     RectangularBinaryMatrix &operator=(const RectangularBinaryMatrix &rhs) {
       if(_r != rhs._r || _c != rhs._c)
         throw std::invalid_argument("RHS matrix dimensions do not match");
@@ -90,7 +114,7 @@ namespace jellyfish {
     }
 
     // Get i-th column. No check on range
-    const uint64_t & operator[](unsigned int i) const { return _columns[i]; }
+    uint64_t operator[](unsigned int i) const { return _columns ? _columns[i] : ((uint64_t)1 << i); }
 
     unsigned int r() const { return _r; }
     unsigned int c() const { return _c; }
@@ -112,8 +136,8 @@ namespace jellyfish {
 
     // Make and check that the matrix the lower right corner of the
     // identity.
-    void init_low_identity();
-    bool is_low_identity();
+    void init_low_identity(bool simplify = true);
+    bool is_low_identity() const;
 
     // Left matrix vector multiplication. Type T supports the operator
     // v[i] to return the i-th 64 bit word of v.
@@ -204,6 +228,7 @@ namespace jellyfish {
 
   template<typename T>
   uint64_t RectangularBinaryMatrix::times_loop(const T &v) const {
+    if(!_columns) return v[0] & cmask();
     uint64_t       *p   = _columns + _c - 1;
     uint64_t        res = 0, x = 0, j = 0;
     const uint64_t  one = (uint64_t)1;
@@ -244,6 +269,7 @@ namespace jellyfish {
 #ifdef HAVE_SSE
   template<typename T>
   uint64_t RectangularBinaryMatrix::times_sse(const T &v) const {
+    if(!_columns) return v[0] & cmask();
 #define FFs ((uint64_t)-1)
     static const uint64_t smear[8] asm("smear") __attribute__ ((aligned(16),used)) =
       {0, 0, 0, FFs, FFs, 0, FFs, FFs};
@@ -338,6 +364,7 @@ namespace jellyfish {
 #ifdef HAVE_INT128
   template<typename T>
   uint64_t RectangularBinaryMatrix::times_128(const T &v) const {
+    if(!_columns) return v[0] & cmask();
     typedef unsigned __int128 u128;
     static const u128 smear[4] =
       { (u128)0,


=====================================
include/jellyfish/whole_sequence_parser.hpp
=====================================
--- a/include/jellyfish/whole_sequence_parser.hpp
+++ b/include/jellyfish/whole_sequence_parser.hpp
@@ -92,7 +92,7 @@ public:
 protected:
   void open_next_file(stream_status& st) {
     st.stream.reset();
-    st.stream = streams_iterator_.next();
+    st.stream = std::move(streams_iterator_.next());
     if(!st.stream.good()) {
       st.type = DONE_TYPE;
       return;


=====================================
jellyfish/dbg.cc
=====================================
--- a/jellyfish/dbg.cc
+++ b/jellyfish/dbg.cc
@@ -16,7 +16,14 @@
 
 #include <jellyfish/dbg.hpp>
 #include <jellyfish/time.hpp>
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_SYS_SYSCALL_H
 #include <sys/syscall.h>
+#endif
 
 namespace dbg {
   pthread_mutex_t print_t::_lock      = PTHREAD_MUTEX_INITIALIZER;
@@ -33,7 +40,7 @@ namespace dbg {
  }
   Time toc() {
 #ifdef DEBUG
-    Time t; 
+    Time t;
     return t - _tic_time;
 #else
     return Time::zero;


=====================================
lib/rectangular_binary_matrix.cc
=====================================
--- a/lib/rectangular_binary_matrix.cc
+++ b/lib/rectangular_binary_matrix.cc
@@ -31,13 +31,20 @@ uint64_t *jellyfish::RectangularBinaryMatrix::alloc(unsigned int r, unsigned int
   // Make sure the number of words allocated is a multiple of
   // 8. Necessary for loop unrolling of vector multiplication
   size_t alloc_columns = (c / 8 + (c % 8 != 0)) * 8;
-  if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
+  //  if(posix_memalign(&mem, sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t)))
+  if(!(mem = aligned_alloc(sizeof(uint64_t) * 2, alloc_columns * sizeof(uint64_t))))
     throw std::bad_alloc();
   memset(mem, '\0', sizeof(uint64_t) * alloc_columns);
   return (uint64_t *)mem;
 }
 
-void jellyfish::RectangularBinaryMatrix::init_low_identity() {
+void jellyfish::RectangularBinaryMatrix::init_low_identity(bool simplify) {
+  if(!_columns) return;
+  if(_c == _r && simplify) {
+    free(_columns);
+    _columns = NULL;
+    return;
+  }
   memset(_columns, '\0', sizeof(uint64_t) * _c);
   unsigned int row = std::min(_c, _r);
   unsigned int col = _c - row;
@@ -46,7 +53,8 @@ void jellyfish::RectangularBinaryMatrix::init_low_identity() {
     _columns[i] = _columns[i - 1] >> 1;
 }
 
-bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
+bool jellyfish::RectangularBinaryMatrix::is_low_identity() const {
+  if(!_columns) return true;
   unsigned int row = std::min(_c, _r);
   unsigned int col = _c - row;
 
@@ -64,6 +72,9 @@ bool jellyfish::RectangularBinaryMatrix::is_low_identity() {
 jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_multiplication(const jellyfish::RectangularBinaryMatrix &rhs) const {
   if(_r != rhs._r || _c != rhs._c)
     throw std::domain_error("Matrices of different size");
+  if(!_columns) return rhs;
+  if(!rhs._columns) return *this;
+
   RectangularBinaryMatrix res(_r, _c);
 
   // v is a vector. The lower part is equal to the given column of rhs
@@ -102,6 +113,8 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_mu
 }
 
 unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
+  if(!_columns) return _c;
+
   unsigned int            rank = _c;
   RectangularBinaryMatrix pivot(*this);
 
@@ -136,8 +149,10 @@ unsigned int jellyfish::RectangularBinaryMatrix::pseudo_rank() const {
 }
 
 jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_inverse() const {
+  if(!_columns) return *this;
+
   RectangularBinaryMatrix pivot(*this);
-  RectangularBinaryMatrix res(_r, _c); res.init_low_identity();
+  RectangularBinaryMatrix res(_r, _c); res.init_low_identity(false);
   unsigned int            i, j;
   uint64_t                mask;
 
@@ -186,12 +201,19 @@ jellyfish::RectangularBinaryMatrix jellyfish::RectangularBinaryMatrix::pseudo_in
 }
 
 void jellyfish::RectangularBinaryMatrix::print(std::ostream &os) const {
-  uint64_t mask = (uint64_t)1 << (_r - 1);
-  for( ; mask; mask >>= 1) {
-    for(unsigned int j = 0; j < _c; ++j) {
-      os << (mask & _columns[j] ? "1" : "0");
+  if(!_columns) {
+    for(unsigned int i = 0; i < _c; ++i) {
+      for(unsigned int j = 0; j < _c; ++j)
+        os << (i == j ? '1' : '0');
+      os << '\n';
     }
-    os << "\n";
+  } else {
+      uint64_t mask = (uint64_t)1 << (_r - 1);
+      for( ; mask; mask >>= 1) {
+        for(unsigned int j = 0; j < _c; ++j)
+          os << (mask & _columns[j] ? '1' : '0');
+        os << '\n';
+      }
   }
 }
 


=====================================
swig/Makefile.am
=====================================
--- a/swig/Makefile.am
+++ b/swig/Makefile.am
@@ -16,23 +16,27 @@ endif
 
 # Python support
 if PYTHON_BINDING
-PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/jellyfish.py
+PYTHON_BUILT = swig/python/swig_wrap.cpp swig/python/dna_jellyfish.py
 BUILT_SOURCES += $(PYTHON_BUILT)
 
-pythonextdir = $(PYTHON_SITE_PKG)/jellyfish
+if PYTHON_DEPRECATED
+pythonglobaldir = $(PYTHON_SITE_PKG)
+pythonglobal_SCRIPTS = swig/python/jellyfish.py
+endif
+pythonextdir = $(PYTHON_SITE_PKG)/dna_jellyfish
 pythonext_SCRIPTS = swig/python/__init__.pyc
-pythonext_LTLIBRARIES = swig/python/_jellyfish.la
-swig_python__jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
-swig_python__jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
-swig_python__jellyfish_la_LDFLAGS = -module
-swig_python__jellyfish_la_LIBADD = libjellyfish-2.0.la
+pythonext_LTLIBRARIES = swig/python/_dna_jellyfish.la
+swig_python__dna_jellyfish_la_SOURCES = swig/python/swig_wrap.cpp $(SWIG_SRC)
+swig_python__dna_jellyfish_la_CPPFLAGS = $(PYTHON_CPPFLAGS) -I$(srcdir)/include
+swig_python__dna_jellyfish_la_LDFLAGS = -module
+swig_python__dna_jellyfish_la_LIBADD = libjellyfish-2.0.la
 CLEANFILES += $(PYTHON_BUILT) $(pythonext_SCRIPTS)
 PYTHONC_V_GEN = $(pythonc_v_GEN_$(V))
 pythonc_v_GEN_ = $(pythonc_v_GEN_$(AM_DEFAULT_VERBOSITY))
 pythonc_v_GEN_0 = @echo "  PYTHONC " $@;
-%/__init__.pyc: %/jellyfish.py
+%/__init__.pyc: %/dna_jellyfish.py
 	$(PYTHONC_V_GEN)$(PYTHON) -c 'import py_compile, sys; py_compile.compile(sys.argv[1], sys.argv[2])' $< $@
-swig/python/jellyfish.py: swig/python/swig_wrap.cpp
+swig/python/dna_jellyfish.py: swig/python/swig_wrap.cpp
 EXTRA_DIST += $(PYTHON_BUILT)
 endif
 


=====================================
swig/jellyfish.i
=====================================
--- a/swig/jellyfish.i
+++ b/swig/jellyfish.i
@@ -1,6 +1,22 @@
+#ifdef SWIGPYTHON
+// Default Python loading code does not seem to work. Use our own.
+%define MODULEIMPORT
+"
+import os
+if os.path.basename(__file__) == \"__init__.pyc\" or os.path.basename(__file__) == \"__init__.py\":
+  import dna_jellyfish.$module
+else:
+  import $module
+"
+%enddef
+%module(docstring="Jellyfish binding", moduleimport=MODULEIMPORT) dna_jellyfish
+#else
 %module(docstring="Jellyfish binding") jellyfish
+#endif
+
 %naturalvar; // Use const reference instead of pointers
 %include "std_string.i"
+
 %include "exception.i"
 %include "std_except.i"
 %include "typemaps.i"
@@ -8,7 +24,6 @@
 
 %{
 #ifdef SWIGPYTHON
-#define SWIG_FILE_WITH_INIT
 #endif
 
 #ifdef SWIGPERL


=====================================
swig/python/setup.py
=====================================
--- a/swig/python/setup.py
+++ b/swig/python/setup.py
@@ -29,7 +29,7 @@ jf_rpath   = [re.sub(r'^', '-Wl,-rpath,', x) for x in jf_libdir]
 jf_ldflags = os.popen("pkg-config --libs-only-other jellyfish-2.0").read().rstrip().split()
 
 
-jellyfish_module = Extension('_jellyfish',
+jellyfish_module = Extension('_dna_jellyfish',
                              sources = ['jellyfish_wrap.cxx'],
                              include_dirs = jf_include,
                              libraries = jf_libs,
@@ -37,9 +37,9 @@ jellyfish_module = Extension('_jellyfish',
                              extra_compile_args = ["-std=c++0x"] + jf_cflags,
                              extra_link_args = jf_ldflags + jf_rpath,
                              language = "c++")
-setup(name = 'jellyfish',
+setup(name = 'dna_jellyfish',
       version = '0.0.1',
       author = 'Guillaume Marcais',
       description = 'Access to jellyfish k-mer counting',
       ext_modules = [jellyfish_module],
-      py_modules = ["jellyfish"])
+      py_modules = ["dna_jellyfish"])


=====================================
swig/python/test_hash_counter.py
=====================================
--- a/swig/python/test_hash_counter.py
+++ b/swig/python/test_hash_counter.py
@@ -1,20 +1,22 @@
 import unittest
 import sys
 import random
-import jellyfish
+
+
+import dna_jellyfish as jf
 
 class TestHashCounter(unittest.TestCase):
     def setUp(self):
-        jellyfish.MerDNA.k(100)
-        self.hash = jellyfish.HashCounter(1024, 5)
+        jf.MerDNA.k(100)
+        self.hash = jf.HashCounter(1024, 5)
 
     def test_info(self):
-        self.assertEqual(100, jellyfish.MerDNA.k())
+        self.assertEqual(100, jf.MerDNA.k())
         self.assertEqual(1024, self.hash.size())
         self.assertEqual(5, self.hash.val_len())
 
     def test_add(self):
-        mer  = jellyfish.MerDNA()
+        mer  = jf.MerDNA()
         good = True
         for i in range(1000):
             mer.randomize()


=====================================
swig/python/test_mer_file.py
=====================================
--- a/swig/python/test_mer_file.py
+++ b/swig/python/test_mer_file.py
@@ -1,12 +1,14 @@
-import jellyfish
 import unittest
 import sys
 import os
 from collections import Counter
 
+import dna_jellyfish as jf
+
+
 class TestMerFile(unittest.TestCase):
     def setUp(self):
-        self.mf = jellyfish.ReadMerFile(os.path.join(data, "swig_python.jf"))
+        self.mf = jf.ReadMerFile(os.path.join(data, "swig_python.jf"))
 
     def test_histo(self):
         histo = Counter()
@@ -46,7 +48,7 @@ class TestMerFile(unittest.TestCase):
 
     def test_query(self):
         good = True
-        qf   = jellyfish.QueryMerFile(os.path.join(data, "swig_python.jf"))
+        qf   = jf.QueryMerFile(os.path.join(data, "swig_python.jf"))
         for mer, count in self.mf:
             good = good and count == qf[mer]
             if not good: break


=====================================
swig/python/test_string_mers.py
=====================================
--- a/swig/python/test_string_mers.py
+++ b/swig/python/test_string_mers.py
@@ -1,31 +1,35 @@
 import unittest
 import sys
 import random
-import jellyfish
+
+import dna_jellyfish as jf
 
 class TestStringMers(unittest.TestCase):
     def setUp(self):
         bases = "ACGTacgt"
         self.str = ''.join(random.choice(bases) for _ in range(1000))
         self.k = random.randint(10, 110)
-        jellyfish.MerDNA.k(self.k)
+        jf.MerDNA.k(self.k)
 
     def test_all_mers(self):
         count = 0
-        good = True
-        mers = jellyfish.string_mers(self.str)
+        good1 = True
+        good2 = True
+        mers = jf.string_mers(self.str)
         for m in mers:
-            m2 = jellyfish.MerDNA(self.str[count:count+self.k])
-            good = good and m == m2
+            m2 = jf.MerDNA(self.str[count:count+self.k])
+            good1 = good1 and m == m2
+            good2 = good2 and self.str[count:count+self.k].upper() == str(m2)
             count += 1
-        self.assertTrue(good)
+        self.assertTrue(good1)
+        self.assertTrue(good2)
         self.assertEqual(len(self.str) - self.k + 1, count)
 
     def test_canonical_mers(self):
         good = True
-        mers = jellyfish.string_canonicals(self.str)
+        mers = jf.string_canonicals(self.str)
         for count, m in enumerate(mers):
-            m2 = jellyfish.MerDNA(self.str[count:count+self.k])
+            m2 = jf.MerDNA(self.str[count:count+self.k])
             rm2 = m2.get_reverse_complement()
             good = good and (m == m2 or m == rm2)
             good = good and (not (m > m2)) and (not (m > rm2))


=====================================
swig/ruby/test_hash_counter.rb
=====================================
--- a/swig/ruby/test_hash_counter.rb
+++ b/swig/ruby/test_hash_counter.rb
@@ -1,7 +1,7 @@
-require 'minitest/autorun'
+require 'test/unit'
 require 'jellyfish'
 
-class TestHashCounter < MiniTest::Unit::TestCase
+class TestHashCounter < Test::Unit::TestCase
   def setup
     Jellyfish::MerDNA::k(100)
     @hash = Jellyfish::HashCounter.new(1024, 5)


=====================================
swig/ruby/test_mer_file.rb
=====================================
--- a/swig/ruby/test_mer_file.rb
+++ b/swig/ruby/test_mer_file.rb
@@ -1,9 +1,9 @@
-require 'minitest/autorun'
+require 'test/unit'
 require 'jellyfish'
 
 $data = ARGV.shift
 
-class TestMerFile < MiniTest::Unit::TestCase
+class TestMerFile < Test::Unit::TestCase
   def setup
     @mf = Jellyfish::ReadMerFile.new(File.join($data, "swig_ruby.jf"))
   end


=====================================
swig/ruby/test_string_mers.rb
=====================================
--- a/swig/ruby/test_string_mers.rb
+++ b/swig/ruby/test_string_mers.rb
@@ -1,7 +1,7 @@
-require 'minitest/autorun'
+require 'test/unit'
 require 'jellyfish'
 
-class TestStringMers < MiniTest::Unit::TestCase
+class TestStringMers < Test::Unit::TestCase
   def setup
     bases = "ACGTacgt"
     @str = (0..1000).map { bases[rand(bases.size())] }.join("")


=====================================
swig/string_mers.i
=====================================
--- a/swig/string_mers.i
+++ b/swig/string_mers.i
@@ -20,6 +20,9 @@
 
 %{
   class StringMers {
+#ifdef SWIGPYTHON
+    const char* const m_str;
+#endif
     const char*       m_current;
     const char* const m_last;
     const bool        m_canonical;
@@ -28,12 +31,23 @@
 
   public:
     StringMers(const char* str, int len, bool canonical)
+#ifdef SWIGPYTHON
+      : m_str(strndup(str, len)) // In Python, duplicate the string! Can this be improved?
+      , m_current(m_str)
+#else
       : m_current(str)
-      , m_last(str + len)
+#endif
+      , m_last(m_current + len)
       , m_canonical(canonical)
       , m_filled(0)
     { }
 
+#ifdef SWIGPYTHON
+    ~StringMers() {
+      free((void*)m_str);
+    }
+#endif
+
     bool next_mer() {
       if(m_current == m_last)
         return false;


=====================================
tests/compat.sh.in
=====================================
--- a/tests/compat.sh.in
+++ b/tests/compat.sh.in
@@ -9,7 +9,7 @@ SRCDIR=@abs_top_srcdir@
 BUILDDIR=@abs_top_builddir@
 
 check () {
-    cut -d\  -f 2 $1 | xargs @MD5@ | sort -k2,2 | diff -w $DIFFFLAGS $1 -
+    cut -d\  -f 2 $1 | xargs @MD5@ | sed 's/ \*/ /' | sort -k2,2 | diff -w $DIFFFLAGS $1 -
 }
 
 ENABLE_RUBY_BINDING="@RUBY_EXT_LIB@"


=====================================
unit_tests/test_file_header.cc
=====================================
--- a/unit_tests/test_file_header.cc
+++ b/unit_tests/test_file_header.cc
@@ -40,14 +40,16 @@ TEST(FileHeader, WriteRead) {
   const unsigned int val_len = random_bits(4);
   const unsigned int max_reprobe = random_bits(7);
   const double fpr = (double)random_bits(10) / 1024.0;
-  RectangularBinaryMatrix m(random_bits(6) + 1, random_bits(8) + 1);
-  m.randomize(random_bits);
+  RectangularBinaryMatrix m1(random_bits(6) + 1, random_bits(8) + 1);
+  m1.randomize(random_bits);
+  RectangularBinaryMatrix m2 = RectangularBinaryMatrix::identity(random_bits(6) + 1);
 
   EXPECT_EQ(8, hw.alignment());
   hw.fill_standard();
   hw.size(random_size);
-  hw.matrix(m);
-  hw.key_len(m.r());
+  hw.matrix(m1, 1);
+  hw.matrix(m2, 2);
+  hw.key_len(m1.r());
   hw.val_len(val_len);
   hw.max_reprobe(max_reprobe);
   hw.set_reprobes(jellyfish::quadratic_reprobes);
@@ -70,8 +72,10 @@ TEST(FileHeader, WriteRead) {
   EXPECT_EQ(0, is.tellg() % 8);
   EXPECT_EQ(8, hr.alignment());
   EXPECT_EQ(random_size, hr.size());
-  EXPECT_EQ(m, hr.matrix());
-  EXPECT_EQ(m.r(), hr.key_len());
+  EXPECT_EQ(m1, hr.matrix(1));
+  EXPECT_TRUE(hr.matrix(2).is_low_identity());
+  EXPECT_EQ(m2.r(), hr.matrix(2).r());
+  EXPECT_EQ(m1.r(), hr.key_len());
   EXPECT_EQ(val_len, hr.val_len());
   EXPECT_EQ(fpr, hr.fpr());
 


=====================================
unit_tests/test_generator_manager.cc
=====================================
--- a/unit_tests/test_generator_manager.cc
+++ b/unit_tests/test_generator_manager.cc
@@ -58,7 +58,7 @@ TEST(GeneratorManager, OneLiners) {
     ASSERT_TRUE(cmds.good()) << "Failed to open cmd file '" << cmds_file << "'";
     cmds << "echo hello\n"
          << "date\n"
-         << "uptime\n"
+         << "whoami\n"
          << "uname\n";
     ASSERT_TRUE(cmds.good()) << "Failed to write to cmd file";
   }


=====================================
unit_tests/test_hash_counter.cc
=====================================
--- a/unit_tests/test_hash_counter.cc
+++ b/unit_tests/test_hash_counter.cc
@@ -64,7 +64,7 @@ public:
 
 TEST(HashCounterCooperative, SizeDouble) {
   static const int    mer_len    = 35;
-  static const int    nb_threads = 5;
+  static const int    nb_threads = 1;
   static const int    nb         = 200;
   static const size_t init_size  = 128;
   mer_dna::k(mer_len);


=====================================
unit_tests/test_large_hash_array.cc
=====================================
--- a/unit_tests/test_large_hash_array.cc
+++ b/unit_tests/test_large_hash_array.cc
@@ -19,7 +19,7 @@ void PrintTo(jellyfish::mer_dna& m, ::std::ostream* os) {
 }
 
 namespace {
-typedef jellyfish::large_hash::array<jellyfish::mer_dna> large_array;
+typedef jellyfish::large_hash::unbounded_array<jellyfish::mer_dna> large_array;
 typedef std::map<jellyfish::mer_dna, uint64_t> mer_map;
 typedef std::set<jellyfish::mer_dna> mer_set;
 


=====================================
unit_tests/test_rectangular_binary_matrix.cc
=====================================
--- a/unit_tests/test_rectangular_binary_matrix.cc
+++ b/unit_tests/test_rectangular_binary_matrix.cc
@@ -78,6 +78,10 @@ TEST(RectangularBinaryMatrix, LowIdentity) {
 
       uint64_t res = m.times(v);
       EXPECT_EQ(v.get_bits(0, std::min(r, c)), res);
+
+      RectangularBinaryMatrix m2 = RectangularBinaryMatrix::identity(r);
+      uint64_t res2 = m2.times(v);
+      EXPECT_EQ(v.get_bits(0, r), res2);
     }
   }
 }



View it on GitLab: https://salsa.debian.org/med-team/jellyfish/commit/d2ced9a4ebd39a9a21073b29a2642a6b3d1d1d9f

---
View it on GitLab: https://salsa.debian.org/med-team/jellyfish/commit/d2ced9a4ebd39a9a21073b29a2642a6b3d1d1d9f
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.alioth.debian.org/pipermail/debian-med-commit/attachments/20180211/b31310d3/attachment-0001.html>


More information about the debian-med-commit mailing list