[DRE-commits] [ruby-nmatrix] 01/01: Imported Upstream version 0.1.0~rc1

Thu Feb 6 14:25:35 UTC 2014

This is an automated email from the git hooks/post-receive script.

boutil pushed a commit to annotated tag upstream/0.1.0_rc1
in repository ruby-nmatrix.

commit 78eb5813de428b3c8917dcdd7965bae612c598b9
Author: Cédric Boutillier <boutil at debian.org>
Date:   Sun Jan 12 11:02:38 2014 +0100

    Imported Upstream version 0.1.0~rc1
---
 Gemfile                                            |    1 +
 History.txt                                        |   96 +-
 LICENSE.txt                                        |    4 +-
 README.rdoc                                        |   50 +-
 Rakefile                                           |   48 +-
 checksums.yaml.gz                                  |  Bin 268 -> 270 bytes
 ext/nmatrix/data/complex.h                         |    4 +-
 ext/nmatrix/data/data.cpp                          |   78 +-
 ext/nmatrix/data/data.h                            |   96 +-
 ext/nmatrix/data/meta.h                            |    4 +-
 ext/nmatrix/data/rational.h                        |    4 +-
 ext/nmatrix/data/ruby_object.h                     |    4 +-
 ext/nmatrix/extconf.rb                             |  173 +--
 ext/nmatrix/math.cpp                               |   85 +-
 ext/nmatrix/math/asum.h                            |    6 +-
 ext/nmatrix/math/geev.h                            |    4 +-
 ext/nmatrix/math/gemm.h                            |    8 +-
 ext/nmatrix/math/gemv.h                            |    8 +-
 ext/nmatrix/math/ger.h                             |    4 +-
 ext/nmatrix/math/gesdd.h                           |    4 +-
 ext/nmatrix/math/gesvd.h                           |    4 +-
 ext/nmatrix/math/getf2.h                           |    4 +-
 ext/nmatrix/math/getrf.h                           |    4 +-
 ext/nmatrix/math/getri.h                           |    4 +-
 ext/nmatrix/math/getrs.h                           |   10 +-
 ext/nmatrix/math/idamax.h                          |    4 +-
 ext/nmatrix/math/inc.h                             |   18 +-
 ext/nmatrix/math/laswp.h                           |    4 +-
 ext/nmatrix/math/long_dtype.h                      |    4 +-
 ext/nmatrix/math/math.h                            |   26 +-
 ext/nmatrix/math/nrm2.h                            |    6 +-
 ext/nmatrix/math/potrs.h                           |   10 +-
 ext/nmatrix/math/rot.h                             |    4 +-
 ext/nmatrix/math/rotg.h                            |    4 +-
 ext/nmatrix/math/scal.h                            |    4 +-
 ext/nmatrix/math/swap.h                            |    4 +-
 ext/nmatrix/math/trsm.h                            |   10 +-
 ext/nmatrix/nm_memory.h                            |   60 ++
 ext/nmatrix/nmatrix.cpp                            |   60 +-
 ext/nmatrix/nmatrix.h                              |   49 +-
 ext/nmatrix/ruby_constants.cpp                     |    6 +-
 ext/nmatrix/ruby_constants.h                       |    6 +-
 ext/nmatrix/ruby_nmatrix.c                         | 1107 +++++++++++++++++---
 ext/nmatrix/storage/common.cpp                     |    4 +-
 ext/nmatrix/storage/common.h                       |    4 +-
 ext/nmatrix/storage/{ => dense}/dense.cpp          |  353 +++++--
 ext/nmatrix/storage/{ => dense}/dense.h            |   11 +-
 ext/nmatrix/storage/{ => list}/list.cpp            |  615 +++++++++--
 ext/nmatrix/storage/{ => list}/list.h              |   19 +-
 ext/nmatrix/storage/storage.cpp                    |   67 +-
 ext/nmatrix/storage/storage.h                      |    8 +-
 ext/nmatrix/storage/yale/class.h                   |  155 ++-
 ext/nmatrix/storage/yale/iterators/base.h          |    4 +-
 ext/nmatrix/storage/yale/iterators/iterator.h      |    4 +-
 ext/nmatrix/storage/yale/iterators/row.h           |    4 +-
 ext/nmatrix/storage/yale/iterators/row_stored.h    |    4 +-
 ext/nmatrix/storage/yale/iterators/row_stored_nd.h |    7 +-
 .../storage/yale/iterators/stored_diagonal.h       |    4 +-
 ext/nmatrix/storage/yale/math/transpose.h          |    4 +-
 ext/nmatrix/storage/yale/yale.cpp                  |  395 ++++++-
 ext/nmatrix/storage/yale/yale.h                    |   10 +-
 ext/nmatrix/types.h                                |    4 +-
 ext/nmatrix/util/io.cpp                            |   10 +-
 ext/nmatrix/util/io.h                              |    4 +-
 ext/nmatrix/util/sl_list.cpp                       |   67 +-
 ext/nmatrix/util/sl_list.h                         |    6 +-
 ext/nmatrix/util/util.h                            |    4 +-
 lib/nmatrix.rb                                     |    4 +-
 lib/nmatrix/blas.rb                                |    4 +-
 lib/nmatrix/enumerate.rb                           |   23 +-
 lib/nmatrix/io/market.rb                           |    5 +-
 lib/nmatrix/io/mat5_reader.rb                      |    4 +-
 lib/nmatrix/io/mat_reader.rb                       |    4 +-
 lib/nmatrix/lapack.rb                              |   92 +-
 lib/nmatrix/math.rb                                |  233 +++-
 lib/nmatrix/monkeys.rb                             |   26 +-
 lib/nmatrix/nmatrix.rb                             |  403 ++++++-
 lib/nmatrix/nvector.rb                             |   66 +-
 lib/nmatrix/rspec.rb                               |    4 +-
 lib/nmatrix/shortcuts.rb                           |   75 +-
 lib/nmatrix/version.rb                             |   14 +-
 lib/nmatrix/yale_functions.rb                      |    8 +-
 metadata.yml                                       |   30 +-
 nmatrix.gemspec                                    |    9 +-
 scripts/mac-brew-gcc.sh                            |   19 +-
 scripts/mac-mavericks-brew-gcc.sh                  |   22 +
 spec/00_nmatrix_spec.rb                            |  123 ++-
 spec/01_enum_spec.rb                               |   20 +-
 spec/02_slice_spec.rb                              |   14 +-
 spec/blas_spec.rb                                  |    7 +-
 spec/elementwise_spec.rb                           |    7 +-
 spec/io_spec.rb                                    |   44 +-
 spec/lapack_spec.rb                                |  166 ++-
 spec/math_spec.rb                                  |   99 +-
 spec/nmatrix_yale_spec.rb                          |   47 +-
 spec/rspec_monkeys.rb                              |   27 +
 spec/rspec_spec.rb                                 |    4 +-
 spec/shortcuts_spec.rb                             |   15 +-
 spec/slice_set_spec.rb                             |    8 +-
 spec/spec_helper.rb                                |    5 +-
 spec/stat_spec.rb                                  |  332 +++---
 101 files changed, 4398 insertions(+), 1425 deletions(-)

diff --git a/Gemfile b/Gemfile
index d9e84e7..7aac379 100644
--- a/Gemfile
+++ b/Gemfile
@@ -7,5 +7,6 @@ gem 'packable', ">= 1.3.5"  # for Matlab IO
 group :development do
   gem 'pry'
   gem 'rspec-longrun'
+  #gem 'narray', :path => "../narray"
   #gem 'pry-debugger'
 end
diff --git a/History.txt b/History.txt
index e36e8d3..5cba094 100644
--- a/History.txt
+++ b/History.txt
@@ -426,4 +426,98 @@
   * nil values in matrices are now pretty printed as "nil"
 
   * Casting from dense to Yale now properly accepts the default
-     value option
+    value option
+
+=== 0.1.0.rc1 / 2014-12-28
+
+* 4 major enhancements:
+
+  * Improved garbage collection strategy for partial object creation
+    (i.e., when VALUEs are allocated but not registered right away),
+    which in addition to fixing numerous bugs should prevent some new
+    bugs from arising in the future (by @cjfuller)
+
+  * Implemented list storage transpose
+
+  * Implemented generic n-dimensional transpose
+
+  * Implemented == comparison between differing matrix stypes
+
+* 9 minor enhancements:
+
+  * User-friendly #gesvd and #gesdd updates (by @ryanmt)
+
+  * Added experimental #yale_row_key_intersection function for expert
+    recommendation problems
+
+  * Added additional *indgen shortcuts and changed behavior for some;
+    now, #cindgen for :complex64, #zindgen for :complex128, #findgen
+    for :float32, #dindgen for :float64, #rindgen for :rational128,
+    and #rbindgen for Ruby objects (which contain integers); also,
+    removed code repetition
+
+  * Changed #stddev to use elementwise #sqrt instead of a manual map
+    block (by @cjfuller)
+
+  * Added alias from MATLAB `load_mat` method to `load` for
+    consistency with the MatrixMarket loader
+
+  * Improved organization by moving list and yale code into storage/
+    subdirectories
+
+  * Added NMatrix#potrf! and NMatrix#getrf, which are instance methods
+    for calling CLAPACK functions (NMatrix#getrf! already existed)
+
+  * Added GCC installation instructions for Mac OS X Mavericks, and
+    updated the old installation instructions for Mac OS X (both
+    found in scripts/)
+
+  * Switched NMatrix::VERSION to work more like Rails::VERSION, with
+    support for MAJOR, MINOR, TINY, and PRE
+
+  * Added #concat, #hconcat, #vconcat, and #dconcat for joining
+    matrices together
+
+* 16 bug fixes:
+
+  * Spec revisions for lapack_gesdd and lapack_gesvd (by @ryanmt)
+
+  * Fixed two double-free problems (by @cjfuller and @mohawkjohn)
+
+  * Fixed contiguous array marking fencepost error
+
+  * Fixed C/C++ API compatibility problem preventing rb/gsl linking
+
+  * Resolved a number of compiler warnings, including one return-type
+    problem that would likely have become a garbage collection error
+    (if it wasn't already)
+
+  * Fixed -O3 optimization problems
+
+  * Restored NMatrix#asum, #nrm2, #binned_sorted_indices,
+    #sorted_indices which were inadvertantly removed by NVector
+    deprecation; have not tested
+
+  * Experimental #yale_nd_row and functions which call it now checks
+    range of argument to prevent segfault
+
+  * Fixed :* shortcut for a full list dimension (by @cjfuller)
+
+  * Fixed list construction problem which occurred when an initial
+    value array was provided (by @cjfuller)
+
+  * Fixed #inject issue with list and yale matrices of two dimensions
+    (by @cjfuller)
+
+  * Fixed several garbage collection problems (also listed under
+    enhancements) (by @cjfuller)
+
+  * Updated object cleaning target in extconf.rb
+
+  * Fixed possible compilation problem on Mavericks with Xcode 5.02
+
+  * Fixed errors involving undefined symbols, unresolved symbols, and
+    lazy symbol binding
+
+  * Improved LAPACK and BLAS header selection for Ubuntu/Debian
+    systems with ATLAS (by @mvz)
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
index b697436..95ff794 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-This version of SciRuby is licensed under the BSD 2-clause license.
+This version of NMatrix is licensed under the BSD 2-clause license.
 
 * http://sciruby.com
 * http://github.com/sciruby/sciruby/wiki/License
@@ -9,7 +9,7 @@ You *must* read the Contributor Agreement before contributing code to the SciRub
 
 -----
 
-Copyright (c) 2010 - 2013, Ruby Science Foundation
+Copyright (c) 2010 - 2014, Ruby Science Foundation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
diff --git a/README.rdoc b/README.rdoc
index e0b962c..03a527f 100644
--- a/README.rdoc
+++ b/README.rdoc
@@ -7,6 +7,8 @@ Fast Numerical Linear Algebra Library for Ruby
 * {NMatrix Installation wiki}[https://github.com/SciRuby/nmatrix/wiki/Installation]
 * {SciRuby Installation guide}[http://sciruby.com/docs#installation]
 
+{<img src=https://travis-ci.org/SciRuby/nmatrix.png>}[https://travis-ci.org/SciRuby/nmatrix]
+
 == Description
 
 NMatrix is a fast numerical linear algebra library for Ruby, with dense and sparse matrices, written mostly in C and
@@ -24,8 +26,7 @@ However, you will need to install {ATLAS}[http://math-atlas.sourceforge.net/] wi
 {BLAS}[http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms]) first. Detailed directions can be found
 {here}[https://github.com/SciRuby/nmatrix/wiki/Installation]. The requirements for NMatrix are:
 
-* ATLAS
-* LAPACK, probably ({see here for details}[https://github.com/SciRuby/nmatrix/wiki/Installation])
+* ATLAS, preferably with CLAPACK ({see here for details}[https://github.com/SciRuby/nmatrix/wiki/Installation])
 * a version of GCC or clang which supports C++0x or C++11
 * Ruby 1.9.3+
 * {packable}[http://github.com/marcandre/packable] 1.3.5 (used for I/O)
@@ -37,9 +38,11 @@ If you want to obtain the latest (development) code, you should generally do:
     bundle install
     bundle exec rake compile
     bundle exec rake repackage
-    gem install pkg/nmatrix-0.1.0.gem
+    gem install pkg/nmatrix-0.1.0-rc1.gem
 
 Detailed instructions are available for {Mac}[https://github.com/SciRuby/nmatrix/wiki/Installation#mac-os-x] and {Linux}[https://github.com/SciRuby/nmatrix/wiki/Installation#linux].
+We are currently working on Mavericks (Mac OS X) installation instructions, but in general, you'll need Homebrew and should
+probably use +brew install gcc48+ instead of using the install script.
 
 == Documentation
 
@@ -52,21 +55,27 @@ You can find the complete API documentation {on our website}[http://sciruby.com/
 
 == Examples
 
-Create a new NMatrix from a ruby array: 
+Create a new NMatrix from a ruby Array: 
 
     >> require 'nmatrix'
-    >> NMatrix.new([2, 3], [0, 1, 2, 3, 4, 5], dtype: :int64).pp
-      [0, 1, 2]
-      [3, 4, 5]
-    => nil
+    >> NMatrix.new([2, 3], [0, 1, 2, 3, 4, 5], dtype: :int64)
+    => [
+        [0, 1, 2],
+        [3, 4, 5]
+       ]
 
 Create a new NMatrix using the +N+ shortcut:
 
     >> m = N[ [2, 3, 4], [7, 8, 9] ]
+    => [
+        [2, 3, 4],
+        [7, 8, 9]
+       ]
+    >> m.inspect
     => #<NMatrix:0x007f8e121b6cf8shape:[2,3] dtype:int32 stype:dense>
-    >> m.pp
-       [2, 3, 4]
-       [7, 8, 9]
+
+The above output requires that you have a pretty-print-enabled console such as Pry; otherwise, you'll
+see the output given by +inspect+.
 
 If you want to learn more about how to create a
 matrix, {read the guide in our wiki}[https://github.com/SciRuby/nmatrix/wiki/How-to-create-a-NMatrix].
@@ -79,7 +88,7 @@ Read the instructions in +CONTRIBUTING.md+ if you want to help NMatrix.
 
 == Features
 
-The following features exist in the current version of NMatrix (0.0.8):
+The following features exist in the current version of NMatrix (0.1.0.rc1):
 
 * Matrix and vector storage containers: dense, yale, list (more to come)
 * Data types: byte (uint8), int8, int16, int32, int64, float32, float64, complex64, complex128, rational64, rational128,
@@ -114,6 +123,8 @@ The following features exist in the current version of NMatrix (0.0.8):
 * Determinant calculation for BLAS dtypes
 * Vector 2-norms
 * Ruby/GSL interoperability (requires {SciRuby's fork of rb-gsl}(http://github.com/SciRuby/rb-gsl))
+* slice assignments, e.g.,
+    x[1..3,0..4] = some_other_matrix
 
 === Planned Features (Short-to-Medium Term)
 
@@ -121,29 +132,16 @@ We are nearly the release of NMatrix 0.1.0, our first beta.
 
 These are features planned for NMatrix 0.2.0:
 
-* slice assignments, e.g.,
-    x[1..3,0..4] = some_other_matrix
 * LAPACK-free calculation of determinant, trace, and eigenvalues (characteristic polynomial)
 * LAPACK-free matrix inversions
 * tensor products
-* principal component analysis (PCA)
 * improved file I/O
   * compression of yale symmetries in I/O
 * optimization of non-BLAS data types on BLAS-like operations (e.g., matrix multiplication for rational numbers)
 
-=== Warning
-
-Please be aware that SciRuby and NMatrix are *alpha* status. If you're thinking of using SciRuby/NMatrix to write
-mission-critical code, such as for driving a car or flying a space shuttle, you may wish to choose other software for
-now.
-
-You should also be aware that NMatrix and NArray are incompatible with one another; you should not try to require both
-at the same time. Unfortunately, that causes problems with Ruby/GSL, which currently depends upon NArray. As such, we
-have a {fork of Ruby/GSL}[https://github.com/SciRuby/rb-gsl].
-
 == License
 
-Copyright (c) 2010--13, The Ruby Science Foundation.
+Copyright (c) 2012--14, John Woods and the Ruby Science Foundation.
 
 All rights reserved.
 
diff --git a/Rakefile b/Rakefile
index 90e40e9..6652048 100644
--- a/Rakefile
+++ b/Rakefile
@@ -41,18 +41,26 @@ BASEDIR = Pathname( __FILE__ ).dirname.relative_path_from( Pathname.pwd )
 SPECDIR = BASEDIR + 'spec'
 
 VALGRIND_OPTIONS = [
-        "--tool=memcheck",
-        #"--leak-check=yes",
-        "--num-callers=15",
-        #"--error-limit=no",
-        "--partial-loads-ok=yes",
-        "--undef-value-errors=no" #,
-        #"--dsymutil=yes"
+    "--tool=memcheck",
+    #"--leak-check=yes",
+    "--num-callers=15",
+    #"--error-limit=no",
+    "--partial-loads-ok=yes",
+    "--undef-value-errors=no" #,
+    #"--dsymutil=yes"
 ]
+
+CALLGRIND_OPTIONS = [
+    "--tool=callgrind",
+    "--dump-instr=yes",
+    "--simulate-cache=yes",
+    "--collect-jumps=yes"
+]
+
 VALGRIND_MEMORYFILL_OPTIONS = [
-        "--freelist-vol=100000000",
-        "--malloc-fill=6D",
-        "--free-fill=66 ",
+    "--freelist-vol=100000000",
+    "--malloc-fill=6D",
+    "--free-fill=66 ",
 ]
 
 GDB_OPTIONS = []
@@ -125,17 +133,25 @@ namespace :spec do
   desc "Run specs under cgdb."
   task :cgdb => [ :compile ] do |task|
     cmd = [ 'cgdb' ] + GDB_OPTIONS
-          cmd += [ '--args' ]
-          cmd += RSPEC_CMD
-          run( *cmd )
+    cmd += [ '--args' ]
+    cmd += RSPEC_CMD
+    run( *cmd )
   end
 
   desc "Run specs under Valgrind."
   task :valgrind => [ :compile ] do |task|
-          cmd = [ 'valgrind' ] + VALGRIND_OPTIONS
-          cmd += RSPEC_CMD
-          run( *cmd )
+    cmd = [ 'valgrind' ] + VALGRIND_OPTIONS
+    cmd += RSPEC_CMD
+    run( *cmd )
+  end
+
+  desc "Run specs under Callgrind."
+  task :callgrind => [ :compile ] do |task|
+    cmd = [ 'valgrind' ] + CALLGRIND_OPTIONS
+    cmd += RSPEC_CMD
+    run( *cmd )
   end
+
 end
 
 
diff --git a/checksums.yaml.gz b/checksums.yaml.gz
index d274cb6..9142bae 100644
Binary files a/checksums.yaml.gz and b/checksums.yaml.gz differ
diff --git a/ext/nmatrix/data/complex.h b/ext/nmatrix/data/complex.h
index 614d7bf..b463373 100644
--- a/ext/nmatrix/data/complex.h
+++ b/ext/nmatrix/data/complex.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/data/data.cpp b/ext/nmatrix/data/data.cpp
index 5497c16..22bfdd9 100644
--- a/ext/nmatrix/data/data.cpp
+++ b/ext/nmatrix/data/data.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -73,50 +73,24 @@ namespace nm {
     "geq"
   };
 
+  const std::string NONCOM_EWOP_NAMES[nm::NUM_NONCOM_EWOPS] = {
+    "atan2",
+    "ldexp",
+    "hypot"
+  };
 
-  template <typename Type>
-  Complex<Type>::Complex(const RubyObject& other) {
-    switch(TYPE(other.rval)) {
-    case T_COMPLEX:
-      r = NUM2DBL(rb_funcall(other.rval, rb_intern("real"), 0));
-      i = NUM2DBL(rb_funcall(other.rval, rb_intern("imag"), 0));
-      break;
-    case T_FLOAT:
-    case T_RATIONAL:
-    case T_FIXNUM:
-    case T_BIGNUM:
-      r = NUM2DBL(other.rval);
-      i = 0.0;
-      break;
-    default:
-      rb_raise(rb_eTypeError, "not sure how to convert this type of VALUE to a complex");
-    }
-  }
-
-
-  template <typename Type>
-  Rational<Type>::Rational(const RubyObject& other) {
-    switch (TYPE(other.rval)) {
-    case T_RATIONAL:
-      n = NUM2LONG(rb_funcall(other.rval, rb_intern("numerator"), 0));
-      d = NUM2LONG(rb_funcall(other.rval, rb_intern("denominator"), 0));
-      break;
-    case T_FIXNUM:
-    case T_BIGNUM:
-      n = NUM2LONG(other.rval);
-      d = 1;
-      break;
-    case T_COMPLEX:
-    case T_FLOAT:
-      rb_raise(rb_eTypeError, "cannot convert float to a rational");
-      break;
-    default:
-      rb_raise(rb_eTypeError, "not sure how to convert this type of VALUE to a rational");
-    }
-  }
-
+  const std::string UNARYOPS[nm::NUM_UNARYOPS] = {
+    "sin", "cos", "tan",
+    "asin", "acos", "atan",
+    "sinh", "cosh", "tanh",
+    "asinh", "acosh", "atanh",
+    "exp", "log2", 
+    "log10", "sqrt", "erf", 
+    "erfc", "cbrt", "gamma"
+  };
 
 } // end of namespace nm
+
 extern "C" {
 
 const char* const DTYPE_NAMES[nm::NUM_DTYPES] = {
@@ -301,21 +275,23 @@ nm::RubyObject rubyobj_from_cval(void* val, nm::dtype_t dtype) {
  */
 void* rubyobj_to_cval(VALUE val, nm::dtype_t dtype) {
   size_t size =  DTYPE_SIZES[dtype];
-  void* ret_val = ALLOC_N(char, size);
+  NM_CONSERVATIVE(nm_register_value(val));
+  void* ret_val = NM_ALLOC_N(char, size);
 
   rubyval_to_cval(val, dtype, ret_val);
-
+  NM_CONSERVATIVE(nm_unregister_value(val));
   return ret_val;
 }
 
 
 void nm_init_data() {
-    nm::RubyObject obj(INT2FIX(1));
-    nm::Rational32 x(obj);
-    nm::Rational64 y(obj);
-    nm::Rational128 z(obj);
-    nm::Complex64 a(obj);
-    nm::Complex128 b(obj);
+  volatile VALUE t = INT2FIX(1);
+  volatile nm::RubyObject obj(t);
+  volatile nm::Rational32 x(const_cast<nm::RubyObject&>(obj));
+  volatile nm::Rational64 y(const_cast<nm::RubyObject&>(obj));
+  volatile nm::Rational128 z(const_cast<nm::RubyObject&>(obj));
+  volatile nm::Complex64 a(const_cast<nm::RubyObject&>(obj));
+  volatile nm::Complex128 b(const_cast<nm::RubyObject&>(obj));
 }
 
 
diff --git a/ext/nmatrix/data/data.h b/ext/nmatrix/data/data.h
index ed13f36..9bc8ee1 100644
--- a/ext/nmatrix/data/data.h
+++ b/ext/nmatrix/data/data.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -55,7 +55,8 @@ namespace nm {
 	const int NUM_DTYPES = 13;
 	const int NUM_ITYPES = 4;
 	const int NUM_EWOPS = 12;
-	const int NUM_NONCOMP_EWOPS = 6;
+	const int NUM_UNARYOPS = 21;
+	const int NUM_NONCOM_EWOPS = 3;
 
   enum ewop_t {
     EW_ADD,
@@ -69,12 +70,85 @@ namespace nm {
     EW_LT,
     EW_GT,
     EW_LEQ,
-    EW_GEQ
+    EW_GEQ,
+  };
+
+  enum noncom_ewop_t {
+    NONCOM_EW_ATAN2,
+    NONCOM_EW_LDEXP,
+    NONCOM_EW_HYPOT
+  };
+
+  enum unaryop_t {
+    UNARY_SIN,
+    UNARY_COS,
+    UNARY_TAN,
+    UNARY_ASIN,
+    UNARY_ACOS,
+    UNARY_ATAN,
+    UNARY_SINH,
+    UNARY_COSH,
+    UNARY_TANH,
+    UNARY_ASINH,
+    UNARY_ACOSH,
+    UNARY_ATANH,
+    UNARY_EXP,
+    UNARY_LOG2,
+    UNARY_LOG10,
+    UNARY_SQRT,
+    UNARY_ERF,
+    UNARY_ERFC,
+    UNARY_CBRT,
+    UNARY_GAMMA
   };
 
   // element-wise and scalar operators
   extern const char* const  EWOP_OPS[nm::NUM_EWOPS];
   extern const std::string  EWOP_NAMES[nm::NUM_EWOPS];
+  extern const std::string  UNARYOPS[nm::NUM_UNARYOPS];
+  extern const std::string  NONCOM_EWOP_NAMES[nm::NUM_NONCOM_EWOPS];
+
+
+  template <typename Type>
+  Complex<Type>::Complex(const RubyObject& other) {
+    switch(TYPE(other.rval)) {
+    case T_COMPLEX:
+      r = NUM2DBL(rb_funcall(other.rval, rb_intern("real"), 0));
+      i = NUM2DBL(rb_funcall(other.rval, rb_intern("imag"), 0));
+      break;
+    case T_FLOAT:
+    case T_RATIONAL:
+    case T_FIXNUM:
+    case T_BIGNUM:
+      r = NUM2DBL(other.rval);
+      i = 0.0;
+      break;
+    default:
+      rb_raise(rb_eTypeError, "not sure how to convert this type of VALUE to a complex");
+    }
+  }
+
+
+  template <typename Type>
+  Rational<Type>::Rational(const RubyObject& other) {
+    switch (TYPE(other.rval)) {
+    case T_RATIONAL:
+      n = NUM2LONG(rb_funcall(other.rval, rb_intern("numerator"), 0));
+      d = NUM2LONG(rb_funcall(other.rval, rb_intern("denominator"), 0));
+      break;
+    case T_FIXNUM:
+    case T_BIGNUM:
+      n = NUM2LONG(other.rval);
+      d = 1;
+      break;
+    case T_COMPLEX:
+    case T_FLOAT:
+      rb_raise(rb_eTypeError, "cannot convert float to a rational");
+      break;
+    default:
+      rb_raise(rb_eTypeError, "not sure how to convert this type of VALUE to a rational");
+    }
+  }
 
 
 } // end of namespace nm
@@ -90,6 +164,20 @@ namespace nm {
     nm_yale_storage_mark												\
   };
 
+#define STYPE_REGISTER_TABLE(name)              \
+  static void (*(name)[nm::NUM_STYPES])(const STORAGE*) = { \
+    nm_dense_storage_register,                  \
+    nm_list_storage_register,                   \
+    nm_yale_storage_register                    \
+  };
+
+#define STYPE_UNREGISTER_TABLE(name)              \
+  static void (*(name)[nm::NUM_STYPES])(const STORAGE*) = { \
+    nm_dense_storage_unregister,                \
+    nm_list_storage_unregister,                 \
+    nm_yale_storage_unregister                  \
+  };
+
 #define CAST_TABLE(name)                                                   \
   static STORAGE* (*(name)[nm::NUM_STYPES][nm::NUM_STYPES])(const STORAGE*, nm::dtype_t, void*) = {      \
     { nm_dense_storage_cast_copy,  nm_dense_storage_from_list,  nm_dense_storage_from_yale },  \
diff --git a/ext/nmatrix/data/meta.h b/ext/nmatrix/data/meta.h
index b13082a..4260ae1 100644
--- a/ext/nmatrix/data/meta.h
+++ b/ext/nmatrix/data/meta.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/data/rational.h b/ext/nmatrix/data/rational.h
index 3615523..1a1e74a 100644
--- a/ext/nmatrix/data/rational.h
+++ b/ext/nmatrix/data/rational.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/data/ruby_object.h b/ext/nmatrix/data/ruby_object.h
index d26ba86..9632751 100644
--- a/ext/nmatrix/data/ruby_object.h
+++ b/ext/nmatrix/data/ruby_object.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/extconf.rb b/ext/nmatrix/extconf.rb
index 98c4f8c..0489fdf 100644
--- a/ext/nmatrix/extconf.rb
+++ b/ext/nmatrix/extconf.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -102,22 +102,65 @@ if /cygwin|mingw/ =~ RUBY_PLATFORM
 end
 
 $DEBUG = true
-$CFLAGS = ["-Wall ",$CFLAGS].join(" ")
-
-$srcs = [
-         'nmatrix.cpp',
-         'ruby_constants.cpp',
-
-         'data/data.cpp',
-         'math.cpp',
-         'util/sl_list.cpp',
-         'util/io.cpp',
-         'storage/common.cpp',
-         'storage/storage.cpp',
-         'storage/dense.cpp',
-         'storage/yale/yale.cpp',
-         'storage/list.cpp'
-        ]
+$CFLAGS = ["-Wall -Werror=return-type",$CFLAGS].join(" ")
+$CXXFLAGS = ["-Wall -Werror=return-type",$CXXFLAGS].join(" ")
+$CPPFLAGS = ["-Wall -Werror=return-type",$CPPFLAGS].join(" ")
+
+# When adding objects here, make sure their directories are included in CLEANOBJS down at the bottom of extconf.rb.
+basenames = %w{nmatrix ruby_constants data/data util/io math util/sl_list storage/common storage/storage storage/dense/dense storage/yale/yale storage/list/list}
+$objs = basenames.map { |b| "#{b}.o"   }
+$srcs = basenames.map { |b| "#{b}.cpp" }
+
+#CONFIG['CXX'] = 'clang++'
+CONFIG['CXX'] = 'g++'
+
+def find_newer_gplusplus #:nodoc:
+  print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
+  [9,8,7,6,5,4,3].each do |minor|
+    ver = "4.#{minor}"
+    gpp = "g++-#{ver}"
+    result = `which #{gpp}`
+    next if result.empty?
+    CONFIG['CXX'] = gpp
+    puts ver
+    return CONFIG['CXX']
+  end
+  false
+end
+
+def gplusplus_version #:nodoc:
+  cxxvar = proc { |n| `#{CONFIG['CXX']} -E -dM - </dev/null | grep #{n}`.chomp.split(' ')[2] }
+  major = cxxvar.call('__GNUC__')
+  minor = cxxvar.call('__GNUC_MINOR__')
+  patch = cxxvar.call('__GNUC_PATCHLEVEL__')
+
+  raise("unable to determine g++ version (match to get version was nil)") if major.nil? || minor.nil? || patch.nil?
+
+  "#{major}.#{minor}.#{patch}"
+end
+
+
+if CONFIG['CXX'] == 'clang++'
+  $CPP_STANDARD = 'c++11'
+
+else
+  version = gplusplus_version
+  if version < '4.3.0' && CONFIG['CXX'] == 'g++'  # see if we can find a newer G++, unless it's been overridden by user
+    if !find_newer_gplusplus
+      raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
+    end
+    version = gplusplus_version
+  end
+
+  if version < '4.7.0'
+    $CPP_STANDARD = 'c++0x'
+  else
+    $CPP_STANDARD = 'c++11'
+  end
+  puts "using C++ standard... #{$CPP_STANDARD}"
+  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
+end
+
 # add smmp in to get generic transp; remove smmp2 to eliminate funcptr transp
 
 # The next line allows the user to supply --with-atlas-dir=/usr/local/atlas,
@@ -138,9 +181,16 @@ idefaults = {lapack: ["/usr/include/atlas"],
              cblas: ["/usr/local/atlas/include", "/usr/include/atlas"],
              atlas: ["/usr/local/atlas/include", "/usr/include/atlas"]}
 
-ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib"],
-             cblas: ["/usr/local/lib", "/usr/local/atlas/lib"],
-             atlas: ["/usr/local/atlas/lib", "/usr/local/lib", "/usr/lib"]}
+# For some reason, if we try to look for /usr/lib64/atlas on a Mac OS X Mavericks system, and the directory does not
+# exist, it will give a linker error -- even if the lib dir is already correctly included with -L. So we need to check
+# that Dir.exists?(d) for each.
+ldefaults = {lapack: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
+             cblas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) },
+             atlas: ["/usr/local/lib", "/usr/local/atlas/lib", "/usr/lib", "/usr/lib64/atlas"].delete_if { |d| !Dir.exists?(d) }}
+
+if have_library("clapack") # Usually only applies for Mac OS X
+  $libs += " -lclapack "
+end
 
 unless have_library("lapack")
   dir_config("lapack", idefaults[:lapack], ldefaults[:lapack])
@@ -154,22 +204,19 @@ unless have_library("atlas")
   dir_config("atlas", idefaults[:atlas], ldefaults[:atlas])
 end
 
-# this needs to go before cblas.h checks -- on Ubuntu, the clapack in the
-# include path found for cblas.h doesn't seem to contain all the necessary 
-# functions
-have_header("clapack.h")
-
-# this ensures that we find the header on Ubuntu, where by default the library 
-# can be found but not the header
-unless have_header("cblas.h")
-  find_header("cblas.h", *idefaults[:cblas])
+# If BLAS and LAPACK headers are in an atlas directory, prefer those. Otherwise,
+# we try our luck with the default location.
+if have_header("atlas/cblas.h")
+  have_header("atlas/clapack.h")
+else
+  have_header("cblas.h")
+  have_header("clapack.h")
 end
 
-have_header("cblas.h")
 
 have_func("clapack_dgetrf", ["cblas.h", "clapack.h"])
 have_func("clapack_dgetri", ["cblas.h", "clapack.h"])
-have_func("dgesvd_", "clapack.h")
+have_func("dgesvd_", "clapack.h") # This may not do anything. dgesvd_ seems to be in LAPACK, not CLAPACK.
 
 have_func("cblas_dgemm", "cblas.h")
 
@@ -178,59 +225,15 @@ have_func("cblas_dgemm", "cblas.h")
 #find_library("lapack", "clapack_dgetrf")
 #find_library("cblas", "cblas_dgemm")
 #find_library("atlas", "ATL_dgemmNN")
-
 # Order matters here: ATLAS has to go after LAPACK: http://mail.scipy.org/pipermail/scipy-user/2007-January/010717.html
 $libs += " -llapack -lcblas -latlas "
+#$libs += " -lprofiler "
 
-$objs = %w{nmatrix ruby_constants data/data util/io math util/sl_list storage/common storage/storage storage/dense storage/yale/yale storage/list}.map { |i| i + ".o" }
-
-#CONFIG['CXX'] = 'clang++'
-CONFIG['CXX'] = 'g++'
-
-def find_newer_gplusplus #:nodoc:
-  print "checking for apparent GNU g++ binary with C++0x/C++11 support... "
-  [9,8,7,6,5,4,3].each do |minor|
-    ver = "4.#{minor}"
-    gpp = "g++-#{ver}"
-    result = `which #{gpp}`
-    next if result.empty?
-    CONFIG['CXX'] = gpp
-    puts ver
-    return CONFIG['CXX']
-  end
-  false
-end
-
-def gplusplus_version #:nodoc:
-  `LANG="en_US" #{CONFIG['CXX']} -v 2>&1`.lines.to_a.last.match(/gcc\sversion\s(\d\.\d.\d)/).captures.first
-end
-
-
-if CONFIG['CXX'] == 'clang++'
-  $CPP_STANDARD = 'c++11'
-
-else
-  version = gplusplus_version
-  if version < '4.3.0' && CONFIG['CXX'] == 'g++'  # see if we can find a newer G++, unless it's been overridden by user
-    if !find_newer_gplusplus
-      raise("You need a version of g++ which supports -std=c++0x or -std=c++11. If you're on a Mac and using Homebrew, we recommend using mac-brew-gcc.sh to install a more recent g++.")
-    end
-    version = gplusplus_version
-  end
-
-  if version < '4.7.0'
-    $CPP_STANDARD = 'c++0x'
-  else
-    $CPP_STANDARD = 'c++11'
-  end
-  puts "using C++ standard... #{$CPP_STANDARD}"
-  puts "g++ reports version... " + `#{CONFIG['CXX']} --version|head -n 1|cut -f 3 -d " "`
-end
 
 # For release, these next two should both be changed to -O3.
-$CFLAGS += " -O3 " #" -O0 -g "
+$CFLAGS += " -O3 -g" #" -O0 -g "
 #$CFLAGS += " -static -O0 -g "
-$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} " #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
+$CPPFLAGS += " -O3 -std=#{$CPP_STANDARD} -g" #" -O0 -g -std=#{$CPP_STANDARD} " #-fmax-errors=10 -save-temps
 #$CPPFLAGS += " -static -O0 -g -std=#{$CPP_STANDARD} "
 
 CONFIG['warnflags'].gsub!('-Wshorten-64-to-32', '') # doesn't work except in Mac-patched gcc (4.2)
@@ -244,15 +247,13 @@ Dir.mkdir("data") unless Dir.exists?("data")
 Dir.mkdir("util") unless Dir.exists?("util")
 Dir.mkdir("storage") unless Dir.exists?("storage")
 Dir.chdir("storage") do
-  Dir.mkdir("yale") unless Dir.exists?("yale")
-  Dir.chdir("yale") do
-    Dir.mkdir("iterators") unless Dir.exists?("iterators")
-  end
+  Dir.mkdir("yale")  unless Dir.exists?("yale")
+  Dir.mkdir("list")  unless Dir.exists?("list")
+  Dir.mkdir("dense") unless Dir.exists?("dense")
 end
 
 # to clean up object files in subdirectories:
 open('Makefile', 'a') do |f|
-  f.write <<EOS
-CLEANOBJS := $(CLEANOBJS) data/*.#{CONFIG["OBJEXT"]} storage/*.#{CONFIG["OBJEXT"]} util/*.#{CONFIG["OBJEXT"]}
-EOS
+  clean_objs_paths = %w{data storage storage/dense storage/yale storage/list util}.map { |d| "#{d}/*.#{CONFIG["OBJEXT"]}" }
+  f.write("CLEANOBJS := $(CLEANOBJS) #{clean_objs_paths.join(' ')}")
 end
diff --git a/ext/nmatrix/math.cpp b/ext/nmatrix/math.cpp
index e838a7f..6c371e3 100644
--- a/ext/nmatrix/math.cpp
+++ b/ext/nmatrix/math.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -140,7 +140,7 @@
 #include "math/rot.h"
 #include "math/rotg.h"
 #include "math/math.h"
-#include "storage/dense.h"
+#include "storage/dense/dense.h"
 
 #include "nmatrix.h"
 #include "ruby_constants.h"
@@ -150,8 +150,10 @@
  */
 
 extern "C" {
-#ifdef HAVE_CLAPACK_H
+#if defined HAVE_CLAPACK_H
   #include <clapack.h>
+#elif defined HAVE_ATLAS_CLAPACK_H
+  #include <atlas/clapack.h>
 #endif
 
   static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx);
@@ -503,8 +505,10 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
     return Qnil;
 
   } else {
-    void *pC = ALLOCA_N(char, DTYPE_SIZES[dtype]),
-         *pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    NM_CONSERVATIVE(nm_register_value(self));
+    NM_CONSERVATIVE(nm_register_value(ab));
+    void *pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
+         *pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
 
     // extract A and B from the NVector (first two elements)
     void* pA = NM_STORAGE_DENSE(ab)->elements;
@@ -522,7 +526,8 @@ static VALUE nm_cblas_rotg(VALUE self, VALUE ab) {
       rb_ary_store(result, 0, rubyobj_from_cval(pC, dtype).rval);
       rb_ary_store(result, 1, rubyobj_from_cval(pS, dtype).rval);
     }
-
+    NM_CONSERVATIVE(nm_unregister_value(ab));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     return result;
   }
 }
@@ -575,18 +580,18 @@ static VALUE nm_cblas_rot(VALUE self, VALUE n, VALUE x, VALUE incx, VALUE y, VAL
 
     // We need to ensure the cosine and sine arguments are the correct dtype -- which may differ from the actual dtype.
     if (dtype == nm::COMPLEX64) {
-      pC = ALLOCA_N(float,1);
-      pS = ALLOCA_N(float,1);
+      pC = NM_ALLOCA_N(float,1);
+      pS = NM_ALLOCA_N(float,1);
       rubyval_to_cval(c, nm::FLOAT32, pC);
       rubyval_to_cval(s, nm::FLOAT32, pS);
     } else if (dtype == nm::COMPLEX128) {
-      pC = ALLOCA_N(double,1);
-      pS = ALLOCA_N(double,1);
+      pC = NM_ALLOCA_N(double,1);
+      pS = NM_ALLOCA_N(double,1);
       rubyval_to_cval(c, nm::FLOAT64, pC);
       rubyval_to_cval(s, nm::FLOAT64, pS);
     } else {
-      pC = ALLOCA_N(char, DTYPE_SIZES[dtype]);
-      pS = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+      pC = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
+      pS = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
       rubyval_to_cval(c, dtype, pC);
       rubyval_to_cval(s, dtype, pS);
     }
@@ -646,7 +651,7 @@ static VALUE nm_cblas_nrm2(VALUE self, VALUE n, VALUE x, VALUE incx) {
     if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
     else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
 
-    void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]);
+    void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
 
     ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
 
@@ -698,7 +703,7 @@ static VALUE nm_cblas_asum(VALUE self, VALUE n, VALUE x, VALUE incx) {
   if      (dtype == nm::COMPLEX64)  rdtype = nm::FLOAT32;
   else if (dtype == nm::COMPLEX128) rdtype = nm::FLOAT64;
 
-  void *Result = ALLOCA_N(char, DTYPE_SIZES[rdtype]);
+  void *Result = NM_ALLOCA_N(char, DTYPE_SIZES[rdtype]);
 
   ttable[dtype](FIX2INT(n), NM_STORAGE_DENSE(x)->elements, FIX2INT(incx), Result);
 
@@ -743,8 +748,8 @@ static VALUE nm_cblas_gemm(VALUE self,
 
   nm::dtype_t dtype = NM_DTYPE(a);
 
-  void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
-       *pBeta  = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
+       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(alpha, dtype, pAlpha);
   rubyval_to_cval(beta, dtype, pBeta);
 
@@ -788,8 +793,8 @@ static VALUE nm_cblas_gemv(VALUE self,
 
   nm::dtype_t dtype = NM_DTYPE(a);
 
-  void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
-       *pBeta  = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+  void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
+       *pBeta  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(alpha, dtype, pAlpha);
   rubyval_to_cval(beta, dtype, pBeta);
 
@@ -825,7 +830,7 @@ static VALUE nm_cblas_trsm(VALUE self,
   if (!ttable[dtype]) {
     rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
   } else {
-    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
     rubyval_to_cval(alpha, dtype, pAlpha);
 
     ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
@@ -865,7 +870,7 @@ static VALUE nm_cblas_trmm(VALUE self,
   if (!ttable[dtype]) {
     rb_raise(nm_eDataTypeError, "this matrix operation not yet defined for non-BLAS dtypes");
   } else {
-    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
     rubyval_to_cval(alpha, dtype, pAlpha);
 
     ttable[dtype](blas_order_sym(order), blas_side_sym(side), blas_uplo_sym(uplo), blas_transpose_sym(trans_a), blas_diag_sym(diag), FIX2INT(m), FIX2INT(n), pAlpha, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda), NM_STORAGE_DENSE(b)->elements, FIX2INT(ldb));
@@ -903,8 +908,8 @@ static VALUE nm_cblas_syrk(VALUE self,
   if (!ttable[dtype]) {
     rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
   } else {
-    void *pAlpha = ALLOCA_N(char, DTYPE_SIZES[dtype]),
-         *pBeta = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+    void *pAlpha = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]),
+         *pBeta = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
     rubyval_to_cval(alpha, dtype, pAlpha);
     rubyval_to_cval(beta, dtype, pBeta);
 
@@ -984,12 +989,12 @@ static VALUE nm_lapack_gesvd(VALUE self, VALUE jobu, VALUE jobvt, VALUE m, VALUE
 
     // only need rwork for complex matrices
     int rwork_size  = (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) ? 5 * min_mn : 0;
-    void* rwork     = rwork_size > 0 ? ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
+    void* rwork     = rwork_size > 0 ? NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size) : NULL;
     int work_size   = FIX2INT(lwork);
 
     // ignore user argument for lwork if it's too small.
     work_size       = NM_MAX((dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? 2 * min_mn + max_mn : NM_MAX(3*min_mn + max_mn, 5*min_mn)), work_size);
-    void* work      = ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
+    void* work      = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
 
     int info = gesvd_table[dtype](JOBU, JOBVT, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
       NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
@@ -1046,7 +1051,7 @@ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a,
     int work_size = FIX2INT(lwork); // Make sure we allocate enough work, regardless of the user request.
     if (dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128) {
       int rwork_size = min_mn * (JOBZ == 'N' ? 5 : NM_MAX(5*min_mn + 7, 2*max_mn + 2*min_mn + 1));
-      rwork = ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
+      rwork = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * rwork_size);
 
       if (JOBZ == 'N')      work_size = NM_MAX(work_size, 3*min_mn + NM_MAX(max_mn, 6*min_mn));
       else if (JOBZ == 'O') work_size = NM_MAX(work_size, 3*min_mn*min_mn + NM_MAX(max_mn, 5*min_mn*min_mn + 4*min_mn));
@@ -1056,8 +1061,8 @@ static VALUE nm_lapack_gesdd(VALUE self, VALUE jobz, VALUE m, VALUE n, VALUE a,
       else if (JOBZ == 'O') work_size = NM_MAX(work_size, 2*min_mn*min_mn + max_mn + 2*min_mn);
       else                  work_size = NM_MAX(work_size, min_mn*min_mn + max_mn + 2*min_mn);
     }
-    void* work  = ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
-    int* iwork  = ALLOCA_N(int, 8*min_mn);
+    void* work  = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * work_size);
+    int* iwork  = NM_ALLOCA_N(int, 8*min_mn);
 
     int info = gesdd_table[dtype](JOBZ, M, N, NM_STORAGE_DENSE(a)->elements, FIX2INT(lda),
       NM_STORAGE_DENSE(s)->elements, NM_STORAGE_DENSE(u)->elements, FIX2INT(ldu), NM_STORAGE_DENSE(vt)->elements, FIX2INT(ldvt),
@@ -1114,7 +1119,7 @@ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right,
 
     // only need rwork for complex matrices (wi == Qnil for complex)
     int rwork_size  = dtype == nm::COMPLEX64 || dtype == nm::COMPLEX128 ? N * DTYPE_SIZES[dtype] : 0; // 2*N*floattype for complex only, otherwise 0
-    void* rwork     = rwork_size > 0 ? ALLOCA_N(char, rwork_size) : NULL;
+    void* rwork     = rwork_size > 0 ? NM_ALLOCA_N(char, rwork_size) : NULL;
     int work_size   = FIX2INT(lwork);
     void* work;
 
@@ -1123,11 +1128,11 @@ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right,
     // if work size is 0 or -1, query.
     if (work_size <= 0) {
       work_size = -1;
-      work = ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
+      work = NM_ALLOC_N(char, DTYPE_SIZES[dtype]); //2*N * DTYPE_SIZES[dtype]);
       info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
       work_size = (int)(dtype == nm::COMPLEX64 || dtype == nm::FLOAT32 ? reinterpret_cast<float*>(work)[0] : reinterpret_cast<double*>(work)[0]);
       // line above is basically: work_size = (int)(work[0]); // now have new work_size
-      xfree(work);
+      NM_FREE(work);
       if (info == 0)
         rb_warn("geev: calculated optimal lwork of %d; to eliminate this message, use a positive value for lwork (at least 2*shape[i])", work_size);
       else return INT2FIX(info); // error of some kind on query!
@@ -1140,7 +1145,7 @@ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right,
     }
 
     // Allocate work array for actual run
-    work = ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
+    work = NM_ALLOCA_N(char, work_size * DTYPE_SIZES[dtype]);
 
     // Perform the actual calculation.
     info = geev_table[dtype](JOBVL, JOBVR, N, A, FIX2INT(lda), WR, WI, VL, FIX2INT(ldvl), VR, FIX2INT(ldvr), work, work_size, rwork);
@@ -1158,7 +1163,7 @@ static VALUE nm_lapack_geev(VALUE self, VALUE compute_left, VALUE compute_right,
 static VALUE nm_clapack_scal(VALUE self, VALUE n, VALUE scale, VALUE vector, VALUE incx) {
   nm::dtype_t dtype = NM_DTYPE(vector);
 
-  void* da      = ALLOCA_N(char, DTYPE_SIZES[dtype]);
+  void* da      = NM_ALLOCA_N(char, DTYPE_SIZES[dtype]);
   rubyval_to_cval(scale, dtype, da);
 
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::math::clapack_scal, void, const int n, const void* da, void* dx, const int incx);
@@ -1251,7 +1256,7 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
 
   // Allocate the pivot index array, which is of size MIN(M, N).
   size_t ipiv_size = std::min(M,N);
-  int* ipiv = ALLOCA_N(int, ipiv_size);
+  int* ipiv = NM_ALLOCA_N(int, ipiv_size);
 
   if (!ttable[NM_DTYPE(a)]) {
     rb_raise(nm_eDataTypeError, "this matrix operation undefined for integer matrices");
@@ -1282,7 +1287,7 @@ static VALUE nm_clapack_getrf(VALUE self, VALUE order, VALUE m, VALUE n, VALUE a
  */
 static VALUE nm_clapack_potrf(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
 #ifndef HAVE_CLAPACK_H
-  rb_raise(rb_eNotImpError, "potrf currently requires LAPACK");
+  rb_raise(rb_eNotImpError, "potrf currently requires CLAPACK");
 #endif
 
   static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
@@ -1343,7 +1348,7 @@ static VALUE nm_clapack_getrs(VALUE self, VALUE order, VALUE trans, VALUE n, VAL
   if (TYPE(ipiv) != T_ARRAY) {
     rb_raise(rb_eArgError, "ipiv must be of type Array");
   } else {
-    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
     for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
       ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
     }
@@ -1411,7 +1416,7 @@ static VALUE nm_clapack_potrs(VALUE self, VALUE order, VALUE uplo, VALUE n, VALU
  */
 static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE lda, VALUE ipiv) {
 #ifndef HAVE_CLAPACK_H
-  rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
+  rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
 #endif
 
   static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const int n, void* a, const int lda, const int* ipiv) = {
@@ -1437,7 +1442,7 @@ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE l
   if (TYPE(ipiv) != T_ARRAY) {
     rb_raise(rb_eArgError, "ipiv must be of type Array");
   } else {
-    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
     for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
       ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
     }
@@ -1468,7 +1473,7 @@ static VALUE nm_clapack_getri(VALUE self, VALUE order, VALUE n, VALUE a, VALUE l
  */
 static VALUE nm_clapack_potri(VALUE self, VALUE order, VALUE uplo, VALUE n, VALUE a, VALUE lda) {
 #ifndef HAVE_CLAPACK_H
-  rb_raise(rb_eNotImpError, "getri currently requires LAPACK");
+  rb_raise(rb_eNotImpError, "getri currently requires CLAPACK");
 #endif
 
   static int (*ttable[nm::NUM_DTYPES])(const enum CBLAS_ORDER, const enum CBLAS_UPLO, const int n, void* a, const int lda) = {
@@ -1534,7 +1539,7 @@ static VALUE nm_clapack_laswp(VALUE self, VALUE n, VALUE a, VALUE lda, VALUE k1,
   if (TYPE(ipiv) != T_ARRAY) {
     rb_raise(rb_eArgError, "ipiv must be of type Array");
   } else {
-    ipiv_ = ALLOCA_N(int, RARRAY_LEN(ipiv));
+    ipiv_ = NM_ALLOCA_N(int, RARRAY_LEN(ipiv));
     for (int index = 0; index < RARRAY_LEN(ipiv); ++index) {
       ipiv_[index] = FIX2INT( RARRAY_PTR(ipiv)[index] );
     }
diff --git a/ext/nmatrix/math/asum.h b/ext/nmatrix/math/asum.h
index 3b4e95e..3a77a8b 100644
--- a/ext/nmatrix/math/asum.h
+++ b/ext/nmatrix/math/asum.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -86,7 +86,7 @@ inline ReturnDType asum(const int N, const DType* X, const int incX) {
 }
 
 
-#ifdef HAVE_CBLAS_H
+#if defined HAVE_CBLAS_H || defined HAVE_ATLAS_CBLAS_H
 template <>
 inline float asum(const int N, const float* X, const int incX) {
   return cblas_sasum(N, X, incX);
diff --git a/ext/nmatrix/math/geev.h b/ext/nmatrix/math/geev.h
index 130a24a..3c89e10 100644
--- a/ext/nmatrix/math/geev.h
+++ b/ext/nmatrix/math/geev.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/gemm.h b/ext/nmatrix/math/gemm.h
index 0520b90..b26961e 100644
--- a/ext/nmatrix/math/gemm.h
+++ b/ext/nmatrix/math/gemm.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -31,7 +31,11 @@
 # define GEMM_H
 
 extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 }
 
 
diff --git a/ext/nmatrix/math/gemv.h b/ext/nmatrix/math/gemv.h
index 158952c..e24a45c 100644
--- a/ext/nmatrix/math/gemv.h
+++ b/ext/nmatrix/math/gemv.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -31,7 +31,11 @@
 # define GEMV_H
 
 extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 }
 
 
diff --git a/ext/nmatrix/math/ger.h b/ext/nmatrix/math/ger.h
index a769e91..9e6a18f 100644
--- a/ext/nmatrix/math/ger.h
+++ b/ext/nmatrix/math/ger.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/gesdd.h b/ext/nmatrix/math/gesdd.h
index 25fafe2..046701d 100644
--- a/ext/nmatrix/math/gesdd.h
+++ b/ext/nmatrix/math/gesdd.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/gesvd.h b/ext/nmatrix/math/gesvd.h
index 65af306..181df08 100644
--- a/ext/nmatrix/math/gesvd.h
+++ b/ext/nmatrix/math/gesvd.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/getf2.h b/ext/nmatrix/math/getf2.h
index f8a3728..98240fe 100644
--- a/ext/nmatrix/math/getf2.h
+++ b/ext/nmatrix/math/getf2.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/getrf.h b/ext/nmatrix/math/getrf.h
index 644f9a0..68b7455 100644
--- a/ext/nmatrix/math/getrf.h
+++ b/ext/nmatrix/math/getrf.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/getri.h b/ext/nmatrix/math/getri.h
index 125f94e..f0c1614 100644
--- a/ext/nmatrix/math/getri.h
+++ b/ext/nmatrix/math/getri.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/getrs.h b/ext/nmatrix/math/getrs.h
index 22f1af4..8a6ddb2 100644
--- a/ext/nmatrix/math/getrs.h
+++ b/ext/nmatrix/math/getrs.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -60,7 +60,11 @@
 #define GETRS_H
 
 extern "C" {
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 }
 
 namespace nm { namespace math {
@@ -122,4 +126,4 @@ inline int clapack_getrs(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOS
 
 } } // end nm::math
 
-#endif // GETRS_H
\ No newline at end of file
+#endif // GETRS_H
diff --git a/ext/nmatrix/math/idamax.h b/ext/nmatrix/math/idamax.h
index d8b48d0..679b3a0 100644
--- a/ext/nmatrix/math/idamax.h
+++ b/ext/nmatrix/math/idamax.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/inc.h b/ext/nmatrix/math/inc.h
index 3f778e9..11a9594 100644
--- a/ext/nmatrix/math/inc.h
+++ b/ext/nmatrix/math/inc.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -31,11 +31,17 @@
 
 
 extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 
-  #ifdef HAVE_CLAPACK_H
-    #include <clapack.h>
-  #endif
+#if defined HAVE_CLAPACK_H
+  #include <clapack.h>
+#elif defined HAVE_ATLAS_CLAPACK_H
+  #include <atlas/clapack.h>
+#endif
 }
 
-#endif // INC_H
\ No newline at end of file
+#endif // INC_H
diff --git a/ext/nmatrix/math/laswp.h b/ext/nmatrix/math/laswp.h
index 3887db3..235f76a 100644
--- a/ext/nmatrix/math/laswp.h
+++ b/ext/nmatrix/math/laswp.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/long_dtype.h b/ext/nmatrix/math/long_dtype.h
index 3993786..18341ec 100644
--- a/ext/nmatrix/math/long_dtype.h
+++ b/ext/nmatrix/math/long_dtype.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/math.h b/ext/nmatrix/math/math.h
index 742b38c..fc329cd 100644
--- a/ext/nmatrix/math/math.h
+++ b/ext/nmatrix/math/math.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -69,11 +69,17 @@
  */
 
 extern "C" { // These need to be in an extern "C" block or you'll get all kinds of undefined symbol errors.
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 
-  #ifdef HAVE_CLAPACK_H
-    #include <clapack.h>
-  #endif
+#if defined HAVE_CLAPACK_H
+  #include <clapack.h>
+#elif defined HAVE_ATLAS_CLAPACK_H
+  #include <atlas/clapack.h>
+#endif
 }
 
 #include <algorithm> // std::min, std::max
@@ -531,15 +537,15 @@ inline void smmp_sort_columns(const size_t n, const IType* ia, IType* ja, DType*
  */
 template <typename DType>
 inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, DType* A, const int lda) {
-#ifdef HAVE_CLAPACK_H
+#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
   rb_raise(rb_eNotImpError, "not yet implemented for non-BLAS dtypes");
 #else
-  rb_raise(rb_eNotImpError, "only LAPACK version implemented thus far");
+  rb_raise(rb_eNotImpError, "only CLAPACK version implemented thus far");
 #endif
   return 0;
 }
 
-#ifdef HAVE_CLAPACK_H
+#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
 template <>
 inline int potrf(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
   return clapack_spotrf(order, uplo, N, A, lda);
@@ -928,7 +934,7 @@ inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, cons
 }
 
 
-#ifdef HAVE_CLAPACK_H
+#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
 template <bool is_complex>
 inline void lauum(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int N, float* A, const int lda) {
   clapack_slauum(order, uplo, N, A, lda);
@@ -1019,7 +1025,7 @@ inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const
 }
 
 
-#ifdef HAVE_CLAPACK_H
+#if defined HAVE_CLAPACK_H || defined HAVE_ATLAS_CLAPACK_H
 template <>
 inline int potri(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const int n, float* a, const int lda) {
   return clapack_spotri(order, uplo, n, a, lda);
diff --git a/ext/nmatrix/math/nrm2.h b/ext/nmatrix/math/nrm2.h
index 7463677..68e563b 100644
--- a/ext/nmatrix/math/nrm2.h
+++ b/ext/nmatrix/math/nrm2.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -98,7 +98,7 @@ ReturnDType nrm2(const int N, const DType* X, const int incX) {
 }
 
 
-#ifdef HAVE_CBLAS_H
+#if defined HAVE_CBLAS_H || defined HAVE_ATLAS_CBLAS_H
 template <>
 inline float nrm2(const int N, const float* X, const int incX) {
   return cblas_snrm2(N, X, incX);
diff --git a/ext/nmatrix/math/potrs.h b/ext/nmatrix/math/potrs.h
index 9bae514..6bc62ce 100644
--- a/ext/nmatrix/math/potrs.h
+++ b/ext/nmatrix/math/potrs.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -60,7 +60,11 @@
 #define POTRS_H
 
 extern "C" {
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 }
 
 namespace nm { namespace math {
@@ -122,4 +126,4 @@ inline int clapack_potrs(const enum CBLAS_ORDER order, const enum CBLAS_UPLO upl
 
 } } // end nm::math
 
-#endif // POTRS_H
\ No newline at end of file
+#endif // POTRS_H
diff --git a/ext/nmatrix/math/rot.h b/ext/nmatrix/math/rot.h
index ae839b8..c9906da 100644
--- a/ext/nmatrix/math/rot.h
+++ b/ext/nmatrix/math/rot.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/rotg.h b/ext/nmatrix/math/rotg.h
index 9f2fbf4..98cc158 100644
--- a/ext/nmatrix/math/rotg.h
+++ b/ext/nmatrix/math/rotg.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/scal.h b/ext/nmatrix/math/scal.h
index 3a4a527..1189091 100644
--- a/ext/nmatrix/math/scal.h
+++ b/ext/nmatrix/math/scal.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/swap.h b/ext/nmatrix/math/swap.h
index bd37801..2a5d266 100644
--- a/ext/nmatrix/math/swap.h
+++ b/ext/nmatrix/math/swap.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/math/trsm.h b/ext/nmatrix/math/trsm.h
index bccda3d..1f880b8 100644
--- a/ext/nmatrix/math/trsm.h
+++ b/ext/nmatrix/math/trsm.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -60,7 +60,11 @@
 
 
 extern "C" {
+#if defined HAVE_CBLAS_H
   #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
 }
 
 namespace nm { namespace math {
@@ -380,4 +384,4 @@ inline void trsm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const
 
 
 } }  // namespace nm::math
-#endif // TRSM_H
\ No newline at end of file
+#endif // TRSM_H
diff --git a/ext/nmatrix/nm_memory.h b/ext/nmatrix/nm_memory.h
new file mode 100644
index 0000000..8ef608f
--- /dev/null
+++ b/ext/nmatrix/nm_memory.h
@@ -0,0 +1,60 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
+//
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+//
+// Please see LICENSE.txt for additional copyright notices.
+//
+// == Contributing
+//
+// By contributing source code to SciRuby, you agree to be bound by
+// our Contributor Agreement:
+//
+// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+//
+// == nm_memory.h
+//
+// Macros for memory allocation and freeing
+
+/**
+ * We define these macros, which just call the ruby ones, as this makes 
+ * debugging memory issues (particularly those involving interaction with
+ * the ruby GC) easier, as it's posssible to add debugging code temporarily.
+ */
+#ifndef __NM_MEMORY_H__
+#define __NM_MEMORY_H__
+
+#include <ruby.h>
+
+#define NM_ALLOC(type) (ALLOC(type))
+
+#define NM_ALLOC_N(type, n) (ALLOC_N(type, n))
+
+#define NM_REALLOC_N(var, type, n) (REALLOC_N(var, type, n))
+
+#define NM_ALLOCA_N(type, n) (ALLOCA_N(type, n))
+
+#define NM_FREE(var) (xfree(var))
+
+#define NM_ALLOC_NONRUBY(type) ((type*) malloc(sizeof(type)))
+
+//Defines whether to do conservative gc registrations, i.e. those
+//registrations that we're not that sure are necessary.
+//#define NM_GC_CONSERVATIVE
+
+#ifdef NM_GC_CONSERVATIVE
+#define NM_CONSERVATIVE(statement) (statement)
+#else
+#define NM_CONSERVATIVE(statement)
+#endif //NM_GC_CONSERVATIVE
+
+#endif
diff --git a/ext/nmatrix/nmatrix.cpp b/ext/nmatrix/nmatrix.cpp
index 992a8eb..ca57a2b 100644
--- a/ext/nmatrix/nmatrix.cpp
+++ b/ext/nmatrix/nmatrix.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -31,12 +31,19 @@
  * Standard Includes
  */
 
-#include <cblas.h>
-#ifdef HAVE_CLAPACK_H
 extern "C" {
+#if defined HAVE_CBLAS_H
+  #include <cblas.h>
+#elif defined HAVE_ATLAS_CBLAS_H
+  #include <atlas/cblas.h>
+#endif
+
+#if defined HAVE_CLAPACK_H
   #include <clapack.h>
-}
+#elif defined HAVE_ATLAS_CLAPACK_H
+  #include <atlas/clapack.h>
 #endif
+}
 
 #include <ruby.h>
 #include <algorithm> // std::min
@@ -52,7 +59,7 @@ extern "C" {
 #include "math/math.h"
 #include "util/io.h"
 #include "storage/storage.h"
-#include "storage/list.h"
+#include "storage/list/list.h"
 #include "storage/yale/yale.h"
 
 #include "nmatrix.h"
@@ -76,47 +83,6 @@ extern "C" {
 namespace nm {
 
   /*
-   * Read the shape from a matrix storage file, and ignore any padding.
-   *
-   * shape should already be allocated before calling this.
-   */
-  void read_padded_shape(std::ifstream& f, size_t dim, size_t* shape) {
-    size_t bytes_read = 0;
-
-    // Read shape
-    for (size_t i = 0; i < dim; ++i) {
-      IType s;
-      f.read(reinterpret_cast<char*>(&s), sizeof(IType));
-      shape[i] = s;
-
-      bytes_read += sizeof(IType);
-    }
-
-    // Ignore padding
-    f.ignore(bytes_read % 8);
-  }
-
-  void write_padded_shape(std::ofstream& f, size_t dim, size_t* shape) {
-    size_t bytes_written = 0;
-
-    // Write shape
-    for (size_t i = 0; i < dim; ++i) {
-      IType s = shape[i];
-      f.write(reinterpret_cast<const char*>(&s), sizeof(IType));
-
-      bytes_written += sizeof(IType);
-    }
-
-    // Pad with zeros
-    while (bytes_written % 8) {
-      IType zero = 0;
-      f.write(reinterpret_cast<const char*>(&zero), sizeof(IType));
-
-      bytes_written += sizeof(IType);
-    }
-  }
-
-  /*
    * This function is pulled out separately so it can be called for hermitian matrix writing, which also uses it.
    */
   template <typename DType>
diff --git a/ext/nmatrix/nmatrix.h b/ext/nmatrix/nmatrix.h
index eb423d1..3ab92d3 100644
--- a/ext/nmatrix/nmatrix.h
+++ b/ext/nmatrix/nmatrix.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -53,6 +53,8 @@
 	#endif
 #endif
 
+#include "nm_memory.h"
+
 /*
  * Macros
  */
@@ -113,21 +115,27 @@
    *      return enumerator_init(enumerator_allocate(rb_cEnumerator), obj, meth, argc, argv);
    *    }
    */
+
+//opening portion -- this allows unregistering any objects in use before returning
+ #define RETURN_SIZED_ENUMERATOR_PRE do { \
+   if (!rb_block_given_p()) {
+
+//remaining portion
  #ifdef RUBY_2
   #ifndef RETURN_SIZED_ENUMERATOR
    #undef RETURN_SIZED_ENUMERATOR
    // Ruby 2.0 and higher has rb_enumeratorize_with_size instead of rb_enumeratorize.
    // We want to support both in the simplest way possible.
-   #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do {   \
-    if (!rb_block_given_p())                                        \
-      return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn));  \
+   #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) \
+        return rb_enumeratorize_with_size((obj), ID2SYM(rb_frame_this_func()), (argc), (argv), (size_fn));  \
+      } \
     } while (0)
   #endif
  #else
    #undef RETURN_SIZED_ENUMERATOR
-   #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) do {				            \
-    if (!rb_block_given_p())					                                              \
-      return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv));	\
+   #define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn) \
+        return rb_enumeratorize((obj), ID2SYM(rb_frame_this_func()), (argc), (argv));	\
+      } \
     } while (0)
  #endif
 
@@ -278,6 +286,18 @@ NM_DEF_STRUCT_PRE(NMATRIX);   // struct NMATRIX {
   NM_DECL_STRUCT(STORAGE*, storage);  // STORAGE* storage;  // Pointer to storage struct.
 NM_DEF_STRUCT_POST(NMATRIX);  // };
 
+/* Structs for dealing with VALUEs in use so that they don't get GC'd */
+
+typedef struct __NM_GC_LL_NODE {
+  VALUE* val;
+  size_t n;
+  __NM_GC_LL_NODE* next;
+} nm_gc_ll_node;
+
+typedef struct __NM_GC_HOLDER {
+  __NM_GC_LL_NODE* start;
+} nm_gc_holder;
+
 #define NM_MAX_RANK 15
 
 #define UnwrapNMatrix(obj,var)  Data_Get_Struct(obj, NMATRIX, var)
@@ -355,16 +375,21 @@ extern "C" {
 	NM_DECL_ENUM(dtype_t, nm_dtype_min(VALUE));
 
   // Non-API functions needed by other cpp files.
-	NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage);
-  NMATRIX* nm_cast_with_ctype_args(NMATRIX* self, nm::stype_t new_stype, nm::dtype_t new_dtype, void* init_ptr);
+	NMATRIX* nm_create(NM_DECL_ENUM(stype_t, stype), STORAGE* storage);
+  NMATRIX* nm_cast_with_ctype_args(NMATRIX* self, NM_DECL_ENUM(stype_t, new_stype), NM_DECL_ENUM(dtype_t, new_dtype), void* init_ptr);
 	VALUE    nm_cast(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol, VALUE init);
 	void     nm_mark(NMATRIX* mat);
 	void     nm_delete(NMATRIX* mat);
 	void     nm_delete_ref(NMATRIX* mat);
-  void     nm_mark(NMATRIX* mat);
   void     nm_register_values(VALUE* vals, size_t n);
   void     nm_unregister_values(VALUE* vals, size_t n);
-
+  void     nm_register_value(VALUE& val);
+  void     nm_unregister_value(VALUE& val);
+  void     nm_register_storage(nm::stype_t stype, const STORAGE* storage);
+  void     nm_unregister_storage(nm::stype_t stype, const STORAGE* storage);
+  void     nm_register_nmatrix(NMATRIX* nmatrix);
+  void     nm_unregister_nmatrix(NMATRIX* nmatrix);
+  void	   nm_completely_unregister_value(VALUE& val);
 #ifdef __cplusplus
 }
 #endif
diff --git a/ext/nmatrix/ruby_constants.cpp b/ext/nmatrix/ruby_constants.cpp
index 04c442c..20691e7 100644
--- a/ext/nmatrix/ruby_constants.cpp
+++ b/ext/nmatrix/ruby_constants.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-// NMatrix is Copyright (c) 2012, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -89,6 +89,8 @@ VALUE cNMatrix,
 			cNMatrix_YaleFunctions,
 			cNMatrix_BLAS,
 			cNMatrix_LAPACK,
+
+      cNMatrix_GC_holder,
 			
 			nm_eDataTypeError,
       nm_eConvergenceError,
diff --git a/ext/nmatrix/ruby_constants.h b/ext/nmatrix/ruby_constants.h
index 7cd8a01..5f2eecb 100644
--- a/ext/nmatrix/ruby_constants.h
+++ b/ext/nmatrix/ruby_constants.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -91,6 +91,8 @@ extern VALUE	cNMatrix,
 							cNMatrix_YaleFunctions,
 							cNMatrix_BLAS,
 							cNMatrix_LAPACK,
+
+							cNMatrix_GC_holder,
 			
 							nm_eDataTypeError,
               nm_eConvergenceError,
diff --git a/ext/nmatrix/ruby_nmatrix.c b/ext/nmatrix/ruby_nmatrix.c
index 18b84f4..13d2dc3 100644
--- a/ext/nmatrix/ruby_nmatrix.c
+++ b/ext/nmatrix/ruby_nmatrix.c
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -51,6 +51,7 @@ static VALUE nm_capacity(VALUE self);
 static VALUE nm_each_with_indices(VALUE nmatrix);
 static VALUE nm_each_stored_with_indices(VALUE nmatrix);
 static VALUE nm_each_ordered_stored_with_indices(VALUE nmatrix);
+static VALUE nm_map_stored(VALUE nmatrix);
 
 static SLICE* get_slice(size_t dim, int argc, VALUE* arg, size_t* shape);
 static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*, SLICE*), void (*delete_func)(NMATRIX*), VALUE self);
@@ -74,10 +75,27 @@ static VALUE nm_ew_##name(VALUE left_val, VALUE right_val) {  \
   return elementwise_op(nm::EW_##oper, left_val, right_val);  \
 }
 
+#define DEF_UNARY_RUBY_ACCESSOR(oper, name)                 \
+static VALUE nm_unary_##name(VALUE self) {  \
+  return unary_op(nm::UNARY_##oper, self);  \
+}
+
+#define DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(oper, name) \
+static VALUE nm_noncom_ew_##name(int argc, VALUE* argv, VALUE self) { \
+  if (argc > 1) { \
+    return noncom_elementwise_op(nm::NONCOM_EW_##oper, self, argv[0], argv[1]); \
+  } else { \
+    return noncom_elementwise_op(nm::NONCOM_EW_##oper, self, argv[0], Qfalse); \
+  } \
+}
+
+
 /*
  * Macro declares a corresponding accessor function prototype for some element-wise operation.
  */
 #define DECL_ELEMENTWISE_RUBY_ACCESSOR(name)    static VALUE nm_ew_##name(VALUE left_val, VALUE right_val);
+#define DECL_UNARY_RUBY_ACCESSOR(name)          static VALUE nm_unary_##name(VALUE self);
+#define DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(name)    static VALUE nm_noncom_ew_##name(int argc, VALUE* argv, VALUE self);
 
 DECL_ELEMENTWISE_RUBY_ACCESSOR(add)
 DECL_ELEMENTWISE_RUBY_ACCESSOR(subtract)
@@ -91,8 +109,36 @@ DECL_ELEMENTWISE_RUBY_ACCESSOR(lt)
 DECL_ELEMENTWISE_RUBY_ACCESSOR(gt)
 DECL_ELEMENTWISE_RUBY_ACCESSOR(leq)
 DECL_ELEMENTWISE_RUBY_ACCESSOR(geq)
+DECL_UNARY_RUBY_ACCESSOR(sin)
+DECL_UNARY_RUBY_ACCESSOR(cos)
+DECL_UNARY_RUBY_ACCESSOR(tan)
+DECL_UNARY_RUBY_ACCESSOR(asin)
+DECL_UNARY_RUBY_ACCESSOR(acos)
+DECL_UNARY_RUBY_ACCESSOR(atan)
+DECL_UNARY_RUBY_ACCESSOR(sinh)
+DECL_UNARY_RUBY_ACCESSOR(cosh)
+DECL_UNARY_RUBY_ACCESSOR(tanh)
+DECL_UNARY_RUBY_ACCESSOR(asinh)
+DECL_UNARY_RUBY_ACCESSOR(acosh)
+DECL_UNARY_RUBY_ACCESSOR(atanh)
+DECL_UNARY_RUBY_ACCESSOR(exp)
+DECL_UNARY_RUBY_ACCESSOR(log2)
+DECL_UNARY_RUBY_ACCESSOR(log10)
+DECL_UNARY_RUBY_ACCESSOR(sqrt)
+DECL_UNARY_RUBY_ACCESSOR(erf)
+DECL_UNARY_RUBY_ACCESSOR(erfc)
+DECL_UNARY_RUBY_ACCESSOR(cbrt)
+DECL_UNARY_RUBY_ACCESSOR(gamma)
+DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(atan2)
+DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(ldexp)
+DECL_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(hypot)
+
+//log can be unary, but also take a base argument, as with Math.log
+static VALUE nm_unary_log(int argc, VALUE* argv, VALUE self);
 
 static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val);
+static VALUE unary_op(nm::unaryop_t op, VALUE self);
+static VALUE noncom_elementwise_op(nm::noncom_ewop_t op, VALUE self, VALUE other, VALUE orderflip);
 
 static VALUE nm_symmetric(VALUE self);
 static VALUE nm_hermitian(VALUE self);
@@ -144,6 +190,12 @@ void Init_nmatrix() {
 	 */
 	nm_eStorageTypeError = rb_define_class("StorageTypeError", rb_eStandardError);
 
+  /*
+   * Class that holds values in use by the C code.
+   */
+  cNMatrix_GC_holder = rb_define_class("NMGCHolder", rb_cObject);
+
+
 	///////////////////
 	// Class Methods //
 	///////////////////
@@ -169,7 +221,7 @@ void Init_nmatrix() {
 	rb_define_method(cNMatrix, "write", (METHOD)nm_write, -1);
 
 	// Technically, the following function is a copy constructor.
-	rb_define_method(cNMatrix, "transpose", (METHOD)nm_init_transposed, 0);
+	rb_define_protected_method(cNMatrix, "clone_transpose", (METHOD)nm_init_transposed, 0);
 
 	rb_define_method(cNMatrix, "dtype", (METHOD)nm_dtype, 0);
 	rb_define_method(cNMatrix, "stype", (METHOD)nm_stype, 0);
@@ -198,8 +250,10 @@ void Init_nmatrix() {
 	rb_define_protected_method(cNMatrix, "__dense_map_pair__", (METHOD)nm_dense_map_pair, 1);
 	rb_define_method(cNMatrix, "each_with_indices", (METHOD)nm_each_with_indices, 0);
 	rb_define_method(cNMatrix, "each_stored_with_indices", (METHOD)nm_each_stored_with_indices, 0);
+	rb_define_method(cNMatrix, "map_stored", (METHOD)nm_map_stored, 0);
 	rb_define_method(cNMatrix, "each_ordered_stored_with_indices", (METHOD)nm_each_ordered_stored_with_indices, 0);
 	rb_define_protected_method(cNMatrix, "__list_map_merged_stored__", (METHOD)nm_list_map_merged_stored, 2);
+	rb_define_protected_method(cNMatrix, "__list_map_stored__", (METHOD)nm_list_map_stored, 1);
 	rb_define_protected_method(cNMatrix, "__yale_map_merged_stored__", (METHOD)nm_yale_map_merged_stored, 2);
 	rb_define_protected_method(cNMatrix, "__yale_map_stored__", (METHOD)nm_yale_map_stored, 0);
 	rb_define_protected_method(cNMatrix, "__yale_stored_diagonal_each_with_indices__", (METHOD)nm_yale_stored_diagonal_each_with_indices, 0);
@@ -214,6 +268,32 @@ void Init_nmatrix() {
   rb_define_method(cNMatrix, "**",    (METHOD)nm_ew_power,    1);
   rb_define_method(cNMatrix, "%",     (METHOD)nm_ew_mod,      1);
 
+  rb_define_method(cNMatrix, "atan2", (METHOD)nm_noncom_ew_atan2, -1);
+  rb_define_method(cNMatrix, "ldexp", (METHOD)nm_noncom_ew_ldexp, -1);
+  rb_define_method(cNMatrix, "hypot", (METHOD)nm_noncom_ew_hypot, -1);
+
+  rb_define_method(cNMatrix, "sin",   (METHOD)nm_unary_sin,   0);
+  rb_define_method(cNMatrix, "cos",   (METHOD)nm_unary_cos,   0);
+  rb_define_method(cNMatrix, "tan",   (METHOD)nm_unary_tan,   0);
+  rb_define_method(cNMatrix, "asin",  (METHOD)nm_unary_asin,  0);
+  rb_define_method(cNMatrix, "acos",  (METHOD)nm_unary_acos,  0);
+  rb_define_method(cNMatrix, "atan",  (METHOD)nm_unary_atan,  0);
+  rb_define_method(cNMatrix, "sinh",  (METHOD)nm_unary_sinh,  0);
+  rb_define_method(cNMatrix, "cosh",  (METHOD)nm_unary_cosh,  0);
+  rb_define_method(cNMatrix, "tanh",  (METHOD)nm_unary_tanh,  0);
+  rb_define_method(cNMatrix, "asinh", (METHOD)nm_unary_asinh, 0);
+  rb_define_method(cNMatrix, "acosh", (METHOD)nm_unary_acosh, 0);
+  rb_define_method(cNMatrix, "atanh", (METHOD)nm_unary_atanh, 0);
+  rb_define_method(cNMatrix, "exp",   (METHOD)nm_unary_exp,   0);
+  rb_define_method(cNMatrix, "log2",  (METHOD)nm_unary_log2,  0);
+  rb_define_method(cNMatrix, "log10", (METHOD)nm_unary_log10, 0);
+  rb_define_method(cNMatrix, "sqrt",  (METHOD)nm_unary_sqrt,  0);
+  rb_define_method(cNMatrix, "erf",   (METHOD)nm_unary_erf,   0);
+  rb_define_method(cNMatrix, "erfc",  (METHOD)nm_unary_erfc,  0);
+  rb_define_method(cNMatrix, "cbrt",  (METHOD)nm_unary_cbrt,  0);
+  rb_define_method(cNMatrix, "gamma", (METHOD)nm_unary_gamma, 0);
+  rb_define_method(cNMatrix, "log",   (METHOD)nm_unary_log,  -1);
+
 	rb_define_method(cNMatrix, "=~", (METHOD)nm_ew_eqeq, 1);
 	rb_define_method(cNMatrix, "!~", (METHOD)nm_ew_neq, 1);
 	rb_define_method(cNMatrix, "<=", (METHOD)nm_ew_leq, 1);
@@ -283,9 +363,9 @@ void Init_nmatrix() {
  * Slice constructor.
  */
 static SLICE* alloc_slice(size_t dim) {
-  SLICE* slice = ALLOC(SLICE);
-  slice->coords = ALLOC_N(size_t, dim);
-  slice->lengths = ALLOC_N(size_t, dim);
+  SLICE* slice = NM_ALLOC(SLICE);
+  slice->coords = NM_ALLOC_N(size_t, dim);
+  slice->lengths = NM_ALLOC_N(size_t, dim);
   return slice;
 }
 
@@ -294,9 +374,9 @@ static SLICE* alloc_slice(size_t dim) {
  * Slice destructor.
  */
 static void free_slice(SLICE* slice) {
-  xfree(slice->coords);
-  xfree(slice->lengths);
-  xfree(slice);
+  NM_FREE(slice->coords);
+  NM_FREE(slice->lengths);
+  NM_FREE(slice);
 }
 
 
@@ -304,7 +384,7 @@ static void free_slice(SLICE* slice) {
  * Allocator.
  */
 static VALUE nm_alloc(VALUE klass) {
-  NMATRIX* mat = ALLOC(NMATRIX);
+  NMATRIX* mat = NM_ALLOC(NMATRIX);
   mat->storage = NULL;
 
   // DO NOT MARK This STRUCT. It has no storage allocated, and no stype, so mark will do an invalid something.
@@ -320,6 +400,7 @@ static VALUE nm_alloc(VALUE klass) {
  * just return the original matrix's capacity.
  */
 static VALUE nm_capacity(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
   VALUE cap;
 
   switch(NM_STYPE(self)) {
@@ -336,9 +417,11 @@ static VALUE nm_capacity(VALUE self) {
     break;
 
   default:
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(nm_eStorageTypeError, "unrecognized stype in nm_capacity()");
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return cap;
 }
 
@@ -363,7 +446,7 @@ void nm_delete(NMATRIX* mat) {
   };
   ttable[mat->stype](mat->storage);
 
-  xfree(mat);
+  NM_FREE(mat);
 }
 
 /*
@@ -377,33 +460,186 @@ void nm_delete_ref(NMATRIX* mat) {
   };
   ttable[mat->stype](mat->storage);
 
-  xfree(mat);
+  NM_FREE(mat);
+}
+
+
+/**
+ * These variables hold a linked list of VALUEs that are registered to be in
+ * use by nmatrix so that they can be marked when GC runs.
+ */
+static VALUE* gc_value_holder = NULL;
+static nm_gc_holder* gc_value_holder_struct = NULL;
+static nm_gc_holder* allocated_pool = NULL; // an object pool for linked list nodes; using pooling is in some cases a substantial performance improvement
+
+/**
+ * GC Marking function for the values that have been registered.
+ */
+static void __nm_mark_value_container(nm_gc_holder* gc_value_holder_struct) {
+  if (gc_value_holder_struct && gc_value_holder_struct->start) {
+    nm_gc_ll_node* curr = gc_value_holder_struct->start;
+    while (curr) {
+      rb_gc_mark_locations(curr->val, curr->val + curr->n);
+      curr = curr->next;
+    }
+  }
+}
+
+/**
+ * Initilalizes the linked list of in-use VALUEs if it hasn't been done
+ * already.
+ */
+static void __nm_initialize_value_container() {
+  if (gc_value_holder == NULL) {
+    gc_value_holder_struct = NM_ALLOC_NONRUBY(nm_gc_holder);
+    allocated_pool = NM_ALLOC_NONRUBY(nm_gc_holder);
+    gc_value_holder = NM_ALLOC_NONRUBY(VALUE);
+    gc_value_holder_struct->start = NULL;
+    allocated_pool->start = NULL;
+    *gc_value_holder = Data_Wrap_Struct(cNMatrix_GC_holder, __nm_mark_value_container, NULL, gc_value_holder_struct);
+    rb_global_variable(gc_value_holder); 
+  }
 }
 
 /*
- * Register the addresses of an array of VALUEs with the gc to avoid collection
+ * Register an array of VALUEs to avoid their collection
  * while using them internally.
  */
 void nm_register_values(VALUE* values, size_t n) {
+  if (!gc_value_holder_struct)
+    __nm_initialize_value_container();
   if (values) {
-    for (size_t i = n; i-- > 0;) {
-      rb_gc_register_address(values + i);
+    nm_gc_ll_node* to_insert = NULL;
+    if (allocated_pool->start) {
+      to_insert = allocated_pool->start;
+      allocated_pool->start = to_insert->next;
+    } else {
+      to_insert = NM_ALLOC_NONRUBY(nm_gc_ll_node);
     }
+    to_insert->val = values;
+    to_insert->n = n;
+    to_insert->next = gc_value_holder_struct->start;
+    gc_value_holder_struct->start = to_insert;
   }
 }
 
 /*
- * Unregister the addresses of an array of VALUEs with the gc to allow normal
+ * Unregister an array of VALUEs with the gc to allow normal
  * garbage collection to occur again.
  */
 void nm_unregister_values(VALUE* values, size_t n) {
   if (values) {
-    for (size_t i = n; i-- > 0;) {
-      rb_gc_unregister_address(values + i);
+    if (gc_value_holder_struct) {
+      nm_gc_ll_node* curr = gc_value_holder_struct->start;
+      nm_gc_ll_node* last = NULL;
+      while (curr) {
+        if (curr->val == values) {
+          if (last) {
+            last->next = curr->next;
+          } else {
+            gc_value_holder_struct->start = curr->next;
+          }
+          curr->next = allocated_pool->start;
+          curr->val = NULL;
+          curr->n = 0;
+          allocated_pool->start = curr;
+          break;
+        }
+        last = curr;
+        curr = curr->next;
+      }
+    }
+  }
+}
+
+/**
+ * Register a single VALUE as in use to avoid garbage collection.
+ */
+void nm_register_value(VALUE& val) {
+  nm_register_values(&val, 1);
+}
+
+/**
+ * Unregister a single VALUE to allow normal garbage collection.
+ */
+void nm_unregister_value(VALUE& val) {
+  nm_unregister_values(&val, 1);
+}
+
+/**
+ * Removes all instances of a single VALUE in the gc list.  This can be
+ * dangerous.  Primarily used when something is about to be
+ * freed and replaced so that and residual registrations won't access after
+ * free.
+ **/
+void nm_completely_unregister_value(VALUE& val) {
+  if (gc_value_holder_struct) {
+    nm_gc_ll_node* curr = gc_value_holder_struct->start;
+    nm_gc_ll_node* last = NULL;
+    while (curr) {
+      if (curr->val == &val) {
+	if (last) {
+	  last->next = curr->next;
+	} else {
+	  gc_value_holder_struct->start = curr->next;
+	}
+	nm_gc_ll_node* temp_next = curr->next;
+	curr->next = allocated_pool->start;
+	curr->val = NULL;
+	curr->n = 0;
+	allocated_pool->start = curr;
+	curr = temp_next;
+      } else {
+	last = curr;
+	curr = curr->next;
+      }
     }
   }
 }
 
+
+
+/**
+ * Register a STORAGE struct of the supplied stype to avoid garbage collection
+ * of its internals.
+ *
+ * Delegates to the storage-specific methods.  They will check dtype and ignore
+ * non-rubyobject dtypes, so it's safe to pass any storage in.
+ */
+void nm_register_storage(nm::stype_t stype, const STORAGE* storage) {
+  STYPE_REGISTER_TABLE(ttable);
+  ttable[stype](storage);
+}
+
+/**
+ * Unregister a STORAGE struct of the supplied stype to allow normal garbage collection
+ * of its internals.
+ *
+ * Delegates to the storage-specific methods.  They will check dtype and ignore
+ * non-rubyobject dtypes, so it's safe to pass any storage in.
+ *
+ */
+void nm_unregister_storage(nm::stype_t stype, const STORAGE* storage) {
+  STYPE_UNREGISTER_TABLE(ttable);
+  ttable[stype](storage);
+}
+
+/**
+ * Registers an NMATRIX struct to avoid garbage collection of its internals.
+ */
+void nm_register_nmatrix(NMATRIX* nmatrix) {
+  if (nmatrix)
+    nm_register_storage(nmatrix->stype, nmatrix->storage);
+}
+
+/**
+ * Unregisters an NMATRIX struct to avoid garbage collection of its internals.
+ */
+void nm_unregister_nmatrix(NMATRIX* nmatrix) {
+  if (nmatrix)
+    nm_unregister_storage(nmatrix->stype, nmatrix->storage);
+}
+
 /*
  * call-seq:
  *     dtype -> Symbol
@@ -427,7 +663,6 @@ static VALUE nm_dtype(VALUE self) {
  * This is a singleton method on NMatrix, e.g., NMatrix.upcast(:int32, :int64)
  */
 static VALUE nm_upcast(VALUE self, VALUE t1, VALUE t2) {
-
   nm::dtype_t d1    = nm_dtype_from_rbsymbol(t1),
               d2    = nm_dtype_from_rbsymbol(t2);
 
@@ -462,18 +697,26 @@ static VALUE nm_default_value(VALUE self) {
  * Iterate over all entries of any matrix in standard storage order (as with #each), and include the indices.
  */
 static VALUE nm_each_with_indices(VALUE nmatrix) {
-  volatile VALUE nm = nmatrix;
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+  VALUE to_return = Qnil;
 
-  switch(NM_STYPE(nm)) {
+  switch(NM_STYPE(nmatrix)) {
   case nm::YALE_STORE:
-    return nm_yale_each_with_indices(nm);
+    to_return = nm_yale_each_with_indices(nmatrix);
+    break;
   case nm::DENSE_STORE:
-    return nm_dense_each_with_indices(nm);
+    to_return = nm_dense_each_with_indices(nmatrix);
+    break;
   case nm::LIST_STORE:
-    return nm_list_each_with_indices(nm, false);
+    to_return = nm_list_each_with_indices(nmatrix, false);
+    break;
   default:
+    NM_CONSERVATIVE(nm_unregister_value(nmatrix));
     rb_raise(nm_eDataTypeError, "Not a proper storage type");
   }
+
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  return to_return;
 }
 
 /*
@@ -485,41 +728,88 @@ static VALUE nm_each_with_indices(VALUE nmatrix) {
  * i, j, ..., and the entry itself.
  */
 static VALUE nm_each_stored_with_indices(VALUE nmatrix) {
-  volatile VALUE nm = nmatrix;
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+  VALUE to_return = Qnil;
 
-  switch(NM_STYPE(nm)) {
+  switch(NM_STYPE(nmatrix)) {
   case nm::YALE_STORE:
-    return nm_yale_each_stored_with_indices(nm);
+    to_return = nm_yale_each_stored_with_indices(nmatrix);
+    break;
   case nm::DENSE_STORE:
-    return nm_dense_each_with_indices(nm);
+    to_return = nm_dense_each_with_indices(nmatrix);
+    break;
   case nm::LIST_STORE:
-    return nm_list_each_with_indices(nm, true);
+    to_return = nm_list_each_with_indices(nmatrix, true);
+    break;
   default:
+    NM_CONSERVATIVE(nm_unregister_value(nmatrix));
     rb_raise(nm_eDataTypeError, "Not a proper storage type");
   }
+
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  return to_return;
 }
 
 
 /*
  * call-seq:
+ *     map_stored -> Enumerator
+ *
+ * Iterate over the stored entries of any matrix. For dense and yale, this iterates over non-zero
+ * entries; for list, this iterates over non-default entries. Yields dim+1 values for each entry:
+ * i, j, ..., and the entry itself.
+ */
+static VALUE nm_map_stored(VALUE nmatrix) {
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+  VALUE to_return = Qnil;
+
+  switch(NM_STYPE(nmatrix)) {
+  case nm::YALE_STORE:
+    to_return = nm_yale_map_stored(nmatrix);
+    break;
+  case nm::DENSE_STORE:
+    to_return = nm_dense_map(nmatrix);
+    break;
+  case nm::LIST_STORE:
+    to_return = nm_list_map_stored(nmatrix, Qnil);
+    break;
+  default:
+    NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+    rb_raise(nm_eDataTypeError, "Not a proper storage type");
+  }
+
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  return to_return;
+}
+
+/*
+ * call-seq:
  *     each_ordered_stored_with_indices -> Enumerator
  *
  * Very similar to #each_stored_with_indices. The key difference is that it enforces matrix ordering rather
  * than storage ordering, which only matters if your matrix is Yale.
  */
 static VALUE nm_each_ordered_stored_with_indices(VALUE nmatrix) {
-  volatile VALUE nm = nmatrix;
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+  VALUE to_return = Qnil;
 
-  switch(NM_STYPE(nm)) {
+  switch(NM_STYPE(nmatrix)) {
   case nm::YALE_STORE:
-    return nm_yale_each_ordered_stored_with_indices(nm);
+    to_return = nm_yale_each_ordered_stored_with_indices(nmatrix);
+    break;
   case nm::DENSE_STORE:
-    return nm_dense_each_with_indices(nm);
+    to_return = nm_dense_each_with_indices(nmatrix);
+    break;
   case nm::LIST_STORE:
-    return nm_list_each_with_indices(nm, true);
+    to_return = nm_list_each_with_indices(nmatrix, true);
+    break;
   default:
+    NM_CONSERVATIVE(nm_unregister_value(nmatrix));
     rb_raise(nm_eDataTypeError, "Not a proper storage type");
   }
+
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  return to_return;
 }
 
 
@@ -530,8 +820,13 @@ static VALUE nm_each_ordered_stored_with_indices(VALUE nmatrix) {
  * For elementwise, use =~ instead.
  *
  * This method will raise an exception if dimensions do not match.
+ *
+ * When stypes differ, this function calls a protected Ruby method.
  */
 static VALUE nm_eqeq(VALUE left, VALUE right) {
+  NM_CONSERVATIVE(nm_register_value(left));
+  NM_CONSERVATIVE(nm_register_value(right));
+
   NMATRIX *l, *r;
 
   CheckNMatrixType(left);
@@ -540,23 +835,35 @@ static VALUE nm_eqeq(VALUE left, VALUE right) {
   UnwrapNMatrix(left, l);
   UnwrapNMatrix(right, r);
 
-  if (l->stype != r->stype)
-    rb_raise(rb_eNotImpError, "comparison between different matrix stypes not yet implemented");
-
   bool result = false;
 
-  switch(l->stype) {
-  case nm::DENSE_STORE:
-    result = nm_dense_storage_eqeq(l->storage, r->storage);
-    break;
-  case nm::LIST_STORE:
-    result = nm_list_storage_eqeq(l->storage, r->storage);
-    break;
-  case nm::YALE_STORE:
-    result = nm_yale_storage_eqeq(l->storage, r->storage);
-    break;
+  if (l->stype != r->stype) { // DIFFERENT STYPES
+
+    if (l->stype == nm::DENSE_STORE)
+      result = rb_funcall(left, rb_intern("dense_eql_sparse?"), 1, right);
+    else if (r->stype == nm::DENSE_STORE)
+      result = rb_funcall(right, rb_intern("dense_eql_sparse?"), 1, left);
+    else
+      result = rb_funcall(left, rb_intern("sparse_eql_sparse?"), 1, right);
+
+  } else {
+
+    switch(l->stype) {       // SAME STYPES
+    case nm::DENSE_STORE:
+      result = nm_dense_storage_eqeq(l->storage, r->storage);
+      break;
+    case nm::LIST_STORE:
+      result = nm_list_storage_eqeq(l->storage, r->storage);
+      break;
+    case nm::YALE_STORE:
+      result = nm_yale_storage_eqeq(l->storage, r->storage);
+      break;
+    }
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(left));
+  NM_CONSERVATIVE(nm_unregister_value(right));
+
   return result ? Qtrue : Qfalse;
 }
 
@@ -573,6 +880,60 @@ DEF_ELEMENTWISE_RUBY_ACCESSOR(GEQ, geq)
 DEF_ELEMENTWISE_RUBY_ACCESSOR(LT, lt)
 DEF_ELEMENTWISE_RUBY_ACCESSOR(GT, gt)
 
+DEF_UNARY_RUBY_ACCESSOR(SIN, sin)
+DEF_UNARY_RUBY_ACCESSOR(COS, cos)
+DEF_UNARY_RUBY_ACCESSOR(TAN, tan)
+DEF_UNARY_RUBY_ACCESSOR(ASIN, asin)
+DEF_UNARY_RUBY_ACCESSOR(ACOS, acos)
+DEF_UNARY_RUBY_ACCESSOR(ATAN, atan)
+DEF_UNARY_RUBY_ACCESSOR(SINH, sinh)
+DEF_UNARY_RUBY_ACCESSOR(COSH, cosh)
+DEF_UNARY_RUBY_ACCESSOR(TANH, tanh)
+DEF_UNARY_RUBY_ACCESSOR(ASINH, asinh)
+DEF_UNARY_RUBY_ACCESSOR(ACOSH, acosh)
+DEF_UNARY_RUBY_ACCESSOR(ATANH, atanh)
+DEF_UNARY_RUBY_ACCESSOR(EXP, exp)
+DEF_UNARY_RUBY_ACCESSOR(LOG2, log2)
+DEF_UNARY_RUBY_ACCESSOR(LOG10, log10)
+DEF_UNARY_RUBY_ACCESSOR(SQRT, sqrt)
+DEF_UNARY_RUBY_ACCESSOR(ERF, erf)
+DEF_UNARY_RUBY_ACCESSOR(ERFC, erfc)
+DEF_UNARY_RUBY_ACCESSOR(CBRT, cbrt)
+DEF_UNARY_RUBY_ACCESSOR(GAMMA, gamma)
+
+DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(ATAN2, atan2)
+DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(LDEXP, ldexp)
+DEF_NONCOM_ELEMENTWISE_RUBY_ACCESSOR(HYPOT, hypot)
+
+static VALUE nm_unary_log(int argc, VALUE* argv, VALUE self) {
+  NM_CONSERVATIVE(nm_register_values(argv, argc));
+  const double default_log_base = exp(1.0);
+  NMATRIX* left;
+  UnwrapNMatrix(self, left);
+  std::string sym;
+
+  switch(left->stype) {
+  case nm::DENSE_STORE:
+    sym = "__dense_unary_log__";
+    break;
+  case nm::YALE_STORE:
+    sym = "__yale_unary_log__";
+    break;
+  case nm::LIST_STORE:
+    sym = "__list_unary_log__";
+    break;
+  }
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  if (argc > 0) { //supplied a base
+    return rb_funcall(self, rb_intern(sym.c_str()), 1, argv[0]);
+  }
+  return rb_funcall(self, rb_intern(sym.c_str()), 1, nm::RubyObject(default_log_base).rval);
+}
+
+//DEF_ELEMENTWISE_RUBY_ACCESSOR(ATAN2, atan2)
+//DEF_ELEMENTWISE_RUBY_ACCESSOR(LDEXP, ldexp)
+//DEF_ELEMENTWISE_RUBY_ACCESSOR(HYPOT, hypot)
+
 /*
  * call-seq:
  *     hermitian? -> Boolean
@@ -598,6 +959,7 @@ static VALUE nm_hermitian(VALUE self) {
  * Bang should imply that no copy is being made, even temporarily.
  */
 static VALUE nm_complex_conjugate_bang(VALUE self) {
+
   NMATRIX* m;
   void* elem;
   size_t size, p;
@@ -643,11 +1005,13 @@ static VALUE nm_complex_conjugate_bang(VALUE self) {
  * need to worry about deleting it.
  */
 NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage) {
-  NMATRIX* mat = ALLOC(NMATRIX);
+  nm_register_storage(stype, storage);
+  NMATRIX* mat = NM_ALLOC(NMATRIX);
 
   mat->stype   = stype;
   mat->storage = storage;
 
+  nm_unregister_storage(stype, storage);
   return mat;
 }
 
@@ -655,6 +1019,8 @@ NMATRIX* nm_create(nm::stype_t stype, STORAGE* storage) {
  * @see nm_init
  */
 static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
+  NM_CONSERVATIVE(nm_register_values(argv, argc));
+  NM_CONSERVATIVE(nm_register_value(self));
   VALUE shape_ary, initial_ary, hash;
   //VALUE shape_ary, default_val, capacity, initial_ary, dtype_sym, stype_sym;
   // Mandatory args: shape, dtype, stype
@@ -676,7 +1042,9 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
     }
   }
 #endif
-
+  NM_CONSERVATIVE(nm_register_value(shape_ary));
+  NM_CONSERVATIVE(nm_register_value(initial_ary));
+  NM_CONSERVATIVE(nm_register_value(hash));
   // Get the shape.
   size_t  dim;
   size_t* shape = interpret_shape(shape_ary, &dim);
@@ -692,7 +1060,9 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
     dtype_sym       = rb_hash_aref(hash, ID2SYM(nm_rb_dtype));
     stype_sym       = rb_hash_aref(hash, ID2SYM(nm_rb_stype));
     capacity_num    = rb_hash_aref(hash, ID2SYM(nm_rb_capacity));
+    NM_CONSERVATIVE(nm_register_value(capacity_num));
     default_val_num = rb_hash_aref(hash, ID2SYM(nm_rb_default));
+    NM_CONSERVATIVE(nm_register_value(default_val_num));
   }
 
   //     stype ||= :dense
@@ -724,6 +1094,10 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
       init = RARRAY_LEN(initial_ary) == 1 ? rubyobj_to_cval(rb_ary_entry(initial_ary, 0), dtype) : NULL;
     else
       init = rubyobj_to_cval(initial_ary, dtype);
+    
+    if (dtype == nm::RUBYOBJ) {
+      nm_register_values(reinterpret_cast<VALUE*>(init), 1);
+    }
   }
 
   // capacity = h[:capacity] || 0
@@ -732,47 +1106,55 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
   }
 
   if (!NIL_P(initial_ary)) {
-    v = interpret_initial_value(initial_ary, dtype);
-
+    
     if (TYPE(initial_ary) == T_ARRAY) 	v_size = RARRAY_LEN(initial_ary);
     else                                v_size = 1;
+
+    v = interpret_initial_value(initial_ary, dtype);
+
+    if (dtype == nm::RUBYOBJ) {
+      nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+    }
   }
 
   // :object matrices MUST be initialized.
   else if (stype == nm::DENSE_STORE && dtype == nm::RUBYOBJ) {
     // Pretend [nil] was passed for RUBYOBJ.
-    v          = ALLOC(VALUE);
+    v          = NM_ALLOC(VALUE);
     *(VALUE*)v = Qnil;
 
     v_size = 1;
 
   }
 
-	NMATRIX* nmatrix;
+  NMATRIX* nmatrix;
   UnwrapNMatrix(self, nmatrix);
 
   nmatrix->stype = stype;
 
   switch (stype) {
-  	case nm::DENSE_STORE:
-  		nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, v, v_size);
-  		break;
+    case nm::DENSE_STORE:
+      nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, v, v_size);
+      break;
 
-  	case nm::LIST_STORE:
-  		nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init);
-  		break;
+    case nm::LIST_STORE:
+      nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init);
+      break;
 
-  	case nm::YALE_STORE:
-  		nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, capacity);
-  		nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), init);
-  		break;
+    case nm::YALE_STORE:
+      nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, capacity);
+      nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), init);
+      break;
   }
 
+  nm_register_storage(stype, nmatrix->storage);
+
   // If we're not creating a dense, and an initial array was provided, use that and multi-slice-set
   // to set the contents of the matrix right now.
   if (stype != nm::DENSE_STORE && v_size > 1) {
-    VALUE* slice_argv = ALLOCA_N(VALUE, dim);
-    size_t* tmp_shape = ALLOC_N(size_t, dim);
+    VALUE* slice_argv = NM_ALLOCA_N(VALUE, dim);
+    nm_register_values(slice_argv, dim);
+    size_t* tmp_shape = NM_ALLOC_N(size_t, dim);
     for (size_t m = 0; m < dim; ++m) {
       slice_argv[m] = ID2SYM(nm_rb_mul); // :* -- full range
       tmp_shape[m]  = shape[m];
@@ -780,20 +1162,47 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
 
     SLICE* slice = get_slice(dim, dim, slice_argv, shape);
     // Create a temporary dense matrix and use it to do a slice assignment on self.
-    NMATRIX* tmp          = nm_create(nm::DENSE_STORE, (STORAGE*)nm_dense_storage_create(dtype, tmp_shape, dim, v, v_size));
-    volatile VALUE rb_tmp = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, tmp);
+    NMATRIX* tmp = nm_create(nm::DENSE_STORE, (STORAGE*)nm_dense_storage_create(dtype, tmp_shape, dim, v, v_size));
+    nm_register_nmatrix(tmp);
+    VALUE rb_tmp = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, tmp);
+    nm_unregister_nmatrix(tmp);
+    nm_register_value(rb_tmp);
     if (stype == nm::YALE_STORE)  nm_yale_storage_set(self, slice, rb_tmp);
     else                          nm_list_storage_set(self, slice, rb_tmp);
 
     free_slice(slice);
 
     // We need to free v if it's not the same size as tmp -- because tmp will have made a copy instead.
-    if (nm_storage_count_max_elements(tmp->storage) != v_size)
-      xfree(v);
+    //if (nm_storage_count_max_elements(tmp->storage) != v_size)
+    //  NM_FREE(v);
 
     // nm_delete(tmp); // This seems to enrage the garbage collector (because rb_tmp is still available). It'd be better if we could force it to free immediately, but no sweat.
+
+    nm_unregister_value(rb_tmp);
+    nm_unregister_values(slice_argv, dim);
+  }
+
+  if (!NIL_P(initial_ary) && dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
   }
 
+  if (stype != nm::DENSE_STORE && dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(init), 1);
+  }
+
+  if (!NIL_P(hash)) {
+    NM_CONSERVATIVE(nm_unregister_value(capacity_num));
+    NM_CONSERVATIVE(nm_unregister_value(default_val_num));
+  }
+
+  NM_CONSERVATIVE(nm_unregister_value(shape_ary));
+  NM_CONSERVATIVE(nm_unregister_value(initial_ary));
+  NM_CONSERVATIVE(nm_unregister_value(hash));
+
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  nm_unregister_storage(stype, nmatrix->storage);
+
   return self;
 }
 
@@ -834,8 +1243,12 @@ static VALUE nm_init_new_version(int argc, VALUE* argv, VALUE self) {
  * shortcuts.rb.
  */
 static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
-
+  NM_CONSERVATIVE(nm_register_value(nm));
+  NM_CONSERVATIVE(nm_register_values(argv, argc));
+  
   if (argc <= 3) { // Call the new constructor unless all four arguments are given (or the 7-arg version is given)
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(nm));
   	return nm_init_new_version(argc, argv, nm);
   }
 
@@ -854,16 +1267,20 @@ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
 
   // If there are 7 arguments and Yale, refer to a different init function with fewer sanity checks.
   if (argc == 7) {
-  	if (stype == nm::YALE_STORE) {
-			return nm_init_yale_from_old_yale(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], nm);
+    if (stype == nm::YALE_STORE) {
+      NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+      NM_CONSERVATIVE(nm_unregister_value(nm));
+      return nm_init_yale_from_old_yale(argv[1], argv[2], argv[3], argv[4], argv[5], argv[6], nm);
 
-		} else {
-			rb_raise(rb_eArgError, "Expected 2-4 arguments (or 7 for internal Yale creation)");
-		}
+    } else {
+      NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+      NM_CONSERVATIVE(nm_unregister_value(nm));
+      rb_raise(rb_eArgError, "Expected 2-4 arguments (or 7 for internal Yale creation)");
+    }
   }
 
-	// 1: Array or Fixnum
-	size_t dim;
+  // 1: Array or Fixnum
+  size_t dim;
   size_t* shape = interpret_shape(argv[offset], &dim);
 
   // 2-3: dtype
@@ -895,7 +1312,7 @@ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
     	 */
       if (dtype == nm::RUBYOBJ) {
       	// Pretend [nil] was passed for RUBYOBJ.
-      	init_val = ALLOC(VALUE);
+      	init_val = NM_ALLOC(VALUE);
         *(VALUE*)init_val = Qnil;
 
         init_val_len = 1;
@@ -904,32 +1321,43 @@ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
       	init_val = NULL;
       }
     } else if (stype == nm::LIST_STORE) {
-    	init_val = ALLOC_N(char, DTYPE_SIZES[dtype]);
+      init_val = NM_ALLOC_N(char, DTYPE_SIZES[dtype]);
       std::memset(init_val, 0, DTYPE_SIZES[dtype]);
     }
   }
 
+  if (dtype == nm::RUBYOBJ) {
+    nm_register_values(reinterpret_cast<VALUE*>(init_val), init_val_len);
+  }
+
   // TODO: Update to allow an array as the initial value.
-	NMATRIX* nmatrix;
+  NMATRIX* nmatrix;
   UnwrapNMatrix(nm, nmatrix);
 
   nmatrix->stype = stype;
 
   switch (stype) {
-  	case nm::DENSE_STORE:
-  		nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, init_val, init_val_len);
-  		break;
+    case nm::DENSE_STORE:
+      nmatrix->storage = (STORAGE*)nm_dense_storage_create(dtype, shape, dim, init_val, init_val_len);
+      break;
 
-  	case nm::LIST_STORE:
-  		nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init_val);
-  		break;
+    case nm::LIST_STORE:
+      nmatrix->storage = (STORAGE*)nm_list_storage_create(dtype, shape, dim, init_val);
+      break;
 
-  	case nm::YALE_STORE:
-  		nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, init_cap);
-  		nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), NULL);
-  		break;
+    case nm::YALE_STORE:
+      nmatrix->storage = (STORAGE*)nm_yale_storage_create(dtype, shape, dim, init_cap);
+      nm_yale_storage_init((YALE_STORAGE*)(nmatrix->storage), NULL);
+      break;
+  }
+
+  if (dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(init_val), init_val_len);
   }
 
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
@@ -938,13 +1366,18 @@ static VALUE nm_init(int argc, VALUE* argv, VALUE nm) {
  * Helper for nm_cast which uses the C types instead of the Ruby objects. Called by nm_cast.
  */
 NMATRIX* nm_cast_with_ctype_args(NMATRIX* self, nm::stype_t new_stype, nm::dtype_t new_dtype, void* init_ptr) {
-  NMATRIX* lhs = ALLOC(NMATRIX);
+
+  nm_register_nmatrix(self);
+
+  NMATRIX* lhs = NM_ALLOC(NMATRIX);
   lhs->stype   = new_stype;
 
   // Copy the storage
   CAST_TABLE(cast_copy);
   lhs->storage = cast_copy[lhs->stype][self->stype](self->storage, new_dtype, init_ptr);
 
+  nm_unregister_nmatrix(self);
+
   return lhs;
 }
 
@@ -957,6 +1390,9 @@ NMATRIX* nm_cast_with_ctype_args(NMATRIX* self, nm::stype_t new_stype, nm::dtype
  * Copy constructor for changing dtypes and stypes.
  */
 VALUE nm_cast(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol, VALUE init) {
+  NM_CONSERVATIVE(nm_register_value(self));
+  NM_CONSERVATIVE(nm_register_value(init));
+
   nm::dtype_t new_dtype = nm_dtype_from_rbsymbol(new_dtype_symbol);
   nm::stype_t new_stype = nm_stype_from_rbsymbol(new_stype_symbol);
 
@@ -965,16 +1401,27 @@ VALUE nm_cast(VALUE self, VALUE new_stype_symbol, VALUE new_dtype_symbol, VALUE
 
   UnwrapNMatrix( self, rhs );
 
-  void* init_ptr = ALLOCA_N(char, DTYPE_SIZES[new_dtype]);
+  void* init_ptr = NM_ALLOCA_N(char, DTYPE_SIZES[new_dtype]);
   rubyval_to_cval(init, new_dtype, init_ptr);
 
-  return Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, nm_cast_with_ctype_args(rhs, new_stype, new_dtype, init_ptr));
+  NMATRIX* m = nm_cast_with_ctype_args(rhs, new_stype, new_dtype, init_ptr);
+  nm_register_nmatrix(m);
+
+  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
+  
+  nm_unregister_nmatrix(m);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  NM_CONSERVATIVE(nm_unregister_value(init));
+  return to_return;
+
 }
 
 /*
  * Copy constructor for transposing.
  */
 static VALUE nm_init_transposed(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   static STORAGE* (*storage_copy_transposed[nm::NUM_STYPES])(const STORAGE* rhs_base) = {
     nm_dense_storage_copy_transposed,
     nm_list_storage_copy_transposed,
@@ -984,19 +1431,30 @@ static VALUE nm_init_transposed(VALUE self) {
   NMATRIX* lhs = nm_create( NM_STYPE(self),
                             storage_copy_transposed[NM_STYPE(self)]( NM_STORAGE(self) )
                           );
+  nm_register_nmatrix(lhs);
+  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, lhs);
 
-  return Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, lhs);
+  nm_unregister_nmatrix(lhs);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  return to_return;
 }
 
 /*
  * Copy constructor for no change of dtype or stype (used for #initialize_copy hook).
  */
 static VALUE nm_init_copy(VALUE copy, VALUE original) {
+  NM_CONSERVATIVE(nm_register_value(copy));
+  NM_CONSERVATIVE(nm_register_value(original));
+
   NMATRIX *lhs, *rhs;
 
   CheckNMatrixType(original);
 
-  if (copy == original) return copy;
+  if (copy == original) {
+    NM_CONSERVATIVE(nm_unregister_value(copy));
+    NM_CONSERVATIVE(nm_unregister_value(original));
+    return copy;
+  }
 
   UnwrapNMatrix( original, rhs );
   UnwrapNMatrix( copy,     lhs );
@@ -1007,25 +1465,24 @@ static VALUE nm_init_copy(VALUE copy, VALUE original) {
   CAST_TABLE(ttable);
   lhs->storage = ttable[lhs->stype][rhs->stype](rhs->storage, rhs->storage->dtype, NULL);
 
+  NM_CONSERVATIVE(nm_unregister_value(copy));
+  NM_CONSERVATIVE(nm_unregister_value(original));
+
   return copy;
 }
 
 /*
- * Get major, minor, and release components of NMatrix::VERSION. Store in function parameters.
+ * Get major, minor, and release components of NMatrix::VERSION. Store in function parameters. Doesn't get
+ * the "pre" field currently (beta1/rc1/etc).
  */
 static void get_version_info(uint16_t& major, uint16_t& minor, uint16_t& release) {
   // Get VERSION and split it on periods. Result is an Array.
-  VALUE version = rb_funcall(rb_const_get(cNMatrix, rb_intern("VERSION")), rb_intern("split"), 1, rb_str_new_cstr("."));
-  VALUE* ary    = RARRAY_PTR(version); // major, minor, and release
+  VALUE cVersion = rb_const_get(cNMatrix, rb_intern("VERSION"));
 
   // Convert each to an integer
-  VALUE  maj    = rb_funcall(ary[0], rb_intern("to_i"), 0);
-  VALUE  min    = rb_funcall(ary[1], rb_intern("to_i"), 0);
-  VALUE  rel    = rb_funcall(ary[2], rb_intern("to_i"), 0);
-
-  major   = static_cast<uint16_t>(nm::RubyObject(maj));
-  minor   = static_cast<uint16_t>(nm::RubyObject(min));
-  release = static_cast<uint16_t>(nm::RubyObject(rel));
+  major   = FIX2INT(rb_const_get(cVersion, rb_intern("MAJOR")));
+  minor   = FIX2INT(rb_const_get(cVersion, rb_intern("MINOR")));
+  release = FIX2INT(rb_const_get(cVersion, rb_intern("TINY")));
 }
 
 
@@ -1055,12 +1512,40 @@ static nm::symm_t interpret_symm(VALUE symm) {
 
 
 void read_padded_shape(std::ifstream& f, size_t dim, size_t* shape) {
-  nm::read_padded_shape(f, dim, shape);
+  size_t bytes_read = 0;
+
+  // Read shape
+  for (size_t i = 0; i < dim; ++i) {
+    size_t s;
+    f.read(reinterpret_cast<char*>(&s), sizeof(size_t));
+    shape[i] = s;
+
+    bytes_read += sizeof(size_t);
+  }
+
+  // Ignore padding
+  f.ignore(bytes_read % 8);
 }
 
 
 void write_padded_shape(std::ofstream& f, size_t dim, size_t* shape) {
-  nm::write_padded_shape(f, dim, shape);
+  size_t bytes_written = 0;
+
+  // Write shape
+  for (size_t i = 0; i < dim; ++i) {
+    size_t s = shape[i];
+    f.write(reinterpret_cast<const char*>(&s), sizeof(size_t));
+
+    bytes_written += sizeof(size_t);
+  }
+
+  // Pad with zeros
+  size_t zero = 0;
+  while (bytes_written % 8) {
+    f.write(reinterpret_cast<const char*>(&zero), sizeof(size_t));
+
+    bytes_written += sizeof(IType);
+  }
 }
 
 
@@ -1116,6 +1601,10 @@ static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
   if (argc < 1 || argc > 2) {
     rb_raise(rb_eArgError, "Expected one or two arguments");
   }
+
+  NM_CONSERVATIVE(nm_register_values(argv, argc));
+  NM_CONSERVATIVE(nm_register_value(self));
+
   VALUE file = argv[0],
         symm = argc == 1 ? Qnil : argv[1];
 
@@ -1125,6 +1614,8 @@ static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
   nm::symm_t symm_ = interpret_symm(symm);
 
   if (nmatrix->storage->dtype == nm::RUBYOBJ) {
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(rb_eNotImpError, "Ruby Object writing is not implemented yet");
   }
 
@@ -1137,8 +1628,15 @@ static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
   //FIXME: Cast the matrix to the smallest possible index type. Write that in the place of IType.
 
   // Check arguments before starting to write.
-  if (nmatrix->stype == nm::LIST_STORE) rb_raise(nm_eStorageTypeError, "cannot save list matrix; cast to yale or dense first");
+  if (nmatrix->stype == nm::LIST_STORE) {
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    rb_raise(nm_eStorageTypeError, "cannot save list matrix; cast to yale or dense first");
+  }
   if (symm_ != nm::NONSYMM) {
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
+
     if (dim != 2) rb_raise(rb_eArgError, "symmetry/triangularity not defined for a non-2D matrix");
     if (nmatrix->storage->shape[0] != nmatrix->storage->shape[1])
       rb_raise(rb_eArgError, "symmetry/triangularity not defined for a non-square matrix");
@@ -1185,6 +1683,9 @@ static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
 
   f.close();
 
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
   return Qtrue;
 }
 
@@ -1202,6 +1703,9 @@ static VALUE nm_write(int argc, VALUE* argv, VALUE self) {
 static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
   using std::ifstream;
 
+  NM_CONSERVATIVE(nm_register_values(argv, argc));
+  NM_CONSERVATIVE(nm_register_value(self));
+
   VALUE file, force_;
 
   // Read the arguments
@@ -1210,6 +1714,8 @@ static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
 
 
   if (!RB_FILE_EXISTS(file)) { // FIXME: Errno::ENOENT
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(rb_get_errno_exc("ENOENT"), "%s", RSTRING_PTR(file));
   }
 
@@ -1230,9 +1736,11 @@ static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
   int ver  = major * 10000 + minor * 100 + release,
       fver = fmajor * 10000 + fminor * 100 + release;
   if (fver > ver && force == false) {
-    rb_raise(rb_eIOError, "File was created in newer version of NMatrix than current");
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    rb_raise(rb_eIOError, "File was created in newer version of NMatrix than current (%u.%u.%u)", fmajor, fminor, frelease);
   }
-  if (null16 != 0) fprintf(stderr, "Warning: Expected zero padding was not zero\n");
+  if (null16 != 0) rb_warn("nm_read: Expected zero padding was not zero (0)\n");
 
   uint8_t dt, st, it, sm;
   uint16_t dim;
@@ -1245,19 +1753,20 @@ static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
   f.read(reinterpret_cast<char*>(&null16), sizeof(uint16_t));
   f.read(reinterpret_cast<char*>(&dim), sizeof(uint16_t));
 
-  if (null16 != 0) fprintf(stderr, "Warning: Expected zero padding was not zero\n");
+  if (null16 != 0) rb_warn("nm_read: Expected zero padding was not zero (1)");
   nm::stype_t stype = static_cast<nm::stype_t>(st);
   nm::dtype_t dtype = static_cast<nm::dtype_t>(dt);
   nm::symm_t  symm  = static_cast<nm::symm_t>(sm);
   //nm::itype_t itype = static_cast<nm::itype_t>(it);
 
   // READ NEXT FEW 64-BIT BLOCKS
-  size_t* shape = ALLOC_N(size_t, dim);
+  size_t* shape = NM_ALLOC_N(size_t, dim);
   read_padded_shape(f, dim, shape);
 
   STORAGE* s;
   if (stype == nm::DENSE_STORE) {
     s = nm_dense_storage_create(dtype, shape, dim, NULL, 0);
+    nm_register_storage(stype, s);
 
     read_padded_dense_elements(f, reinterpret_cast<DENSE_STORAGE*>(s), symm, dtype);
 
@@ -1270,8 +1779,12 @@ static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
 
     s = nm_yale_storage_create(dtype, shape, dim, length); // set length as init capacity
 
+    nm_register_storage(stype, s);
+
     read_padded_yale_elements(f, reinterpret_cast<YALE_STORAGE*>(s), length, symm, dtype);
   } else {
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(nm_eStorageTypeError, "please convert to yale or dense before saving");
   }
 
@@ -1280,10 +1793,18 @@ static VALUE nm_read(int argc, VALUE* argv, VALUE self) {
   // Return the appropriate matrix object (Ruby VALUE)
   // FIXME: This should probably return CLASS_OF(self) instead of cNMatrix, but I don't know how that works for
   // FIXME: class methods.
+  nm_register_nmatrix(nm);
+  VALUE to_return = Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);
+
+  nm_unregister_nmatrix(nm);
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  nm_unregister_storage(stype, s);
+
   switch(stype) {
   case nm::DENSE_STORE:
   case nm::YALE_STORE:
-    return Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);
+    return to_return;
   default: // this case never occurs (due to earlier rb_raise)
     return Qnil;
   }
@@ -1321,7 +1842,7 @@ static VALUE nm_init_yale_from_old_yale(VALUE shape, VALUE dtype, VALUE ia, VALU
  */
 static VALUE nm_is_ref(VALUE self) {
   if (NM_SRC(self) == NM_STORAGE(self)) return Qfalse;
-  else                                  return Qtrue;
+  return Qtrue;
 }
 
 /*
@@ -1340,7 +1861,8 @@ static VALUE nm_mget(int argc, VALUE* argv, VALUE self) {
     nm_list_storage_get,
     nm_yale_storage_get
   };
-  return nm_xslice(argc, argv, ttable[NM_STYPE(self)], nm_delete, self);
+  nm::stype_t stype = NM_STYPE(self);
+  return nm_xslice(argc, argv, ttable[stype], nm_delete, self);
 }
 
 /*
@@ -1359,7 +1881,8 @@ static VALUE nm_mref(int argc, VALUE* argv, VALUE self) {
     nm_list_storage_ref,
     nm_yale_storage_ref
   };
-  return nm_xslice(argc, argv, ttable[NM_STYPE(self)], nm_delete_ref, self);
+  nm::stype_t stype = NM_STYPE(self);
+  return nm_xslice(argc, argv, ttable[stype], nm_delete_ref, self);
 }
 
 /*
@@ -1372,11 +1895,17 @@ static VALUE nm_mref(int argc, VALUE* argv, VALUE self) {
  *     n[3,3] = n[2,3] = 5.0
  */
 static VALUE nm_mset(int argc, VALUE* argv, VALUE self) {
+  
   size_t dim = NM_DIM(self); // last arg is the value
 
+  VALUE to_return = Qnil;
+
   if ((size_t)(argc) > NM_DIM(self)+1) {
-    rb_raise(rb_eArgError, "wrong number of arguments (%d for %u)", argc, effective_dim(NM_STORAGE(self))+1);
+    rb_raise(rb_eArgError, "wrong number of arguments (%d for %lu)", argc, effective_dim(NM_STORAGE(self))+1);
   } else {
+    NM_CONSERVATIVE(nm_register_value(self));
+    NM_CONSERVATIVE(nm_register_values(argv, argc));
+
     SLICE* slice = get_slice(dim, argc-1, argv, NM_STORAGE(self)->shape);
 
     static void (*ttable[nm::NUM_STYPES])(VALUE, SLICE*, VALUE) = {
@@ -1389,9 +1918,13 @@ static VALUE nm_mset(int argc, VALUE* argv, VALUE self) {
 
     free_slice(slice);
 
-    return argv[argc-1];
+    to_return = argv[argc-1];
+
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    NM_CONSERVATIVE(nm_unregister_values(argv, argc));
   }
-  return Qnil;
+
+  return to_return;
 }
 
 /*
@@ -1402,30 +1935,50 @@ static VALUE nm_mset(int argc, VALUE* argv, VALUE self) {
  * The two matrices must be of the same stype (for now). If dtype differs, an upcast will occur.
  */
 static VALUE nm_multiply(VALUE left_v, VALUE right_v) {
+  NM_CONSERVATIVE(nm_register_value(left_v));
+  NM_CONSERVATIVE(nm_register_value(right_v));
+
   NMATRIX *left, *right;
 
   UnwrapNMatrix( left_v, left );
 
-  if (NM_RUBYVAL_IS_NUMERIC(right_v))
+  if (NM_RUBYVAL_IS_NUMERIC(right_v)) {
+    NM_CONSERVATIVE(nm_unregister_value(left_v));
+    NM_CONSERVATIVE(nm_unregister_value(right_v));
     return matrix_multiply_scalar(left, right_v);
+  }
 
-  else if (TYPE(right_v) == T_ARRAY)
+  else if (TYPE(right_v) == T_ARRAY) {
+    NM_CONSERVATIVE(nm_unregister_value(left_v));
+    NM_CONSERVATIVE(nm_unregister_value(right_v));
     rb_raise(rb_eNotImpError, "please convert array to nx1 or 1xn NMatrix first");
+  }
 
   else { // both are matrices (probably)
     CheckNMatrixType(right_v);
     UnwrapNMatrix( right_v, right );
 
-    if (left->storage->shape[1] != right->storage->shape[0])
+    if (left->storage->shape[1] != right->storage->shape[0]) {
+      NM_CONSERVATIVE(nm_unregister_value(left_v));
+      NM_CONSERVATIVE(nm_unregister_value(right_v));
       rb_raise(rb_eArgError, "incompatible dimensions");
+    }
 
-    if (left->stype != right->stype)
+    if (left->stype != right->stype) {
+      NM_CONSERVATIVE(nm_unregister_value(left_v));
+      NM_CONSERVATIVE(nm_unregister_value(right_v));
       rb_raise(rb_eNotImpError, "matrices must have same stype");
+    }
 
+    NM_CONSERVATIVE(nm_unregister_value(left_v));
+    NM_CONSERVATIVE(nm_unregister_value(right_v));
     return matrix_multiply(left, right);
 
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(left_v));
+  NM_CONSERVATIVE(nm_unregister_value(right_v));
+
   return Qnil;
 }
 
@@ -1452,13 +2005,17 @@ static VALUE nm_dim(VALUE self) {
  * Get the shape (dimensions) of a matrix.
  */
 static VALUE nm_shape(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
   STORAGE* s   = NM_STORAGE(self);
 
   // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
-  VALUE* shape = ALLOCA_N(VALUE, s->dim);
+  VALUE* shape = NM_ALLOCA_N(VALUE, s->dim);
+  nm_register_values(shape, s->dim);
   for (size_t index = 0; index < s->dim; ++index)
     shape[index] = INT2FIX(s->shape[index]);
-
+  
+  nm_unregister_values(shape, s->dim);
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return rb_ary_new4(s->dim, shape);
 }
 
@@ -1470,13 +2027,17 @@ static VALUE nm_shape(VALUE self) {
  * Get the offset (slice position) of a matrix. Typically all zeros, unless you have a reference slice.
  */
 static VALUE nm_offset(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
   STORAGE* s   = NM_STORAGE(self);
 
   // Copy elements into a VALUE array and then use those to create a Ruby array with rb_ary_new4.
-  VALUE* offset = ALLOCA_N(VALUE, s->dim);
+  VALUE* offset = NM_ALLOCA_N(VALUE, s->dim);
+  nm_register_values(offset, s->dim);
   for (size_t index = 0; index < s->dim; ++index)
     offset[index] = INT2FIX(s->offset[index]);
 
+  nm_unregister_values(offset, s->dim);
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return rb_ary_new4(s->dim, offset);
 }
 
@@ -1490,13 +2051,20 @@ static VALUE nm_offset(VALUE self) {
 static VALUE nm_supershape(VALUE self) {
 
   STORAGE* s   = NM_STORAGE(self);
-  if (s->src == s) return nm_shape(self); // easy case (not a slice)
+  if (s->src == s) {
+    return nm_shape(self); // easy case (not a slice)
+  } 
   else s = s->src;
 
-  VALUE* shape = ALLOCA_N(VALUE, s->dim);
+  NM_CONSERVATIVE(nm_register_value(self));
+  
+  VALUE* shape = NM_ALLOCA_N(VALUE, s->dim);
+  nm_register_values(shape, s->dim);
   for (size_t index = 0; index < s->dim; ++index)
     shape[index] = INT2FIX(s->shape[index]);
 
+  nm_unregister_values(shape, s->dim);
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return rb_ary_new4(s->dim, shape);
 }
 
@@ -1507,8 +2075,10 @@ static VALUE nm_supershape(VALUE self) {
  * Get the storage type (stype) of a matrix, e.g., :yale, :dense, or :list.
  */
 static VALUE nm_stype(VALUE self) {
-  ID stype = rb_intern(STYPE_NAMES[NM_STYPE(self)]);
-  return ID2SYM(stype);
+  NM_CONSERVATIVE(nm_register_value(self));
+  VALUE stype = ID2SYM(rb_intern(STYPE_NAMES[NM_STYPE(self)]));
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  return stype;
 }
 
 /*
@@ -1550,11 +2120,18 @@ static VALUE nm_effective_dim(VALUE self) {
  */
 static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*, SLICE*), void (*delete_func)(NMATRIX*), VALUE self) {
   VALUE result = Qnil;
+
   STORAGE* s = NM_STORAGE(self);
 
   if (NM_DIM(self) < (size_t)(argc)) {
-    rb_raise(rb_eArgError, "wrong number of arguments (%d for %u)", argc, effective_dim(s));
+    rb_raise(rb_eArgError, "wrong number of arguments (%d for %lu)", argc, effective_dim(s));
   } else {
+
+    NM_CONSERVATIVE(nm_register_values(argv, argc));
+    NM_CONSERVATIVE(nm_register_value(self));
+
+    nm_register_value(result);
+
     SLICE* slice = get_slice(NM_DIM(self), argc, argv, s->shape);
 
     if (slice->single) {
@@ -1569,16 +2146,21 @@ static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*
 
     } else {
 
-      NMATRIX* mat  = ALLOC(NMATRIX);
+      NMATRIX* mat  = NM_ALLOC(NMATRIX);
       mat->stype    = NM_STYPE(self);
       mat->storage  = (STORAGE*)((*slice_func)( s, slice ));
-
+      nm_register_nmatrix(mat);
       result        = Data_Wrap_Struct(CLASS_OF(self), nm_mark, delete_func, mat);
+      nm_unregister_nmatrix(mat);
     }
 
     free_slice(slice);
   }
 
+  nm_unregister_value(result);
+  NM_CONSERVATIVE(nm_unregister_values(argv, argc));
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
   return result;
 }
 
@@ -1586,13 +2168,49 @@ static VALUE nm_xslice(int argc, VALUE* argv, void* (*slice_func)(const STORAGE*
 // Helper Functions //
 //////////////////////
 
+static VALUE unary_op(nm::unaryop_t op, VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+  NMATRIX* left;
+  UnwrapNMatrix(self, left);
+  std::string sym;
+
+  switch(left->stype) {
+  case nm::DENSE_STORE:
+    sym = "__dense_unary_" + nm::UNARYOPS[op] + "__";
+    break;
+  case nm::YALE_STORE:
+    sym = "__yale_unary_" + nm::UNARYOPS[op]  + "__";
+    break;
+  case nm::LIST_STORE:
+    sym = "__list_unary_" + nm::UNARYOPS[op]  + "__";
+    break;
+  }
+
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  return rb_funcall(self, rb_intern(sym.c_str()), 0);
+}
+
+static void check_dims_and_shape(VALUE left_val, VALUE right_val) {
+    // Check that the left- and right-hand sides have the same dimensionality.
+    if (NM_DIM(left_val) != NM_DIM(right_val)) {
+      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same dimensionality.");
+    }
+    // Check that the left- and right-hand sides have the same shape.
+    if (memcmp(&NM_SHAPE(left_val, 0), &NM_SHAPE(right_val, 0), sizeof(size_t) * NM_DIM(left_val)) != 0) {
+      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same shape.");
+    }
+}
+
 static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val) {
 
-	NMATRIX* left;
-	NMATRIX* result;
+  NM_CONSERVATIVE(nm_register_value(left_val));
+  NM_CONSERVATIVE(nm_register_value(right_val));
 
-	CheckNMatrixType(left_val);
-	UnwrapNMatrix(left_val, left);
+  NMATRIX* left;
+  NMATRIX* result;
+
+  CheckNMatrixType(left_val);
+  UnwrapNMatrix(left_val, left);
 
   if (TYPE(right_val) != T_DATA || (RDATA(right_val)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(right_val)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) {
     // This is a matrix-scalar element-wise operation.
@@ -1608,21 +2226,18 @@ static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val) {
       sym = "__list_scalar_" + nm::EWOP_NAMES[op] + "__";
       break;
     default:
+      NM_CONSERVATIVE(nm_unregister_value(left_val));
+      NM_CONSERVATIVE(nm_unregister_value(right_val));
       rb_raise(rb_eNotImpError, "unknown storage type requested scalar element-wise operation");
     }
-    return rb_funcall(left_val, rb_intern(sym.c_str()), 1, right_val);
+    VALUE symv = rb_intern(sym.c_str());
+    NM_CONSERVATIVE(nm_unregister_value(left_val));
+    NM_CONSERVATIVE(nm_unregister_value(right_val));
+    return rb_funcall(left_val, symv, 1, right_val);
 
   } else {
 
-    // Check that the left- and right-hand sides have the same dimensionality.
-    if (NM_DIM(left_val) != NM_DIM(right_val)) {
-      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same dimensionality.");
-    }
-
-    // Check that the left- and right-hand sides have the same shape.
-    if (memcmp(&NM_SHAPE(left_val, 0), &NM_SHAPE(right_val, 0), sizeof(size_t) * NM_DIM(left_val)) != 0) {
-      rb_raise(rb_eArgError, "The left- and right-hand sides of the operation must have the same shape.");
-    }
+    check_dims_and_shape(left_val, right_val);
 
     NMATRIX* right;
     UnwrapNMatrix(right_val, right);
@@ -1641,16 +2256,99 @@ static VALUE elementwise_op(nm::ewop_t op, VALUE left_val, VALUE right_val) {
         sym = "__list_elementwise_" + nm::EWOP_NAMES[op] + "__";
         break;
       default:
+        NM_CONSERVATIVE(nm_unregister_value(left_val));
+        NM_CONSERVATIVE(nm_unregister_value(right_val));
         rb_raise(rb_eNotImpError, "unknown storage type requested element-wise operation");
       }
-      return rb_funcall(left_val, rb_intern(sym.c_str()), 1, right_val);
+
+      VALUE symv = rb_intern(sym.c_str());
+      NM_CONSERVATIVE(nm_unregister_value(left_val));
+      NM_CONSERVATIVE(nm_unregister_value(right_val));
+      return rb_funcall(left_val, symv, 1, right_val);
 
     } else {
+      NM_CONSERVATIVE(nm_unregister_value(left_val));
+      NM_CONSERVATIVE(nm_unregister_value(right_val));
       rb_raise(rb_eArgError, "Element-wise operations are not currently supported between matrices with differing stypes.");
     }
   }
 
-	return Data_Wrap_Struct(CLASS_OF(left_val), nm_mark, nm_delete, result);
+  NM_CONSERVATIVE(nm_unregister_value(left_val));
+  NM_CONSERVATIVE(nm_unregister_value(right_val));
+  return Data_Wrap_Struct(CLASS_OF(left_val), nm_mark, nm_delete, result);
+}
+
+static VALUE noncom_elementwise_op(nm::noncom_ewop_t op, VALUE self, VALUE other, VALUE flip) {
+
+  NM_CONSERVATIVE(nm_register_value(self));
+  NM_CONSERVATIVE(nm_register_value(other));
+
+  NMATRIX* self_nm;
+  NMATRIX* result;
+
+  CheckNMatrixType(self);
+  UnwrapNMatrix(self, self_nm);
+
+  if (TYPE(other) != T_DATA || (RDATA(other)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(other)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) {
+    // This is a matrix-scalar element-wise operation.
+    std::string sym;
+    switch(self_nm->stype) {
+    case nm::DENSE_STORE:
+      sym = "__dense_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+      break;
+    case nm::YALE_STORE:
+      sym = "__yale_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+      break;
+    case nm::LIST_STORE:
+      sym = "__list_scalar_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+      break;
+    default:
+      NM_CONSERVATIVE(nm_unregister_value(self));
+      NM_CONSERVATIVE(nm_unregister_value(other));
+      rb_raise(rb_eNotImpError, "unknown storage type requested scalar element-wise operation");
+    }
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    NM_CONSERVATIVE(nm_unregister_value(other));
+    return rb_funcall(self, rb_intern(sym.c_str()), 2, other, flip);
+
+  } else {
+
+    check_dims_and_shape(self, other);
+
+    NMATRIX* other_nm;
+    UnwrapNMatrix(other, other_nm);
+
+    if (self_nm->stype == other_nm->stype) {
+      std::string sym;
+
+      switch(self_nm->stype) {
+      case nm::DENSE_STORE:
+        sym = "__dense_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+        break;
+      case nm::YALE_STORE:
+        sym = "__yale_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+        break;
+      case nm::LIST_STORE:
+        sym = "__list_elementwise_" + nm::NONCOM_EWOP_NAMES[op] + "__";
+        break;
+      default:
+	NM_CONSERVATIVE(nm_unregister_value(self));
+	NM_CONSERVATIVE(nm_unregister_value(other));
+	rb_raise(rb_eNotImpError, "unknown storage type requested element-wise operation");
+      }
+      NM_CONSERVATIVE(nm_unregister_value(self));
+      NM_CONSERVATIVE(nm_unregister_value(other));
+      return rb_funcall(self, rb_intern(sym.c_str()), 2, other, flip);
+
+    } else {
+      nm_unregister_value(self);
+      nm_unregister_value(other);
+      rb_raise(rb_eArgError, "Element-wise operations are not currently supported between matrices with differing stypes.");
+    }
+  }
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  NM_CONSERVATIVE(nm_unregister_value(other));
+  return Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, result);
 }
 
 /*
@@ -1664,11 +2362,13 @@ bool is_ref(const NMATRIX* matrix) {
  * Helper function for nm_symmetric and nm_hermitian.
  */
 static VALUE is_symmetric(VALUE self, bool hermitian) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   NMATRIX* m;
   UnwrapNMatrix(self, m);
 
   if (m->storage->shape[0] == m->storage->shape[1] and m->storage->dim == 2) {
-		if (NM_STYPE(self) == nm::DENSE_STORE) {
+    if (NM_STYPE(self) == nm::DENSE_STORE) {
       if (hermitian) {
         nm_dense_storage_is_hermitian((DENSE_STORAGE*)(m->storage), m->storage->shape[0]);
 
@@ -1678,11 +2378,12 @@ static VALUE is_symmetric(VALUE self, bool hermitian) {
 
     } else {
       // TODO: Implement, at the very least, yale_is_symmetric. Model it after yale/transp.template.c.
+      NM_CONSERVATIVE(nm_unregister_value(self));
       rb_raise(rb_eNotImpError, "symmetric? and hermitian? only implemented for dense currently");
     }
 
   }
-
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return Qfalse;
 }
 
@@ -1724,9 +2425,10 @@ nm::dtype_t nm_dtype_min_fixnum(int64_t v) {
  * Helper for nm_dtype_min(), handling rationals.
  */
 nm::dtype_t nm_dtype_min_rational(VALUE vv) {
-  nm::Rational128* v = ALLOCA_N(nm::Rational128, 1);
+  NM_CONSERVATIVE(nm_register_value(vv));
+  nm::Rational128* v = NM_ALLOCA_N(nm::Rational128, 1);
   rubyval_to_cval(vv, nm::RATIONAL128, v);
-
+  NM_CONSERVATIVE(nm_unregister_value(vv));
   int64_t i = std::max(std::abs(v->n), v->d);
   if (i <= SHRT_MAX) return nm::INT16;
   else if (i <= INT_MAX) return nm::INT32;
@@ -1848,6 +2550,8 @@ nm::dtype_t nm_dtype_guess(VALUE v) {
  * accessing some part of a matrix.
  */
 static SLICE* get_slice(size_t dim, int argc, VALUE* arg, size_t* shape) {
+  NM_CONSERVATIVE(nm_register_values(arg, argc));
+
   VALUE beg, end;
   int excl;
 
@@ -1875,15 +2579,17 @@ static SLICE* get_slice(size_t dim, int argc, VALUE* arg, size_t* shape) {
       slice->coords[r]  = 0;
       slice->lengths[r] = shape[r];
       slice->single     = false;
+      t++;
 
     } else if (TYPE(arg[t]) == T_HASH) { // 3:5 notation (inclusive)
       VALUE begin_end   = rb_funcall(v, rb_intern("shift"), 0); // rb_hash_shift
+      nm_register_value(begin_end);
       slice->coords[r]  = FIX2UINT(rb_ary_entry(begin_end, 0));
       slice->lengths[r] = FIX2UINT(rb_ary_entry(begin_end, 1)) - slice->coords[r];
 
       if (RHASH_EMPTY_P(v)) t++; // go on to the next
-
       slice->single = false;
+      nm_unregister_value(begin_end);
 
     } else if (CLASS_OF(v) == rb_cRange) {
       rb_range_values(arg[t], &beg, &end, &excl);
@@ -1896,13 +2602,17 @@ static SLICE* get_slice(size_t dim, int argc, VALUE* arg, size_t* shape) {
       t++;
 
     } else {
+      NM_CONSERVATIVE(nm_unregister_values(arg, argc));
       rb_raise(rb_eArgError, "expected Fixnum, Range, or Hash for slice component instead of %s", rb_obj_classname(v));
     }
 
-    if (slice->coords[r] > shape[r] || slice->coords[r] + slice->lengths[r] > shape[r])
-      rb_raise(rb_eRangeError, "slice is larger than matrix in dimension %u (slice component %u)", r, t);
+    if (slice->coords[r] > shape[r] || slice->coords[r] + slice->lengths[r] > shape[r]) {
+      NM_CONSERVATIVE(nm_unregister_values(arg, argc));
+      rb_raise(rb_eRangeError, "slice is larger than matrix in dimension %lu (slice component %lu)", r, t);
+    }
   }
 
+  NM_CONSERVATIVE(nm_unregister_values(arg, argc));
   return slice;
 }
 
@@ -1960,12 +2670,14 @@ static nm::dtype_t interpret_dtype(int argc, VALUE* argv, nm::stype_t stype) {
  * Convert an Ruby value or an array of Ruby values into initial C values.
  */
 static void* interpret_initial_value(VALUE arg, nm::dtype_t dtype) {
+  NM_CONSERVATIVE(nm_register_value(arg));
+
   unsigned int index;
   void* init_val;
 
   if (TYPE(arg) == T_ARRAY) {
   	// Array
-    init_val = ALLOC_N(char, DTYPE_SIZES[dtype] * RARRAY_LEN(arg));
+    init_val = NM_ALLOC_N(char, DTYPE_SIZES[dtype] * RARRAY_LEN(arg));
     NM_CHECK_ALLOC(init_val);
     for (index = 0; index < RARRAY_LEN(arg); ++index) {
     	rubyval_to_cval(RARRAY_PTR(arg)[index], dtype, (char*)init_val + (index * DTYPE_SIZES[dtype]));
@@ -1976,6 +2688,7 @@ static void* interpret_initial_value(VALUE arg, nm::dtype_t dtype) {
     init_val = rubyobj_to_cval(arg, dtype);
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(arg));
   return init_val;
 }
 
@@ -1986,11 +2699,12 @@ static void* interpret_initial_value(VALUE arg, nm::dtype_t dtype) {
  * array describing the shape, which must be freed manually.
  */
 static size_t* interpret_shape(VALUE arg, size_t* dim) {
+  NM_CONSERVATIVE(nm_register_value(arg));
   size_t* shape;
 
   if (TYPE(arg) == T_ARRAY) {
     *dim = RARRAY_LEN(arg);
-    shape = ALLOC_N(size_t, *dim);
+    shape = NM_ALLOC_N(size_t, *dim);
 
     for (size_t index = 0; index < *dim; ++index) {
       shape[index] = FIX2UINT( RARRAY_PTR(arg)[index] );
@@ -1998,15 +2712,17 @@ static size_t* interpret_shape(VALUE arg, size_t* dim) {
 
   } else if (FIXNUM_P(arg)) {
     *dim = 2;
-    shape = ALLOC_N(size_t, *dim);
+    shape = NM_ALLOC_N(size_t, *dim);
 
     shape[0] = FIX2UINT(arg);
     shape[1] = FIX2UINT(arg);
 
   } else {
+    nm_unregister_value(arg);
     rb_raise(rb_eArgError, "Expected an array of numbers or a single Fixnum for matrix shape");
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(arg));
   return shape;
 }
 
@@ -2038,12 +2754,20 @@ STORAGE* matrix_storage_cast_alloc(NMATRIX* matrix, nm::dtype_t new_dtype) {
 }
 
 STORAGE_PAIR binary_storage_cast_alloc(NMATRIX* left_matrix, NMATRIX* right_matrix) {
+  nm_register_nmatrix(left_matrix);
+  nm_register_nmatrix(right_matrix);
+
   STORAGE_PAIR casted;
   nm::dtype_t new_dtype = Upcast[left_matrix->storage->dtype][right_matrix->storage->dtype];
 
   casted.left  = matrix_storage_cast_alloc(left_matrix, new_dtype);
+  nm_register_storage(left_matrix->stype, casted.left);
   casted.right = matrix_storage_cast_alloc(right_matrix, new_dtype);
 
+  nm_unregister_nmatrix(left_matrix);
+  nm_unregister_nmatrix(right_matrix);
+  nm_unregister_storage(left_matrix->stype, casted.left);
+
   return casted;
 }
 
@@ -2053,12 +2777,16 @@ static VALUE matrix_multiply_scalar(NMATRIX* left, VALUE scalar) {
 }
 
 static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right) {
+  nm_register_nmatrix(left);
+  nm_register_nmatrix(right);
   ///TODO: multiplication for non-dense and/or non-decimal matrices
 
   // Make sure both of our matrices are of the correct type.
   STORAGE_PAIR casted = binary_storage_cast_alloc(left, right);
+  nm_register_storage(left->stype, casted.left);
+  nm_register_storage(right->stype, casted.right);
 
-  size_t*  resulting_shape   = ALLOC_N(size_t, 2);
+  size_t*  resulting_shape   = NM_ALLOC_N(size_t, 2);
   resulting_shape[0] = left->storage->shape[0];
   resulting_shape[1] = right->storage->shape[1];
 
@@ -2074,6 +2802,7 @@ static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right) {
 
   STORAGE* resulting_storage = storage_matrix_multiply[left->stype](casted, resulting_shape, vector);
   NMATRIX* result = nm_create(left->stype, resulting_storage);
+  nm_register_nmatrix(result);
 
   // Free any casted-storage we created for the multiplication.
   // TODO: Can we make the Ruby GC take care of this stuff now that we're using it?
@@ -2085,11 +2814,19 @@ static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right) {
     nm_yale_storage_delete
   };
 
+  nm_unregister_storage(left->stype, casted.left);
   if (left->storage != casted.left)   free_storage[result->stype](casted.left);
+
+  nm_unregister_storage(right->stype, casted.right);
   if (right->storage != casted.right) free_storage[result->stype](casted.right);
 
-  if (result) return Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, result);
-  return Qnil; // Only if we try to multiply list matrices should we return Qnil.
+  VALUE to_return = result ? Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, result) : Qnil; // Only if we try to multiply list matrices should we return Qnil.
+
+  nm_unregister_nmatrix(left);
+  nm_unregister_nmatrix(right);
+  nm_unregister_nmatrix(result);
+
+  return to_return;
 }
 
 /*
@@ -2100,15 +2837,31 @@ static VALUE matrix_multiply(NMATRIX* left, NMATRIX* right) {
  * Note: Currently only implemented for 2x2 and 3x3 matrices.
  */
 static VALUE nm_det_exact(VALUE self) {
-  if (NM_STYPE(self) != nm::DENSE_STORE) rb_raise(nm_eStorageTypeError, "can only calculate exact determinant for dense matrices");
 
-  if (NM_DIM(self) != 2 || NM_SHAPE0(self) != NM_SHAPE1(self)) return Qnil;
+  if (NM_STYPE(self) != nm::DENSE_STORE) {
+    rb_raise(nm_eStorageTypeError, "can only calculate exact determinant for dense matrices");
+  }
+  if (NM_DIM(self) != 2 || NM_SHAPE0(self) != NM_SHAPE1(self)) {
+    return Qnil;
+  }
+
+  NM_CONSERVATIVE(nm_register_value(self));
 
   // Calculate the determinant and then assign it to the return value
-  void* result = ALLOCA_N(char, DTYPE_SIZES[NM_DTYPE(self)]);
+  void* result = NM_ALLOCA_N(char, DTYPE_SIZES[NM_DTYPE(self)]);
+  nm::dtype_t dtype = NM_DTYPE(self);
   nm_math_det_exact(NM_SHAPE0(self), NM_STORAGE_DENSE(self)->elements, NM_SHAPE0(self), NM_DTYPE(self), result);
 
-  return rubyobj_from_cval(result, NM_DTYPE(self)).rval;
+  if (dtype == nm::RUBYOBJ) {
+    nm_register_values(reinterpret_cast<VALUE*>(result), 1);
+  }
+  VALUE to_return = rubyobj_from_cval(result, NM_DTYPE(self)).rval;
+  if (dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(result), 1);
+  }
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
+  return to_return;
 }
 
 /////////////////
@@ -2127,6 +2880,11 @@ static VALUE nm_det_exact(VALUE self) {
  * TODO: Add a column-major option for libraries that use column-major matrices.
  */
 VALUE rb_nmatrix_dense_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t length) {
+
+  if (dtype == nm::RUBYOBJ) {
+    nm_register_values(reinterpret_cast<VALUE*>(elements), length);
+  }
+
   NMATRIX* nm;
   size_t nm_dim;
   size_t* shape_copy;
@@ -2134,25 +2892,34 @@ VALUE rb_nmatrix_dense_create(nm::dtype_t dtype, size_t* shape, size_t dim, void
   // Do not allow a dim of 1. Treat it as a column or row matrix.
   if (dim == 1) {
     nm_dim				= 2;
-    shape_copy		= ALLOC_N(size_t, nm_dim);
+    shape_copy		= NM_ALLOC_N(size_t, nm_dim);
     shape_copy[0]	= shape[0];
     shape_copy[1]	= 1;
 
   } else {
     nm_dim			= dim;
-    shape_copy	= ALLOC_N(size_t, nm_dim);
+    shape_copy	= NM_ALLOC_N(size_t, nm_dim);
     memcpy(shape_copy, shape, sizeof(size_t)*nm_dim);
   }
 
   // Copy elements
-  void* elements_copy = ALLOC_N(char, DTYPE_SIZES[dtype]*length);
+  void* elements_copy = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*length);
   memcpy(elements_copy, elements, DTYPE_SIZES[dtype]*length);
 
   // allocate and create the matrix and its storage
   nm = nm_create(nm::DENSE_STORE, nm_dense_storage_create(dtype, shape_copy, dim, elements_copy, length));
 
+  nm_register_nmatrix(nm);
+
+  VALUE to_return = Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);
+
+  nm_unregister_nmatrix(nm);
+  if (dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(elements), length);
+  }
+
   // tell Ruby about the matrix and its storage, particularly how to garbage collect it.
-  return Data_Wrap_Struct(cNMatrix, nm_mark, nm_delete, nm);
+  return to_return;
 }
 
 /*
diff --git a/ext/nmatrix/storage/common.cpp b/ext/nmatrix/storage/common.cpp
index 482a48b..e6a1325 100644
--- a/ext/nmatrix/storage/common.cpp
+++ b/ext/nmatrix/storage/common.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/common.h b/ext/nmatrix/storage/common.h
index 17be0e2..b9b9ac1 100644
--- a/ext/nmatrix/storage/common.h
+++ b/ext/nmatrix/storage/common.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/dense.cpp b/ext/nmatrix/storage/dense/dense.cpp
similarity index 73%
rename from ext/nmatrix/storage/dense.cpp
rename to ext/nmatrix/storage/dense/dense.cpp
index f391582..1253e1a 100644
--- a/ext/nmatrix/storage/dense.cpp
+++ b/ext/nmatrix/storage/dense/dense.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -34,13 +34,12 @@
 /*
  * Project Includes
  */
-// #include "types.h"
-#include "data/data.h"
-#include "math/long_dtype.h"
-#include "math/gemm.h"
-#include "math/gemv.h"
-#include "math/math.h"
-#include "common.h"
+#include "../../data/data.h"
+#include "../../math/long_dtype.h"
+#include "../../math/gemm.h"
+#include "../../math/gemv.h"
+#include "../../math/math.h"
+#include "../common.h"
 #include "dense.h"
 
 /*
@@ -124,10 +123,13 @@ namespace nm { namespace dense_storage {
    */
   template <typename D>
   void set(VALUE left, SLICE* slice, VALUE right) {
+    NM_CONSERVATIVE(nm_register_value(left));
+    NM_CONSERVATIVE(nm_register_value(right));
+
     DENSE_STORAGE* s = NM_STORAGE_DENSE(left);
 
     std::pair<NMATRIX*,bool> nm_and_free =
-      interpret_arg_as_dense_nmatrix(right, NM_DTYPE(left));
+      interpret_arg_as_dense_nmatrix(right, s->dtype);
 
     // Map the data onto D* v.
     D*     v;
@@ -139,13 +141,20 @@ namespace nm { namespace dense_storage {
       v_size           = nm_storage_count_max_elements(t);
 
     } else if (TYPE(right) == T_ARRAY) {
+      
       v_size = RARRAY_LEN(right);
-      v      = ALLOC_N(D, v_size);
+      v      = NM_ALLOC_N(D, v_size);
+      if (s->dtype == nm::RUBYOBJ)
+        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+
       for (size_t m = 0; m < v_size; ++m) {
         rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
       }
+
     } else {
       v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
+      if (s->dtype == nm::RUBYOBJ)
+        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
     }
 
     if (slice->single) {
@@ -156,10 +165,18 @@ namespace nm { namespace dense_storage {
     }
 
     // Only free v if it was allocated in this function.
-    if (nm_and_free.first && nm_and_free.second)
-      nm_delete(nm_and_free.first);
-    else
-      xfree(v);
+    if (nm_and_free.first) {
+      if (nm_and_free.second) {
+        nm_delete(nm_and_free.first);
+      }
+    } else {
+      if (s->dtype == nm::RUBYOBJ)
+        nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
+      NM_FREE(v);
+    }
+    NM_CONSERVATIVE(nm_unregister_value(left));
+    NM_CONSERVATIVE(nm_unregister_value(right));
+
   }
 
 }} // end of namespace nm::dense_storage
@@ -186,13 +203,13 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
  * check for that NULL pointer before freeing elements.
  */
 static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
-  DENSE_STORAGE* s = ALLOC( DENSE_STORAGE );
+  DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );
 
   s->dim        = dim;
   s->shape      = shape;
   s->dtype      = dtype;
 
-  s->offset     = ALLOC_N(size_t, dim);
+  s->offset     = NM_ALLOC_N(size_t, dim);
   memset(s->offset, 0, sizeof(size_t)*dim);
 
   s->stride     = stride(shape, dim);
@@ -212,15 +229,24 @@ static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* s
  * elements is NULL, the new elements array will not be initialized.
  */
 DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
+  if (dtype == nm::RUBYOBJ)
+    nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);
 
   DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
   size_t count  = nm_storage_count_max_elements(s);
 
   if (elements_length == count) {
-  	s->elements = elements;
+    s->elements = elements;
+    
+    if (dtype == nm::RUBYOBJ)
+      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
 
   } else {
-    s->elements = ALLOC_N(char, DTYPE_SIZES[dtype]*count);
+
+    s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);
+
+    if (dtype == nm::RUBYOBJ)
+      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);
 
     size_t copy_length = elements_length;
 
@@ -236,7 +262,7 @@ DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
       }
 
       // Get rid of the init_val.
-      xfree(elements);
+      NM_FREE(elements);
     }
   }
 
@@ -252,12 +278,13 @@ void nm_dense_storage_delete(STORAGE* s) {
   if (s) {
     DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
     if(storage->count-- == 1) {
-      xfree(storage->shape);
-      xfree(storage->offset);
-      xfree(storage->stride);
-      if (storage->elements != NULL) // happens with dummy objects
-        xfree(storage->elements);
-      xfree(storage);
+      NM_FREE(storage->shape);
+      NM_FREE(storage->offset);
+      NM_FREE(storage->stride);
+      if (storage->elements != NULL) {// happens with dummy objects
+        NM_FREE(storage->elements);
+      }
+      NM_FREE(storage);
     }
   }
 }
@@ -270,9 +297,9 @@ void nm_dense_storage_delete_ref(STORAGE* s) {
   if (s) {
     DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
     nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
-    xfree(storage->shape);
-    xfree(storage->offset);
-    xfree(storage);
+    NM_FREE(storage->shape);
+    NM_FREE(storage->offset);
+    NM_FREE(storage);
   }
 }
 
@@ -286,14 +313,43 @@ void nm_dense_storage_mark(STORAGE* storage_base) {
   if (storage && storage->dtype == nm::RUBYOBJ) {
     VALUE* els = reinterpret_cast<VALUE*>(storage->elements);
 
-    rb_gc_mark_locations(els, els + nm_storage_count_max_elements(storage) * sizeof(VALUE));
-
+    if (els) {
+      rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
+    }
   	//for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
     //  rb_gc_mark(els[index]);
     //}
   }
 }
 
+/**
+ * Register a dense storage struct as in-use to avoid garbage collection of the
+ * elements stored.
+ *
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
+ *
+ */
+void nm_dense_storage_register(const STORAGE* s) {
+  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
+  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
+    nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
+  }
+}
+
+/**
+ * Unregister a dense storage struct to allow normal garbage collection of the
+ * elements stored.
+ *
+ * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
+ *
+ */
+void nm_dense_storage_unregister(const STORAGE* s) {
+  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
+  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
+  }
+}
+
 ///////////////
 // Accessors //
 ///////////////
@@ -304,23 +360,30 @@ void nm_dense_storage_mark(STORAGE* storage_base) {
  * map_pair iterator for dense matrices (for element-wise operations)
  */
 VALUE nm_dense_map_pair(VALUE self, VALUE right) {
-  DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
-                *t = NM_STORAGE_DENSE(right);
 
+  NM_CONSERVATIVE(nm_register_value(self));
+  NM_CONSERVATIVE(nm_register_value(right));
+
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(right));
+  NM_CONSERVATIVE(nm_unregister_value(self));
   RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
 
-  size_t* coords = ALLOCA_N(size_t, s->dim);
+  DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
+                *t = NM_STORAGE_DENSE(right);
+
+  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
   memset(coords, 0, sizeof(size_t) * s->dim);
 
-  size_t *shape_copy = ALLOC_N(size_t, s->dim);
+  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
   memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
 
   size_t count = nm_storage_count_max_elements(s);
 
   DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
-  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
 
-  nm_register_values(result_elem, count);
+  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
+  nm_dense_storage_register(result);
 
   for (size_t k = 0; k < count; ++k) {
     nm_dense_storage_coords(result, k, coords);
@@ -328,17 +391,23 @@ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
            t_index = nm_dense_storage_pos(t, coords);
 
     VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
+    nm_register_value(sval);
     VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
-
     result_elem[k] = rb_yield_values(2, sval, tval);
+    nm_unregister_value(sval);
   }
 
+  VALUE klass = CLASS_OF(self);
   NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
-  VALUE rb_nm = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
+  nm_register_nmatrix(m);
+  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
 
-  nm_unregister_values(result_elem, count);
+  nm_unregister_nmatrix(m);
+  nm_dense_storage_unregister(result);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  NM_CONSERVATIVE(nm_unregister_value(right));
 
-  return rb_nm;
+  return to_return;
 
 }
 
@@ -346,22 +415,28 @@ VALUE nm_dense_map_pair(VALUE self, VALUE right) {
  * map enumerator for dense matrices.
  */
 VALUE nm_dense_map(VALUE self) {
-  DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
 
+  NM_CONSERVATIVE(nm_register_value(self));
+
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(self));
   RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);
 
-  size_t* coords = ALLOCA_N(size_t, s->dim);
+  DENSE_STORAGE *s = NM_STORAGE_DENSE(self);
+
+  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
   memset(coords, 0, sizeof(size_t) * s->dim);
 
-  size_t *shape_copy = ALLOC_N(size_t, s->dim);
+  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
   memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
 
   size_t count = nm_storage_count_max_elements(s);
 
   DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);
+
   VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
 
-  nm_register_values(result_elem, count);
+  nm_dense_storage_register(result);
 
   for (size_t k = 0; k < count; ++k) {
     nm_dense_storage_coords(result, k, coords);
@@ -370,13 +445,18 @@ VALUE nm_dense_map(VALUE self) {
     result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
   }
 
+  VALUE klass = CLASS_OF(self);
+
   NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
-  VALUE rb_nm = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
+  nm_register_nmatrix(m);
 
-  nm_unregister_values(result_elem, count);
+  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
 
-  return rb_nm;
+  nm_unregister_nmatrix(m);
+  nm_dense_storage_unregister(result);
+  NM_CONSERVATIVE(nm_unregister_value(self));
 
+  return to_return;
 }
 
 
@@ -384,18 +464,20 @@ VALUE nm_dense_map(VALUE self) {
  * each_with_indices iterator for dense matrices.
  */
 VALUE nm_dense_each_with_indices(VALUE nmatrix) {
-  volatile VALUE nm = nmatrix;
-
-  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
 
-  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+  
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
+  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
 
   // Create indices and initialize them to zero
-  size_t* coords = ALLOCA_N(size_t, s->dim);
+  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
   memset(coords, 0, sizeof(size_t) * s->dim);
 
   size_t slice_index;
-  size_t* shape_copy = ALLOC_N(size_t, s->dim);
+  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
   memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
 
   DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
@@ -404,8 +486,9 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
     nm_dense_storage_coords(sliced_dummy, k, coords);
     slice_index = nm_dense_storage_pos(s, coords);
     VALUE ary = rb_ary_new();
-    if (NM_DTYPE(nm) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
-    else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval);
+    nm_register_value(ary);
+    if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
+    else rb_ary_push(ary, rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);
 
     for (size_t p = 0; p < s->dim; ++p) {
       rb_ary_push(ary, INT2FIX(coords[p]));
@@ -413,11 +496,13 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
 
     // yield the array which now consists of the value and the indices
     rb_yield(ary);
-
+    nm_unregister_value(ary);
   }
 
   nm_dense_storage_delete(sliced_dummy);
 
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+
   return nmatrix;
 
 }
@@ -431,18 +516,22 @@ VALUE nm_dense_each_with_indices(VALUE nmatrix) {
  * containing other types of data.
  */
 VALUE nm_dense_each(VALUE nmatrix) {
-  volatile VALUE nm = nmatrix; // Not sure this actually does anything.
-  DENSE_STORAGE* s = NM_STORAGE_DENSE(nm);
 
-  RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_enumerator_length);
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
+  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);
+
+  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);
 
-  size_t* temp_coords = ALLOCA_N(size_t, s->dim);
+  size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
   size_t sliced_index;
-  size_t* shape_copy = ALLOC_N(size_t, s->dim);
+  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
   memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
   DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);
 
-  if (NM_DTYPE(nm) == nm::RUBYOBJ) {
+  if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {
 
     // matrix of Ruby objects -- yield those objects directly
     for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
@@ -458,12 +547,13 @@ VALUE nm_dense_each(VALUE nmatrix) {
     for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
       nm_dense_storage_coords(sliced_dummy, i, temp_coords);
       sliced_index = nm_dense_storage_pos(s, temp_coords);
-      VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nm)], NM_DTYPE(nm)).rval;
+      VALUE v = rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
       rb_yield( v ); // yield to the copy we made
     }
   }
 
   nm_dense_storage_delete(sliced_dummy);
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
 
   return nmatrix;
 
@@ -487,11 +577,11 @@ static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* le
  */
 void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
   DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
-
   if (slice->single)
     return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
   else {
-    size_t *shape      = ALLOC_N(size_t, s->dim);
+    nm_dense_storage_register(s);
+    size_t *shape      = NM_ALLOC_N(size_t, s->dim);
     for (size_t i = 0; i < s->dim; ++i) {
       shape[i]  = slice->lengths[i];
     }
@@ -505,6 +595,7 @@ void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
         nm_dense_storage_pos(s, slice->coords),
         0);
 
+    nm_dense_storage_unregister(s);
     return ns;
   }
 }
@@ -521,11 +612,12 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
     return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
 
   else {
-    DENSE_STORAGE* ns = ALLOC( DENSE_STORAGE );
+    nm_dense_storage_register(s);
+    DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
     ns->dim        = s->dim;
     ns->dtype      = s->dtype;
-    ns->offset     = ALLOC_N(size_t, ns->dim);
-    ns->shape      = ALLOC_N(size_t, ns->dim);
+    ns->offset     = NM_ALLOC_N(size_t, ns->dim);
+    ns->shape      = NM_ALLOC_N(size_t, ns->dim);
 
     for (size_t i = 0; i < ns->dim; ++i) {
       ns->offset[i] = slice->coords[i] + s->offset[i];
@@ -538,6 +630,7 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
     s->src->count++;
     ns->src = s->src;
 
+    nm_dense_storage_unregister(s);
     return ns;
   }
 }
@@ -550,8 +643,8 @@ void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
  */
 void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
   NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
-
-  ttable[NM_DTYPE(left)](left, slice, right);
+  nm::dtype_t dtype = NM_DTYPE(left);
+  ttable[dtype](left, slice, right);
 }
 
 
@@ -567,7 +660,7 @@ void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
  *				have the same dtype.
  */
 bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
-	LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
+  LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)
 
   if (!ttable[left->dtype][right->dtype]) {
     rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
@@ -657,7 +750,7 @@ void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, siz
  */
 static size_t* stride(size_t* shape, size_t dim) {
   size_t i, j;
-  size_t* stride = ALLOC_N(size_t, dim);
+  size_t* stride = NM_ALLOC_N(size_t, dim);
 
   for (i = 0; i < dim; ++i) {
     stride[i] = 1;
@@ -678,22 +771,24 @@ static size_t* stride(size_t* shape, size_t dim) {
  * Copy dense storage, changing dtype if necessary.
  */
 STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
-	NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
+  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);
 
   if (!ttable[new_dtype][rhs->dtype]) {
     rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
     return NULL;
   }
 
-	return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
+  return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
 }
 
 /*
  * Copy dense storage without a change in dtype.
  */
 DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
+  nm_dense_storage_register(rhs);
+
   size_t  count = 0;
-  size_t *shape  = ALLOC_N(size_t, rhs->dim);
+  size_t *shape  = NM_ALLOC_N(size_t, rhs->dim);
 
   // copy shape and offset
   for (size_t i = 0; i < rhs->dim; ++i) {
@@ -709,7 +804,8 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
     if (rhs == rhs->src) // not a reference
       memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
     else { // slice whole matrix
-      size_t *offset = ALLOC_N(size_t, rhs->dim);
+      nm_dense_storage_register(lhs);
+      size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
       memset(offset, 0, sizeof(size_t) * rhs->dim);
 
       slice_copy(lhs,
@@ -718,9 +814,13 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
            0,
            nm_dense_storage_pos(rhs, offset),
            0);
+
+      nm_dense_storage_unregister(lhs);
     }
   }
 
+  nm_dense_storage_unregister(rhs);
+
   return lhs;
 }
 
@@ -733,7 +833,9 @@ DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
 STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
   DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;
 
-  size_t *shape = ALLOC_N(size_t, rhs->dim);
+  nm_dense_storage_register(rhs);
+
+  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
 
   // swap shape and offset
   shape[0] = rhs->shape[1];
@@ -743,17 +845,25 @@ STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
   lhs->offset[0] = rhs->offset[1];
   lhs->offset[1] = rhs->offset[0];
 
+  nm_dense_storage_register(lhs);
+
   if (rhs_base->src == rhs_base) {
     nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
   } else {
     NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);
 
-    if (!ttable[lhs->dtype][rhs->dtype])
+    if (!ttable[lhs->dtype][rhs->dtype]) {
+      nm_dense_storage_unregister(rhs);
+      nm_dense_storage_unregister(lhs);      
       rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
+    }
 
     ttable[lhs->dtype][rhs->dtype](rhs, lhs);
   }
 
+  nm_dense_storage_unregister(rhs);
+  nm_dense_storage_unregister(lhs);
+
   return (STORAGE*)lhs;
 }
 
@@ -768,21 +878,26 @@ namespace nm {
  * Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
  */
 std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
+  NM_CONSERVATIVE(nm_register_value(right));
   if (TYPE(right) == T_DATA && (RDATA(right)->dfree == (RUBY_DATA_FUNC)nm_delete || RDATA(right)->dfree == (RUBY_DATA_FUNC)nm_delete_ref)) {
     NMATRIX *r;
     if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
       UnwrapNMatrix( right, r );
       NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
+      NM_CONSERVATIVE(nm_unregister_value(right));
       return std::make_pair(ldtype_r,true);
     } else {  // simple case -- right-hand matrix is dense and is not a reference and has same dtype
       UnwrapNMatrix( right, r );
+      NM_CONSERVATIVE(nm_unregister_value(right));
       return std::make_pair(r, false);
     }
     // Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
   } else if (TYPE(right) == T_DATA) {
+    NM_CONSERVATIVE(nm_unregister_value(right));
     rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(right));
   return std::make_pair<NMATRIX*,bool>(NULL, false);
 }
 
@@ -796,11 +911,14 @@ namespace dense_storage {
 template<typename LDType, typename RDType>
 void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
 
+  nm_dense_storage_register(rhs);
+  nm_dense_storage_register(lhs);
+
   LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
   RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
 
   size_t count = nm_storage_count_max_elements(lhs);
-  size_t* temp_coords = ALLOCA_N(size_t, lhs->dim);
+  size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
   size_t coord_swap_temp;
 
   while (count-- > 0) {
@@ -810,21 +928,28 @@ void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {
     lhs_els[count] = rhs_els[r_coord];
   }
 
+  nm_dense_storage_unregister(rhs);
+  nm_dense_storage_unregister(lhs);
+
 }
 
 template <typename LDType, typename RDType>
 DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
+  nm_dense_storage_register(rhs);
+
   size_t  count = nm_storage_count_max_elements(rhs);
 
-  size_t *shape = ALLOC_N(size_t, rhs->dim);
+  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
   memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
 
-  DENSE_STORAGE* lhs			= nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
+  DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);
+
+  nm_dense_storage_register(lhs);
 
 	// Ensure that allocation worked before copying.
   if (lhs && count) {
     if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
-      size_t* offset      = ALLOCA_N(size_t, rhs->dim);
+      size_t* offset      = NM_ALLOCA_N(size_t, rhs->dim);
       memset(offset, 0, sizeof(size_t) * rhs->dim);
 
       slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
@@ -832,54 +957,72 @@ DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
                  nm_dense_storage_pos(rhs, offset), 0);
 
     } else {              // Make a regular copy.
-      RDType*	rhs_els         = reinterpret_cast<RDType*>(rhs->elements);
-      LDType* lhs_els	        = reinterpret_cast<LDType*>(lhs->elements);
+      RDType* rhs_els          = reinterpret_cast<RDType*>(rhs->elements);
+      LDType* lhs_els          = reinterpret_cast<LDType*>(lhs->elements);
 
-    	while (count-- > 0)     		lhs_els[count] = rhs_els[count];
+      for (size_t i = 0; i < count; ++i)
+    	  lhs_els[i] = rhs_els[i];
     }
   }
 
+  nm_dense_storage_unregister(rhs);
+  nm_dense_storage_unregister(lhs);
+
   return lhs;
 }
 
 template <typename LDType, typename RDType>
 bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
+  nm_dense_storage_register(left);
+  nm_dense_storage_register(right);
+
   size_t index;
   DENSE_STORAGE *tmp1, *tmp2;
   tmp1 = NULL; tmp2 = NULL;
   bool result = true;
   /* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */
-  if (left->dim != right->dim) return false;
-
+  if (left->dim != right->dim) {
+    nm_dense_storage_unregister(right);
+    nm_dense_storage_unregister(left);
+    return false;
+  }
 
-	LDType* left_elements	  = (LDType*)left->elements;
-  RDType* right_elements	= (RDType*)right->elements;
+  LDType* left_elements	  = (LDType*)left->elements;
+  RDType* right_elements  = (RDType*)right->elements;
 
   // Copy elements in temp matrix if you have reference to the right.
   if (left->src != left) {
     tmp1 = nm_dense_storage_copy(left);
+    nm_dense_storage_register(tmp1);
     left_elements = (LDType*)tmp1->elements;
   }
   if (right->src != right) {
     tmp2 = nm_dense_storage_copy(right);
+    nm_dense_storage_register(tmp2);
     right_elements = (RDType*)tmp2->elements;
   }
 
 
 
-	for (index = nm_storage_count_max_elements(left); index-- > 0;) {
-		if (left_elements[index] != right_elements[index]) {
+  for (index = nm_storage_count_max_elements(left); index-- > 0;) {
+    if (left_elements[index] != right_elements[index]) {
       result = false;
       break;
     }
-	}
+  }
 
-  if (tmp1)
-    free(tmp1);
-  if (tmp2)
-    free(tmp2);
+  if (tmp1) {
+    nm_dense_storage_unregister(tmp1);
+    NM_FREE(tmp1);
+  }
+  if (tmp2) {
+    nm_dense_storage_unregister(tmp2);
+    NM_FREE(tmp2);
+  }
 
-	return result;
+  nm_dense_storage_unregister(left);
+  nm_dense_storage_unregister(right);
+  return result;
 }
 
 template <typename DType>
@@ -929,11 +1072,16 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
   DENSE_STORAGE *left  = (DENSE_STORAGE*)(casted_storage.left),
                 *right = (DENSE_STORAGE*)(casted_storage.right);
 
+  nm_dense_storage_register(left);
+  nm_dense_storage_register(right);
+
   // Create result storage.
   DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);
 
-  DType *pAlpha = ALLOCA_N(DType, 1),
-        *pBeta  = ALLOCA_N(DType, 1);
+  nm_dense_storage_register(result);
+
+  DType *pAlpha = NM_ALLOCA_N(DType, 1),
+        *pBeta  = NM_ALLOCA_N(DType, 1);
 
   *pAlpha = 1;
   *pBeta = 0;
@@ -947,6 +1095,11 @@ static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t
                                     reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
                                     reinterpret_cast<DType*>(result->elements), result->shape[1]);
 
+
+  nm_dense_storage_unregister(left);
+  nm_dense_storage_unregister(right);
+  nm_dense_storage_unregister(result);
+
   return result;
 }
 
diff --git a/ext/nmatrix/storage/dense.h b/ext/nmatrix/storage/dense/dense.h
similarity index 90%
rename from ext/nmatrix/storage/dense.h
rename to ext/nmatrix/storage/dense/dense.h
index 9f262c7..abe7d2d 100644
--- a/ext/nmatrix/storage/dense.h
+++ b/ext/nmatrix/storage/dense/dense.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -43,7 +43,7 @@
 
 #include "data/data.h"
 
-#include "common.h"
+#include "../common.h"
 
 #include "nmatrix.h"
 
@@ -73,8 +73,9 @@ DENSE_STORAGE*	nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t
 void						nm_dense_storage_delete(STORAGE* s);
 void						nm_dense_storage_delete_ref(STORAGE* s);
 void						nm_dense_storage_mark(STORAGE*);
-void            nm_dense_storage_register_values(VALUE* values, size_t n);
-void            nm_dense_storage_unregister_values(VALUE* values, size_t n);
+void            nm_dense_storage_register(const STORAGE* s);
+void            nm_dense_storage_unregister(const STORAGE* s);
+
 
 ///////////////
 // Accessors //
diff --git a/ext/nmatrix/storage/list.cpp b/ext/nmatrix/storage/list/list.cpp
similarity index 65%
rename from ext/nmatrix/storage/list.cpp
rename to ext/nmatrix/storage/list/list.cpp
index 9ffed21..d1fa8dc 100644
--- a/ext/nmatrix/storage/list.cpp
+++ b/ext/nmatrix/storage/list/list.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -25,7 +25,6 @@
 //
 // List-of-lists n-dimensional matrix storage. Uses singly-linked
 // lists.
-
 /*
  * Standard Includes
  */
@@ -34,21 +33,22 @@
 #include <algorithm> // std::min
 #include <iostream>
 #include <vector>
+#include <list>
 
 /*
  * Project Includes
  */
 
-#include "types.h"
+#include "../../types.h"
 
-#include "data/data.h"
+#include "../../data/data.h"
 
-#include "dense.h"
-#include "common.h"
+#include "../dense/dense.h"
+#include "../common.h"
 #include "list.h"
 
-#include "math/math.h"
-#include "util/sl_list.h"
+#include "../../math/math.h"
+#include "../../util/sl_list.h"
 
 /*
  * Macros
@@ -61,6 +61,8 @@
 
 extern "C" {
 static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coords, size_t* lengths, size_t n);
+static void __nm_list_storage_unregister_temp_value_list(std::list<VALUE*>& temp_vals);
+static void __nm_list_storage_unregister_temp_list_list(std::list<LIST*>& temp_vals, size_t recursions);
 }
 
 namespace nm { namespace list_storage {
@@ -78,11 +80,20 @@ public:
         offsets[i] += actual->offset[i];
       actual = reinterpret_cast<LIST_STORAGE*>(actual->src);
     }
+    nm_list_storage_register(actual);
+    nm_list_storage_register(ref);
     actual_shape_ = actual->shape;
 
     if (init_obj_ == Qnil) {
       init_obj_ = s->dtype == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(s->default_val) : rubyobj_from_cval(s->default_val, s->dtype).rval;
     }
+    nm_register_value(init_obj_);
+  }
+
+  ~RecurseData() {
+    nm_unregister_value(init_obj_);
+    nm_list_storage_unregister(ref);
+    nm_list_storage_unregister(actual);
   }
 
   dtype_t dtype() const { return ref->dtype; }
@@ -95,13 +106,13 @@ public:
   }
 
   size_t* copy_alloc_shape() const {
-    size_t* new_shape = ALLOC_N(size_t, ref->dim);
+    size_t* new_shape = NM_ALLOC_N(size_t, ref->dim);
     memcpy(new_shape, shape_, sizeof(size_t)*ref->dim);
     return new_shape;
   }
 
   size_t actual_shape(size_t rec) const {
-    return actual_shape_[ref->dim - rec - 1];
+    return actual_shape_[actual->dim - rec - 1];
   }
 
   size_t offset(size_t rec) const {
@@ -140,53 +151,157 @@ static bool eqeq_r(RecurseData& left, RecurseData& right, const LIST* l, const L
 template <typename SDType, typename TDType>
 static bool eqeq_empty_r(RecurseData& s, const LIST* l, size_t rec, const TDType* t_init);
 
-
 /*
  * Recursive helper for map_merged_stored_r which handles the case where one list is empty and the other is not.
  */
 static void map_empty_stored_r(RecurseData& result, RecurseData& s, LIST* x, const LIST* l, size_t rec, bool rev, const VALUE& t_init) {
+  if (s.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(l, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(x, rec);
+  }
+
   NODE *curr  = l->first,
        *xcurr = NULL;
 
   // For reference matrices, make sure we start in the correct place.
-  size_t offset   = result.offset(rec);
-  size_t x_shape  = result.ref_shape(rec);
+  size_t offset   = s.offset(rec);
+  size_t x_shape  = s.ref_shape(rec);
 
   while (curr && curr->key < offset) {  curr = curr->next;  }
   if (curr && curr->key - offset >= x_shape) curr = NULL;
 
   if (rec) {
+    std::list<LIST*> temp_vals;
     while (curr) {
       LIST* val = nm::list::create();
       map_empty_stored_r(result, s, val, reinterpret_cast<const LIST*>(curr->val), rec-1, rev, t_init);
 
       if (!val->first) nm::list::del(val, 0);
-      else nm::list::insert_helper(x, xcurr, curr->key - offset, val);
-
+      else {
+        nm_list_storage_register_list(val, rec-1);
+	temp_vals.push_front(val);
+        nm::list::insert_helper(x, xcurr, curr->key - offset, val);
+      } 
       curr = curr->next;
       if (curr && curr->key - offset >= x_shape) curr = NULL;
     }
+    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
   } else {
+    std::list<VALUE*> temp_vals;
     while (curr) {
       VALUE val, s_val = rubyobj_from_cval(curr->val, s.dtype()).rval;
       if (rev) val = rb_yield_values(2, t_init, s_val);
       else     val = rb_yield_values(2, s_val, t_init);
 
-      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue)
+      nm_register_value(val);
+
+      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue) {
         xcurr = nm::list::insert_helper(x, xcurr, curr->key - offset, val);
+        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
+        nm_register_value(*reinterpret_cast<VALUE*>(xcurr->val));
+      }
+      nm_unregister_value(val);
 
       curr = curr->next;
       if (curr && curr->key - offset >= x_shape) curr = NULL;
     }
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
+  }
+
+  if (s.dtype() == nm::RUBYOBJ){
+    nm_list_storage_unregister_list(l, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(x, rec);
   }
 
 }
 
 
 /*
+ * Recursive helper function for nm_list_map_stored
+ */
+static void map_stored_r(RecurseData& result, RecurseData& left, LIST* x, const LIST* l, size_t rec) {
+  if (left.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(l, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(x, rec);
+  }
+  NODE *lcurr = l->first,
+       *xcurr = x->first;
+
+  // For reference matrices, make sure we start in the correct place.
+  while (lcurr && lcurr->key < left.offset(rec))  {  lcurr = lcurr->next;  }
+
+  if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec))  lcurr = NULL;
+
+  if (rec) {
+    std::list<LIST*> temp_vals;
+    while (lcurr) {
+      size_t key;
+      LIST*  val = nm::list::create();
+      map_stored_r(result, left, val, reinterpret_cast<const LIST*>(lcurr->val), rec-1);
+      key        = lcurr->key - left.offset(rec);
+      lcurr      = lcurr->next;
+
+      if (!val->first) nm::list::del(val, 0); // empty list -- don't insert
+      else {
+        nm_list_storage_register_list(val, rec-1);
+        temp_vals.push_front(val);
+        xcurr = nm::list::insert_helper(x, xcurr, key, val);
+      }
+      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
+    }
+    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
+  } else {
+    std::list<VALUE*> temp_vals;
+    while (lcurr) {
+      size_t key;
+      VALUE  val;
+
+      val   = rb_yield_values(1, rubyobj_from_cval(lcurr->val, left.dtype()).rval);
+      key   = lcurr->key - left.offset(rec);
+      lcurr = lcurr->next;
+
+      if (!rb_equal(val, result.init_obj())) {
+        xcurr = nm::list::insert_helper(x, xcurr, key, val);
+        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
+        nm_register_value(*reinterpret_cast<VALUE*>(xcurr->val));
+      }
+
+      if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
+    }
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
+  }
+
+  if (left.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(l, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(x, rec);
+  }
+}
+
+
+
+/*
  * Recursive helper function for nm_list_map_merged_stored
  */
 static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseData& right, LIST* x, const LIST* l, const LIST* r, size_t rec) {
+  if (left.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(l, rec);
+  }
+  if (right.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(r, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_register_list(x, rec);
+  }
+
+
   NODE *lcurr = l->first,
        *rcurr = r->first,
        *xcurr = x->first;
@@ -199,6 +314,7 @@ static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseD
   if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec))  lcurr = NULL;
 
   if (rec) {
+    std::list<LIST*> temp_vals;
     while (lcurr || rcurr) {
       size_t key;
       LIST*  val = nm::list::create();
@@ -218,13 +334,19 @@ static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseD
         rcurr = rcurr->next;
       }
 
-      if (!val->first) nm::list::del(val, 0); // empty list -- don't insert
-      else xcurr = nm::list::insert_helper(x, xcurr, key, val);
 
+      if (!val->first) nm::list::del(val, 0); // empty list -- don't insert
+      else {
+        nm_list_storage_register_list(val, rec-1);
+        temp_vals.push_front(val);
+        xcurr = nm::list::insert_helper(x, xcurr, key, val);
+      }
       if (rcurr && rcurr->key - right.offset(rec) >= result.ref_shape(rec)) rcurr = NULL;
       if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
     }
+    __nm_list_storage_unregister_temp_list_list(temp_vals, rec-1);
   } else {
+    std::list<VALUE*> temp_vals;
     while (lcurr || rcurr) {
       size_t key;
       VALUE  val;
@@ -234,7 +356,7 @@ static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseD
         key   = lcurr->key - left.offset(rec);
         lcurr = lcurr->next;
       } else if (!lcurr || (rcurr && (rcurr->key - right.offset(rec) < lcurr->key - left.offset(rec)))) {
-        val   = rb_yield_values(2, left.init_obj(), rubyobj_from_cval(rcurr->val, right.dtype()).rval);
+	      val   = rb_yield_values(2, left.init_obj(), rubyobj_from_cval(rcurr->val, right.dtype()).rval);
         key   = rcurr->key - right.offset(rec);
         rcurr = rcurr->next;
       } else { // == and both present
@@ -243,15 +365,35 @@ static void map_merged_stored_r(RecurseData& result, RecurseData& left, RecurseD
         lcurr = lcurr->next;
         rcurr = rcurr->next;
       }
-      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue)
+
+      nm_register_value(val);
+
+      if (rb_funcall(val, rb_intern("!="), 1, result.init_obj()) == Qtrue) {
         xcurr = nm::list::insert_helper(x, xcurr, key, val);
+        temp_vals.push_front(reinterpret_cast<VALUE*>(xcurr->val));
+        nm_register_value(*reinterpret_cast<VALUE*>(xcurr->val));
+      }
+
+      nm_unregister_value(val);
 
       if (rcurr && rcurr->key - right.offset(rec) >= result.ref_shape(rec)) rcurr = NULL;
       if (lcurr && lcurr->key - left.offset(rec) >= result.ref_shape(rec)) lcurr = NULL;
     }
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
+  }
+
+  if (left.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(l, rec);
+  }
+  if (right.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(r, rec);
+  }
+  if (result.dtype() == nm::RUBYOBJ) {
+    nm_list_storage_unregister_list(x, rec);
   }
 }
 
+
 /*
  * Recursive function, sets multiple values in a matrix from multiple source values. Also handles removal; returns true
  * if the recursion results in an empty list at that level (which signals that the current parent should be removed).
@@ -266,6 +408,12 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
   using nm::list::insert_after;
   size_t* offsets = dest->offset;
 
+  nm_list_storage_register(dest);
+  if (dest->dtype == nm::RUBYOBJ) {
+    nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+    nm_list_storage_register_list(l, dest->dim - n - 1);
+  }
+
   // drill down into the structure
   NODE* prev = find_preceding_from_list(l, coords[n] + offsets[n]);
   NODE* node = NULL;
@@ -286,13 +434,16 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
     }
 
     // At this point, it's guaranteed that there is a list here matching key.
-
+    std::list<LIST*> temp_lists;
     while (node) {
       // Recurse down into the list. If it returns true, it's empty, so we need to delete it.
       bool remove_parent = slice_set(dest, reinterpret_cast<LIST*>(node->val), coords, lengths, n+1, v, v_size, v_offset);
-
+      if (dest->dtype == nm::RUBYOBJ) {
+        temp_lists.push_front(reinterpret_cast<LIST*>(node->val));
+        nm_list_storage_register_list(reinterpret_cast<LIST*>(node->val), dest->dim - n - 2);
+      }
       if (remove_parent) {
-        xfree(remove_by_node(l, prev, node));
+        NM_FREE(remove_by_node(l, prev, node));
         if (prev) node = prev->next ? prev->next : NULL;
         else      node = l->first   ? l->first   : NULL;
       } else {  // move forward
@@ -313,12 +464,13 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
         }
       }
     }
+    __nm_list_storage_unregister_temp_list_list(temp_lists, dest->dim - n - 2);
 
   } else {
 
     size_t i    = 0;
     size_t key  = i + offsets[n] + coords[n];
-
+    std::list<VALUE*> temp_vals;
     while (i < lengths[n]) {
       // Make sure we have an element to work with
       if (v_offset >= v_size) v_offset %= v_size;
@@ -327,7 +479,7 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
         if (node->key == key) {
           if (v[v_offset] == *reinterpret_cast<D*>(dest->default_val)) { // remove zero value
 
-            xfree(remove_by_node(l, (prev ? prev : l->first), node));
+            NM_FREE(remove_by_node(l, (prev ? prev : l->first), node));
 
             if (prev) node = prev->next ? prev->next : NULL;
             else      node = l->first   ? l->first   : NULL;
@@ -338,7 +490,12 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
             node = node->next ? node->next : NULL;
           }
         } else if (node->key > key) {
-          D* nv = ALLOC(D); *nv = v[v_offset];
+          D* nv = NM_ALLOC(D); *nv = v[v_offset++];
+          if (dest->dtype == nm::RUBYOBJ) {
+            nm_register_value(*reinterpret_cast<VALUE*>(nv));
+            temp_vals.push_front(reinterpret_cast<VALUE*>(nv));
+          }
+
           if (prev) node = insert_after(prev, key, nv);
           else      node = insert_first_node(l, key, nv, sizeof(D));
 
@@ -346,7 +503,11 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
           node = prev->next ? prev->next : NULL;
         }
       } else { // no node -- insert a new one
-        D* nv = ALLOC(D); *nv = v[v_offset];
+        D* nv = NM_ALLOC(D); *nv = v[v_offset++];
+        if (dest->dtype == nm::RUBYOBJ) {
+          nm_register_value(*reinterpret_cast<VALUE*>(nv));
+          temp_vals.push_front(reinterpret_cast<VALUE*>(nv));
+        }
         if (prev) node = insert_after(prev, key, nv);
         else      node = insert_first_node(l, key, nv, sizeof(D));
 
@@ -356,7 +517,14 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
 
       ++i; ++key;
     }
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
+  }
+
+  if (dest->dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
+    nm_list_storage_unregister_list(l, dest->dim - n - 1);
   }
+  nm_list_storage_unregister(dest);
 
   return (l->first) ? false : true;
 }
@@ -364,8 +532,10 @@ static bool slice_set(LIST_STORAGE* dest, LIST* l, size_t* coords, size_t* lengt
 
 template <typename D>
 void set(VALUE left, SLICE* slice, VALUE right) {
+  NM_CONSERVATIVE(nm_register_value(left));
+  NM_CONSERVATIVE(nm_register_value(right));
   LIST_STORAGE* s = NM_STORAGE_LIST(left);
-
+  
   std::pair<NMATRIX*,bool> nm_and_free =
     interpret_arg_as_dense_nmatrix(right, NM_DTYPE(left));
 
@@ -379,17 +549,27 @@ void set(VALUE left, SLICE* slice, VALUE right) {
     v_size           = nm_storage_count_max_elements(t);
 
   } else if (TYPE(right) == T_ARRAY) {
+    nm_register_nmatrix(nm_and_free.first);
     v_size = RARRAY_LEN(right);
-    v      = ALLOC_N(D, v_size);
+    v      = NM_ALLOC_N(D, v_size);
+    if (NM_DTYPE(left) == nm::RUBYOBJ)
+        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+
     for (size_t m = 0; m < v_size; ++m) {
       rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
     }
+    if (NM_DTYPE(left) == nm::RUBYOBJ)
+        nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
+
   } else {
+    nm_register_nmatrix(nm_and_free.first);
     v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
   }
 
   if (v_size == 1 && *v == *reinterpret_cast<D*>(s->default_val)) {
-    nm::list::remove_recursive(s->rows, slice->coords, s->offset, slice->lengths, 0, s->dim);
+    if (*reinterpret_cast<D*>(nm_list_storage_get(s, slice)) != *reinterpret_cast<D*>(s->default_val)) {
+      nm::list::remove_recursive(s->rows, slice->coords, s->offset, slice->lengths, 0, s->dim);
+    }
   } else if (slice->single) {
     slice_set_single(s, s->rows, reinterpret_cast<void*>(v), slice->coords, slice->lengths, 0);
   } else {
@@ -403,7 +583,12 @@ void set(VALUE left, SLICE* slice, VALUE right) {
     if (nm_and_free.second) {
       nm_delete(nm_and_free.first);
     }
-  } else xfree(v);
+  } else {
+    NM_FREE(v);
+    nm_unregister_nmatrix(nm_and_free.first);
+  }
+  NM_CONSERVATIVE(nm_unregister_value(left));
+  NM_CONSERVATIVE(nm_unregister_value(right));
 }
 
 /*
@@ -411,7 +596,7 @@ void set(VALUE left, SLICE* slice, VALUE right) {
  */
 template <typename D>
 void init_default(LIST_STORAGE* s) {
-  s->default_val = ALLOC(D);
+  s->default_val = NM_ALLOC(D);
   *reinterpret_cast<D*>(s->default_val) = 0;
 }
 
@@ -438,13 +623,13 @@ extern "C" {
  * new storage. You don't need to free them, and you shouldn't re-use them.
  */
 LIST_STORAGE* nm_list_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* init_val) {
-  LIST_STORAGE* s = ALLOC( LIST_STORAGE );
+  LIST_STORAGE* s = NM_ALLOC( LIST_STORAGE );
 
   s->dim   = dim;
   s->shape = shape;
   s->dtype = dtype;
 
-  s->offset = ALLOC_N(size_t, s->dim);
+  s->offset = NM_ALLOC_N(size_t, s->dim);
   memset(s->offset, 0, s->dim * sizeof(size_t));
 
   s->rows  = nm::list::create();
@@ -461,7 +646,7 @@ LIST_STORAGE* nm_list_storage_create(nm::dtype_t dtype, size_t* shape, size_t di
 }
 
 /*
- * Documentation goes here.
+ * Destructor for list storage.
  */
 void nm_list_storage_delete(STORAGE* s) {
   if (s) {
@@ -469,30 +654,30 @@ void nm_list_storage_delete(STORAGE* s) {
     if (storage->count-- == 1) {
       nm::list::del( storage->rows, storage->dim - 1 );
 
-      xfree(storage->shape);
-      xfree(storage->offset);
-      xfree(storage->default_val);
-      xfree(s);
+      NM_FREE(storage->shape);
+      NM_FREE(storage->offset);
+      NM_FREE(storage->default_val);
+      NM_FREE(s);
     }
   }
 }
 
 /*
- * Documentation goes here.
+ * Destructor for a list storage reference slice.
  */
 void nm_list_storage_delete_ref(STORAGE* s) {
   if (s) {
     LIST_STORAGE* storage = (LIST_STORAGE*)s;
 
     nm_list_storage_delete( reinterpret_cast<STORAGE*>(storage->src ) );
-    xfree(storage->shape);
-    xfree(storage->offset);
-    xfree(s);
+    NM_FREE(storage->shape);
+    NM_FREE(storage->offset);
+    NM_FREE(s);
   }
 }
 
 /*
- * Documentation goes here.
+ * GC mark function for list storage.
  */
 void nm_list_storage_mark(STORAGE* storage_base) {
   LIST_STORAGE* storage = (LIST_STORAGE*)storage_base;
@@ -503,6 +688,85 @@ void nm_list_storage_mark(STORAGE* storage_base) {
   }
 }
 
+static void __nm_list_storage_unregister_temp_value_list(std::list<VALUE*>& temp_vals) {
+  for (std::list<VALUE*>::iterator it = temp_vals.begin(); it != temp_vals.end(); ++it) {
+    nm_unregister_value(**it);
+  }
+}
+
+static void __nm_list_storage_unregister_temp_list_list(std::list<LIST*>& temp_vals, size_t recursions) {
+  for (std::list<LIST*>::iterator it = temp_vals.begin(); it != temp_vals.end(); ++it) {
+    nm_list_storage_unregister_list(*it, recursions);
+  }
+}
+
+void nm_list_storage_register_node(const NODE* curr) {
+  nm_register_value(*reinterpret_cast<VALUE*>(curr->val));      
+}
+
+void nm_list_storage_unregister_node(const NODE* curr) {
+  nm_unregister_value(*reinterpret_cast<VALUE*>(curr->val));      
+}
+
+/**
+ * Gets rid of all instances of a given node in the registration list.
+ * Sometimes a node will get deleted and replaced deep in a recursion, but
+ * further up it will still get registered.  This leads to a potential read
+ * after free during the GC marking.  This function completely clears out a
+ * node so that this won't happen.
+ */
+void nm_list_storage_completely_unregister_node(const NODE* curr) {
+  nm_completely_unregister_value(*reinterpret_cast<VALUE*>(curr->val));
+}
+
+void nm_list_storage_register_list(const LIST* list, size_t recursions) {
+  NODE* next;
+  if (!list) return;
+  NODE* curr = list->first;
+
+  while (curr != NULL) {
+    next = curr->next;
+    if (recursions == 0) {
+      nm_list_storage_register_node(curr);
+    } else {
+      nm_list_storage_register_list(reinterpret_cast<LIST*>(curr->val), recursions - 1);
+    }
+    curr = next;
+  }
+}
+
+void nm_list_storage_unregister_list(const LIST* list, size_t recursions) {
+  NODE* next;
+  if (!list) return;
+  NODE* curr = list->first;
+
+  while (curr != NULL) {
+    next = curr->next;
+    if (recursions == 0) {
+      nm_list_storage_unregister_node(curr);
+    } else {
+      nm_list_storage_unregister_list(reinterpret_cast<LIST*>(curr->val), recursions - 1);
+    }
+    curr = next;
+  }
+}
+
+void nm_list_storage_register(const STORAGE* s) {
+  const LIST_STORAGE* storage = reinterpret_cast<const LIST_STORAGE*>(s);
+  if (storage && storage->dtype == nm::RUBYOBJ) {
+    nm_register_value(*reinterpret_cast<VALUE*>(storage->default_val));
+    nm_list_storage_register_list(storage->rows, storage->dim - 1);
+  }
+}
+
+void nm_list_storage_unregister(const STORAGE* s) {
+  const LIST_STORAGE* storage = reinterpret_cast<const LIST_STORAGE*>(s);
+  if (storage && storage->dtype == nm::RUBYOBJ) {
+    nm_unregister_value(*reinterpret_cast<VALUE*>(storage->default_val));
+    nm_list_storage_unregister_list(storage->rows, storage->dim - 1);
+  }
+}
+
 ///////////////
 // Accessors //
 ///////////////
@@ -510,8 +774,7 @@ void nm_list_storage_mark(STORAGE* storage_base) {
 /*
  * Documentation goes here.
  */
-static NODE* list_storage_get_single_node(LIST_STORAGE* s, SLICE* slice)
-{
+static NODE* list_storage_get_single_node(LIST_STORAGE* s, SLICE* slice) {
   size_t r;
   LIST*  l = s->rows;
   NODE*  n;
@@ -532,6 +795,7 @@ static NODE* list_storage_get_single_node(LIST_STORAGE* s, SLICE* slice)
  */
 static void each_empty_with_indices_r(nm::list_storage::RecurseData& s, size_t rec, VALUE& stack) {
   VALUE empty  = s.dtype() == nm::RUBYOBJ ? *reinterpret_cast<VALUE*>(s.init()) : s.init_obj();
+  NM_CONSERVATIVE(nm_register_value(stack));
 
   if (rec) {
     for (long index = 0; index < s.ref_shape(rec); ++index) {
@@ -549,12 +813,16 @@ static void each_empty_with_indices_r(nm::list_storage::RecurseData& s, size_t r
     }
     rb_ary_shift(stack);
   }
+  NM_CONSERVATIVE(nm_unregister_value(stack));
 }
 
 /*
  * Recursive helper function for each_with_indices, based on nm_list_storage_count_elements_r.
  */
 static void each_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l, size_t rec, VALUE& stack) {
+  if (s.dtype() == nm::RUBYOBJ)
+    nm_list_storage_register_list(l, rec);
+  NM_CONSERVATIVE(nm_register_value(stack));
   NODE*  curr  = l->first;
 
   size_t offset = s.offset(rec);
@@ -594,7 +862,9 @@ static void each_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l,
       rb_ary_pop(stack);
     }
   }
-
+  NM_CONSERVATIVE(nm_unregister_value(stack));
+  if (s.dtype() == nm::RUBYOBJ)
+    nm_list_storage_unregister_list(l, rec);
 }
 
 
@@ -602,6 +872,10 @@ static void each_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l,
  * Recursive helper function for each_stored_with_indices, based on nm_list_storage_count_elements_r.
  */
 static void each_stored_with_indices_r(nm::list_storage::RecurseData& s, const LIST* l, size_t rec, VALUE& stack) {
+  if (s.dtype() == nm::RUBYOBJ)
+    nm_list_storage_register_list(l, rec);
+  NM_CONSERVATIVE(nm_register_value(stack));
+  
   NODE* curr = l->first;
 
   size_t offset = s.offset(rec);
@@ -639,6 +913,9 @@ static void each_stored_with_indices_r(nm::list_storage::RecurseData& s, const L
       if (curr && curr->key - offset >= shape) curr = NULL;
     }
   }
+  NM_CONSERVATIVE(nm_unregister_value(stack));
+  if (s.dtype() == nm::RUBYOBJ)
+    nm_list_storage_unregister_list(l, rec);
 }
 
 
@@ -648,7 +925,11 @@ static void each_stored_with_indices_r(nm::list_storage::RecurseData& s, const L
  */
 VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored) {
 
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
+
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, 0);
 
   nm::list_storage::RecurseData sdata(NM_STORAGE_LIST(nmatrix));
@@ -658,6 +939,7 @@ VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored) {
   if (stored) each_stored_with_indices_r(sdata, sdata.top_level_list(), sdata.dim() - 1, stack);
   else        each_with_indices_r(sdata, sdata.top_level_list(), sdata.dim() - 1, stack);
 
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   return nmatrix;
 }
 
@@ -665,7 +947,63 @@ VALUE nm_list_each_with_indices(VALUE nmatrix, bool stored) {
 /*
  * map merged stored iterator. Always returns a matrix containing RubyObjects which probably needs to be casted.
  */
+VALUE nm_list_map_stored(VALUE left, VALUE init) {
+  NM_CONSERVATIVE(nm_register_value(left));
+  NM_CONSERVATIVE(nm_register_value(init));
+
+  bool scalar = false;
+
+  LIST_STORAGE *s   = NM_STORAGE_LIST(left);
+
+  // For each matrix, if it's a reference, we want to deal directly with the original (with appropriate offsetting)
+  nm::list_storage::RecurseData sdata(s);
+
+  void* scalar_init = NULL;
+
+  //if (!rb_block_given_p()) {
+  //  rb_raise(rb_eNotImpError, "RETURN_SIZED_ENUMERATOR probably won't work for a map_merged since no merged object is created");
+  //}
+  // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(left));
+  NM_CONSERVATIVE(nm_unregister_value(init));
+  RETURN_SIZED_ENUMERATOR(left, 0, 0, 0); // FIXME: Test this. Probably won't work. Enable above code instead.
+
+  // Figure out default value if none provided by the user
+  if (init == Qnil) {
+    nm_unregister_value(init);
+    init = rb_yield_values(1, sdata.init_obj());
+    nm_register_value(init);
+  }
+	// Allocate a new shape array for the resulting matrix.
+  void* init_val = NM_ALLOC(VALUE);
+  memcpy(init_val, &init, sizeof(VALUE));
+  nm_register_value(*reinterpret_cast<VALUE*>(init_val));
+
+  NMATRIX* result = nm_create(nm::LIST_STORE, nm_list_storage_create(nm::RUBYOBJ, sdata.copy_alloc_shape(), s->dim, init_val));
+  LIST_STORAGE* r = reinterpret_cast<LIST_STORAGE*>(result->storage);
+  nm::list_storage::RecurseData rdata(r, init);
+  nm_register_nmatrix(result);
+  map_stored_r(rdata, sdata, rdata.top_level_list(), sdata.top_level_list(), sdata.dim() - 1);
+
+  VALUE to_return = Data_Wrap_Struct(CLASS_OF(left), nm_mark, nm_delete, result);
+
+  nm_unregister_nmatrix(result);
+  nm_unregister_value(*reinterpret_cast<VALUE*>(init_val));
+  NM_CONSERVATIVE(nm_unregister_value(init));
+  NM_CONSERVATIVE(nm_unregister_value(left));
+
+  return to_return;
+}
+
+
+/*
+ * map merged stored iterator. Always returns a matrix containing RubyObjects which probably needs to be casted.
+ */
 VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init) {
+  NM_CONSERVATIVE(nm_register_value(left));
+  NM_CONSERVATIVE(nm_register_value(right));
+  NM_CONSERVATIVE(nm_register_value(init));
 
   bool scalar = false;
 
@@ -679,8 +1017,7 @@ VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init) {
 
   // right might be a scalar, in which case this is a scalar operation.
   if (TYPE(right) != T_DATA || (RDATA(right)->dfree != (RUBY_DATA_FUNC)nm_delete && RDATA(right)->dfree != (RUBY_DATA_FUNC)nm_delete_ref)) {
-    nm::dtype_t r_dtype = nm_dtype_min(right);
-
+    nm::dtype_t r_dtype = Upcast[NM_DTYPE(left)][nm_dtype_min(right)];
     scalar_init         = rubyobj_to_cval(right, r_dtype); // make a copy of right
 
     t                   = reinterpret_cast<LIST_STORAGE*>(nm_list_storage_create(r_dtype, sdata.copy_alloc_shape(), s->dim, scalar_init));
@@ -693,26 +1030,43 @@ VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init) {
   //  rb_raise(rb_eNotImpError, "RETURN_SIZED_ENUMERATOR probably won't work for a map_merged since no merged object is created");
   //}
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(left));
+  NM_CONSERVATIVE(nm_unregister_value(right));
+  NM_CONSERVATIVE(nm_unregister_value(init));
   RETURN_SIZED_ENUMERATOR(left, 0, 0, 0); // FIXME: Test this. Probably won't work. Enable above code instead.
 
   // Figure out default value if none provided by the user
-  nm::list_storage::RecurseData tdata(t);
-  if (init == Qnil) init = rb_yield_values(2, sdata.init_obj(), tdata.init_obj());
+  nm::list_storage::RecurseData& tdata = *(new nm::list_storage::RecurseData(t)); //FIXME: this is a hack to make sure that we can run the destructor before nm_list_storage_delete(t) below.
+  if (init == Qnil) {
+    nm_unregister_value(init);
+    init = rb_yield_values(2, sdata.init_obj(), tdata.init_obj());
+    nm_register_value(init);
+  }
 
-	// Allocate a new shape array for the resulting matrix.
-  void* init_val = ALLOC(VALUE);
+  // Allocate a new shape array for the resulting matrix.
+  void* init_val = NM_ALLOC(VALUE);
   memcpy(init_val, &init, sizeof(VALUE));
+  nm_register_value(*reinterpret_cast<VALUE*>(init_val));
 
   NMATRIX* result = nm_create(nm::LIST_STORE, nm_list_storage_create(nm::RUBYOBJ, sdata.copy_alloc_shape(), s->dim, init_val));
   LIST_STORAGE* r = reinterpret_cast<LIST_STORAGE*>(result->storage);
   nm::list_storage::RecurseData rdata(r, init);
-
   map_merged_stored_r(rdata, sdata, tdata, rdata.top_level_list(), sdata.top_level_list(), tdata.top_level_list(), sdata.dim() - 1);
 
+  delete &tdata;
   // If we are working with a scalar operation
   if (scalar) nm_list_storage_delete(t);
 
-  return Data_Wrap_Struct(CLASS_OF(left), nm_mark, nm_delete, result);
+  VALUE to_return = Data_Wrap_Struct(CLASS_OF(left), nm_mark, nm_delete, result);
+
+  nm_unregister_value(*reinterpret_cast<VALUE*>(init_val));
+
+  NM_CONSERVATIVE(nm_unregister_value(init));
+  NM_CONSERVATIVE(nm_unregister_value(right));
+  NM_CONSERVATIVE(nm_unregister_value(left));
+
+  return to_return;
 }
 
 
@@ -720,13 +1074,14 @@ VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init) {
  * Copy a slice of a list matrix into a regular list matrix.
  */
 static LIST* slice_copy(const LIST_STORAGE* src, LIST* src_rows, size_t* coords, size_t* lengths, size_t n) {
-
+  nm_list_storage_register(src);
   void *val = NULL;
   int key;
   
   LIST* dst_rows = nm::list::create();
   NODE* src_node = src_rows->first;
-
+  std::list<VALUE*> temp_vals;
+  std::list<LIST*> temp_lists;
   while (src_node) {
     key = src_node->key - (src->offset[n] + coords[n]);
     
@@ -737,16 +1092,28 @@ static LIST* slice_copy(const LIST_STORAGE* src, LIST* src_rows, size_t* coords,
                           coords,
                           lengths,
                           n + 1    );
-
-        if (val) {  nm::list::insert_copy(dst_rows, false, key, val, sizeof(LIST)); }
+        if (val) {
+          if (src->dtype == nm::RUBYOBJ) {
+            nm_list_storage_register_list(reinterpret_cast<LIST*>(val), src->dim - n - 2);
+            temp_lists.push_front(reinterpret_cast<LIST*>(val));
+          }
+          nm::list::insert_copy(dst_rows, false, key, val, sizeof(LIST));
+        }
+      } else { // matches src->dim - n > 1
+        if (src->dtype == nm::RUBYOBJ) {
+          nm_register_value(*reinterpret_cast<VALUE*>(src_node->val));
+          temp_vals.push_front(reinterpret_cast<VALUE*>(src_node->val));
+        }
+        nm::list::insert_copy(dst_rows, false, key, src_node->val, DTYPE_SIZES[src->dtype]);
       }
-
-      else nm::list::insert_copy(dst_rows, false, key, src_node->val, DTYPE_SIZES[src->dtype]);
     }
-
     src_node = src_node->next;
+ }
+  if (src->dtype == nm::RUBYOBJ) {
+    __nm_list_storage_unregister_temp_list_list(temp_lists, src->dim - n - 2);
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
   }
-
+  nm_list_storage_unregister(src);
   return dst_rows;
 }
 
@@ -756,21 +1123,31 @@ static LIST* slice_copy(const LIST_STORAGE* src, LIST* src_rows, size_t* coords,
 void* nm_list_storage_get(const STORAGE* storage, SLICE* slice) {
   LIST_STORAGE* s = (LIST_STORAGE*)storage;
   LIST_STORAGE* ns = NULL;
-  NODE* n;
+
+  nm_list_storage_register(s);
 
   if (slice->single) {
-    n = list_storage_get_single_node(s, slice);
+    NODE* n = list_storage_get_single_node(s, slice);
+    nm_list_storage_unregister(s);
     return (n ? n->val : s->default_val);
+
   } else {
-    void *init_val = ALLOC_N(char, DTYPE_SIZES[s->dtype]);
+    void *init_val = NM_ALLOC_N(char, DTYPE_SIZES[s->dtype]);
     memcpy(init_val, s->default_val, DTYPE_SIZES[s->dtype]);
+    if (s->dtype == nm::RUBYOBJ)
+      nm_register_value(*reinterpret_cast<VALUE*>(init_val));
 
-    size_t *shape = ALLOC_N(size_t, s->dim);
+    size_t *shape = NM_ALLOC_N(size_t, s->dim);
     memcpy(shape, slice->lengths, sizeof(size_t) * s->dim);
 
     ns = nm_list_storage_create(s->dtype, shape, s->dim, init_val);
-
+  
     ns->rows = slice_copy(s, s->rows, slice->coords, slice->lengths, 0);
+
+    if (s->dtype == nm::RUBYOBJ)
+      nm_unregister_value(*reinterpret_cast<VALUE*>(init_val));
+    nm_list_storage_unregister(s);
+
     return ns;
   }
 }
@@ -782,20 +1159,21 @@ void* nm_list_storage_get(const STORAGE* storage, SLICE* slice) {
 void* nm_list_storage_ref(const STORAGE* storage, SLICE* slice) {
   LIST_STORAGE* s = (LIST_STORAGE*)storage;
   LIST_STORAGE* ns = NULL;
-  NODE* n;
+  nm_list_storage_register(s);
 
   //TODO: It needs a refactoring.
   if (slice->single) {
-    n = list_storage_get_single_node(s, slice); 
+    NODE* n = list_storage_get_single_node(s, slice);
+    nm_list_storage_unregister(s);
     return (n ? n->val : s->default_val);
   } 
   else {
-    ns              = ALLOC( LIST_STORAGE );
+    ns              = NM_ALLOC( LIST_STORAGE );
     
     ns->dim         = s->dim;
     ns->dtype       = s->dtype;
-    ns->offset      = ALLOC_N(size_t, ns->dim);
-    ns->shape       = ALLOC_N(size_t, ns->dim);
+    ns->offset      = NM_ALLOC_N(size_t, ns->dim);
+    ns->shape       = NM_ALLOC_N(size_t, ns->dim);
 
     for (size_t i = 0; i < ns->dim; ++i) {
       ns->offset[i] = slice->coords[i] + s->offset[i];
@@ -807,7 +1185,7 @@ void* nm_list_storage_ref(const STORAGE* storage, SLICE* slice) {
     
     s->src->count++;
     ns->src         = s->src;
-    
+    nm_list_storage_unregister(s);
     return ns;
   }
 }
@@ -817,10 +1195,16 @@ void* nm_list_storage_ref(const STORAGE* storage, SLICE* slice) {
  * Recursive function, sets multiple values in a matrix from a single source value.
  */
 static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coords, size_t* lengths, size_t n) {
+  nm_list_storage_register(dest);
+  if (dest->dtype == nm::RUBYOBJ) {
+    nm_register_value(*reinterpret_cast<VALUE*>(val));
+    nm_list_storage_register_list(l, dest->dim - n - 1);
+  }
 
   // drill down into the structure
   NODE* node = NULL;
   if (dest->dim - n > 1) {
+    std::list<LIST*> temp_nodes; 
     for (size_t i = 0; i < lengths[n]; ++i) {
 
       size_t key = i + dest->offset[n] + coords[n];
@@ -833,10 +1217,17 @@ static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coo
         node = node->next; // correct rank already exists.
       }
 
+      if (dest->dtype == nm::RUBYOBJ) {
+        temp_nodes.push_front(reinterpret_cast<LIST*>(node->val));
+        nm_list_storage_register_list(reinterpret_cast<LIST*>(node->val), dest->dim - n - 2);
+      }
+
       // cast it to a list and recurse
       slice_set_single(dest, reinterpret_cast<LIST*>(node->val), val, coords, lengths, n + 1);
     }
+    __nm_list_storage_unregister_temp_list_list(temp_nodes, dest->dim - n - 2);
   } else {
+    std::list<VALUE*> temp_vals;
     for (size_t i = 0; i < lengths[n]; ++i) {
 
       size_t key = i + dest->offset[n] + coords[n];
@@ -846,7 +1237,18 @@ static void slice_set_single(LIST_STORAGE* dest, LIST* l, void* val, size_t* coo
       } else {
         node = nm::list::replace_insert_after(node, key, val, true, DTYPE_SIZES[dest->dtype]);
       }
+      if (dest->dtype == nm::RUBYOBJ) {
+        temp_vals.push_front(reinterpret_cast<VALUE*>(node->val));
+        nm_register_value(*reinterpret_cast<VALUE*>(node->val));
+      }
     }
+    __nm_list_storage_unregister_temp_value_list(temp_vals);
+  }
+
+  nm_list_storage_unregister(dest);
+  if (dest->dtype == nm::RUBYOBJ) {
+    nm_unregister_value(*reinterpret_cast<VALUE*>(val));
+    nm_list_storage_unregister_list(l, dest->dim - n - 1);
   }
 }
 
@@ -870,6 +1272,9 @@ void nm_list_storage_set(VALUE left, SLICE* slice, VALUE right) {
  */
 NODE* nm_list_storage_insert(STORAGE* storage, SLICE* slice, void* val) {
   LIST_STORAGE* s = (LIST_STORAGE*)storage;
+  nm_list_storage_register(s);
+  if (s->dtype == nm::RUBYOBJ)
+    nm_register_value(*reinterpret_cast<VALUE*>(val));
   // Pretend dims = 2
   // Then coords is going to be size 2
   // So we need to find out if some key already exists
@@ -878,12 +1283,16 @@ NODE* nm_list_storage_insert(STORAGE* storage, SLICE* slice, void* val) {
   LIST*  l = s->rows;
 
   // drill down into the structure
-  for (r = s->dim; r > 1; --r) {
-    n = nm::list::insert(l, false, s->offset[s->dim - r] + slice->coords[s->dim - r], nm::list::create());
+  for (r = 0; r < s->dim -1; ++r) {
+    n = nm::list::insert(l, false, s->offset[r] + slice->coords[s->dim - r], nm::list::create());
     l = reinterpret_cast<LIST*>(n->val);
   }
 
-  return nm::list::insert(l, true, s->offset[s->dim - r] + slice->coords[s->dim - r], val);
+  nm_list_storage_unregister(s);
+  if (s->dtype == nm::RUBYOBJ)
+    nm_unregister_value(*reinterpret_cast<VALUE*>(val));
+
+  return nm::list::insert(l, true, s->offset[r] + slice->coords[r], val);
 }
 
 /*
@@ -937,10 +1346,10 @@ STORAGE* nm_list_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, siz
  * it's a sparse matrix.
  */
 VALUE nm_list_storage_to_hash(const LIST_STORAGE* s, const nm::dtype_t dtype) {
-
+  nm_list_storage_register(s);
   // Get the default value for the list storage.
   VALUE default_value = rubyobj_from_cval(s->default_val, dtype).rval;
-
+  nm_list_storage_unregister(s);
   // Recursively copy each dimension of the matrix into a nested hash.
   return nm_list_copy_to_hash(s->rows, dtype, s->dim - 1, default_value);
 }
@@ -1006,18 +1415,21 @@ size_t nm_list_storage_count_nd_elements(const LIST_STORAGE* s) {
  * List storage copy constructor C access.
  */
 
-LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs)
-{
-  size_t *shape = ALLOC_N(size_t, rhs->dim);
+LIST_STORAGE* nm_list_storage_copy(const LIST_STORAGE* rhs) {
+  nm_list_storage_register(rhs);
+  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
   memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);
   
-  void *init_val = ALLOC_N(char, DTYPE_SIZES[rhs->dtype]);
+  void *init_val = NM_ALLOC_N(char, DTYPE_SIZES[rhs->dtype]);
   memcpy(init_val, rhs->default_val, DTYPE_SIZES[rhs->dtype]);
 
   LIST_STORAGE* lhs = nm_list_storage_create(rhs->dtype, shape, rhs->dim, init_val);
-  
+  nm_list_storage_register(lhs);
+
   lhs->rows = slice_copy(rhs, rhs->rows, lhs->offset, lhs->shape, 0);
 
+  nm_list_storage_unregister(rhs);
+  nm_list_storage_unregister(lhs);
   return lhs;
 }
 
@@ -1057,27 +1469,31 @@ namespace list_storage {
  */
 template <typename LDType, typename RDType>
 static LIST_STORAGE* cast_copy(const LIST_STORAGE* rhs, dtype_t new_dtype) {
-
+  nm_list_storage_register(rhs);
   // allocate and copy shape
-  size_t* shape = ALLOC_N(size_t, rhs->dim);
+  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
   memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
 
   // copy default value
-  LDType* default_val = ALLOC_N(LDType, 1);
+  LDType* default_val = NM_ALLOC_N(LDType, 1);
   *default_val = *reinterpret_cast<RDType*>(rhs->default_val);
 
   LIST_STORAGE* lhs = nm_list_storage_create(new_dtype, shape, rhs->dim, default_val);
   //lhs->rows         = nm::list::create();
 
+  nm_list_storage_register(lhs);
   // TODO: Needs optimization. When matrix is reference it is copped twice.
   if (rhs->src == rhs) 
     nm::list::cast_copy_contents<LDType, RDType>(lhs->rows, rhs->rows, rhs->dim - 1);
   else {
     LIST_STORAGE *tmp = nm_list_storage_copy(rhs);
+    nm_list_storage_register(tmp);
     nm::list::cast_copy_contents<LDType, RDType>(lhs->rows, tmp->rows, rhs->dim - 1);
+    nm_list_storage_unregister(tmp);
     nm_list_storage_delete(tmp);
   }
-
+  nm_list_storage_unregister(lhs);
+  nm_list_storage_unregister(rhs);
   return lhs;
 }
 
@@ -1196,13 +1612,16 @@ extern "C" {
     return nm_list_storage_to_hash(NM_STORAGE_LIST(self), NM_DTYPE(self));
   }
 
-    /*
-     * call-seq:
-     *     __list_default_value__ -> ...
-     *
-     * Get the default_value property from a list matrix.
-     */
-    VALUE nm_list_default_value(VALUE self) {
-      return (NM_DTYPE(self) == nm::RUBYOBJ) ? *reinterpret_cast<VALUE*>(NM_DEFAULT_VAL(self)) : rubyobj_from_cval(NM_DEFAULT_VAL(self), NM_DTYPE(self)).rval;
-    }
+  /*
+   * call-seq:
+   *     __list_default_value__ -> ...
+   *
+   * Get the default_value property from a list matrix.
+   */
+  VALUE nm_list_default_value(VALUE self) {
+    NM_CONSERVATIVE(nm_register_value(self));
+    VALUE to_return = (NM_DTYPE(self) == nm::RUBYOBJ) ? *reinterpret_cast<VALUE*>(NM_DEFAULT_VAL(self)) : rubyobj_from_cval(NM_DEFAULT_VAL(self), NM_DTYPE(self)).rval;
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    return to_return;
+  }
 } // end of extern "C" block
diff --git a/ext/nmatrix/storage/list.h b/ext/nmatrix/storage/list/list.h
similarity index 78%
rename from ext/nmatrix/storage/list.h
rename to ext/nmatrix/storage/list/list.h
index f10824b..9e15a5f 100644
--- a/ext/nmatrix/storage/list.h
+++ b/ext/nmatrix/storage/list/list.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -34,14 +34,14 @@
  */
 
 #include <stdlib.h>
-
+#include <list>
 /*
  * Project Includes
  */
 
 #include "types.h"
 #include "data/data.h"
-#include "common.h"
+#include "../common.h"
 #include "util/sl_list.h"
 #include "nmatrix.h"
 
@@ -73,7 +73,13 @@ extern "C" {
   void					nm_list_storage_delete(STORAGE* s);
   void					nm_list_storage_delete_ref(STORAGE* s);
   void					nm_list_storage_mark(STORAGE*);
-
+  void          nm_list_storage_register(const STORAGE* s);
+  void          nm_list_storage_unregister(const STORAGE* s);
+  void          nm_list_storage_register_list(const LIST* l, size_t recursions);
+  void          nm_list_storage_unregister_list(const LIST* l, size_t recursions);
+  void          nm_list_storage_register_node(const NODE* n);
+  void          nm_list_storage_unregister_node(const NODE* n);
+  void		      nm_list_storage_completely_unregister_node(const NODE* curr);
   ///////////////
   // Accessors //
   ///////////////
@@ -82,7 +88,7 @@ extern "C" {
   void* nm_list_storage_ref(const STORAGE* s, SLICE* slice);
   void* nm_list_storage_get(const STORAGE* s, SLICE* slice);
   NODE* nm_list_storage_insert(STORAGE* s, SLICE* slice, void* val);
-  void nm_list_storage_set(VALUE left, SLICE* slice, VALUE right);
+  void  nm_list_storage_set(VALUE left, SLICE* slice, VALUE right);
   void  nm_list_storage_remove(STORAGE* s, SLICE* slice);
 
   ///////////
@@ -124,6 +130,7 @@ extern "C" {
   // Exposed functions
   VALUE nm_to_hash(VALUE self);
   VALUE nm_list_map_merged_stored(VALUE left, VALUE right, VALUE init);
+  VALUE nm_list_map_stored(VALUE left, VALUE init);
   VALUE nm_list_default_value(VALUE self);
 } // end of extern "C" block
 
diff --git a/ext/nmatrix/storage/storage.cpp b/ext/nmatrix/storage/storage.cpp
index 97af774..5ceed36 100644
--- a/ext/nmatrix/storage/storage.cpp
+++ b/ext/nmatrix/storage/storage.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -87,9 +87,9 @@ static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos
  */
 template <typename LDType, typename RDType>
 DENSE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, dtype_t l_dtype) {
-
+  nm_list_storage_register(rhs);
   // allocate and copy shape
-  size_t* shape = ALLOC_N(size_t, rhs->dim);
+  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
   memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
 
   DENSE_STORAGE* lhs = nm_dense_storage_create(l_dtype, shape, rhs->dim, NULL, 0);
@@ -114,6 +114,7 @@ DENSE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, dtype_t l_dtype
     nm_list_storage_delete(tmp);
 
   }
+  nm_list_storage_unregister(rhs);
 
   return lhs;
 }
@@ -127,12 +128,13 @@ DENSE_STORAGE* create_from_list_storage(const LIST_STORAGE* rhs, dtype_t l_dtype
 template <typename LDType, typename RDType>
 DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype) {
 
+  nm_yale_storage_register(rhs);
   // Position in rhs->elements.
   IType*  rhs_ija = reinterpret_cast<YALE_STORAGE*>(rhs->src)->ija;
   RDType* rhs_a   = reinterpret_cast<RDType*>(reinterpret_cast<YALE_STORAGE*>(rhs->src)->a);
 
   // Allocate and set shape.
-  size_t* shape = ALLOC_N(size_t, rhs->dim);
+  size_t* shape = NM_ALLOC_N(size_t, rhs->dim);
   shape[0] = rhs->shape[0];
   shape[1] = rhs->shape[1];
 
@@ -195,6 +197,7 @@ DENSE_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype
       }
     }
   }
+  nm_yale_storage_unregister(rhs);
 
   return lhs;
 }
@@ -209,7 +212,9 @@ static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* defaul
   NODE *curr = rhs->first;
   int last_key = -1;
 
-	for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
+  nm_list_storage_register_list(rhs, recursions);
+
+  for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
 
     if (!curr || (curr->key > (size_t)(last_key+1))) {
 
@@ -229,6 +234,8 @@ static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* defaul
     }
   }
 
+  nm_list_storage_unregister_list(rhs, recursions);
+
   --pos;
 }
 
@@ -237,7 +244,7 @@ static void cast_copy_list_contents(LDType* lhs, const LIST* rhs, RDType* defaul
  */
 template <typename LDType,typename RDType>
 static void cast_copy_list_default(LDType* lhs, RDType* default_val, size_t& pos, const size_t* shape, size_t dim, size_t max_elements, size_t recursions) {
-	for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
+  for (size_t i = 0; i < shape[dim - 1 - recursions]; ++i, ++pos) {
 
     if (recursions == 0)    lhs[pos] = static_cast<LDType>(*default_val);
     else                  	cast_copy_list_default<LDType,RDType>(lhs, default_val, pos, shape, dim, max_elements, recursions-1);
@@ -261,13 +268,14 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
  */
 template <typename LDType, typename RDType>
 LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtype, void* init) {
+  nm_dense_storage_register(rhs);
 
-  LDType* l_default_val = ALLOC_N(LDType, 1);
-  RDType* r_default_val = ALLOCA_N(RDType, 1); // clean up when finished with this function
+  LDType* l_default_val = NM_ALLOC_N(LDType, 1);
+  RDType* r_default_val = NM_ALLOCA_N(RDType, 1); // clean up when finished with this function
 
   // allocate and copy shape and coords
-  size_t *shape  = ALLOC_N(size_t, rhs->dim),
-         *coords = ALLOC_N(size_t, rhs->dim);
+  size_t *shape  = NM_ALLOC_N(size_t, rhs->dim),
+         *coords = NM_ALLOC_N(size_t, rhs->dim);
 
   memcpy(shape, rhs->shape, rhs->dim * sizeof(size_t));
   memset(coords, 0, rhs->dim * sizeof(size_t));
@@ -286,6 +294,8 @@ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtyp
 
   LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, l_default_val);
 
+  nm_list_storage_register(lhs);
+
   size_t pos = 0;
 
   if (rhs->src == rhs)
@@ -303,6 +313,9 @@ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtyp
     nm_dense_storage_delete(tmp);
   }
 
+  nm_list_storage_unregister(lhs);
+  nm_dense_storage_unregister(rhs);
+
   return lhs;
 }
 
@@ -314,14 +327,16 @@ LIST_STORAGE* create_from_dense_storage(const DENSE_STORAGE* rhs, dtype_t l_dtyp
 template <typename LDType, typename RDType>
 LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype) {
   // allocate and copy shape
-  size_t *shape = ALLOC_N(size_t, rhs->dim);
+  nm_yale_storage_register(rhs);
+
+  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
   shape[0] = rhs->shape[0]; shape[1] = rhs->shape[1];
 
   RDType* rhs_a    = reinterpret_cast<RDType*>(reinterpret_cast<YALE_STORAGE*>(rhs->src)->a);
   RDType R_ZERO    = rhs_a[ rhs->src->shape[0] ];
 
   // copy default value from the zero location in the Yale matrix
-  LDType* default_val = ALLOC_N(LDType, 1);
+  LDType* default_val = NM_ALLOC_N(LDType, 1);
   *default_val        = static_cast<LDType>(R_ZERO);
 
   LIST_STORAGE* lhs = nm_list_storage_create(l_dtype, shape, rhs->dim, default_val);
@@ -360,7 +375,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
         // Is there a nonzero diagonal item between the previously added item and the current one?
         if (rj > ri && add_diag) {
           // Allocate and copy insertion value
-          insert_val  = ALLOC_N(LDType, 1);
+          insert_val  = NM_ALLOC_N(LDType, 1);
           *insert_val = static_cast<LDType>(rhs_a[ri]);
 
           // Insert the item in the list at the appropriate location.
@@ -375,7 +390,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
         }
 
         // now allocate and add the current item
-        insert_val  = ALLOC_N(LDType, 1);
+        insert_val  = NM_ALLOC_N(LDType, 1);
         *insert_val = static_cast<LDType>(rhs_a[ija]);
 
         if (last_added)    	last_added = list::insert_after(last_added, j, insert_val);
@@ -387,7 +402,7 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
       if (add_diag) {
 
       	// still haven't added the diagonal.
-        insert_val         = ALLOC_N(LDType, 1);
+        insert_val         = NM_ALLOC_N(LDType, 1);
         *insert_val        = static_cast<LDType>(rhs_a[ri]);
 
         // insert the item in the list at the appropriate location
@@ -405,6 +420,8 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
 		// end of walk through rows
   }
 
+  nm_yale_storage_unregister(rhs);
+
   return lhs;
 }
 
@@ -415,6 +432,9 @@ LIST_STORAGE* create_from_yale_storage(const YALE_STORAGE* rhs, dtype_t l_dtype)
  */
 template <typename LDType, typename RDType>
 static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero, size_t& pos, size_t* coords, const size_t* shape, size_t dim, size_t recursions) {
+
+  nm_list_storage_register_list(lhs, recursions);
+
   NODE *prev = NULL;
   LIST *sub_list;
   bool added = false, added_list = false;
@@ -429,7 +449,7 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
       	// is not zero
 
         // Create a copy of our value that we will insert in the list
-        LDType* insert_value = ALLOC_N(LDType, 1);
+        LDType* insert_value = NM_ALLOC_N(LDType, 1);
         *insert_value        = static_cast<LDType>(rhs[pos]);
 
         if (!lhs->first)    prev = list::insert(lhs, false, coords[dim-1-recursions], insert_value);
@@ -453,6 +473,8 @@ static bool cast_copy_contents_dense(LIST* lhs, const RDType* rhs, RDType* zero,
     }
   }
 
+  nm_list_storage_unregister_list(lhs, recursions);
+
   coords[dim-1-recursions] = 0;
   --pos;
 
@@ -471,6 +493,8 @@ namespace yale_storage { // FIXME: Move to yale.cpp
 
     if (rhs->dim != 2) rb_raise(nm_eStorageTypeError, "can only convert matrices of dim 2 to yale");
 
+    nm_dense_storage_register(rhs);
+
     IType pos = 0;
     IType ndnz = 0;
 
@@ -495,7 +519,7 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     }
 
     // Copy shape for yale construction
-    size_t* shape = ALLOC_N(size_t, 2);
+    size_t* shape = NM_ALLOC_N(size_t, 2);
     shape[0] = rhs->shape[0];
     shape[1] = rhs->shape[1];
 
@@ -539,6 +563,8 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     lhs_ija[shape[0]] = ija; // indicate the end of the last row
     lhs->ndnz = ndnz;
 
+    nm_dense_storage_unregister(rhs);
+
     return lhs;
   }
 
@@ -556,10 +582,11 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     } else if (strncmp(reinterpret_cast<const char*>(rhs->default_val), "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", DTYPE_SIZES[rhs->dtype]))
       rb_raise(nm_eStorageTypeError, "list matrix of non-Ruby objects must have default value of 0 to convert to yale");
 
+    nm_list_storage_register(rhs);
 
     size_t ndnz = nm_list_storage_count_nd_elements(rhs);
     // Copy shape for yale construction
-    size_t* shape = ALLOC_N(size_t, 2);
+    size_t* shape = NM_ALLOC_N(size_t, 2);
     shape[0] = rhs->shape[0];
     shape[1] = rhs->shape[1];
 
@@ -612,6 +639,8 @@ namespace yale_storage { // FIXME: Move to yale.cpp
     lhs_ija[rhs->shape[0]] = ija; // indicate the end of the last row
     lhs->ndnz = ndnz;
 
+    nm_list_storage_unregister(rhs);
+
     return lhs;
   }
 
diff --git a/ext/nmatrix/storage/storage.h b/ext/nmatrix/storage/storage.h
index 0a42d39..2b8fd30 100644
--- a/ext/nmatrix/storage/storage.h
+++ b/ext/nmatrix/storage/storage.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -45,8 +45,8 @@
 #include "data/data.h"
 
 #include "common.h"
-#include "dense.h"
-#include "list.h"
+#include "dense/dense.h"
+#include "list/list.h"
 #include "yale/yale.h"
 
 /*
diff --git a/ext/nmatrix/storage/yale/class.h b/ext/nmatrix/storage/yale/class.h
index c517796..c83822a 100644
--- a/ext/nmatrix/storage/yale/class.h
+++ b/ext/nmatrix/storage/yale/class.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -29,8 +29,9 @@
 #ifndef YALE_CLASS_H
 # define YALE_CLASS_H
 
-#include "../dense.h"
+#include "../dense/dense.h"
 #include "math/transpose.h"
+#include "yale.h"
 
 namespace nm {
 
@@ -50,14 +51,22 @@ public:
      slice(storage != storage->src),
      slice_shape(storage->shape),
      slice_offset(storage->offset)
-  { }
+  {
+    nm_yale_storage_register(storage->src);
+  }
 
   YaleStorage(const STORAGE* storage)
    : s(reinterpret_cast<YALE_STORAGE*>(storage->src)),
      slice(storage != storage->src),
      slice_shape(storage->shape),
      slice_offset(storage->offset)
-  { }
+  {
+    nm_yale_storage_register(reinterpret_cast<STORAGE*>(storage->src));
+  }
+
+  ~YaleStorage() {
+    nm_yale_storage_unregister(s);
+  }
 
   /* Allows us to do YaleStorage<uint8>::dtype() to get an nm::dtype_t */
   static nm::dtype_t dtype() {
@@ -72,6 +81,7 @@ public:
   inline const D& default_obj() const { return a(s->shape[0]); }
   inline const D& const_default_obj() const { return a(s->shape[0]); }
 
+
   /*
    * Return a Ruby VALUE representation of default_obj()
    */
@@ -99,6 +109,14 @@ public:
 
 
   /*
+   * Returns true if the value at apos is the default value.
+   * Mainly used for determining if the diagonal contains zeros.
+   */
+  bool is_pos_default_value(size_t apos) const {
+    return (a(apos) == const_default_obj());
+  }
+
+  /*
    * Given a size-2 array of size_t, representing the shape, determine
    * the maximum size of YaleStorage arrays.
    */
@@ -328,7 +346,7 @@ public:
 
       // Make the necessary modifications, which hopefully can be done in-place.
       size_t v_offset = 0;
-      int accum       = 0;
+      //int accum       = 0;
       for (size_t ii = 0; ii < lengths[0]; ++ii, ++i) {
         i.insert(row_stored_nd_iterator(i, p.pos[ii]), j, lengths[1], v, v_size, v_offset);
       }
@@ -344,6 +362,8 @@ public:
    */
   void insert(SLICE* slice, VALUE right) {
 
+    NM_CONSERVATIVE(nm_register_value(right));
+
     std::pair<NMATRIX*,bool> nm_and_free =
       interpret_arg_as_dense_nmatrix(right, dtype());
     // Map the data onto D* v
@@ -358,10 +378,17 @@ public:
 
     } else if (TYPE(right) == T_ARRAY) {
       v_size = RARRAY_LEN(right);
-      v      = ALLOC_N(D, v_size);
+      v      = NM_ALLOC_N(D, v_size);
+      if (dtype() == nm::RUBYOBJ) {
+       nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+      }
       for (size_t m = 0; m < v_size; ++m) {
         rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
       }
+      if (dtype() == nm::RUBYOBJ) {
+       nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
+      }
+
     } else {
       v = reinterpret_cast<D*>(rubyobj_to_cval(right, dtype()));
     }
@@ -381,7 +408,9 @@ public:
       if (nm_and_free.second) {
         nm_delete(nm_and_free.first);
       }
-    } else xfree(v);
+    } else NM_FREE(v);
+
+    NM_CONSERVATIVE(nm_unregister_value(right));
   }
 
 
@@ -489,15 +518,15 @@ public:
    * Allocate a reference pointing to s. Note that even if +this+ is a reference,
    * we can create a reference within it.
    *
-   * Note: Make sure you xfree() the result of this call. You can't just cast it
+   * Note: Make sure you NM_FREE() the result of this call. You can't just cast it
    * directly into a YaleStorage<D> class.
    */
   YALE_STORAGE* alloc_ref(SLICE* slice) {
-    YALE_STORAGE* ns  = ALLOC( YALE_STORAGE );
+    YALE_STORAGE* ns  = NM_ALLOC( YALE_STORAGE );
 
     ns->dim           = s->dim;
-    ns->offset        = ALLOC_N(size_t, ns->dim);
-    ns->shape         = ALLOC_N(size_t, ns->dim);
+    ns->offset        = NM_ALLOC_N(size_t, ns->dim);
+    ns->shape         = NM_ALLOC_N(size_t, ns->dim);
 
     for (size_t d = 0; d < ns->dim; ++d) {
       ns->offset[d]   = slice->coords[d]  + offset(d);
@@ -522,12 +551,12 @@ public:
    * Allocates and initializes the basic struct (but not IJA or A vectors).
    */
   static YALE_STORAGE* alloc(size_t* shape, size_t dim = 2) {
-    YALE_STORAGE* s = ALLOC( YALE_STORAGE );
+    YALE_STORAGE* s = NM_ALLOC( YALE_STORAGE );
 
     s->ndnz         = 0;
     s->dtype        = dtype();
     s->shape        = shape;
-    s->offset       = ALLOC_N(size_t, dim);
+    s->offset       = NM_ALLOC_N(size_t, dim);
     for (size_t d = 0; d < dim; ++d)
       s->offset[d]  = 0;
     s->dim          = dim;
@@ -556,8 +585,8 @@ public:
       s->capacity = reserve;
     }
 
-    s->ija = ALLOC_N( size_t, s->capacity );
-    s->a   = ALLOC_N( D,      s->capacity );
+    s->ija = NM_ALLOC_N( size_t, s->capacity );
+    s->a   = NM_ALLOC_N( D,      s->capacity );
 
     return s;
   }
@@ -608,14 +637,14 @@ public:
    template <typename E>
    YALE_STORAGE* alloc_basic_copy(size_t new_capacity, size_t new_ndnz) const {
      nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
-     YALE_STORAGE* lhs     = ALLOC( YALE_STORAGE );
+     YALE_STORAGE* lhs     = NM_ALLOC( YALE_STORAGE );
      lhs->dim              = s->dim;
-     lhs->shape            = ALLOC_N( size_t, lhs->dim );
+     lhs->shape            = NM_ALLOC_N( size_t, lhs->dim );
 
      lhs->shape[0]         = shape(0);
      lhs->shape[1]         = shape(1);
 
-     lhs->offset           = ALLOC_N( size_t, lhs->dim );
+     lhs->offset           = NM_ALLOC_N( size_t, lhs->dim );
 
      lhs->offset[0]        = 0;
      lhs->offset[1]        = 0;
@@ -623,8 +652,8 @@ public:
      lhs->capacity         = new_capacity;
      lhs->dtype            = new_dtype;
      lhs->ndnz             = new_ndnz;
-     lhs->ija              = ALLOC_N( size_t, new_capacity );
-     lhs->a                = ALLOC_N( E,      new_capacity );
+     lhs->ija              = NM_ALLOC_N( size_t, new_capacity );
+     lhs->a                = NM_ALLOC_N( E,      new_capacity );
      lhs->src              = lhs;
      lhs->count            = 1;
 
@@ -633,7 +662,7 @@ public:
 
 
   /*
-   * Make a full matrix structure copy (entries remain uninitialized). Remember to xfree()!
+   * Make a full matrix structure copy (entries remain uninitialized). Remember to NM_FREE()!
    */
   template <typename E>
   YALE_STORAGE* alloc_struct_copy(size_t new_capacity) const {
@@ -655,7 +684,7 @@ public:
    */
   template <typename E, bool Yield=false>
   void copy(YALE_STORAGE& ns) const {
-    nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
+    //nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
     // get the default value for initialization (we'll re-use val for other copies after this)
     E val = static_cast<E>(const_default_obj());
 
@@ -665,6 +694,7 @@ public:
 
     E* ns_a    = reinterpret_cast<E*>(ns.a);
     size_t sz  = shape(0) + 1; // current used size of ns
+    nm_yale_storage_register(&ns);
 
     // FIXME: If diagonals line up, it's probably faster to do this with stored diagonal and stored non-diagonal iterators
     for (const_row_iterator it = cribegin(); it != criend(); ++it) {
@@ -681,6 +711,7 @@ public:
       }
       ns.ija[it.i()+1]  = sz;
     }
+    nm_yale_storage_unregister(&ns);
 
     //ns.ija[shape(0)] = sz;                // indicate end of last row
     ns.ndnz          = sz - shape(0) - 1; // update ndnz count
@@ -688,17 +719,17 @@ public:
 
 
   /*
-   * Allocate a casted copy of this matrix/reference. Remember to xfree() the result!
+   * Allocate a casted copy of this matrix/reference. Remember to NM_FREE() the result!
    *
    * If Yield is true, E must be nm::RubyObject, and it will call an rb_yield upon the stored value.
    */
   template <typename E, bool Yield = false>
   YALE_STORAGE* alloc_copy() const {
-    nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
+    //nm::dtype_t new_dtype = nm::ctype_to_dtype_enum<E>::value_type;
 
     YALE_STORAGE* lhs;
     if (slice) {
-      size_t* xshape    = ALLOC_N(size_t, 2);
+      size_t* xshape    = NM_ALLOC_N(size_t, 2);
       xshape[0]         = shape(0);
       xshape[1]         = shape(1);
       size_t ndnz       = count_copy_ndnz();
@@ -708,6 +739,7 @@ public:
 
       lhs               = YaleStorage<E>::create(xshape, reserve);
 
+      // FIXME: This should probably be a throw which gets caught outside of the object.
       if (lhs->capacity < reserve)
         rb_raise(nm_eStorageTypeError, "conversion failed; capacity of %lu requested, max allowable is %lu", reserve, lhs->capacity);
 
@@ -718,10 +750,15 @@ public:
       lhs               = alloc_struct_copy<E>(s->capacity);
 
       E* la = reinterpret_cast<E*>(lhs->a);
+
+      nm_yale_storage_register(lhs);
       for (size_t m = 0; m < size(); ++m) {
-        if (Yield) la[m] = rb_yield(nm::yale_storage::nm_rb_dereference(a(m)));
+        if (Yield) {
+	  la[m] = rb_yield(nm::yale_storage::nm_rb_dereference(a(m)));
+	}
         else       la[m] = static_cast<E>(a(m));
       }
+      nm_yale_storage_unregister(lhs);
 
     }
 
@@ -732,7 +769,7 @@ public:
    * Allocate a transposed copy of the matrix
    */
   /*
-   * Allocate a casted copy of this matrix/reference. Remember to xfree() the result!
+   * Allocate a casted copy of this matrix/reference. Remember to NM_FREE() the result!
    *
    * If Yield is true, E must be nm::RubyObject, and it will call an rb_yield upon the stored value.
    */
@@ -743,7 +780,7 @@ public:
       rb_raise(rb_eNotImpError, "please make a copy before transposing");
     } else {
       // Copy the structure and setup the IJA structure.
-      size_t* xshape    = ALLOC_N(size_t, 2);
+      size_t* xshape    = NM_ALLOC_N(size_t, 2);
       xshape[0]         = shape(1);
       xshape[1]         = shape(0);
 
@@ -806,30 +843,44 @@ public:
    */
   template <typename E>
   VALUE map_merged_stored(VALUE klass, nm::YaleStorage<E>& t, VALUE r_init) const {
+    nm_register_value(r_init);
     VALUE s_init    = const_default_value(),
           t_init    = t.const_default_value();
-
+    nm_register_value(s_init);
+    nm_register_value(t_init);
+    
     // Make a reasonable approximation of the resulting capacity
     size_t s_ndnz   = count_copy_ndnz(),
            t_ndnz   = t.count_copy_ndnz();
     size_t reserve  = shape(0) + std::max(s_ndnz, t_ndnz) + 1;
 
-    size_t* xshape  = ALLOC_N(size_t, 2);
+    size_t* xshape  = NM_ALLOC_N(size_t, 2);
     xshape[0]       = shape(0);
     xshape[1]       = shape(1);
 
     YALE_STORAGE* rs= YaleStorage<nm::RubyObject>::create(xshape, reserve);
 
-    if (r_init == Qnil)
+    if (r_init == Qnil) {
+      nm_unregister_value(r_init);
       r_init       = rb_yield_values(2, s_init, t_init);
+      nm_register_value(r_init);
+    }
 
     nm::RubyObject r_init_obj(r_init);
 
     // Prepare the matrix structure
     YaleStorage<nm::RubyObject>::init(*rs, &r_init_obj);
     NMATRIX* m     = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(rs));
+    nm_register_nmatrix(m);
     VALUE result   = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);
-
+    nm_unregister_nmatrix(m);
+    nm_register_value(result);
+    nm_unregister_value(r_init);
+
+    RETURN_SIZED_ENUMERATOR_PRE
+    nm_unregister_value(result);
+    nm_unregister_value(t_init);
+    nm_unregister_value(s_init);
     // No obvious, efficient way to pass a length function as the fourth argument here:
     RETURN_SIZED_ENUMERATOR(result, 0, 0, 0);
 
@@ -873,6 +924,9 @@ public:
         //RB_P(rb_funcall(result, rb_intern("yale_ija"), 0));
       }
     }
+    nm_unregister_value(result);
+    nm_unregister_value(t_init);
+    nm_unregister_value(s_init);
 
     return result;
   }
@@ -900,12 +954,16 @@ protected:
     size_t new_cap = sz + p.total_change;
 
     if (new_cap > real_max_size()) {
-      xfree(v);
+      NM_FREE(v);
       rb_raise(rb_eStandardError, "resize caused by insertion of size %d (on top of current size %lu) would have caused yale matrix size to exceed its maximum (%lu)", p.total_change, sz, real_max_size());
     }
 
-    size_t* new_ija     = ALLOC_N( size_t,new_cap );
-    D* new_a            = ALLOC_N( D,     new_cap );
+    if (s->dtype == nm::RUBYOBJ) {
+      nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
+    }
+
+    size_t* new_ija     = NM_ALLOC_N( size_t,new_cap );
+    D* new_a            = NM_ALLOC_N( D,     new_cap );
 
     // Copy unchanged row pointers first.
     size_t m = 0;
@@ -967,8 +1025,12 @@ protected:
 
     s->capacity = new_cap;
 
-    xfree(s->ija);
-    xfree(s->a);
+    NM_FREE(s->ija);
+    NM_FREE(s->a);
+
+    if (s->dtype == nm::RUBYOBJ) {
+      nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
+    }   
 
     s->ija      = new_ija;
     s->a        = reinterpret_cast<void*>(new_a);
@@ -994,8 +1056,8 @@ protected:
 
     if (new_cap < sz + n) new_cap = sz + n;
 
-    size_t* new_ija     = ALLOC_N( size_t,new_cap );
-    D* new_a            = ALLOC_N( D,     new_cap );
+    size_t* new_ija     = NM_ALLOC_N( size_t,new_cap );
+    D* new_a            = NM_ALLOC_N( D,     new_cap );
 
     // Copy unchanged row pointers first.
     for (size_t m = 0; m <= real_i; ++m) {
@@ -1024,11 +1086,18 @@ protected:
       new_a[m+n]        = a(m);
     }
 
+    if (s->dtype == nm::RUBYOBJ) {
+      nm_yale_storage_register_a(new_a, new_cap);
+    }
 
     s->capacity = new_cap;
 
-    xfree(s->ija);
-    xfree(s->a);
+    NM_FREE(s->ija);
+    NM_FREE(s->a);
+
+    if (s->dtype == nm::RUBYOBJ) {
+      nm_yale_storage_unregister_a(new_a, new_cap);
+    }
 
     s->ija      = new_ija;
     s->a        = reinterpret_cast<void*>(new_a);
@@ -1067,4 +1136,4 @@ protected:
 
 } // end of nm namespace
 
-#endif // YALE_CLASS_H
\ No newline at end of file
+#endif // YALE_CLASS_H
diff --git a/ext/nmatrix/storage/yale/iterators/base.h b/ext/nmatrix/storage/yale/iterators/base.h
index 5c96f6e..04e6065 100644
--- a/ext/nmatrix/storage/yale/iterators/base.h
+++ b/ext/nmatrix/storage/yale/iterators/base.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/iterators/iterator.h b/ext/nmatrix/storage/yale/iterators/iterator.h
index b92875a..83bd869 100644
--- a/ext/nmatrix/storage/yale/iterators/iterator.h
+++ b/ext/nmatrix/storage/yale/iterators/iterator.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/iterators/row.h b/ext/nmatrix/storage/yale/iterators/row.h
index dd096ea..00c036d 100644
--- a/ext/nmatrix/storage/yale/iterators/row.h
+++ b/ext/nmatrix/storage/yale/iterators/row.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/iterators/row_stored.h b/ext/nmatrix/storage/yale/iterators/row_stored.h
index 5d725c6..58b206a 100644
--- a/ext/nmatrix/storage/yale/iterators/row_stored.h
+++ b/ext/nmatrix/storage/yale/iterators/row_stored.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/iterators/row_stored_nd.h b/ext/nmatrix/storage/yale/iterators/row_stored_nd.h
index 965f225..8956877 100644
--- a/ext/nmatrix/storage/yale/iterators/row_stored_nd.h
+++ b/ext/nmatrix/storage/yale/iterators/row_stored_nd.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -87,6 +87,7 @@ public:
     if (&r != &(rhs.r))
       throw std::logic_error("can't assign iterator from another row iterator");
     p_ = rhs.p_;
+    return *this;
   }
 
   virtual size_t p() const { return p_; }
@@ -164,4 +165,4 @@ public:
 
 } } // end of namespace nm::yale_storage
 
-#endif // YALE_ITERATORS_ROW_STORED_ND_H
\ No newline at end of file
+#endif // YALE_ITERATORS_ROW_STORED_ND_H
diff --git a/ext/nmatrix/storage/yale/iterators/stored_diagonal.h b/ext/nmatrix/storage/yale/iterators/stored_diagonal.h
index fd8eb61..7ab1e19 100644
--- a/ext/nmatrix/storage/yale/iterators/stored_diagonal.h
+++ b/ext/nmatrix/storage/yale/iterators/stored_diagonal.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/math/transpose.h b/ext/nmatrix/storage/yale/math/transpose.h
index d1af377..56918f3 100644
--- a/ext/nmatrix/storage/yale/math/transpose.h
+++ b/ext/nmatrix/storage/yale/math/transpose.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/storage/yale/yale.cpp b/ext/nmatrix/storage/yale/yale.cpp
index 0bbf821..6652cfa 100644
--- a/ext/nmatrix/storage/yale/yale.cpp
+++ b/ext/nmatrix/storage/yale/yale.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -101,6 +101,7 @@ extern "C" {
   static VALUE nm_ia(VALUE self);
   static VALUE nm_ja(VALUE self);
   static VALUE nm_ija(int argc, VALUE* argv, VALUE self);
+  static VALUE nm_row_keys_intersection(VALUE m1, VALUE ii1, VALUE m2, VALUE ii2);
 
   static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self);
 
@@ -208,8 +209,8 @@ YALE_STORAGE* create_from_old_yale(dtype_t dtype, size_t* shape, char* r_ia, cha
   s->ndnz     = ndnz;
 
   // Setup IJA and A arrays
-  s->ija = ALLOC_N( IType, s->capacity );
-  s->a   = ALLOC_N( LDType, s->capacity );
+  s->ija = NM_ALLOC_N( IType, s->capacity );
+  s->a   = NM_ALLOC_N( LDType, s->capacity );
   IType* ijl    = reinterpret_cast<IType*>(s->ija);
   LDType* al    = reinterpret_cast<LDType*>(s->a);
 
@@ -452,14 +453,14 @@ static void vector_grow(YALE_STORAGE* s) {
   if (s != s->src) {
     throw; // need to correct this quickly.
   }
-
+  nm_yale_storage_register(s);
   size_t new_capacity = s->capacity * GROWTH_CONSTANT;
   size_t max_capacity = YaleStorage<uint8_t>::max_size(s->shape);
 
   if (new_capacity > max_capacity) new_capacity = max_capacity;
 
-  IType* new_ija      = ALLOC_N(IType, new_capacity);
-  void* new_a         = ALLOC_N(char, DTYPE_SIZES[s->dtype] * new_capacity);
+  IType* new_ija      = NM_ALLOC_N(IType, new_capacity);
+  void* new_a         = NM_ALLOC_N(char, DTYPE_SIZES[s->dtype] * new_capacity);
 
   IType* old_ija      = s->ija;
   void* old_a         = s->a;
@@ -469,11 +470,18 @@ static void vector_grow(YALE_STORAGE* s) {
 
   s->capacity         = new_capacity;
 
-  xfree(old_ija);
-  xfree(old_a);
+  if (s->dtype == nm::RUBYOBJ)
+    nm_yale_storage_register_a(new_a, s->capacity * DTYPE_SIZES[s->dtype]);
+
+  NM_FREE(old_ija);
+  nm_yale_storage_unregister(s);
+  NM_FREE(old_a);
+  if (s->dtype == nm::RUBYOBJ)
+    nm_yale_storage_unregister_a(new_a, s->capacity * DTYPE_SIZES[s->dtype]);
 
   s->ija         = new_ija;
   s->a           = new_a;
+
 }
 
 
@@ -497,11 +505,13 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
   if (new_capacity < current_size + n)
   	new_capacity = current_size + n;
 
+  nm_yale_storage_register(s);
+
   // Allocate the new vectors.
-  IType* new_ija     = ALLOC_N( IType, new_capacity );
+  IType* new_ija     = NM_ALLOC_N( IType, new_capacity );
   NM_CHECK_ALLOC(new_ija);
 
-  DType* new_a       = ALLOC_N( DType, new_capacity );
+  DType* new_a       = NM_ALLOC_N( DType, new_capacity );
   NM_CHECK_ALLOC(new_a);
 
   IType* old_ija     = reinterpret_cast<IType*>(s->ija);
@@ -533,9 +543,15 @@ static char vector_insert_resize(YALE_STORAGE* s, size_t current_size, size_t po
   }
 
   s->capacity = new_capacity;
+  if (s->dtype == nm::RUBYOBJ)
+    nm_yale_storage_register_a(new_a, new_capacity);
 
-  xfree(s->ija);
-  xfree(s->a);
+  NM_FREE(s->ija);
+  nm_yale_storage_unregister(s);
+  NM_FREE(s->a);
+  
+  if (s->dtype == nm::RUBYOBJ)
+    nm_yale_storage_unregister_a(new_a, new_capacity);
 
   s->ija = new_ija;
   s->a   = reinterpret_cast<void*>(new_a);
@@ -566,12 +582,11 @@ static char vector_insert(YALE_STORAGE* s, size_t pos, size_t* j, void* val_, si
   DType* a   = reinterpret_cast<DType*>(s->a);
 
   if (size + n > s->capacity) {
-  	vector_insert_resize<DType>(s, size, pos, j, n, struct_only);
+    vector_insert_resize<DType>(s, size, pos, j, n, struct_only);
 
     // Need to get the new locations for ija and a.
   	ija = s->ija;
     a   = reinterpret_cast<DType*>(s->a);
-
   } else {
     /*
      * No resize required:
@@ -673,6 +688,8 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   YALE_STORAGE *left  = (YALE_STORAGE*)(casted_storage.left),
                *right = (YALE_STORAGE*)(casted_storage.right);
 
+  nm_yale_storage_register(left);
+  nm_yale_storage_register(right);
   // We can safely get dtype from the casted matrices; post-condition of binary_storage_cast_alloc is that dtype is the
   // same for left and right.
   // int8_t dtype = left->dtype;
@@ -704,6 +721,8 @@ static STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resu
   // Sort the columns
   nm::math::smmp_sort_columns<DType>(result->shape[0], ija, ija, reinterpret_cast<DType*>(result->a));
 
+  nm_yale_storage_unregister(right);
+  nm_yale_storage_unregister(left);
   return reinterpret_cast<STORAGE*>(result);
 }
 
@@ -872,11 +891,13 @@ public:
 // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
 // the matrix's storage.
 static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
   YALE_STORAGE* s   = NM_STORAGE_YALE(nmatrix);
   YALE_STORAGE* src = s->src == s ? s : reinterpret_cast<YALE_STORAGE*>(s->src);
   size_t ia_size    = src->shape[0];
   // FIXME: This needs to be corrected for slicing.
   size_t len = std::min( s->shape[0] + s->offset[0], s->shape[1] + s->offset[1] ) + nm_yale_storage_get_size(src) -  ia_size;
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   return INT2FIX(len);
 }
 
@@ -884,27 +905,32 @@ static VALUE nm_yale_stored_enumerator_length(VALUE nmatrix) {
 // Helper function used only for the RETURN_SIZED_ENUMERATOR macro. Returns the length of
 // the matrix's storage.
 static VALUE nm_yale_stored_nondiagonal_enumerator_length(VALUE nmatrix) {
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
   YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
   if (s->src != s) s = reinterpret_cast<YALE_STORAGE*>(s->src);  // need to get the original storage shape
 
   size_t ia_size = s->shape[0];
   size_t len     = nm_yale_storage_get_size(NM_STORAGE_YALE(nmatrix)) - ia_size;
-
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   return INT2FIX(len);
 }
 
 // Helper function for diagonal length.
 static VALUE nm_yale_stored_diagonal_enumerator_length(VALUE nmatrix) {
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
   YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
   size_t len = std::min( s->shape[0] + s->offset[0], s->shape[1] + s->offset[1] );
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   return INT2FIX(len);
 }
 
 
 // Helper function for full enumerator length.
 static VALUE nm_yale_enumerator_length(VALUE nmatrix) {
+  NM_CONSERVATIVE(nm_register_value(nmatrix));
   YALE_STORAGE* s = NM_STORAGE_YALE(nmatrix);
   size_t len = s->shape[0] * s->shape[1];
+  NM_CONSERVATIVE(nm_unregister_value(nmatrix));
   return INT2FIX(len);
 }
 
@@ -914,12 +940,21 @@ static VALUE nm_yale_enumerator_length(VALUE nmatrix) {
  */
 template <typename D>
 static VALUE map_stored(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
   YALE_STORAGE* s = NM_STORAGE_YALE(self);
   YaleStorage<D> y(s);
+  
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(self));
   RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_yale_stored_enumerator_length);
+
   YALE_STORAGE* r = y.template alloc_copy<nm::RubyObject, true>();
+  nm_yale_storage_register(r);
   NMATRIX* m      = nm_create(nm::YALE_STORE, reinterpret_cast<STORAGE*>(r));
-  return Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
+  VALUE to_return = Data_Wrap_Struct(CLASS_OF(self), nm_mark, nm_delete, m);
+  nm_yale_storage_unregister(r);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+  return to_return;
 }
 
 
@@ -930,7 +965,8 @@ template <typename LD, typename RD>
 static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init) {
   nm::YaleStorage<LD> l(NM_STORAGE_YALE(left));
   nm::YaleStorage<RD> r(NM_STORAGE_YALE(right));
-  return l.map_merged_stored(CLASS_OF(left), r, init);
+  VALUE to_return = l.map_merged_stored(CLASS_OF(left), r, init);
+  return to_return;
 }
 
 
@@ -939,10 +975,13 @@ static VALUE map_merged_stored(VALUE left, VALUE right, VALUE init) {
  */
 template <typename DType>
 static VALUE each_stored_with_indices(VALUE nm) {
+  NM_CONSERVATIVE(nm_register_value(nm));
   YALE_STORAGE* s = NM_STORAGE_YALE(nm);
   YaleStorage<DType> y(s);
 
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nm));
   RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
 
   for (typename YaleStorage<DType>::const_stored_diagonal_iterator d = y.csdbegin(); d != y.csdend(); ++d) {
@@ -955,6 +994,8 @@ static VALUE each_stored_with_indices(VALUE nm) {
     }
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
@@ -964,16 +1005,22 @@ static VALUE each_stored_with_indices(VALUE nm) {
  */
 template <typename DType>
 static VALUE stored_diagonal_each_with_indices(VALUE nm) {
+  NM_CONSERVATIVE(nm_register_value(nm));
+
   YALE_STORAGE* s = NM_STORAGE_YALE(nm);
   YaleStorage<DType> y(s);
 
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nm));
   RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_diagonal_length); // FIXME: need diagonal length
-
+  
   for (typename YaleStorage<DType>::const_stored_diagonal_iterator d = y.csdbegin(); d != y.csdend(); ++d) {
     rb_yield_values(3, ~d, d.rb_i(), d.rb_j());
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
@@ -983,10 +1030,14 @@ static VALUE stored_diagonal_each_with_indices(VALUE nm) {
  */
 template <typename DType>
 static VALUE stored_nondiagonal_each_with_indices(VALUE nm) {
+  NM_CONSERVATIVE(nm_register_value(nm));
+
   YALE_STORAGE* s = NM_STORAGE_YALE(nm);
   YaleStorage<DType> y(s);
 
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nm));
   RETURN_SIZED_ENUMERATOR(nm, 0, 0, 0); // FIXME: need diagonal length
 
   for (typename YaleStorage<DType>::const_row_iterator it = y.cribegin(); it != y.criend(); ++it) {
@@ -995,6 +1046,8 @@ static VALUE stored_nondiagonal_each_with_indices(VALUE nm) {
     }
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
@@ -1004,10 +1057,14 @@ static VALUE stored_nondiagonal_each_with_indices(VALUE nm) {
  */
 template <typename DType>
 static VALUE each_ordered_stored_with_indices(VALUE nm) {
+  NM_CONSERVATIVE(nm_register_value(nm));
+
   YALE_STORAGE* s = NM_STORAGE_YALE(nm);
   YaleStorage<DType> y(s);
 
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nm));
   RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_stored_enumerator_length);
 
   for (typename YaleStorage<DType>::const_row_iterator it = y.cribegin(); it != y.criend(); ++it) {
@@ -1016,25 +1073,39 @@ static VALUE each_ordered_stored_with_indices(VALUE nm) {
     }
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
 
 template <typename DType>
 static VALUE each_with_indices(VALUE nm) {
+  NM_CONSERVATIVE(nm_register_value(nm));
+
   YALE_STORAGE* s = NM_STORAGE_YALE(nm);
   YaleStorage<DType> y(s);
 
   // If we don't have a block, return an enumerator.
+  RETURN_SIZED_ENUMERATOR_PRE
+  NM_CONSERVATIVE(nm_unregister_value(nm));
   RETURN_SIZED_ENUMERATOR(nm, 0, 0, nm_yale_enumerator_length);
 
   for (typename YaleStorage<DType>::const_iterator iter = y.cbegin(); iter != y.cend(); ++iter) {
     rb_yield_values(3, ~iter, iter.rb_i(), iter.rb_j());
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(nm));
+
   return nm;
 }
 
+template <typename D>
+static bool is_pos_default_value(YALE_STORAGE* s, size_t apos) {
+  YaleStorage<D> y(s);
+  return y.is_pos_default_value(apos);
+}
+
 
 } // end of namespace nm::yale_storage
 
@@ -1056,6 +1127,10 @@ void nm_init_yale_functions() {
 	 */
   cNMatrix_YaleFunctions = rb_define_module_under(cNMatrix, "YaleFunctions");
 
+  // Expert recommendation. Eventually this should go in a separate gem, or at least a separate module.
+  rb_define_method(cNMatrix_YaleFunctions, "yale_row_keys_intersection", (METHOD)nm_row_keys_intersection, 3);
+
+  // Debugging functions.
   rb_define_method(cNMatrix_YaleFunctions, "yale_ija", (METHOD)nm_ija, -1);
   rb_define_method(cNMatrix_YaleFunctions, "yale_a", (METHOD)nm_a, -1);
   rb_define_method(cNMatrix_YaleFunctions, "yale_size", (METHOD)nm_size, 0);
@@ -1162,7 +1237,7 @@ void* nm_yale_storage_get(const STORAGE* storage, SLICE* slice) {
 
     return elem_copy_table[casted_storage->dtype](casted_storage, slice);
   } else {
-
+    nm_yale_storage_register(casted_storage);
     //return reinterpret_cast<void*>(nm::YaleStorage<nm::dtype_enum_T<storage->dtype>::type>(casted_storage).alloc_ref(slice));
     NAMED_DTYPE_TEMPLATE_TABLE(ref_table, nm::yale_storage::ref, YALE_STORAGE*, YALE_STORAGE* storage, SLICE* slice)
 
@@ -1172,7 +1247,9 @@ void* nm_yale_storage_get(const STORAGE* storage, SLICE* slice) {
 
     YALE_STORAGE* ns = slice_copy_table[casted_storage->dtype][casted_storage->dtype](ref);
 
-    xfree(ref);
+    NM_FREE(ref);
+
+    nm_yale_storage_unregister(casted_storage);
 
     return ns;
   }
@@ -1339,11 +1416,11 @@ void nm_yale_storage_delete(STORAGE* s) {
   if (s) {
     YALE_STORAGE* storage = (YALE_STORAGE*)s;
     if (storage->count-- == 1) {
-      xfree(storage->shape);
-      xfree(storage->offset);
-      xfree(storage->ija);
-      xfree(storage->a);
-      xfree(storage);
+      NM_FREE(storage->shape);
+      NM_FREE(storage->offset);
+      NM_FREE(storage->ija);
+      NM_FREE(storage->a);
+      NM_FREE(storage);
     }
   }
 }
@@ -1355,9 +1432,9 @@ void nm_yale_storage_delete_ref(STORAGE* s) {
   if (s) {
     YALE_STORAGE* storage = (YALE_STORAGE*)s;
     nm_yale_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
-    xfree(storage->shape);
-    xfree(storage->offset);
-    xfree(s);
+    NM_FREE(storage->shape);
+    NM_FREE(storage->offset);
+    NM_FREE(s);
   }
 }
 
@@ -1378,15 +1455,35 @@ void nm_yale_storage_init(YALE_STORAGE* s, void* init_val) {
  */
 void nm_yale_storage_mark(STORAGE* storage_base) {
   YALE_STORAGE* storage = (YALE_STORAGE*)storage_base;
-  size_t i;
 
   if (storage && storage->dtype == nm::RUBYOBJ) {
 
     VALUE* a = (VALUE*)(storage->a);
-    rb_gc_mark_locations(a, a + storage->capacity * sizeof(VALUE));
+    rb_gc_mark_locations(a, &(a[storage->capacity-1]));
   }
 }
 
+void nm_yale_storage_register_a(void* a, size_t size) {
+  nm_register_values(reinterpret_cast<VALUE*>(a), size);
+}
+
+void nm_yale_storage_unregister_a(void* a, size_t size) {
+  nm_unregister_values(reinterpret_cast<VALUE*>(a), size);
+}
+
+void nm_yale_storage_register(const STORAGE* s) {
+  const YALE_STORAGE* y = reinterpret_cast<const YALE_STORAGE*>(s);
+  if (y->dtype == nm::RUBYOBJ) {
+    nm_register_values(reinterpret_cast<VALUE*>(y->a), nm::yale_storage::get_size(y));
+  }
+}
+
+void nm_yale_storage_unregister(const STORAGE* s) {
+  const YALE_STORAGE* y = reinterpret_cast<const YALE_STORAGE*>(s);
+  if (y->dtype == nm::RUBYOBJ) {
+    nm_unregister_values(reinterpret_cast<VALUE*>(y->a), nm::yale_storage::get_size(y));
+  }
+}
 
 /*
  * Allocates and initializes the basic struct (but not the IJA or A vectors).
@@ -1396,12 +1493,12 @@ void nm_yale_storage_mark(STORAGE* storage_base) {
 static YALE_STORAGE* alloc(nm::dtype_t dtype, size_t* shape, size_t dim) {
   YALE_STORAGE* s;
 
-  s = ALLOC( YALE_STORAGE );
+  s = NM_ALLOC( YALE_STORAGE );
 
   s->ndnz        = 0;
   s->dtype       = dtype;
   s->shape       = shape;
-  s->offset      = ALLOC_N(size_t, dim);
+  s->offset      = NM_ALLOC_N(size_t, dim);
   for (size_t i = 0; i < dim; ++i)
     s->offset[i] = 0;
   s->dim         = dim;
@@ -1432,7 +1529,110 @@ YALE_STORAGE* nm_yale_storage_create_from_old_yale(nm::dtype_t dtype, size_t* sh
  */
 static VALUE nm_size(VALUE self) {
   YALE_STORAGE* s = (YALE_STORAGE*)(NM_SRC(self));
-  return INT2FIX(nm::yale_storage::IJA(s)[s->shape[0]]);
+  VALUE to_return = INT2FIX(nm::yale_storage::IJA(s)[s->shape[0]]);
+  return to_return;
+}
+
+
+/*
+ * Determine if some pos in the diagonal is the default. No bounds checking!
+ */
+static bool is_pos_default_value(YALE_STORAGE* s, size_t apos) {
+  DTYPE_TEMPLATE_TABLE(nm::yale_storage::is_pos_default_value, bool, YALE_STORAGE*, size_t)
+  return ttable[s->dtype](s, apos);
+}
+
+
+/*
+ * call-seq:
+ *     yale_row_keys_intersection(i, m2, i2) -> Array
+ *
+ * This function is experimental.
+ *
+ * It finds the intersection of row i of the current matrix with row i2 of matrix m2.
+ * Both matrices must be Yale. They may not be slices.
+ *
+ * Only checks the stored indices; does not care about matrix default value.
+ */
+static VALUE nm_row_keys_intersection(VALUE m1, VALUE ii1, VALUE m2, VALUE ii2) {
+  
+  NM_CONSERVATIVE(nm_register_value(m1));
+  NM_CONSERVATIVE(nm_register_value(m2));
+
+  if (NM_SRC(m1) != NM_STORAGE(m1) || NM_SRC(m2) != NM_STORAGE(m2)) {
+    NM_CONSERVATIVE(nm_unregister_value(m2));
+    NM_CONSERVATIVE(nm_unregister_value(m1));
+    rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
+  }
+
+  size_t i1 = FIX2INT(ii1),
+         i2 = FIX2INT(ii2);
+
+  YALE_STORAGE *s   = NM_STORAGE_YALE(m1),
+               *t   = NM_STORAGE_YALE(m2);
+
+  size_t pos1 = s->ija[i1],
+         pos2 = t->ija[i2];
+
+  size_t nextpos1 = s->ija[i1+1],
+         nextpos2 = t->ija[i2+1];
+
+  size_t diff1 = nextpos1 - pos1,
+         diff2 = nextpos2 - pos2;
+
+  // Does the diagonal have a nonzero in it?
+  bool diag1 = i1 < s->shape[0] && !is_pos_default_value(s, i1),
+       diag2 = i2 < t->shape[0] && !is_pos_default_value(t, i2);
+
+  // Reserve max(diff1,diff2) space -- that's the max intersection possible.
+  VALUE ret = rb_ary_new2(std::max(diff1,diff2)+1);
+  nm_register_value(ret);
+
+  // Handle once the special case where both have the diagonal in exactly
+  // the same place.
+  if (diag1 && diag2 && i1 == i2) {
+    rb_ary_push(ret, INT2FIX(i1));
+    diag1 = false; diag2 = false; // no need to deal with diagonals anymore.
+  }
+
+  // Now find the intersection.
+  size_t idx1 = pos1, idx2 = pos2;
+  while (idx1 < nextpos1 && idx2 < nextpos2) {
+    if (s->ija[idx1] == t->ija[idx2]) {
+      rb_ary_push(ret, INT2FIX(s->ija[idx1]));
+      ++idx1; ++idx2;
+    } else if (diag1 && i1 == t->ija[idx2]) {
+      rb_ary_push(ret, INT2FIX(i1));
+      diag1 = false;
+      ++idx2;
+    } else if (diag2 && i2 == s->ija[idx1]) {
+      rb_ary_push(ret, INT2FIX(i2));
+      diag2 = false;
+      ++idx1;
+    } else if (s->ija[idx1] < t->ija[idx2]) {
+      ++idx1;
+    } else { // s->ija[idx1] > t->ija[idx2]
+      ++idx2;
+    }
+  }
+
+  // Past the end of row i2's stored entries; need to try to find diagonal
+  if (diag2 && idx1 < nextpos1) {
+    idx1 = nm::yale_storage::binary_search_left_boundary(s, idx1, nextpos1, i2);
+    if (s->ija[idx1] == i2) rb_ary_push(ret, INT2FIX(i2));
+  }
+
+  // Find the diagonal, if possible, in the other one.
+  if (diag1 && idx2 < nextpos2) {
+    idx2 = nm::yale_storage::binary_search_left_boundary(t, idx2, nextpos2, i1);
+    if (t->ija[idx2] == i1) rb_ary_push(ret, INT2FIX(i1));
+  }
+
+  nm_unregister_value(ret);
+  NM_CONSERVATIVE(nm_unregister_value(m1));
+  NM_CONSERVATIVE(nm_unregister_value(m2));
+
+  return ret;
 }
 
 
@@ -1444,15 +1644,21 @@ static VALUE nm_size(VALUE self) {
  * Get the A array of a Yale matrix (which stores the diagonal and the LU portions of the matrix).
  */
 static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   VALUE idx;
   rb_scan_args(argc, argv, "01", &idx);
+  NM_CONSERVATIVE(nm_register_value(idx));
 
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
   size_t size = nm_yale_storage_get_size(s);
 
   if (idx == Qnil) {
-    VALUE* vals = ALLOCA_N(VALUE, size);
 
+    VALUE* vals = NM_ALLOCA_N(VALUE, size);
+
+    nm_register_values(vals, size);
+    
     if (NM_DTYPE(self) == nm::RUBYOBJ) {
       for (size_t i = 0; i < size; ++i) {
         vals[i] = reinterpret_cast<VALUE*>(s->a)[i];
@@ -1467,11 +1673,15 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
     for (size_t i = size; i < s->capacity; ++i)
       rb_ary_push(ary, Qnil);
 
+    nm_unregister_values(vals, size);
+    NM_CONSERVATIVE(nm_unregister_value(idx));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     return ary;
   } else {
     size_t index = FIX2INT(idx);
+    NM_CONSERVATIVE(nm_unregister_value(idx));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     if (index >= size) rb_raise(rb_eRangeError, "out of range");
-
     return rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
   }
 }
@@ -1485,13 +1695,17 @@ static VALUE nm_a(int argc, VALUE* argv, VALUE self) {
  * Get the diagonal ("D") portion of the A array of a Yale matrix.
  */
 static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
   VALUE idx;
   rb_scan_args(argc, argv, "01", &idx);
+  NM_CONSERVATIVE(nm_register_value(idx));
 
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
 
   if (idx == Qnil) {
-    VALUE* vals = ALLOCA_N(VALUE, s->shape[0]);
+    VALUE* vals = NM_ALLOCA_N(VALUE, s->shape[0]);
+
+    nm_register_values(vals, s->shape[0]);
 
     if (NM_DTYPE(self) == nm::RUBYOBJ) {
       for (size_t i = 0; i < s->shape[0]; ++i) {
@@ -1502,12 +1716,16 @@ static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
         vals[i] = rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*i, s->dtype).rval;
       }
     }
+    nm_unregister_values(vals, s->shape[0]);
+    NM_CONSERVATIVE(nm_unregister_value(idx));
+    NM_CONSERVATIVE(nm_unregister_value(self));
 
     return rb_ary_new4(s->shape[0], vals);
   } else {
     size_t index = FIX2INT(idx);
+    NM_CONSERVATIVE(nm_unregister_value(idx));
+    NM_CONSERVATIVE(nm_unregister_value(self));
     if (index >= s->shape[0]) rb_raise(rb_eRangeError, "out of range");
-
     return rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype] * index, s->dtype).rval;
   }
 }
@@ -1519,11 +1737,15 @@ static VALUE nm_d(int argc, VALUE* argv, VALUE self) {
  * Get the non-diagonal ("LU") portion of the A array of a Yale matrix.
  */
 static VALUE nm_lu(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
 
   size_t size = nm_yale_storage_get_size(s);
 
-  VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
+  VALUE* vals = NM_ALLOCA_N(VALUE, size - s->shape[0] - 1);
+
+  nm_register_values(vals, size - s->shape[0] - 1);
 
   if (NM_DTYPE(self) == nm::RUBYOBJ) {
     for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
@@ -1540,6 +1762,9 @@ static VALUE nm_lu(VALUE self) {
   for (size_t i = size; i < s->capacity; ++i)
     rb_ary_push(ary, Qnil);
 
+  nm_unregister_values(vals, size - s->shape[0] - 1);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
   return ary;
 }
 
@@ -1551,14 +1776,18 @@ static VALUE nm_lu(VALUE self) {
  * JA and LU portions of the IJA and A arrays, respectively.
  */
 static VALUE nm_ia(VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
 
-  VALUE* vals = ALLOCA_N(VALUE, s->shape[0] + 1);
+  VALUE* vals = NM_ALLOCA_N(VALUE, s->shape[0] + 1);
 
   for (size_t i = 0; i < s->shape[0] + 1; ++i) {
     vals[i] = INT2FIX(s->ija[i]);
   }
 
+  NM_CONSERVATIVE(nm_unregister_value(self)); 
+
   return rb_ary_new4(s->shape[0]+1, vals);
 }
 
@@ -1570,11 +1799,16 @@ static VALUE nm_ia(VALUE self) {
  * positions in the LU portion of the A array.
  */
 static VALUE nm_ja(VALUE self) {
+
+  NM_CONSERVATIVE(nm_register_value(self));
+
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
 
   size_t size = nm_yale_storage_get_size(s);
 
-  VALUE* vals = ALLOCA_N(VALUE, size - s->shape[0] - 1);
+  VALUE* vals = NM_ALLOCA_N(VALUE, size - s->shape[0] - 1);
+
+  nm_register_values(vals, size - s->shape[0] - 1);
 
   for (size_t i = 0; i < size - s->shape[0] - 1; ++i) {
     vals[i] = INT2FIX(s->ija[s->shape[0] + 1 + i]);
@@ -1585,6 +1819,9 @@ static VALUE nm_ja(VALUE self) {
   for (size_t i = size; i < s->capacity; ++i)
     rb_ary_push(ary, Qnil);
 
+  nm_unregister_values(vals, size - s->shape[0] - 1);
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
   return ary;
 }
 
@@ -1596,15 +1833,20 @@ static VALUE nm_ja(VALUE self) {
  * Get the IJA array of a Yale matrix (or a component of the IJA array).
  */
 static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
+  NM_CONSERVATIVE(nm_register_value(self));
+
   VALUE idx;
   rb_scan_args(argc, argv, "01", &idx);
+  NM_CONSERVATIVE(nm_register_value(idx));
 
   YALE_STORAGE* s = reinterpret_cast<YALE_STORAGE*>(NM_SRC(self));
   size_t size = nm_yale_storage_get_size(s);
 
   if (idx == Qnil) {
 
-    VALUE* vals = ALLOCA_N(VALUE, size);
+    VALUE* vals = NM_ALLOCA_N(VALUE, size);
+
+    nm_register_values(vals, size);
 
     for (size_t i = 0; i < size; ++i) {
       vals[i] = INT2FIX(s->ija[i]);
@@ -1615,12 +1857,17 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
     for (size_t i = size; i < s->capacity; ++i)
       rb_ary_push(ary, Qnil);
 
+    nm_unregister_values(vals, size);
+    NM_CONSERVATIVE(nm_unregister_value(idx));
+    NM_CONSERVATIVE(nm_unregister_value(self));
+
     return ary;
 
   } else {
     size_t index = FIX2INT(idx);
     if (index >= size) rb_raise(rb_eRangeError, "out of range");
-
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    NM_CONSERVATIVE(nm_unregister_value(idx));
     return INT2FIX(s->ija[index]);
   }
 }
@@ -1638,11 +1885,18 @@ static VALUE nm_ija(int argc, VALUE* argv, VALUE self) {
  * range.
  */
 static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
-  if (NM_SRC(self) != NM_STORAGE(self))
+
+  NM_CONSERVATIVE(nm_register_value(self));
+  
+  if (NM_SRC(self) != NM_STORAGE(self)) {
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
+  }  
 
   VALUE i_, as;
   rb_scan_args(argc, argv, "11", &i_, &as);
+  NM_CONSERVATIVE(nm_register_value(as));
+  NM_CONSERVATIVE(nm_register_value(i_));
 
   bool keys = false;
   if (as != Qnil && rb_to_id(as) != nm_rb_hash) keys = true;
@@ -1650,7 +1904,14 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
   size_t i = FIX2INT(i_);
 
   YALE_STORAGE* s   = NM_STORAGE_YALE(self);
-  nm::dtype_t dtype = NM_DTYPE(self);
+  //nm::dtype_t dtype = NM_DTYPE(self);
+
+  if (i >= s->shape[0]) {
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    NM_CONSERVATIVE(nm_unregister_value(as));
+    NM_CONSERVATIVE(nm_unregister_value(i_));
+    rb_raise(rb_eRangeError, "out of range (%lu >= %lu)", i, s->shape[0]);
+  }
 
   size_t pos = s->ija[i];
   size_t nextpos = s->ija[i+1];
@@ -1671,7 +1932,9 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
       rb_hash_aset(ret, INT2FIX(s->ija[idx]), rubyobj_from_cval((char*)(s->a) + DTYPE_SIZES[s->dtype]*idx, s->dtype).rval);
     }
   }
-
+  NM_CONSERVATIVE(nm_unregister_value(as));
+  NM_CONSERVATIVE(nm_unregister_value(i_));
+  NM_CONSERVATIVE(nm_unregister_value(self));
   return ret;
 }
 
@@ -1706,18 +1969,32 @@ static VALUE nm_nd_row(int argc, VALUE* argv, VALUE self) {
  */
 VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv, VALUE vv, VALUE pos_) {
 
-  if (NM_SRC(self) != NM_STORAGE(self))
+  NM_CONSERVATIVE(nm_register_value(self));
+
+  if (NM_SRC(self) != NM_STORAGE(self)) {
+    NM_CONSERVATIVE(nm_unregister_value(self));
     rb_raise(rb_eNotImpError, "must be called on a real matrix and not a slice");
+  }
 
   // i, jv, vv are mandatory; pos is optional; thus "31"
   VALUE i_, jv, vv, pos_;
   rb_scan_args(argc, argv, "31", &i_, &jv, &vv, &pos_);
+  NM_CONSERVATIVE(nm_register_value(i_));
+  NM_CONSERVATIVE(nm_register_value(jv));
+  NM_CONSERVATIVE(nm_register_value(vv));
+  NM_CONSERVATIVE(nm_register_value(pos_));
 
   size_t len   = RARRAY_LEN(jv); // need length in order to read the arrays in
   size_t vvlen = RARRAY_LEN(vv);
 
-  if (len != vvlen)
-    rb_raise(rb_eArgError, "lengths must match between j array (%d) and value array (%d)", len, vvlen);
+  if (len != vvlen) {
+    NM_CONSERVATIVE(nm_unregister_value(pos_));
+    NM_CONSERVATIVE(nm_unregister_value(vv));
+    NM_CONSERVATIVE(nm_unregister_value(jv));
+    NM_CONSERVATIVE(nm_unregister_value(i_));
+    NM_CONSERVATIVE(nm_unregister_value(self));
+    rb_raise(rb_eArgError, "lengths must match between j array (%lu) and value array (%lu)", len, vvlen);
+  }
 
   YALE_STORAGE* s   = NM_STORAGE_YALE(self);
   nm::dtype_t dtype = NM_DTYPE(self);
@@ -1726,8 +2003,11 @@ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv,
   size_t pos = s->ija[i];
 
   // Allocate the j array and the values array
-  size_t* j  = ALLOCA_N(size_t, len);
-  void* vals = ALLOCA_N(char, DTYPE_SIZES[dtype] * len);
+  size_t* j  = NM_ALLOCA_N(size_t, len);
+  void* vals = NM_ALLOCA_N(char, DTYPE_SIZES[dtype] * len);
+  if (dtype == nm::RUBYOBJ){
+    nm_register_values(reinterpret_cast<VALUE*>(vals), len);
+  }
 
   // Copy array contents
   for (size_t idx = 0; idx < len; ++idx) {
@@ -1739,6 +2019,16 @@ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv,
   nm_yale_storage_increment_ia_after(s, s->shape[0], i, len);
   s->ndnz += len;
 
+  if (dtype == nm::RUBYOBJ){
+    nm_unregister_values(reinterpret_cast<VALUE*>(vals), len);
+  }
+
+  NM_CONSERVATIVE(nm_unregister_value(pos_));
+  NM_CONSERVATIVE(nm_unregister_value(vv));
+  NM_CONSERVATIVE(nm_unregister_value(jv));
+  NM_CONSERVATIVE(nm_unregister_value(i_));
+  NM_CONSERVATIVE(nm_unregister_value(self));
+
   // Return the updated position
   pos += len;
   return INT2FIX(pos);
@@ -1754,7 +2044,8 @@ VALUE nm_vector_set(int argc, VALUE* argv, VALUE self) { //, VALUE i_, VALUE jv,
  * Get the default_value property from a yale matrix.
  */
 VALUE nm_yale_default_value(VALUE self) {
-  return default_value(NM_STORAGE_YALE(self));
+  VALUE to_return = default_value(NM_STORAGE_YALE(self));
+  return to_return;
 }
 
 
diff --git a/ext/nmatrix/storage/yale/yale.h b/ext/nmatrix/storage/yale/yale.h
index a9a7dad..9782964 100644
--- a/ext/nmatrix/storage/yale/yale.h
+++ b/ext/nmatrix/storage/yale/yale.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -89,7 +89,11 @@ extern "C" {
   void          nm_yale_storage_delete_ref(STORAGE* s);
   void					nm_yale_storage_init(YALE_STORAGE* s, void* default_val);
   void					nm_yale_storage_mark(STORAGE*);
-
+  void          nm_yale_storage_register(const STORAGE* s);
+  void          nm_yale_storage_unregister(const STORAGE* s);
+  void		nm_yale_storage_register_a(void* a, size_t size);
+  void		nm_yale_storage_unregister_a(void* a, size_t size); 
+    
   ///////////////
   // Accessors //
   ///////////////
diff --git a/ext/nmatrix/types.h b/ext/nmatrix/types.h
index cf77ceb..1a972f2 100644
--- a/ext/nmatrix/types.h
+++ b/ext/nmatrix/types.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/util/io.cpp b/ext/nmatrix/util/io.cpp
index ada64f2..0a1d05e 100644
--- a/ext/nmatrix/util/io.cpp
+++ b/ext/nmatrix/util/io.cpp
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -75,7 +75,7 @@ template <typename DType, typename MDType>
 char* matlab_cstring_to_dtype_string(size_t& result_len, const char* str, size_t bytes) {
 
   result_len   = sizeof(DType) * bytes / sizeof(MDType);
-  char* result = ALLOC_N(char, result_len);
+  char* result = NM_ALLOC_N(char, result_len);
 
   if (bytes % sizeof(MDType) != 0) {
     rb_raise(rb_eArgError, "the given string does not divide evenly for the given MATLAB dtype");
@@ -223,7 +223,7 @@ static VALUE nm_rbstring_matlab_repack(VALUE self, VALUE str, VALUE from, VALUE
 
   // Encode as 8-bit ASCII with a length -- don't want to hiccup on \0
   VALUE result = rb_str_new(repacked_data, repacked_data_length);
-  xfree(repacked_data); // Don't forget to free what we allocated!
+  NM_FREE(repacked_data); // Don't forget to free what we allocated!
 
   return result;
 }
@@ -246,7 +246,7 @@ static VALUE nm_rbstring_merge(VALUE self, VALUE rb_real, VALUE rb_imaginary, VA
   char *real        = RSTRING_PTR(rb_real),
        *imag        = RSTRING_PTR(rb_imaginary);
 
-  char* merge       = ALLOCA_N(char, RSTRING_LEN(rb_real)*2);
+  char* merge       = NM_ALLOCA_N(char, RSTRING_LEN(rb_real)*2);
 
   size_t merge_pos  = 0;
 
diff --git a/ext/nmatrix/util/io.h b/ext/nmatrix/util/io.h
index d038a5c..a2b9759 100644
--- a/ext/nmatrix/util/io.h
+++ b/ext/nmatrix/util/io.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/ext/nmatrix/util/sl_list.cpp b/ext/nmatrix/util/sl_list.cpp
index fe35e27..a9c6e5f 100644
--- a/ext/nmatrix/util/sl_list.cpp
+++ b/ext/nmatrix/util/sl_list.cpp
@@ -1,6 +1,16 @@
+/////////////////////////////////////////////////////////////////////
+// = NMatrix
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// A linear algebra library for scientific computation in Ruby.
+// NMatrix is part of SciRuby.
+//
+// NMatrix was originally inspired by and derived from NArray, by
+// Masahiro Tanaka: http://narray.rubyforge.org
+//
+// == Copyright Information
+//
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -31,6 +41,8 @@
 
 #include "sl_list.h"
 
+#include "storage/list/list.h"
+
 namespace nm { namespace list {
 
 /*
@@ -58,7 +70,7 @@ namespace nm { namespace list {
  * Creates an empty linked list.
  */
 LIST* create(void) {
-  LIST* list = ALLOC( LIST );
+  LIST* list = NM_ALLOC( LIST );
   list->first = NULL;
   return list;
 }
@@ -77,18 +89,19 @@ void del(LIST* list, size_t recursions) {
 
     if (recursions == 0) {
       //fprintf(stderr, "    free_val: %p\n", curr->val);
-      xfree(curr->val);
+      nm_list_storage_completely_unregister_node(curr);
+      NM_FREE(curr->val);
       
     } else {
       //fprintf(stderr, "    free_list: %p\n", list);
       del((LIST*)curr->val, recursions - 1);
     }
 
-    xfree(curr);
+    NM_FREE(curr);
     curr = next;
   }
   //fprintf(stderr, "    free_list: %p\n", list);
-  xfree(list);
+  NM_FREE(list);
 }
 
 /*
@@ -122,10 +135,10 @@ void mark(LIST* list, size_t recursions) {
  * checks, just inserts.
  */
 NODE* insert_first_node(LIST* list, size_t key, void* val, size_t val_size) {
-  NODE* ins   = ALLOC(NODE);
+  NODE* ins   = NM_ALLOC(NODE);
   ins->next   = list->first;
 
-  void* val_copy = ALLOC_N(char, val_size);
+  void* val_copy = NM_ALLOC_N(char, val_size);
   memcpy(val_copy, val, val_size);
 
   ins->val    = reinterpret_cast<void*>(val_copy);
@@ -136,7 +149,7 @@ NODE* insert_first_node(LIST* list, size_t key, void* val, size_t val_size) {
 }
 
 NODE* insert_first_list(LIST* list, size_t key, LIST* l) {
-  NODE* ins   = ALLOC(NODE);
+  NODE* ins   = NM_ALLOC(NODE);
   ins->next   = list->first;
 
   ins->val    = reinterpret_cast<void*>(l);
@@ -160,7 +173,7 @@ NODE* insert(LIST* list, bool replace, size_t key, void* val) {
   	// List is empty
   	
     //if (!(ins = malloc(sizeof(NODE)))) return NULL;
-    ins = ALLOC(NODE);
+    ins = NM_ALLOC(NODE);
     ins->next             = NULL;
     ins->val              = val;
     ins->key              = key;
@@ -172,7 +185,7 @@ NODE* insert(LIST* list, bool replace, size_t key, void* val) {
   	// Goes at the beginning of the list
   	
     //if (!(ins = malloc(sizeof(NODE)))) return NULL;
-    ins = ALLOC(NODE);
+    ins = NM_ALLOC(NODE);
     ins->next             = list->first;
     ins->val              = val;
     ins->key              = key;
@@ -187,11 +200,11 @@ NODE* insert(LIST* list, bool replace, size_t key, void* val) {
   if (ins->key == key) {
     // key already exists
     if (replace) {
-      xfree(ins->val);
+      nm_list_storage_completely_unregister_node(ins);
+      NM_FREE(ins->val);
       ins->val = val;
-      
     } else {
-    	xfree(val);
+      NM_FREE(val);
     }
     
     return ins;
@@ -208,7 +221,7 @@ NODE* insert(LIST* list, bool replace, size_t key, void* val) {
  */
 NODE* insert_after(NODE* node, size_t key, void* val) {
   //if (!(ins = malloc(sizeof(NODE)))) return NULL;
-  NODE* ins = ALLOC(NODE);
+  NODE* ins = NM_ALLOC(NODE);
 
   // insert 'ins' between 'node' and 'node->next'
   ins->next  = node->next;
@@ -231,7 +244,7 @@ NODE* replace_insert_after(NODE* node, size_t key, void* val, bool copy, size_t
     // Should we copy into the current one or free and insert?
     if (copy) memcpy(node->next->val, val, copy_size);
     else {
-      xfree(node->next->val);
+      NM_FREE(node->next->val);
       node->next->val = val;
     }
 
@@ -240,7 +253,7 @@ NODE* replace_insert_after(NODE* node, size_t key, void* val, bool copy, size_t
   } else { // no next node, or if there is one, it's greater than the current key
 
     if (copy) {
-      void* val_copy = ALLOC_N(char, copy_size);
+      void* val_copy = NM_ALLOC_N(char, copy_size);
       memcpy(val_copy, val, copy_size);
       return insert_after(node, key, val_copy);
     } else {
@@ -256,7 +269,7 @@ NODE* replace_insert_after(NODE* node, size_t key, void* val, bool copy, size_t
  * Functions analogously to list::insert but this inserts a copy of the value instead of the original.
  */
 NODE* insert_copy(LIST *list, bool replace, size_t key, void *val, size_t size) {
-  void *copy_val = ALLOC_N(char, size);
+  void *copy_val = NM_ALLOC_N(char, size);
   memcpy(copy_val, val, size);
 
   return insert(list, replace, key, copy_val);
@@ -272,7 +285,7 @@ void* remove_by_node(LIST* list, NODE* prev, NODE* rm) {
   else        prev->next  = rm->next;
 
   void* val   = rm->val;
-  xfree(rm);
+  NM_FREE(rm);
 
   return val;
 }
@@ -296,7 +309,7 @@ void* remove_by_key(LIST* list, size_t key) {
     rm  = list->first;
     
     list->first = rm->next;
-    xfree(rm);
+    NM_FREE(rm);
     
     return val;
   }
@@ -313,7 +326,7 @@ void* remove_by_key(LIST* list, size_t key) {
 
     // get the value and free the memory for the node
     val = rm->val;
-    xfree(rm);
+    NM_FREE(rm);
 
     return val;
   }
@@ -348,7 +361,7 @@ bool remove_recursive(LIST* list, const size_t* coords, const size_t* offsets, c
 
       if (remove_parent) { // now empty -- so remove the sub-list
 //        std::cerr << r << ": removing parent list at " << n->key << std::endl;
-        xfree(remove_by_node(list, prev, n));
+        NM_FREE(remove_by_node(list, prev, n));
 
         if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
         else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;
@@ -367,7 +380,7 @@ bool remove_recursive(LIST* list, const size_t* coords, const size_t* offsets, c
 
     while (n) {
 //      std::cerr << r << ": removing node at " << n->key << std::endl;
-      xfree(remove_by_node(list, prev, n));
+      NM_FREE(remove_by_node(list, prev, n));
 
       if (prev) n  = prev->next && node_is_within_slice(prev->next, coords[r] + offsets[r], lengths[r]) ? prev->next : NULL;
       else      n  = node_is_within_slice(list->first, coords[r] + offsets[r], lengths[r]) ? list->first : NULL;
@@ -505,7 +518,7 @@ void cast_copy_contents(LIST* lhs, const LIST* rhs, size_t recursions) {
   if (rhs->first) {
     // copy head node
     rcurr = rhs->first;
-    lcurr = lhs->first = ALLOC( NODE );
+    lcurr = lhs->first = NM_ALLOC( NODE );
 
     while (rcurr) {
       lcurr->key = rcurr->key;
@@ -513,14 +526,14 @@ void cast_copy_contents(LIST* lhs, const LIST* rhs, size_t recursions) {
       if (recursions == 0) {
       	// contents is some kind of value
 
-        lcurr->val = ALLOC( LDType );
+        lcurr->val = NM_ALLOC( LDType );
 
         *reinterpret_cast<LDType*>(lcurr->val) = *reinterpret_cast<RDType*>( rcurr->val );
 
       } else {
       	// contents is a list
 
-        lcurr->val = ALLOC( LIST );
+        lcurr->val = NM_ALLOC( LIST );
 
         cast_copy_contents<LDType, RDType>(
           reinterpret_cast<LIST*>(lcurr->val),
@@ -530,7 +543,7 @@ void cast_copy_contents(LIST* lhs, const LIST* rhs, size_t recursions) {
       }
 
       if (rcurr->next) {
-      	lcurr->next = ALLOC( NODE );
+      	lcurr->next = NM_ALLOC( NODE );
 
       } else {
       	lcurr->next = NULL;
diff --git a/ext/nmatrix/util/sl_list.h b/ext/nmatrix/util/sl_list.h
index 803356c..d44070f 100644
--- a/ext/nmatrix/util/sl_list.h
+++ b/ext/nmatrix/util/sl_list.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
@@ -90,7 +90,7 @@ bool node_is_within_slice(NODE* n, size_t coord, size_t len);
 
 template <typename Type>
 inline NODE* insert_helper(LIST* list, NODE* node, size_t key, Type val) {
-	Type* val_mem = ALLOC(Type);
+	Type* val_mem = NM_ALLOC(Type);
 	*val_mem = val;
 	
 	if (node == NULL) {
diff --git a/ext/nmatrix/util/util.h b/ext/nmatrix/util/util.h
index 6ad4205..d4e6e5d 100644
--- a/ext/nmatrix/util/util.h
+++ b/ext/nmatrix/util/util.h
@@ -9,8 +9,8 @@
 //
 // == Copyright Information
 //
-// SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-// NMatrix is Copyright (c) 2013, Ruby Science Foundation
+// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 //
 // Please see LICENSE.txt for additional copyright notices.
 //
diff --git a/lib/nmatrix.rb b/lib/nmatrix.rb
index d53dcf5..6a1aec6 100644
--- a/lib/nmatrix.rb
+++ b/lib/nmatrix.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/blas.rb b/lib/nmatrix/blas.rb
index 79172c8..e70135a 100644
--- a/lib/nmatrix/blas.rb
+++ b/lib/nmatrix/blas.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/enumerate.rb b/lib/nmatrix/enumerate.rb
index d41a991..26e5884 100644
--- a/lib/nmatrix/enumerate.rb
+++ b/lib/nmatrix/enumerate.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -66,9 +66,12 @@ class NMatrix
   #
   # Returns an NMatrix if a block is given. For an Array, use #flat_map
   #
+  # Note that #map will always return an :object matrix, because it has no way of knowing
+  # how to handle operations on the different dtypes.
+  #
   def map(&bl)
     return enum_for(:map) unless block_given?
-    cp = self.dup
+    cp = self.cast(dtype: :object)
     cp.map! &bl
     cp
   end
@@ -83,10 +86,18 @@ class NMatrix
   #
   def map!
     return enum_for(:map!) unless block_given?
+    iterated = false
     self.each_stored_with_indices do |e, *i|
+      iterated = true
       self[*i] = (yield e)
     end
-    self
+    #HACK: if there's a single element in a non-dense matrix, it won't iterate and
+    #won't change the default value; this ensures that it does get changed.
+    unless iterated then
+      self.each_with_indices do |e, *i|
+        self[*i] = (yield e)
+      end
+    end
   end
 
 
@@ -215,7 +226,7 @@ class NMatrix
     first_as_acc = false
 
     if initial then
-      acc = NMatrix.new(new_shape, initial, :dtype => dtype || self.dtype)
+      acc = NMatrix.new(new_shape, initial, :dtype => dtype || self.dtype, stype: self.stype)
     else
       each_rank(dimen) do |sub_mat|
         acc = (sub_mat.is_a?(NMatrix) and !dtype.nil? and dtype != self.dtype) ? sub_mat.cast(self.stype, dtype) : sub_mat
@@ -238,4 +249,4 @@ class NMatrix
   alias :reduce_along_dim :inject_rank
   alias :inject_along_dim :inject_rank
 
-end
\ No newline at end of file
+end
diff --git a/lib/nmatrix/io/market.rb b/lib/nmatrix/io/market.rb
index 974d0f0..fb09d61 100644
--- a/lib/nmatrix/io/market.rb
+++ b/lib/nmatrix/io/market.rb
@@ -1,4 +1,3 @@
-#--
 # = NMatrix
 #
 # A linear algebra library for scientific computation in Ruby.
@@ -9,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/io/mat5_reader.rb b/lib/nmatrix/io/mat5_reader.rb
index c190775..e4dc44f 100644
--- a/lib/nmatrix/io/mat5_reader.rb
+++ b/lib/nmatrix/io/mat5_reader.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/io/mat_reader.rb b/lib/nmatrix/io/mat_reader.rb
index 9d01edc..39d1fc9 100644
--- a/lib/nmatrix/io/mat_reader.rb
+++ b/lib/nmatrix/io/mat_reader.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/lapack.rb b/lib/nmatrix/lapack.rb
index 160a59d..829da4d 100644
--- a/lib/nmatrix/lapack.rb
+++ b/lib/nmatrix/lapack.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -64,16 +64,16 @@ class NMatrix
       #   - +nrhs+ ->
       #   - +a+ ->
       #   - +lda+ ->
-      #   - +ipiv+ ->
       #   - +b+ ->
       #   - +ldb+ ->
+      #   - +ipiv+ -> A pivot array (if nil, one will be generated with +clapack_getrf+)
       # * *Returns* :
       #   -
       # * *Raises* :
       #   - ++ ->
       #
-      def clapack_gesv(order, n, nrhs, a, lda, ipiv, b, ldb)
-        clapack_getrf(order, n, n, a, lda, ipiv)
+      def clapack_gesv(order, n, nrhs, a, lda, b, ldb, ipiv=nil)
+        ipiv ||= clapack_getrf(order, n, n, a, lda)
         clapack_getrs(order, :no_transpose, n, nrhs, a, lda, ipiv, b, ldb)
       end
 
@@ -120,47 +120,6 @@ class NMatrix
         clapack_potrs(order, uplo, n, nrhs, a, lda, b, ldb)
       end
 
-      #
-      # call-seq:
-      #     gesvd(matrix, type)
-      # 
-      #
-      # * *Arguments* :
-      #   - +matrix+ -> matrix for which to compute the singular values ##TODO make this a self
-      #   - +type+ -> :all_values, :both, :left, :right, :left_matrix, :right_matrix, :overwrite_right, :overwrite_left, :none , or signifying what combination of singular values and matrices are desired in your output.
-      # * *Returns* :
-      #   - Array with the result values in an array
-      # * *Raises* :
-      #   - +ArgumentError+ -> Expected dense NMatrix as first argument.
-      #
-      def gesvd(matrix, type = :both)
-        raise ArgumentError, 'Expected dense NMatrix as first argument.' unless matrix.is_a?(NMatrix) and matrix.stype == :dense
-        #define jobu, jobvt
-        jobu, jobvt = :none, :none
-        case type
-        when :both
-         jobu, jobvt = :all, :all
-        when :arrays
-          jobu, jobvt = :return, :return
-        when :left
-          jobu = :return
-        when :right
-          jobvt = :return
-        end
-        
-        # Build up the u and vt matrices
-        m, n = matrix.shape
-        dtype = matrix.dtype
-        s_matrix = NMatrix.new([1,matrix.shape.min], dtype: dtype)
-        u_matrix = NMatrix.new([m,m], dtype: dtype)
-        v_matrix = NMatrix.new([n,n], dtype: dtype)
-        # test this
-        s = gesvd(type, matrix, s_matrix, u_matrix, v_matrix)
-
-        # what should this return?
-        [s_matrix, u_matrix, v_matrix]
-      end # #svd
-
       #     laswp(matrix, ipiv) -> NMatrix
       #
       # Permute the columns of a matrix (in-place) according to the Array +ipiv+.
@@ -173,6 +132,47 @@ class NMatrix
         clapack_laswp(matrix.shape[0], matrix, matrix.shape[1], 0, ipiv.size-1, ipiv, 1)
       end
 
+      def alloc_svd_result(matrix)
+        [
+          NMatrix.new(matrix.shape[0], dtype: matrix.dtype),
+          NMatrix.new([matrix.shape[0],1], dtype: matrix.dtype),
+          NMatrix.new(matrix.shape[1], dtype: matrix.dtype)
+        ]
+      end
+
+      #
+      # call-seq:
+      #     gesvd -> [u, sigma, v_transpose]
+      #     gesvd -> [u, sigma, v_conjugate_transpose] # complex
+      #
+      # Compute the singular value decomposition of a matrix using LAPACK's GESVD function.
+      #
+      # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
+      # requires.
+      #
+      def gesvd(matrix, workspace_size=1)
+        result = alloc_svd_result(matrix)
+        NMatrix::LAPACK::lapack_gesvd(:a, :a, matrix.shape[0], matrix.shape[1], matrix, matrix.shape[0], result[1], result[0], matrix.shape[0], result[2], matrix.shape[1], workspace_size)
+        result
+      end
+
+      #
+      # call-seq:
+      #     gesdd -> [u, sigma, v_transpose]
+      #     gesdd -> [u, sigma, v_conjugate_transpose] # complex
+      #
+      # Compute the singular value decomposition of a matrix using LAPACK's GESDD function. This uses a divide-and-conquer
+      # strategy. See also #gesvd.
+      #
+      # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
+      # requires.
+      #
+      def gesdd(matrix, workspace_size=100000)
+        result = alloc_svd_result(matrix)
+        NMatrix::LAPACK::lapack_gesdd(:a, matrix.shape[0], matrix.shape[1], matrix, matrix.shape[0], result[1], result[0], matrix.shape[0], result[2], matrix.shape[1], workspace_size)
+        result
+      end
+
     end
   end
 end
diff --git a/lib/nmatrix/math.rb b/lib/nmatrix/math.rb
index 551a45f..833d44c 100644
--- a/lib/nmatrix/math.rb
+++ b/lib/nmatrix/math.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -29,6 +29,13 @@
 #++
 
 class NMatrix
+
+  module NMMath
+    METHODS_ARITY_2 = [:atan2, :ldexp, :hypot]
+    METHODS_ARITY_1 = [:cos, :sin, :tan, :acos, :asin, :atan, :cosh, :sinh, :tanh, :acosh,
+      :asinh, :atanh, :exp, :log2, :log10, :sqrt, :cbrt, :erf, :erfc, :gamma]
+  end
+
   #
   # call-seq:
   #     invert! -> NMatrix
@@ -75,10 +82,66 @@ class NMatrix
   #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
   #
   def getrf!
-    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.stype == :dense
+    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.dense?
     NMatrix::LAPACK::clapack_getrf(:row, self.shape[0], self.shape[1], self, self.shape[1])
   end
 
+
+  #
+  # call-seq:
+  #     getrf -> NMatrix
+  #
+  # In-place version of #getrf!. Returns the new matrix, which contains L and U matrices.
+  #
+  # * *Raises* :
+  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
+  #
+  def getrf
+    a = self.clone
+    a.getrf!
+    return a
+  end
+
+
+  #
+  # call-seq:
+  #     potrf!(upper_or_lower) -> NMatrix
+  #
+  # Cholesky factorization of a symmetric positive-definite matrix -- or, if complex,
+  # a Hermitian positive-definite matrix +A+. This uses the ATLAS function clapack_potrf,
+  # so the result will be written in either the upper or lower triangular portion of the
+  # matrix upon which it is called.
+  #
+  # * *Returns* :
+  #   the triangular portion specified by the parameter
+  # * *Raises* :
+  #   - +StorageTypeError+ -> ATLAS functions only work on dense matrices.
+  #
+  def potrf!(which)
+    raise(StorageTypeError, "ATLAS functions only work on dense matrices") unless self.dense?
+    # FIXME: Surely there's an easy way to calculate one of these from the other. Do we really need to run twice?
+    NMatrix::LAPACK::clapack_potrf(:row, which, self.shape[0], self, self.shape[1])
+  end
+
+  def potrf_upper!
+    potrf! :upper
+  end
+
+  def potrf_lower!
+    potrf! :lower
+  end
+
+
+  #
+  # call-seq:
+  #     factorize_cholesky -> ...
+  #
+  # Cholesky factorization of a matrix.
+  def factorize_cholesky
+    [self.clone.potrf_upper!.triu!,
+    self.clone.potrf_lower!.tril!]
+  end
+
   #
   # call-seq:
   #     factorize_lu -> ...
@@ -97,12 +160,19 @@ class NMatrix
     t.transpose
   end
 
-  def alloc_svd_result
-    [
-      NMatrix.new(self.shape[0], dtype: self.dtype),
-      NMatrix.new([self.shape[0],1], dtype: self.dtype),
-      NMatrix.new(self.shape[1], dtype: self.dtype)
-    ]
+  #
+  # call-seq:
+  #     gesvd! -> [u, sigma, v_transpose]
+  #     gesvd! -> [u, sigma, v_conjugate_transpose] # complex
+  #
+  # Compute the singular value decomposition of a matrix using LAPACK's GESVD function. 
+  # This is destructive, modifying the source NMatrix.  See also #gesdd.
+  #
+  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
+  # requires.
+  #
+  def gesvd!(workspace_size=1)
+    NMatrix::LAPACK::gesvd(self, workspace_size)
   end
 
   #
@@ -116,12 +186,26 @@ class NMatrix
   # requires.
   #
   def gesvd(workspace_size=1)
-    result = alloc_svd_result
-    NMatrix::LAPACK::lapack_gesvd(:a, :a, self.shape[0], self.shape[1], self, self.shape[0], result[1], result[0], self.shape[0], result[2], self.shape[1], workspace_size)
-    result
+    self.clone.gesvd!(workspace_size)
   end
 
 
+
+  #
+  # call-seq:
+  #     gesdd! -> [u, sigma, v_transpose]
+  #     gesdd! -> [u, sigma, v_conjugate_transpose] # complex
+  #
+  # Compute the singular value decomposition of a matrix using LAPACK's GESDD function. This uses a divide-and-conquer
+  # strategy. This is destructive, modifying the source NMatrix.  See also #gesvd.
+  #
+  # Optionally accepts a +workspace_size+ parameter, which will be honored only if it is larger than what LAPACK
+  # requires.
+  #
+  def gesdd!(workspace_size=1)
+    NMatrix::LAPACK::gesdd(self, workspace_size)
+  end
+
   #
   # call-seq:
   #     gesdd -> [u, sigma, v_transpose]
@@ -134,11 +218,8 @@ class NMatrix
   # requires.
   #
   def gesdd(workspace_size=1)
-    result = alloc_svd_result
-    NMatrix::LAPACK::lapack_gesvd(:a, :a, self.shape[0], self.shape[1], self, self.shape[0], result[1], result[0], self.shape[0], result[2], self.shape[1], workspace_size)
-    result
+    self.clone.gesdd!(workspace_size)
   end
-
   #
   # call-seq:
   #     laswp!(ary) -> NMatrix
@@ -279,8 +360,8 @@ class NMatrix
       reduce_dtype = :float64
     end
     inject_rank(dimen, 0.0, reduce_dtype) do |mean, sub_mat|
-      mean + sub_mat/shape[dimen]
-    end
+      mean + sub_mat
+    end / shape[dimen]
   end
 
   ##
@@ -372,7 +453,7 @@ class NMatrix
   # @see #inject_rank
   #
   def std(dimen=0)
-    variance(dimen).map! { |e| Math.sqrt(e) }
+    variance(dimen).sqrt
   end
 
 
@@ -409,6 +490,38 @@ class NMatrix
     end.cast(self.stype, abs_dtype)
   end
 
+
+  #
+  # call-seq:
+  #     absolute_sum -> Numeric
+  #
+  # == Arguments
+  #   - +incx+ -> the skip size (defaults to 1, no skip)
+  #   - +n+ -> the number of elements to include
+  #
+  # Return the sum of the contents of the vector. This is the BLAS asum routine.
+  def asum incx=1, n=nil
+    return method_missing(:asum, incx, n) unless vector?
+    NMatrix::BLAS::asum(self, incx, self.size / incx)
+  end
+  alias :absolute_sum :asum
+
+  #
+  # call-seq:
+  #     norm2 -> Numeric
+  #
+  # == Arguments
+  #   - +incx+ -> the skip size (defaults to 1, no skip)
+  #   - +n+ -> the number of elements to include
+  #
+  # Return the 2-norm of the vector. This is the BLAS nrm2 routine.
+  def nrm2 incx=1, n=nil
+    return method_missing(:nrm2, incx, n) unless vector?
+    NMatrix::BLAS::nrm2(self, incx, self.size / incx)
+  end
+  alias :norm2 :nrm2
+
+
   alias :permute_columns  :laswp
   alias :permute_columns! :laswp!
 
@@ -437,6 +550,86 @@ protected
     end
   end
 
+  # These don't actually take an argument -- they're called reverse-polish style on the matrix.
+  # This group always gets casted to float64.
+  [:log2, :log10, :sqrt, :sin, :cos, :tan, :acos, :asin, :atan, :cosh, :sinh, :tanh, :acosh, :asinh, :atanh, :exp, :erf, :erfc, :gamma, :cbrt].each do |ewop|
+    define_method("__list_unary_#{ewop}__") do
+      self.__list_map_stored__(nil) { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+    define_method("__yale_unary_#{ewop}__") do
+      self.__yale_map_stored__ { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+    define_method("__dense_unary_#{ewop}__") do
+      self.__dense_map__ { |l| Math.send(ewop, l) }.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+  end
+
+  # log takes an optional single argument, the base.  Default to natural log.
+  def __list_unary_log__(base)
+    self.__list_map_stored__(nil) { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
+  end
+
+  def __yale_unary_log__(base)
+    self.__yale_map_stored__ { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
+  end
+
+  def __dense_unary_log__(base)
+    self.__dense_map__ { |l| Math.log(l, base) }.cast(stype, NMatrix.upcast(dtype, :float64))
+  end
+
+  # These take two arguments. One might be a matrix, and one might be a scalar.
+  # See also monkeys.rb, which contains Math module patches to let the first
+  # arg be a scalar
+  [:atan2, :ldexp, :hypot].each do |ewop|
+    define_method("__list_elementwise_#{ewop}__") do |rhs,order|
+      if order then
+        self.__list_map_merged_stored__(rhs, nil) { |r,l| Math.send(ewop,l,r) }
+      else
+        self.__list_map_merged_stored__(rhs, nil) { |l,r| Math.send(ewop,l,r) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+
+    define_method("__dense_elementwise_#{ewop}__") do |rhs, order|
+      if order then
+        self.__dense_map_pair__(rhs) { |r,l| Math.send(ewop,l,r) }
+      else
+        self.__dense_map_pair__(rhs) { |l,r| Math.send(ewop,l,r) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+
+    define_method("__yale_elementwise_#{ewop}__") do |rhs, order|
+      if order then
+        self.__yale_map_merged_stored__(rhs, nil) { |r,l| Math.send(ewop,l,r) }
+      else
+        self.__yale_map_merged_stored__(rhs, nil) { |l,r| Math.send(ewop,l,r) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+
+    define_method("__list_scalar_#{ewop}__") do |rhs,order|
+      if order then
+        self.__list_map_stored__(nil) { |l| Math.send(ewop, rhs, l) }
+      else
+        self.__list_map_stored__(nil) { |l| Math.send(ewop, l, rhs) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+
+    define_method("__yale_scalar_#{ewop}__") do |rhs,order|
+      if order then
+        self.__yale_map_stored__ { |l| Math.send(ewop, rhs, l) }
+      else
+        self.__yale_map_stored__ { |l| Math.send(ewop, l, rhs) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+
+    define_method("__dense_scalar_#{ewop}__") do |rhs,order|
+      if order
+        self.__dense_map__ { |l| Math.send(ewop, rhs, l) }
+      else
+        self.__dense_map__ { |l| Math.send(ewop, l, rhs) }
+      end.cast(stype, NMatrix.upcast(dtype, :float64))
+    end
+  end
+
   # Equality operators do not involve a cast. We want to get back matrices of TrueClass and FalseClass.
   {eqeq: :==, neq: :!=, lt: :<, gt: :>, leq: :<=, geq: :>=}.each_pair do |ewop, op|
     define_method("__list_elementwise_#{ewop}__") do |rhs|
@@ -459,4 +652,4 @@ protected
       self.__dense_map__ { |l| l.send(op,rhs) }
     end
   end
-end
\ No newline at end of file
+end
diff --git a/lib/nmatrix/monkeys.rb b/lib/nmatrix/monkeys.rb
index 2a12fc9..508efac 100644
--- a/lib/nmatrix/monkeys.rb
+++ b/lib/nmatrix/monkeys.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -26,6 +26,8 @@
 # Ruby core extensions for NMatrix.
 #++
 
+require 'nmatrix/math'
+
 #######################
 # Classes and Modules #
 #######################
@@ -60,3 +62,23 @@ class Object #:nodoc:
     value
   end
 end
+
+
+module Math
+  class << self
+    NMatrix::NMMath::METHODS_ARITY_2.each do |meth|
+      define_method "nm_#{meth}" do |arg0, arg1|
+        if arg0.is_a? NMatrix then
+          arg0.send(meth, arg1)
+        elsif arg1.is_a? NMatrix then
+          arg1.send(meth, arg0, true)
+        else
+          self.send("old_#{meth}".to_sym, arg0, arg1)
+        end
+      end
+      alias_method "old_#{meth}".to_sym, meth
+      alias_method meth, "nm_#{meth}".to_sym
+    end
+  end
+end
+
diff --git a/lib/nmatrix/nmatrix.rb b/lib/nmatrix/nmatrix.rb
index 08a3dd7..5e1032f 100644
--- a/lib/nmatrix/nmatrix.rb
+++ b/lib/nmatrix/nmatrix.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -41,6 +41,7 @@ class NMatrix
         def load_mat file_path
           NMatrix::IO::Matlab::Mat5Reader.new(File.open(file_path, "rb+")).to_ruby
         end
+        alias :load :load_mat
       end
 
       # FIXME: Remove autoloads
@@ -76,7 +77,9 @@ class NMatrix
 
   # TODO: Make this actually pretty.
   def pretty_print(q) #:nodoc:
-    if self.dim > 3 || self.dim == 1
+    if self.shape.size > 1 and self.shape[1] > 100
+      self.inspect.pretty_print(q)
+    elsif self.dim > 3 || self.dim == 1
       self.to_a.pretty_print(q)
     else
       # iterate through the whole matrix and find the longest number
@@ -115,7 +118,6 @@ class NMatrix
   #alias :pp :pretty_print
 
 
-
   #
   # call-seq:
   #     cast(stype, dtype, default) -> NMatrix
@@ -150,13 +152,17 @@ class NMatrix
     else
       params << self.stype if params.size == 0
       params << self.dtype if params.size == 1
-      params << (self.stype == :dense ? 0 : self.default_value) if params.size == 2
-
+      #HACK: the default value can cause an exception if dtype is not complex
+      #and default_value is. (The ruby C code apparently won't convert these.)
+      #Perhaps this should be fixed in the C code (in rubyval_to_cval).
+      default_value = maybe_get_noncomplex_default_value(params[1])
+      params << (self.stype == :dense ? 0 : default_value) if params.size == 2
       self.cast_full(*params)
     end
 
   end
 
+
   #
   # call-seq:
   #     rows -> Integer
@@ -378,12 +384,283 @@ class NMatrix
   #   - +row_number+ -> Integer.
   #   - +get_by+ -> Type of slicing to use, +:copy+ or +:reference+.
   # * *Returns* :
-  #   - A NMatrix representing the requested row as a row vector.
+  #   - An NMatrix representing the requested row as a row vector.
   #
   def row(row_number, get_by = :copy)
     rank(0, row_number, get_by)
   end
 
+
+  #
+  # call-seq:
+  #     reshape(new_shape) -> NMatrix
+  #
+  # Clone a matrix, changing the shape in the process. Note that this function does not do a resize; the product of
+  # the new and old shapes' components must be equal.
+  #
+  # * *Arguments* :
+  #   - +new_shape+ -> Array of positive Fixnums.
+  # * *Returns* :
+  #   - A copy with a different shape.
+  #
+  def reshape new_shape
+    t = reshape_clone_structure(new_shape)
+    left_params  = [:*]*new_shape.size
+    right_params = [:*]*self.shape.size
+    t[*left_params] = self[*right_params]
+    t
+  end
+
+
+  #
+  # call-seq:
+  #     transpose -> NMatrix
+  #     transpose(permutation) -> NMatrix
+  #
+  # Clone a matrix, transposing it in the process. If the matrix is two-dimensional, the permutation is taken to be [1,0]
+  # automatically (switch dimension 0 with dimension 1). If the matrix is n-dimensional, you must provide a permutation
+  # of +0...n+.
+  #
+  # * *Arguments* :
+  #   - +permutation+ -> Optional Array giving a permutation.
+  # * *Returns* :
+  #   - A copy of the matrix, but transposed.
+  #
+  def transpose(permute = nil)
+    if self.dim <= 2 # This will give an error if dim is 1.
+      new_shape = [self.shape[1], self.shape[0]]
+    elsif permute.nil?
+      raise(ArgumentError, "need permutation array of size #{self.dim}")
+    elsif permute.sort.uniq != (0...self.dim).to_a
+      raise(ArgumentError, "invalid permutation array")
+    else
+      # Figure out the new shape based on the permutation given as an argument.
+      new_shape = permute.map { |p| self.shape[p] }
+    end
+
+    if self.dim > 2 # FIXME: For dense, several of these are basically equivalent to reshape.
+
+      # Make the new data structure.
+      t = self.reshape_clone_structure(new_shape)
+
+      self.each_stored_with_indices do |v,*indices|
+        p_indices = permute.map { |p| indices[p] }
+        t[*p_indices] = v
+      end
+      t
+    elsif self.list? # TODO: Need a C list transposition algorithm.
+      # Make the new data structure.
+      t = self.reshape_clone_structure(new_shape)
+
+      self.each_column.with_index do |col,j|
+        t[j,:*] = col.to_flat_array
+      end
+      t
+    else
+      # Call C versions of Yale and List transpose, which do their own copies
+      self.clone_transpose
+    end
+  end
+
+
+  #
+  # call-seq:
+  #     matrix1.concat(*m2) -> NMatrix
+  #     matrix1.concat(*m2, rank) -> NMatrix
+  #     matrix1.hconcat(*m2) -> NMatrix
+  #     matrix1.vconcat(*m2) -> NMatrix
+  #     matrix1.dconcat(*m3) -> NMatrix
+  #
+  # Joins two matrices together into a new larger matrix. Attempts to determine which direction to concatenate
+  # on by looking for the first common element of the matrix +shape+ in reverse. In other words, concatenating two
+  # columns together without supplying +rank+ will glue them into an n x 2 matrix.
+  #
+  # You can also use hconcat, vconcat, and dconcat for the first three ranks. concat performs an hconcat when no
+  # rank argument is provided.
+  #
+  # The two matrices must have the same +dim+.
+  #
+  # * *Arguments* :
+  #   - +matrices+ -> one or more matrices
+  #   - +rank+ -> Fixnum (for rank); alternatively, may use :row, :column, or :layer for 0, 1, 2, respectively
+  #
+  def concat *matrices
+    rank = nil
+    rank = matrices.pop unless matrices.last.is_a?(NMatrix)
+
+    # Find the first matching dimension and concatenate along that (unless rank is specified)
+    if rank.nil?
+      rank = self.dim-1
+      self.shape.reverse_each.with_index do |s,i|
+        matrices.each do |m|
+          if m.shape[i] != s
+            rank -= 1
+            break
+          end
+        end
+      end
+    elsif rank.is_a?(Symbol) # Convert to numeric
+      rank = {:row => 0, :column => 1, :col => 1, :lay => 2, :layer => 2}[rank]
+    end
+
+    # Need to figure out the new shape.
+    new_shape = self.shape.dup
+    new_shape[rank] = matrices.inject(self.shape[rank]) { |total,m| total + m.shape[rank] }
+
+    # Now figure out the options for constructing the concatenated matrix.
+    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
+    if self.yale?
+      # We can generally predict the new capacity for Yale. Subtract out the number of rows
+      # for each matrix being concatenated, and then add in the number of rows for the new
+      # shape. That takes care of the diagonal. The rest of the capacity is represented by
+      # the non-diagonal non-default values.
+      new_cap = matrices.inject(self.capacity - self.shape[0]) do |total,m|
+        total + m.capacity - m.shape[0]
+      end - self.shape[0] + new_shape[0]
+      opts = {capacity: self.new_cap}.merge(opts)
+    end
+
+    # Do the actual construction.
+    n = NMatrix.new(new_shape, opts)
+
+    # Figure out where to start and stop the concatenation. We'll use NMatrices instead of
+    # Arrays because then we can do elementwise addition.
+    ranges = self.shape.map.with_index { |s,i| 0...self.shape[i] }
+
+    matrices.unshift(self)
+    matrices.each do |m|
+      n[*ranges] = m
+
+      # move over by the requisite amount
+      ranges[rank]  = (ranges[rank].first + m.shape[rank])...(ranges[rank].last + m.shape[rank])
+    end
+
+    n
+  end
+
+  def hconcat *matrices
+    concat(*matrices, :column)
+  end
+
+  def vconcat *matrices
+    concat(*matrices, :row)
+  end
+
+  def dconcat *matrices
+    concat(*matrices, :layer)
+  end
+
+
+  #
+  # call-seq:
+  #     upper_triangle -> NMatrix
+  #     upper_triangle(k) -> NMatrix
+  #     triu -> NMatrix
+  #     triu(k) -> NMatrix
+  #
+  # Returns the upper triangular portion of a matrix. This is analogous to the +triu+ method
+  # in MATLAB.
+  #
+  # * *Arguments* :
+  #   - +k+ -> Positive integer. How many extra diagonals to include in the upper triangular portion.
+  #
+  def upper_triangle(k = 0)
+    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2
+
+    t = self.clone_structure
+    (0...self.shape[0]).each do |i|
+      if i - k < 0
+        t[i, :*] = self[i, :*]
+      else
+        t[i, 0...(i-k)]             = 0
+        t[i, (i-k)...self.shape[1]] = self[i, (i-k)...self.shape[1]]
+      end
+    end
+    t
+  end
+  alias :triu :upper_triangle
+
+
+  #
+  # call-seq:
+  #     upper_triangle! -> NMatrix
+  #     upper_triangle!(k) -> NMatrix
+  #     triu! -> NMatrix
+  #     triu!(k) -> NMatrix
+  #
+  # Deletes the lower triangular portion of the matrix (in-place) so only the upper portion remains.
+  #
+  # * *Arguments* :
+  #   - +k+ -> Integer. How many extra diagonals to include in the deletion.
+  #
+  def upper_triangle!(k = 0)
+    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2
+
+    (0...self.shape[0]).each do |i|
+      if i - k >= 0
+        self[i, 0...(i-k)] = 0
+      end
+    end
+    self
+  end
+  alias :triu! :upper_triangle!
+
+
+  #
+  # call-seq:
+  #     lower_triangle -> NMatrix
+  #     lower_triangle(k) -> NMatrix
+  #     tril -> NMatrix
+  #     tril(k) -> NMatrix
+  #
+  # Returns the lower triangular portion of a matrix. This is analogous to the +tril+ method
+  # in MATLAB.
+  #
+  # * *Arguments* :
+  #   - +k+ -> Integer. How many extra diagonals to include in the lower triangular portion.
+  #
+  def lower_triangle(k = 0)
+    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2
+
+    t = self.clone_structure
+    (0...self.shape[0]).each do |i|
+      if i + k >= shape[0]
+        t[i, :*] = self[i, :*]
+      else
+        t[i, (i+k+1)...self.shape[1]] = 0
+        t[i, 0..(i+k)] = self[i, 0..(i+k)]
+      end
+    end
+    t
+  end
+  alias :tril :lower_triangle
+
+
+  #
+  # call-seq:
+  #     lower_triangle! -> NMatrix
+  #     lower_triangle!(k) -> NMatrix
+  #     tril! -> NMatrix
+  #     tril!(k) -> NMatrix
+  #
+  # Deletes the upper triangular portion of the matrix (in-place) so only the lower portion remains.
+  #
+  # * *Arguments* :
+  #   - +k+ -> Integer. How many extra diagonals to include in the deletion.
+  #
+  def lower_triangle!(k = 0)
+    raise(NotImplementedError, "only implemented for 2D matrices") if self.shape.size > 2
+
+    (0...self.shape[0]).each do |i|
+      if i + k < shape[0]
+        self[i, (i+k+1)...self.shape[1]] = 0
+      end
+    end
+    self
+  end
+  alias :tril! :lower_triangle!
+
+
   #
   # call-seq:
   #     layer(layer_number) -> NMatrix
@@ -435,6 +712,43 @@ class NMatrix
   end
 
 
+  #
+  # call-seq:
+  #     sorted_indices -> Array
+  #
+  # Returns an array of the indices ordered by value sorted.
+  #
+  def sorted_indices
+    return method_missing(:sorted_indices) unless vector?
+    ary = self.to_flat_array
+    ary.each_index.sort_by { |i| ary[i] }  # from: http://stackoverflow.com/a/17841159/170300
+  end
+
+
+  #
+  # call-seq:
+  #     binned_sorted_indices -> Array
+  #
+  # Returns an array of arrays of indices ordered by value sorted. Functions basically like +sorted_indices+, but
+  # groups indices together for those values that are the same.
+  #
+  def binned_sorted_indices
+    return method_missing(:sorted_indices) unless vector?
+    ary = self.to_flat_array
+    ary2 = []
+    last_bin = ary.each_index.sort_by { |i| [ary[i]] }.inject([]) do |result, element|
+      if result.empty? || ary[result[-1]] == ary[element]
+        result << element
+      else
+        ary2 << result
+        [element]
+      end
+    end
+    ary2 << last_bin unless last_bin.empty?
+    ary2
+  end
+
+
   def method_missing name, *args, &block #:nodoc:
     if name.to_s =~ /^__list_elementwise_.*__$/
       raise NotImplementedError, "requested undefined list matrix element-wise operation"
@@ -447,7 +761,7 @@ class NMatrix
 
 
   def respond_to?(method) #:nodoc:
-    if [:shuffle, :shuffle!, :each_with_index].include?(method.intern) # vector-only methods
+    if [:shuffle, :shuffle!, :each_with_index, :sorted_indices, :binned_sorted_indices, :nrm2, :asum].include?(method.intern) # vector-only methods
       return vector?
     elsif [:each_layer, :layer].include?(method.intern) # 3-or-more dimensions only
       return dim > 2
@@ -483,6 +797,35 @@ protected
   end
 
 
+  #
+  # call-seq:
+  #     clone_structure -> NMatrix
+  #
+  # This function is like clone, but it only copies the structure and the default value.
+  # None of the other values are copied. It takes an optional capacity argument. This is
+  # mostly only useful for dense, where you may not want to initialize; for other types,
+  # you should probably use +zeros_like+.
+  #
+  def clone_structure(capacity = nil)
+    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
+    opts = {capacity: capacity}.merge(opts) if self.yale?
+    NMatrix.new(self.shape, opts)
+  end
+
+
+  # Clone the structure as needed for a reshape
+  def reshape_clone_structure(new_shape) #:nodoc:
+    raise(ArgumentError, "reshape cannot resize; size of new and old matrices must match") unless self.size == new_shape.inject(1) { |p,i| p *= i }
+
+    opts = {stype: self.stype, default: self.default_value, dtype: self.dtype}
+    if self.yale?
+      # We can generally predict the change in capacity for Yale.
+      opts = {capacity: self.capacity - self.shape[0] + new_shape[0]}.merge(opts)
+    end
+    NMatrix.new(new_shape, opts)
+  end
+
+
   # Helper for converting a matrix into an array of arrays recursively
   def to_a_rec(dimen = 0) #:nodoc:
     return self.flat_map { |v| v } if dimen == self.dim-1
@@ -500,8 +843,50 @@ protected
   def __sparse_initial_set__(ary) #:nodoc:
     self[0...self.shape[0],0...self.shape[1]] = ary
   end
+
+
+  # Function assumes the dimensions and such have already been tested.
+  #
+  # Called from inside NMatrix: nm_eqeq
+  #
+  # There are probably more efficient ways to do this, but currently it's unclear how.
+  # We could use +each_row+, but for list matrices, it's still going to need to make a
+  # reference to each of those rows, and that is going to require a seek.
+  #
+  # It might be more efficient to convert one sparse matrix type to the other with a
+  # cast and then run the comparison. For now, let's assume that people aren't going
+  # to be doing this very often, and we can optimize as needed.
+  def dense_eql_sparse? m #:nodoc:
+    m.each_with_indices do |v,*indices|
+      return false if self[*indices] != v
+    end
+
+    return true
+  end
+  alias :sparse_eql_sparse? :dense_eql_sparse?
+
+
+  #
+  # See the note in #cast about why this is necessary.
+  # If this is a non-dense matrix with a complex dtype and to_dtype is
+  # non-complex, then this will convert the default value to noncomplex.
+  # Returns 0 if dense.  Returns existing default_value if there isn't a
+  # mismatch.
+  #
+  def maybe_get_noncomplex_default_value(to_dtype) #:nodoc:
+    default_value = 0
+    unless self.stype == :dense then
+      if self.dtype.to_s.start_with?('complex') and not to_dtype.to_s.start_with?('complex') then
+        default_value = self.default_value.real
+      else
+        default_value = self.default_value
+      end
+    end
+    default_value
+  end
+
 end
 
 require_relative './shortcuts.rb'
 require_relative './math.rb'
-require_relative './enumerate.rb'
\ No newline at end of file
+require_relative './enumerate.rb'
diff --git a/lib/nmatrix/nvector.rb b/lib/nmatrix/nvector.rb
index af77839..8b62382 100644
--- a/lib/nmatrix/nvector.rb
+++ b/lib/nmatrix/nvector.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -159,68 +159,6 @@ class NVector < NMatrix
     min_so_far
   end
 
-  #
-  # call-seq:
-  #     absolute_sum -> Numeric
-  #
-  # == Arguments
-  #   - +incx+ -> the skip size (defaults to 1, no skip)
-  #   - +n+ -> the number of elements to include
-  #
-  # Return the sum of the contents of the vector. This is the BLAS asum routine.
-  def asum incx=1, n=nil
-    NMatrix::BLAS::asum(self, incx, self.size / incx)
-  end
-  alias :absolute_sum :asum
-
-  #
-  # call-seq:
-  #     norm2 -> Numeric
-  #
-  # == Arguments
-  #   - +incx+ -> the skip size (defaults to 1, no skip)
-  #   - +n+ -> the number of elements to include
-  #
-  # Return the 2-norm of the vector. This is the BLAS nrm2 routine.
-  def nrm2 incx=1, n=nil
-    NMatrix::BLAS::nrm2(self, incx, self.size / incx)
-  end
-  alias :norm2 :nrm2
-
-
-  #
-  # call-seq:
-  #     sorted_indices -> Array
-  #
-  # Returns an array of the indices ordered by value sorted.
-  #
-  def sorted_indices
-    ary = self.to_a
-    ary.each_index.sort_by { |i| ary[i] }  # from: http://stackoverflow.com/a/17841159/170300
-  end
-
-  #
-  # call-seq:
-  #     binned_sorted_indices -> Array
-  #
-  # Returns an array of arrays of indices ordered by value sorted. Functions basically like +sorted_indices+, but
-  # groups indices together for those values that are the same.
-  #
-  def binned_sorted_indices
-    ary = self.to_a
-    ary2 = []
-    last_bin = ary.each_index.sort_by { |i| [ary[i]] }.inject([]) do |result, element|
-      if result.empty? || ary[result[-1]] == ary[element]
-        result << element
-      else
-        ary2 << result
-        [element]
-      end
-    end
-    ary2 << last_bin unless last_bin.empty?
-    ary2
-  end
-
 
   # TODO: Make this actually pretty.
   def pretty_print(q = nil) #:nodoc:
diff --git a/lib/nmatrix/rspec.rb b/lib/nmatrix/rspec.rb
index 4acedac..60dac34 100644
--- a/lib/nmatrix/rspec.rb
+++ b/lib/nmatrix/rspec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/lib/nmatrix/shortcuts.rb b/lib/nmatrix/shortcuts.rb
index 306e5c4..40acc13 100644
--- a/lib/nmatrix/shortcuts.rb
+++ b/lib/nmatrix/shortcuts.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -295,6 +295,14 @@ class NMatrix
     # call-seq:
     #     seq(shape) -> NMatrix
     #     seq(shape, options) -> NMatrix
+    #     bindgen(shape) -> NMatrix of :byte
+    #     indgen(shape) -> NMatrix of :int64
+    #     findgen(shape) -> NMatrix of :float32
+    #     dindgen(shape) -> NMatrix of :float64
+    #     cindgen(shape) -> NMatrix of :complex64
+    #     zindgen(shape) -> NMatrix of :complex128
+    #     rindgen(shape) -> NMatrix of :rational128
+    #     rbindgen(shape) -> NMatrix of :object
     #
     # Creates a matrix filled with a sequence of integers starting at zero.
     #
@@ -322,66 +330,11 @@ class NMatrix
       NMatrix.new(shape, values, {:stype => :dense}.merge(options))
     end
 
-    #
-    # call-seq:
-    #     indgen(size) -> NMatrix
-    #
-    # Returns an integer NMatrix. Equivalent to <tt>seq(n, dtype: :int32)</tt>.
-    #
-    # * *Arguments* :
-    #   - +shape+ -> Shape of the sequence.
-    # * *Returns* :
-    #   - NMatrix with dtype +:int32+.
-    #
-    def indgen(shape)
-      NMatrix.seq(shape, dtype: :int32)
-    end
-
-    #
-    # call-seq:
-    #     findgen(shape) -> NMatrix
-    #
-    # Returns a float NMatrix. Equivalent to <tt>seq(n, dtype: :float32)</tt>.
-    #
-    # * *Arguments* :
-    #   - +shape+ -> Shape of the sequence.
-    # * *Returns* :
-    #   - NMatrix with dtype +:float32+.
-    #
-    def findgen(shape)
-      NMatrix.seq(shape, dtype: :float32)
+    {:bindgen => :byte, :indgen => :int64, :findgen => :float32, :dindgen => :float64,
+     :cindgen => :complex64, :zindgen => :complex128,
+     :rindgen => :rational128, :rbindgen => :object}.each_pair do |meth, dtype|
+      define_method(meth) { |shape| NMatrix.seq(shape, :dtype => dtype) }
     end
-
-    #
-    # call-seq:
-    #     bindgen(size) -> NMatrix
-    #
-    # Returns a byte NMatrix. Equivalent to <tt>seq(n, dtype: :byte)</tt>.
-    #
-    # * *Arguments* :
-    #   - +size+ -> Shape of the sequence.
-    # * *Returns* :
-    #   - NMatrix with dtype +:byte+.
-    #
-    def bindgen(shape)
-      NMatrix.seq(shape, dtype: :byte)
-    end
-
-    #
-    # call-seq:
-    #     cindgen(shape) -> NMatrix
-    #
-    # Returns a complex NMatrix. Equivalent to <tt>seq(n, dtype: :complex64)</tt>.
-    #
-    # * *Arguments* :
-    #   - +shape+ -> Shape of the sequence.
-    # * *Returns* :
-    #   - NMatrix with dtype +:complex64+.
-    #
-    def cindgen(shape)
-      NMatrix.seq(shape, dtype: :complex64)
-    end
-
   end
 end
 
diff --git a/lib/nmatrix/version.rb b/lib/nmatrix/version.rb
index d3b8054..3b6fd01 100644
--- a/lib/nmatrix/version.rb
+++ b/lib/nmatrix/version.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -26,6 +26,14 @@ class NMatrix
   # Note that the format of the VERSION string is needed for NMatrix
   # native IO. If you change the format, please make sure that native
   # IO can still understand NMatrix::VERSION.
-  VERSION = "0.0.9"
+  #VERSION = "0.1.0"
+  module VERSION
+    MAJOR = 0
+    MINOR = 1
+    TINY = 0
+    PRE = "rc1"
+
+    STRING = [MAJOR, MINOR, TINY, PRE].compact.join(".")
+  end
 end
 
diff --git a/lib/nmatrix/yale_functions.rb b/lib/nmatrix/yale_functions.rb
index a47fadf..07f20c2 100644
--- a/lib/nmatrix/yale_functions.rb
+++ b/lib/nmatrix/yale_functions.rb
@@ -9,8 +9,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2013, Ruby Science Foundation
-# NMatrix is Copyright (c) 2013, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -81,7 +81,7 @@ module NMatrix::YaleFunctions
   # Returns the diagonal and non-digonal column indices stored in a given row.
   def yale_ja_d_keys_at i
     ary = yale_nd_row(i, :keys)
-    return ary if i >= self.shape[1] || self[i,i].nil? || self[i,i] == 0
+    return ary if i >= self.shape[1] || self[i,i] == self.default_value
     ary << i
   end
   alias :yale_row_as_array :yale_ja_d_keys_at
@@ -112,7 +112,7 @@ module NMatrix::YaleFunctions
   # Returns the diagonal and non-diagonal column indices and entries stored in a given row.
   def yale_row_as_hash i
     h = yale_nd_row(i, :hash)
-    return h if i >= self.shape[1] || self[i,i].nil? || self[i,i] == 0
+    return h if i >= self.shape[1] || self[i,i] == self.default_value
     h[i] = self[i,i]
   end
 end
\ No newline at end of file
diff --git a/metadata.yml b/metadata.yml
index 42165d9..9a15974 100644
--- a/metadata.yml
+++ b/metadata.yml
@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: nmatrix
 version: !ruby/object:Gem::Version
-  version: 0.0.9
+  version: 0.1.0.rc1
 platform: ruby
 authors:
 - John Woods
@@ -10,7 +10,7 @@ authors:
 autorequire: 
 bindir: bin
 cert_chain: []
-date: 2013-09-19 00:00:00.000000000 Z
+date: 2013-12-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdoc
@@ -162,6 +162,7 @@ files:
 - ext/nmatrix/math/scal.h
 - ext/nmatrix/math/swap.h
 - ext/nmatrix/math/trsm.h
+- ext/nmatrix/nm_memory.h
 - ext/nmatrix/nmatrix.cpp
 - ext/nmatrix/nmatrix.h
 - ext/nmatrix/ruby_constants.cpp
@@ -169,10 +170,10 @@ files:
 - ext/nmatrix/ruby_nmatrix.c
 - ext/nmatrix/storage/common.cpp
 - ext/nmatrix/storage/common.h
-- ext/nmatrix/storage/dense.cpp
-- ext/nmatrix/storage/dense.h
-- ext/nmatrix/storage/list.cpp
-- ext/nmatrix/storage/list.h
+- ext/nmatrix/storage/dense/dense.cpp
+- ext/nmatrix/storage/dense/dense.h
+- ext/nmatrix/storage/list/list.cpp
+- ext/nmatrix/storage/list/list.h
 - ext/nmatrix/storage/storage.cpp
 - ext/nmatrix/storage/storage.h
 - ext/nmatrix/storage/yale/class.h
@@ -209,6 +210,7 @@ files:
 - lib/nmatrix/yale_functions.rb
 - nmatrix.gemspec
 - scripts/mac-brew-gcc.sh
+- scripts/mac-mavericks-brew-gcc.sh
 - spec/00_nmatrix_spec.rb
 - spec/01_enum_spec.rb
 - spec/02_slice_spec.rb
@@ -231,14 +233,12 @@ files:
 - spec/stat_spec.rb
 - spec/utm5940.mtx
 homepage: http://sciruby.com
-licenses: []
+licenses:
+- BSD 2-clause
 metadata: {}
 post_install_message: "***********************************************************\nWelcome
-  to SciRuby: Tools for Scientific Computing in Ruby!\n\n                     ***
-  WARNING ***\nPlease be aware that NMatrix is in ALPHA status. If you're\nthinking
-  of using NMatrix to write mission critical code,\nsuch as for driving a car or flying
-  a space shuttle, you\nmay wish to choose other software (for now).\n\nNMatrix requires
-  a C compiler, and has been tested only\nwith GCC 4.6+. We are happy to accept contributions\nwhich
+  to SciRuby: Tools for Scientific Computing in Ruby!\n\nNMatrix requires a C compiler,
+  and has been tested only\nwith GCC 4.6+. We are happy to accept contributions\nwhich
   improve the portability of this project.\n\nAlso required is ATLAS. Most Linux distributions
   and Mac\nversions include ATLAS, but you may wish to compile it\nyourself. The Ubuntu/Debian
   apt package for ATLAS WILL \nNOT WORK with NMatrix if LAPACK is also installed.\n\nMore
@@ -255,12 +255,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '1.9'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - '>'
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.3.1
 requirements: []
 rubyforge_project: 
-rubygems_version: 2.0.2
+rubygems_version: 2.0.3
 signing_key: 
 specification_version: 4
 summary: NMatrix is an experimental linear algebra library for Ruby, written mostly
diff --git a/nmatrix.gemspec b/nmatrix.gemspec
index 7622e1d..23e155a 100644
--- a/nmatrix.gemspec
+++ b/nmatrix.gemspec
@@ -5,22 +5,17 @@ require 'nmatrix/version'
 
 Gem::Specification.new do |gem|
   gem.name = "nmatrix"
-  gem.version = NMatrix::VERSION
+  gem.version = NMatrix::VERSION::STRING
   gem.summary = "NMatrix is an experimental linear algebra library for Ruby, written mostly in C." 
   gem.description = "NMatrix is an experimental linear algebra library for Ruby, written mostly in C." 
   gem.homepage = 'http://sciruby.com'
   gem.authors = ['John Woods', 'Chris Wailes', 'Aleksey Timin']
   gem.email =  ['john.o.woods at gmail.com']
+  gem.license = 'BSD 2-clause'
   gem.post_install_message = <<-EOF
 ***********************************************************
 Welcome to SciRuby: Tools for Scientific Computing in Ruby!
 
-                     *** WARNING ***
-Please be aware that NMatrix is in ALPHA status. If you're
-thinking of using NMatrix to write mission critical code,
-such as for driving a car or flying a space shuttle, you
-may wish to choose other software (for now).
-
 NMatrix requires a C compiler, and has been tested only
 with GCC 4.6+. We are happy to accept contributions
 which improve the portability of this project.
diff --git a/scripts/mac-brew-gcc.sh b/scripts/mac-brew-gcc.sh
index 15096a3..68888c9 100755
--- a/scripts/mac-brew-gcc.sh
+++ b/scripts/mac-brew-gcc.sh
@@ -1,5 +1,9 @@
 #!/bin/bash
-VERSION="4.7.2"
+
+# Script will not work for GCC 4.8 or 4.9. For those, please see
+# mac-mavericks-brew-gcc.sh
+
+VERSION="4.7.2" # Script should also work with GCC 4.7.1.
 PREFIX="/usr/gcc-${VERSION}"
 LANGUAGES="c,c++,fortran"
 MAKE="make -j 4"
@@ -8,11 +12,9 @@ brew-path() { brew info $1 | head -n3 | tail -n1 | cut -d' ' -f1; }
 
 # Prerequisites
 
-brew install gmp
-brew install mpfr
-brew install libmpc
+brew install gmp mpfr libmpc
 
-# Download & install the latest GCC
+# Next, download & install the latest GCC:
 
 mkdir -p $PREFIX
 mkdir temp-gcc
@@ -25,11 +27,12 @@ cd gcc-$VERSION
 mkdir build
 cd build
 
+# Older versions of brew need brew-path instead of brew --prefix.
 ../configure \
      --prefix=$PREFIX \
-     --with-gmp=$(brew-path gmp) \
-     --with-mpfr=$(brew-path mpfr) \
-     --with-mpc=$(brew-path libmpc) \
+     --with-gmp=$(brew --prefix gmp) \
+     --with-mpfr=$(brew --prefix mpfr) \
+     --with-mpc=$(brew --prefix libmpc) \
      --program-suffix=-$VERSION \
      --enable-languages=$LANGUAGES \
      --with-system-zlib \
diff --git a/scripts/mac-mavericks-brew-gcc.sh b/scripts/mac-mavericks-brew-gcc.sh
new file mode 100644
index 0000000..867d050
--- /dev/null
+++ b/scripts/mac-mavericks-brew-gcc.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+brew-path() { brew info $1 | head -n3 | tail -n1 | cut -d' ' -f1; }
+
+# Try using the following for GCC 4.9:
+#
+#   brew install gmp4 mpfr2 libmpc08 isl011 cloog018
+#
+#
+
+brew install gcc49 --enable-fortran
+# Source for this is: http://stackoverflow.com/questions/19535422/os-x-10-9-gcc-links-to-clang
+
+
+# You may wish to re-install your Ruby if you're using rbenv. To do
+# so, make sure you've installed openssl, readline, and libyaml.
+#
+# The commands for this are:
+#
+#    CC=gcc-4.8 RUBY_CONFIGURE_OPTS="--with-openssl-dir=`brew --prefix openssl` --with-readline-dir=`brew --prefix readline` --with-gcc=gcc-4.8 --enable-shared" rbenv install --keep 2.0.0-p247
+#
+#
\ No newline at end of file
diff --git a/spec/00_nmatrix_spec.rb b/spec/00_nmatrix_spec.rb
index a9f4dd5..283762a 100644
--- a/spec/00_nmatrix_spec.rb
+++ b/spec/00_nmatrix_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -29,6 +29,10 @@
 require File.dirname(__FILE__) + "/spec_helper.rb"
 
 describe NMatrix do
+  #after :each do
+  #  GC.start
+  #end
+
   it "creates a matrix with the new constructor" do
     n = NMatrix.new([2,2], [0,1,2,3], dtype: :int64)
   end
@@ -40,17 +44,15 @@ describe NMatrix do
   end
 
   it "calculates exact determinants on small square matrices" do
-    a = NMatrix.new(:dense, 2, [1,2,3,4], :int64)
-    a.det_exact.should == -2
+    NMatrix.new(2, [1,2,3,4], stype: :dense, dtype: :int64).det_exact.should == -2
   end
 
   it "calculates determinants" do
-    m = NMatrix.new(3, [-2,2,3,-1,1,3,2,0,-1])
-    m.det.should == 6
+    NMatrix.new(3, [-2,2,3,-1,1,3,2,0,-1], stype: :dense, dtype: :int64).det.should == 6
   end
 
   it "allows casting to Ruby objects" do
-    m = NMatrix.new(:dense, [3,3], [0,0,1,0,2,0,3,4,5], :int64)
+    m = NMatrix.new([3,3], [0,0,1,0,2,0,3,4,5], dtype: :int64, stype: :dense)
     n = m.cast(:dense, :object)
     n.should == m
   end
@@ -367,3 +369,110 @@ describe NMatrix do
   end
 
 end
+
+
+describe "NMatrix#upper_triangle" do
+  it "should create a copy with the lower corner set to zero" do
+    n = NMatrix.seq(4)+1
+    n.upper_triangle.should == NMatrix.new(4, [1,2,3,4,0,6,7,8,0,0,11,12,0,0,0,16])
+    n.upper_triangle(2).should == NMatrix.new(4, [1,2,3,4,5,6,7,8,9,10,11,12,0,14,15,16])
+  end
+end
+
+describe "NMatrix#lower_triangle" do
+  it "should create a copy with the lower corner set to zero" do
+    n = NMatrix.seq(4)+1
+    n.lower_triangle.should == NMatrix.new(4, [1,0,0,0,5,6,0,0,9,10,11,0,13,14,15,16])
+    n.lower_triangle(2).should == NMatrix.new(4, [1,2,3,0,5,6,7,8,9,10,11,12,13,14,15,16])
+  end
+end
+
+describe "NMatrix#upper_triangle!" do
+  it "should create a copy with the lower corner set to zero" do
+    n = NMatrix.seq(4)+1
+    n.upper_triangle!.should == NMatrix.new(4, [1,2,3,4,0,6,7,8,0,0,11,12,0,0,0,16])
+    n = NMatrix.seq(4)+1
+    n.upper_triangle!(2).should == NMatrix.new(4, [1,2,3,4,5,6,7,8,9,10,11,12,0,14,15,16])
+  end
+end
+
+describe "NMatrix#lower_triangle!" do
+  it "should create a copy with the lower corner set to zero" do
+    n = NMatrix.seq(4)+1
+    n.lower_triangle!.should == NMatrix.new(4, [1,0,0,0,5,6,0,0,9,10,11,0,13,14,15,16])
+    n = NMatrix.seq(4)+1
+    n.lower_triangle!(2).should == NMatrix.new(4, [1,2,3,0,5,6,7,8,9,10,11,12,13,14,15,16])
+  end
+end
+
+describe "NMatrix#reshape" do
+  it "should change the shape of a matrix without the contents changing" do
+    n = NMatrix.seq(4)+1
+    n.reshape([8,2]).to_flat_array.should == n.to_flat_array
+  end
+
+  it "should permit a change of dimensionality" do
+    n = NMatrix.seq(4)+1
+    n.reshape([8,1,2]).to_flat_array.should == n.to_flat_array
+  end
+
+  it "should prevent a resize" do
+    n = NMatrix.seq(4)+1
+    expect { n.reshape([5,2]) }.to raise_error(ArgumentError)
+  end
+end
+
+describe "NMatrix#transpose" do
+  [:dense, :list, :yale].each do |stype|
+    context(stype) do
+      it "should transpose a #{stype} matrix (2-dimensional)" do
+        n = NMatrix.seq(4, stype: stype)
+        n.transpose.to_a.flatten.should == [0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15]
+      end
+    end
+  end
+
+  [:dense, :list].each do |stype|
+    context(stype) do
+      it "should transpose a #{stype} matrix (3-dimensional)" do
+        n = NMatrix.new([4,4,1], [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15], stype: stype)
+        n.transpose([2,1,0]).to_flat_array.should == [0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15]
+        n.transpose([1,0,2]).to_flat_array.should == [0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15]
+        n.transpose([0,2,1]).to_flat_array.should == n.to_flat_array # for dense, make this reshape!
+      end
+    end
+  end
+
+end
+
+describe "NMatrix#==" do
+  [:dense, :list, :yale].each do |left|
+    [:dense, :list, :yale].each do |right|
+      next if left == right
+      context ("#{left}?#{right}") do
+        it "should compare two matrices of differing stypes" do
+          n = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,0,5,6,7,0], stype: left)
+          m = NMatrix.new([3,4], [0,0,1,2,0,0,3,4,0,0,0,0,5,6,7,0], stype: right)
+          n.should == m
+        end
+      end
+    end
+  end
+end
+
+describe "NMatrix#concat" do
+  it "should default to horizontal concatenation" do
+    n = NMatrix.new([1,3], [1,2,3])
+    n.concat(n).should == NMatrix.new([1,6], [1,2,3,1,2,3])
+  end
+
+  it "should permit vertical concatenation" do
+    n = NMatrix.new([1,3], [1,2,3])
+    n.vconcat(n).should == NMatrix.new([2,3], [1,2,3])
+  end
+
+  it "should permit depth concatenation on tensors" do
+    n = NMatrix.new([1,3,1], [1,2,3])
+    n.dconcat(n).should == NMatrix.new([1,3,2], [1,1,2,2,3,3])
+  end
+end
diff --git a/spec/01_enum_spec.rb b/spec/01_enum_spec.rb
index 653cf77..e7ce161 100644
--- a/spec/01_enum_spec.rb
+++ b/spec/01_enum_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -37,6 +37,10 @@ describe "NMatrix enumeration for" do
         @m = @n[1..4,1..3]
       end
 
+      #after :each do
+      #  GC.start
+      #end
+
       if stype == :yale
         it "should iterate properly along each row of a slice" do
           vv = []
@@ -178,7 +182,17 @@ describe "NMatrix enumeration for" do
 
       end
 
+      if stype == :list or stype == :dense then
+        it "should correctly map to a matrix with a single element" do 
+          nm = N.new([1], [2.0], stype: stype)
+          nm.map { |e| e**2 }.should eq N.new([1], [4.0], stype: stype)
+        end
 
+        it "should correctly map to a matrix with multiple elements" do
+          nm = N.new([2], [2.0, 2.0], stype: stype)
+          nm.map { |e| e**2 }.should eq N.new([2], [4.0, 4.0], stype: stype)
+        end
+      end
     end
   end
-end
\ No newline at end of file
+end
diff --git a/spec/02_slice_spec.rb b/spec/02_slice_spec.rb
index 53ebcd5..0ed7d66 100644
--- a/spec/02_slice_spec.rb
+++ b/spec/02_slice_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -34,10 +34,14 @@ describe "Slice operation" do
   [:dense, :list, :yale].each do |stype|
     context "for #{stype}" do
       before :each do
-        GC.start # don't have to do this, but it helps to make sure we've cleaned up our pointers properly.
+        #GC.start # don't have to do this, but it helps to make sure we've cleaned up our pointers properly.
         @m = create_matrix(stype)
       end
 
+      #after :each do
+      #  GC.start
+      #end
+
       it "should correctly return a row of a reference-slice" do
         @n = create_rectangular_matrix(stype)
         @m = @n[1..4,1..3]
@@ -256,6 +260,10 @@ describe "Slice operation" do
           @m[0..2,0..2].should == @m[0...3,0...3]
         end
 
+        it 'should correctly handle :* slice notation' do
+          @m[:*,0].should eq @m[0... at m.shape[0], 0]
+        end
+
         if stype == :dense
           [:byte,:int8,:int16,:int32,:int64,:float32,:float64,:rational64,:rational128].each do |left_dtype|
             [:byte,:int8,:int16,:int32,:int64,:float32,:float64,:rational64,:rational128].each do |right_dtype|
diff --git a/spec/blas_spec.rb b/spec/blas_spec.rb
index caae6fa..5366c5b 100644
--- a/spec/blas_spec.rb
+++ b/spec/blas_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -29,6 +29,9 @@
 require File.join(File.dirname(__FILE__), "spec_helper.rb")
 
 describe NMatrix::BLAS do
+  #after :each do
+  #  GC.start
+  #end
 
   [:rational32, :rational64, :rational128, :float32, :float64, :complex64, :complex128].each do |dtype|
     context dtype do
diff --git a/spec/elementwise_spec.rb b/spec/elementwise_spec.rb
index dcf7b28..5352539 100644
--- a/spec/elementwise_spec.rb
+++ b/spec/elementwise_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -29,6 +29,9 @@
 require File.join(File.dirname(__FILE__), "spec_helper.rb")
 
 describe NMatrix do
+  #after :each do
+  #  GC.start
+  #end
 
   context "yale" do
     before :each do
diff --git a/spec/io_spec.rb b/spec/io_spec.rb
index 75b6df5..ff3ac77 100644
--- a/spec/io_spec.rb
+++ b/spec/io_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -24,9 +24,21 @@
 #
 # Basic tests for NMatrix::IO.
 #
+require "tmpdir" # Used to avoid cluttering the repository.
+
 require "./lib/nmatrix"
 
 describe NMatrix::IO do
+  before :each do
+    @tmp_dir = Dir.mktmpdir
+    @test_out = File.join(@tmp_dir, "test-out")
+  end
+
+  after :each do
+    File.delete(@test_out) if File.file?(@test_out)
+    Dir.rmdir(@tmp_dir)
+  end
+
   it "repacks a string" do
     NMatrix::IO::Matlab.repack("hello", :miUINT8, :byte).should == "hello"
   end
@@ -83,57 +95,55 @@ describe NMatrix::IO do
 
   it "reads and writes NMatrix dense" do
     n = NMatrix.new(:dense, [4,3], [0,1,2,3,4,5,6,7,8,9,10,11], :int32)
-    n.write("test-out")
+    n.write(@test_out)
 
-    m = NMatrix.read("test-out")
+    m = NMatrix.read(@test_out)
     n.should == m
   end
 
   it "reads and writes NMatrix dense as symmetric" do
     n = NMatrix.new(:dense, 3, [0,1,2,1,3,4,2,4,5], :int16)
-    n.write("test-out", :symmetric)
+    n.write(@test_out, :symmetric)
 
-    m = NMatrix.read("test-out")
+    m = NMatrix.read(@test_out)
     n.should == m
   end
 
   it "reads and writes NMatrix dense as skew" do
     n = NMatrix.new(:dense, 3, [0,1,2,-1,3,4,-2,-4,5], :float64)
-    n.write("test-out", :skew)
+    n.write(@test_out, :skew)
 
-    m = NMatrix.read("test-out")
+    m = NMatrix.read(@test_out)
     n.should == m
   end
 
   it "reads and writes NMatrix dense as hermitian" do
     n = NMatrix.new(:dense, 3, [0,1,2,1,3,4,2,4,5], :complex64)
-    n.write("test-out", :hermitian)
+    n.write(@test_out, :hermitian)
 
-    m = NMatrix.read("test-out")
+    m = NMatrix.read(@test_out)
     n.should == m
   end
 
   it "reads and writes NMatrix dense as upper" do
     n = NMatrix.new(:dense, 3, [-1,1,2,3,4,5,6,7,8], :int32)
-    n.write("test-out", :upper)
+    n.write(@test_out, :upper)
 
     m = NMatrix.new(:dense, 3, [-1,1,2,0,4,5,0,0,8], :int32) # lower version of the same
 
-    o = NMatrix.read("test-out")
+    o = NMatrix.read(@test_out)
     o.should == m
     o.should_not == n
   end
 
   it "reads and writes NMatrix dense as lower" do
     n = NMatrix.new(:dense, 3, [-1,1,2,3,4,5,6,7,8], :int32)
-    n.write("test-out", :lower)
+    n.write(@test_out, :lower)
 
     m = NMatrix.new(:dense, 3, [-1,0,0,3,4,0,6,7,8], :int32) # lower version of the same
 
-    o = NMatrix.read("test-out")
+    o = NMatrix.read(@test_out)
     o.should == m
     o.should_not == n
   end
-
-
-end
\ No newline at end of file
+end
diff --git a/spec/lapack_spec.rb b/spec/lapack_spec.rb
index f48e6df..27269cc 100644
--- a/spec/lapack_spec.rb
+++ b/spec/lapack_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -29,6 +29,10 @@
 require File.join(File.dirname(__FILE__), "spec_helper.rb")
 
 describe NMatrix::LAPACK do
+  #after :each do
+  #  GC.start
+  #end
+
   # where integer math is allowed
   [:byte, :int8, :int16, :int32, :int64, :rational32, :rational64, :rational128, :float32, :float64, :complex64, :complex128].each do |dtype|
     context dtype do
@@ -55,7 +59,23 @@ describe NMatrix::LAPACK do
   # where integer math is not allowed
   [:rational32, :rational64, :rational128, :float32, :float64, :complex64, :complex128].each do |dtype|
     context dtype do
-      it "exposes clapack getrf" do
+
+      it "exposes clapack_gesv" do
+        a = NMatrix[[1.quo(1), 2, 3], [0,1.quo(2),4],[3,3,9]].cast(dtype: dtype)
+        b = NMatrix[[1.quo(1)],[2],[3]].cast(dtype: dtype)
+        err = case dtype
+                when :float32, :complex64
+                  1e-6
+                when :float64, :complex128
+                  1e-8
+                else
+                  1e-64
+              end
+        NMatrix::LAPACK::clapack_gesv(:row,a.shape[0],b.shape[1],a,a.shape[0],b,b.shape[0]).should be_within(err).of(NMatrix[[-1.quo(2)], [0], [1.quo(2)]].cast(dtype: dtype))
+      end
+
+
+      it "exposes clapack_getrf" do
         a = NMatrix.new(3, [4,9,2,3,5,7,8,1,6], dtype: dtype)
         NMatrix::LAPACK::clapack_getrf(:row, 3, 3, a, 3)
 
@@ -80,7 +100,7 @@ describe NMatrix::LAPACK do
         a[2,2].should be_within(err).of(360.quo(53))
       end
 
-      it "exposes clapack potrf" do
+      it "exposes clapack_potrf" do
         # first do upper
         begin
           a = NMatrix.new(:dense, 3, [25,15,-5, 0,18,0, 0,0,11], dtype)
@@ -99,7 +119,7 @@ describe NMatrix::LAPACK do
       end
 
       # Together, these calls are basically xGESV from LAPACK: http://www.netlib.org/lapack/double/dgesv.f
-      it "exposes clapack getrs" do
+      it "exposes clapack_getrs" do
         a     = NMatrix.new(3, [-2,4,-3,3,-2,1,0,-4,3], dtype: dtype)
         ipiv  = NMatrix::LAPACK::clapack_getrf(:row, 3, 3, a, 3)
         b     = NMatrix.new([3,1], [-1, 17, -9], dtype: dtype)
@@ -111,7 +131,7 @@ describe NMatrix::LAPACK do
         b[2].should == -13
       end
 
-      it "exposes clapack getri" do
+      it "exposes clapack_getri" do
         a = NMatrix.new(:dense, 3, [1,0,4,1,1,6,-3,0,-10], dtype)
         ipiv = NMatrix::LAPACK::clapack_getrf(:row, 3, 3, a, 3) # get pivot from getrf, use for getri
 
@@ -125,7 +145,7 @@ describe NMatrix::LAPACK do
         end
       end
 
-      it "exposes lapack gesdd" do
+      it "exposes lapack_gesdd" do
         if [:float32, :float64].include? dtype
           a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
                                     6.11 6.91 5.04 -0.27 7.98
@@ -160,6 +180,7 @@ describe NMatrix::LAPACK do
           #http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/cgesvd_ex.c.htm
           pending "Example may be wrong"
         else
+          pending "Not implemented for non-LAPACK dtypes"
           a = NMatrix.new([4,3], dtype: dtype)
         end
         err = case dtype
@@ -173,7 +194,7 @@ describe NMatrix::LAPACK do
         err = err *5e1
         begin
 
-          info = NMatrix::LAPACK::lapack_gesvd(:a, :a, a.shape[0], a.shape[1], a, a.shape[0], s, u, ldu, vt, ldvt, 500)
+          info = NMatrix::LAPACK::lapack_gesdd(:a, a.shape[0], a.shape[1], a, a.shape[0], s, u, ldu, vt, ldvt, 500)
 
         rescue NotImplementedError => e
           pending e.to_s
@@ -186,7 +207,7 @@ describe NMatrix::LAPACK do
       end
 
 
-      it "exposes lapack gesvd" do
+     it "exposes lapack_gesvd" do
         # http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgesvd_ex.c.htm
         if [:float32, :float64].include? dtype
           a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
@@ -257,6 +278,133 @@ describe NMatrix::LAPACK do
         s.transpose.should be_within(err).of(s_true.row(0))
 
       end
+ 
+      it "exposes the convenience gesvd method" do
+        # http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgesvd_ex.c.htm
+        if [:float32, :float64].include? dtype
+          a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
+                                    6.11 6.91 5.04 -0.27 7.98
+                                    -9.15 -7.93 4.86 4.85 3.01
+                                    9.57 1.64 8.83 0.74 5.80
+                                    -3.49 4.02 9.80 10.00 4.27
+                                    9.84 0.15 -8.99 -6.02 -5.31|.map(&:to_f), dtype: dtype)
+          s_true = NMatrix.new([1,5], [27.468732418221848, 22.643185009774697, 8.558388228482576, 5.985723201512133, 2.014899658715756], dtype: dtype)
+          right_true = NMatrix.new([5,6], [0.5911423764124365, 0.2631678147140568, 0.35543017386282716, 0.3142643627269275, 0.2299383153647484, 0.0, 0.39756679420242547, 0.24379902792633046, -0.22239000068544604, -0.7534661509534584, -0.36358968669749664, 0.0, 0.03347896906244727, -0.6002725806935828, -0.45083926892230763, 0.23344965724471425, -0.3054757327479317, 0.0, 0.4297069031370182, 0.23616680628112555, -0.6858628638738117, 0.3318600182003095, 0.1649276348845103, 0.0, 0.46974792156 [...]
+          #right_true = NMatrix.new([5,6],
+          # %w|-0.59 0.26   0.36   0.31   0.23
+          #   -0.40   0.24  -0.22  -0.75  -0.36
+          #   -0.03  -0.60  -0.45   0.23  -0.31
+          #   -0.43   0.24  -0.69   0.33   0.16
+          #   -0.47  -0.35   0.39   0.16  -0.52
+          #    0.29   0.58  -0.02   0.38  -0.65|.map(&:to_f),
+          #  dtype)
+          left_true = NMatrix.new([5,5], [0.25138279272049635, 0.3968455517769292, 0.6921510074703637, 0.3661704447722309, 0.4076352386533525, 0.814836686086339, 0.3586615001880027, -0.24888801115928438, -0.3685935379446176, -0.09796256926688672, -0.2606185055842211, 0.7007682094072526, -0.22081144672043734, 0.38593848318854174, -0.49325014285102375, 0.3967237771305971, -0.4507112412166429, 0.2513211496937535, 0.4342486014366711, -0.6226840720358049, -0.21802776368654594, 0.1402099498711 [...]
+          #left_true = NMatrix.new([5,5],
+          #  %w|-0.25  -0.40  -0.69  -0.37  -0.41
+          #    0.81   0.36  -0.25  -0.37  -0.10
+          #   -0.26   0.70  -0.22   0.39  -0.49
+          #    0.40  -0.45   0.25   0.43  -0.62
+          #   -0.22   0.14   0.59  -0.63  -0.44|.map(&:to_f),
+          # dtype)
+          s   = NMatrix.new([5,1], 0, dtype: dtype)
+          u   = NMatrix.new([5,5], 0, dtype: dtype)
+          ldu = 5
+          vt  = NMatrix.new([6,6], 0, dtype: dtype)
+          ldvt= 6
+        elsif [:complex64, :complex128].include? dtype
+          #http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/cgesvd_ex.c.htm
+          pending "Example may be wrong"
+          a = NMatrix.new([4,3], [[  5.91, -5.69], [  7.09,  2.72], [  7.78, -4.06], [ -0.79, -7.21], [ -3.15, -4.08], [ -1.89,  3.27], [  4.57, -2.07], [ -3.88, -3.30], [ -4.89,  4.20], [  4.10, -6.70], [  3.28, -3.84], [  3.84,  1.19]].map {|e| Complex(*e) } , dtype: dtype)
+          s_true = NMatrix.new([3,1], [17.63, 11.61, 6.78], dtype: dtype)
+          left_true = NMatrix.new([4,4], [[-0.86, 0.0], [0.4, 0.0], [0.32, 0.0], [-0.35, 0.13], [-0.24, -0.21], [-0.63, 0.6], [0.15, 0.32], [0.61, 0.61], [-0.36, 0.1]].map {|e| Complex(*e)}, dtype: dtype)
+          right_true = NMatrix.new([4,3], [[ -0.22, 0.51], [ -0.37, -0.32], [ -0.53, 0.11], [ 0.15, 0.38], [ 0.31, 0.31], [ 0.09, -0.57], [ 0.18, -0.39], [ 0.38, -0.39], [ 0.53, 0.24], [ 0.49, 0.28], [ -0.47, -0.25], [ -0.15, 0.19]].map {|e| Complex *e} , dtype: dtype)
+
+          s   = NMatrix.new([3,1], 0, dtype: dtype)
+          u   = NMatrix.new([4,4], 0, dtype: dtype)
+          ldu = 4
+          vt  = NMatrix.new([3,3], 0, dtype: dtype)
+          ldvt= 3
+        else 
+          a = NMatrix.new([4,3], dtype: dtype)
+        end
+        err = case dtype
+              when :float32, :complex64
+                1e-6
+              when :float64, :complex128
+                1e-15
+              else
+                1e-64 # FIXME: should be 0, but be_within(0) does not work.
+              end
+        err = err *5e1
+        begin
+          u, s, vt = a.gesvd
+        rescue NotImplementedError => e
+          pending e.to_s
+        end
+        u.should be_within(err).of(left_true)
+        #FIXME: Is the next line correct?
+        vt[0...right_true.shape[0], 0...right_true.shape[1]-1].should be_within(err).of(right_true[0...right_true.shape[0],0...right_true.shape[1]-1])
+        s.transpose.should be_within(err).of(s_true.row(0))
+
+      end
+      it "exposes the convenience gesdd method" do
+        # http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/dgesvd_ex.c.htm
+        if [:float32, :float64].include? dtype
+          a = NMatrix.new([5,6], %w|8.79 9.93 9.83 5.45 3.16
+                                    6.11 6.91 5.04 -0.27 7.98
+                                    -9.15 -7.93 4.86 4.85 3.01
+                                    9.57 1.64 8.83 0.74 5.80
+                                    -3.49 4.02 9.80 10.00 4.27
+                                    9.84 0.15 -8.99 -6.02 -5.31|.map(&:to_f), dtype: dtype)
+          s_true = NMatrix.new([1,5], [27.468732418221848, 22.643185009774697, 8.558388228482576, 5.985723201512133, 2.014899658715756], dtype: dtype)
+          right_true = NMatrix.new([5,6], [0.5911423764124365, 0.2631678147140568, 0.35543017386282716, 0.3142643627269275, 0.2299383153647484, 0.0, 0.39756679420242547, 0.24379902792633046, -0.22239000068544604, -0.7534661509534584, -0.36358968669749664, 0.0, 0.03347896906244727, -0.6002725806935828, -0.45083926892230763, 0.23344965724471425, -0.3054757327479317, 0.0, 0.4297069031370182, 0.23616680628112555, -0.6858628638738117, 0.3318600182003095, 0.1649276348845103, 0.0, 0.46974792156 [...]
+          left_true = NMatrix.new([5,5], [0.25138279272049635, 0.3968455517769292, 0.6921510074703637, 0.3661704447722309, 0.4076352386533525, 0.814836686086339, 0.3586615001880027, -0.24888801115928438, -0.3685935379446176, -0.09796256926688672, -0.2606185055842211, 0.7007682094072526, -0.22081144672043734, 0.38593848318854174, -0.49325014285102375, 0.3967237771305971, -0.4507112412166429, 0.2513211496937535, 0.4342486014366711, -0.6226840720358049, -0.21802776368654594, 0.1402099498711 [...]
+          u   = NMatrix.new([5,5], 0, dtype: dtype)
+          ldu = 5
+          vt  = NMatrix.new([6,6], 0, dtype: dtype)
+          ldvt= 6
+        elsif [:complex64, :complex128].include? dtype
+          #http://software.intel.com/sites/products/documentation/doclib/mkl_sa/11/mkl_lapack_examples/cgesvd_ex.c.htm
+          pending "Example may be wrong"
+          a = NMatrix.new([4,3], [[  5.91, -5.69], [  7.09,  2.72], [  7.78, -4.06], [ -0.79, -7.21], [ -3.15, -4.08], [ -1.89,  3.27], [  4.57, -2.07], [ -3.88, -3.30], [ -4.89,  4.20], [  4.10, -6.70], [  3.28, -3.84], [  3.84,  1.19]].map {|e| Complex(*e) } , dtype: dtype)
+          s_true = NMatrix.new([3,1], [17.63, 11.61, 6.78], dtype: dtype)
+          left_true = NMatrix.new([4,4], [[-0.86, 0.0], [0.4, 0.0], [0.32, 0.0], [-0.35, 0.13], [-0.24, -0.21], [-0.63, 0.6], [0.15, 0.32], [0.61, 0.61], [-0.36, 0.1]].map {|e| Complex(*e)}, dtype: dtype)
+          right_true = NMatrix.new([4,3], [[ -0.22, 0.51], [ -0.37, -0.32], [ -0.53, 0.11], [ 0.15, 0.38], [ 0.31, 0.31], [ 0.09, -0.57], [ 0.18, -0.39], [ 0.38, -0.39], [ 0.53, 0.24], [ 0.49, 0.28], [ -0.47, -0.25], [ -0.15, 0.19]].map {|e| Complex *e} , dtype: dtype)
+
+          s   = NMatrix.new([3,1], 0, dtype: dtype)
+          u   = NMatrix.new([4,4], 0, dtype: dtype)
+          ldu = 4
+          vt  = NMatrix.new([3,3], 0, dtype: dtype)
+          ldvt= 3
+        else 
+          a = NMatrix.new([4,3], dtype: dtype)
+        end
+        s   = NMatrix.new([5,1], 0, dtype: dtype)
+        u   = NMatrix.new([5,5], 0, dtype: dtype)
+        ldu = 5
+        vt  = NMatrix.new([6,6], 0, dtype: dtype)
+        ldvt= 6
+        err = case dtype
+              when :float32, :complex64
+                1e-6
+              when :float64, :complex128
+                1e-15
+              else
+                1e-64 # FIXME: should be 0, but be_within(0) does not work.
+              end
+        err = err *5e1
+        begin
+
+          u, s, vt = a.gesdd(500)
+
+        rescue NotImplementedError => e
+          pending e.to_s
+        end
+        u.should be_within(err).of(left_true)
+        #FIXME: Is the next line correct?
+        vt[0...right_true.shape[0], 0...right_true.shape[1]-1].should be_within(err).of(right_true[0...right_true.shape[0],0...right_true.shape[1]-1])
+        s.transpose.should be_within(err).of(s_true.row(0))
+      end
 
 
       it "exposes geev" do
diff --git a/spec/math_spec.rb b/spec/math_spec.rb
index 3d4b029..ea21f4f 100644
--- a/spec/math_spec.rb
+++ b/spec/math_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -26,10 +26,101 @@
 # versions of unfriendly BLAS and LAPACK functions.
 #
 
-# Can we use require_relative here instead?
-require File.join(File.dirname(__FILE__), "spec_helper.rb")
+require 'spec_helper'
 
 describe "math" do
+  #after :each do
+  #  GC.start
+  #end
+
+  context "elementwise math functions" do
+
+    [:dense,:list,:yale].each do |stype|
+      context stype do
+
+        [:int64,:float64,:rational128].each do |dtype|
+          context dtype do
+            before :each do
+              @size = [2,2]
+              @m = NMatrix.seq(@size, dtype: dtype, stype: stype)+1
+              @a = @m.to_a.flatten
+            end
+
+            NMatrix::NMMath::METHODS_ARITY_1.each do |meth|
+              #skip inverse regular trig functions
+              next if meth.to_s.start_with?('a') and (not meth.to_s.end_with?('h')) \
+                and NMatrix::NMMath::METHODS_ARITY_1.include?(
+                  meth.to_s[1...meth.to_s.length].to_sym)
+              next if meth == :atanh
+
+              it "should correctly apply elementwise #{meth}" do
+
+                @m.send(meth).should eq N.new(@size, @a.map{ |e| Math.send(meth, e) },
+                                                 dtype: :float64, stype: stype)
+              end
+            end
+
+            NMatrix::NMMath::METHODS_ARITY_2.each do |meth|
+              next if meth == :atan2
+              it "should correctly apply elementwise #{meth}" do
+                @m.send(meth, @m).should eq N.new(@size, @a.map{ |e|
+                                                     Math.send(meth, e, e) },
+                                                     dtype: :float64, 
+                                                     stype: stype)
+              end
+
+              it "should correctly apply elementwise #{meth} with a scalar first arg" do
+                Math.send(meth, 1, @m).should eq N.new(@size, @a.map { |e| Math.send(meth, 1, e) }, dtype: :float64, stype: stype)
+              end
+
+              it "should correctly apply elementwise #{meth} with a scalar second arg" do
+                @m.send(meth, 1).should eq N.new(@size, @a.map { |e| Math.send(meth, e, 1) }, dtype: :float64, stype: stype)
+              end
+            end
+
+            it "should correctly apply elementwise natural log" do
+              @m.log.should eq N.new(@size, [0, Math.log(2), Math.log(3), Math.log(4)],
+                                        dtype: :float64, stype: stype)
+            end
+
+            it "should correctly apply elementwise log with arbitrary base" do
+              @m.log(3).should eq N.new(@size, [0, Math.log(2,3), 1, Math.log(4,3)],
+                                           dtype: :float64, stype: stype)
+            end
+
+            context "inverse trig functions" do
+              before :each do
+                @m = NMatrix.seq(@size, dtype: dtype, stype: stype)/4
+                @a = @m.to_a.flatten
+              end
+              [:asin, :acos, :atan, :atanh].each do |atf|
+
+                it "should correctly apply elementwise #{atf}" do
+                  @m.send(atf).should eq N.new(@size, 
+                                               @a.map{ |e| Math.send(atf, e) },
+                                               dtype: :float64, stype: stype)
+                end
+              end
+
+              it "should correctly apply elementtwise atan2" do
+                @m.atan2(@m*0+1).should eq N.new(@size, 
+                  @a.map { |e| Math.send(:atan2, e, 1) }, dtype: :float64, stype: stype)
+              end
+
+              it "should correctly apply elementwise atan2 with a scalar first arg" do
+                Math.atan2(1, @m).should eq N.new(@size, @a.map { |e| Math.send(:atan2, 1, e) }, dtype: :float64, stype: stype)
+              end
+
+              it "should correctly apply elementwise atan2 with a scalar second arg" do
+                  @m.atan2(1).should eq N.new(@size, @a.map { |e| Math.send(:atan2, e, 1) }, dtype: :float64, stype: stype)
+              end
+            end
+          end
+        end
+
+      end
+    end
+  end
 
   [:float32, :float64, :complex64, :complex128, :rational32, :rational64, :rational128].each do |dtype|
     context dtype do
diff --git a/spec/nmatrix_yale_spec.rb b/spec/nmatrix_yale_spec.rb
index 74040f7..5ed1c1a 100644
--- a/spec/nmatrix_yale_spec.rb
+++ b/spec/nmatrix_yale_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -27,6 +27,10 @@
 require "./lib/nmatrix"
 
 describe NMatrix do
+  #after :each do
+  #  GC.start
+  #end
+
   context :yale do
 
     it "compares two empty matrices" do
@@ -106,8 +110,8 @@ describe NMatrix do
       n[0,0] = 0.1
       n[0,1] = 0.2
       n[1,0] = 0.3
-      n.yale_a == [0.1, 0.0, 0.0, 0.2, 0.3]
-      n.yale_ija == [3,4,5,1,0]
+      n.yale_a.should == [0.1, 0.0, 0.0, 0.2, 0.3]
+      n.yale_ija.should == [3,4,5,1,0]
     end
 
     it "sets when resizing" do
@@ -267,28 +271,19 @@ describe NMatrix do
       mn[0,0].should == 541
     end
 
-    it "transposes" do
-      a = NMatrix.new(4, 0.0, stype: :yale)
-      a[0,0] = 1.0
-      a[0,1] = 4.0
-      a[1,2] = 2.0
-      a[1,3] = -4.0
-      a[3,1] = 5.0
-      a[3,3] = 6.0
-      b = a.transpose
-
-      b[0,0].should == 1.0
-      b[1,0].should == 4.0
-      b[2,0].should == 0.0
-      b[3,0].should == 0.0
-      b[0,1].should == 0.0
-      b[1,1].should == 0.0
-      b[2,1].should == 2.0
-      b[3,1].should == -4.0
-      b[0,3].should == 0.0
-      b[1,3].should == 5.0
-      b[2,3].should == 0.0
-      b[3,3].should == 6.0
+    it "calculates the row key intersections of two matrices" do
+      a = NMatrix.new([3,9], [0,1], stype: :yale, dtype: :byte, default: 0)
+      b = NMatrix.new([3,9], [0,0,1,0,1], stype: :yale, dtype: :byte, default: 0)
+      a.extend NMatrix::YaleFunctions
+      b.extend NMatrix::YaleFunctions
+
+      (0...3).each do |ai|
+        (0...3).each do |bi|
+          STDERR.puts (a.yale_ja_d_keys_at(ai) & b.yale_ja_d_keys_at(bi)).inspect
+          (a.yale_ja_d_keys_at(ai) & b.yale_ja_d_keys_at(bi)).should == a.yale_row_keys_intersection(ai, b, bi)
+        end
+      end
+
     end
   end
 end
diff --git a/spec/rspec_monkeys.rb b/spec/rspec_monkeys.rb
index 83909d2..e2f7a1f 100644
--- a/spec/rspec_monkeys.rb
+++ b/spec/rspec_monkeys.rb
@@ -1,3 +1,30 @@
+# = NMatrix
+#
+# A linear algebra library for scientific computation in Ruby.
+# NMatrix is part of SciRuby.
+#
+# NMatrix was originally inspired by and derived from NArray, by
+# Masahiro Tanaka: http://narray.rubyforge.org
+#
+# == Copyright Information
+#
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
+#
+# Please see LICENSE.txt for additional copyright notices.
+#
+# == Contributing
+#
+# By contributing source code to SciRuby, you agree to be bound by
+# our Contributor Agreement:
+#
+# * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
+#
+# == rspec_monkeys.rb
+#
+# A set of monkey patches for RSpec allowing checks of NMatrix types
+#
+
 module RSpec::Matchers::BuiltIn
   class BeWithin
 
diff --git a/spec/rspec_spec.rb b/spec/rspec_spec.rb
index 2fb4ecf..7da1a3f 100644
--- a/spec/rspec_spec.rb
+++ b/spec/rspec_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
diff --git a/spec/shortcuts_spec.rb b/spec/shortcuts_spec.rb
index e74f83e..be411f0 100644
--- a/spec/shortcuts_spec.rb
+++ b/spec/shortcuts_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -30,6 +30,9 @@ require File.join(File.dirname(__FILE__), "spec_helper.rb")
 require 'pry'
 
 describe NMatrix do
+  #after :each do
+  #  GC.start
+  #end
 
   it "zeros() creates a matrix of zeros" do
     m = NMatrix.zeros(3)
@@ -175,13 +178,8 @@ describe NMatrix do
 
   context "_like constructors" do
     before :each do
-      STDERR.puts "starting GC"
-      GC.start
-      STDERR.puts "GC finished"
       @nm_1d = NMatrix[5.0,0.0,1.0,2.0,3.0]
-      STDERR.puts "@nm_1d"
       @nm_2d = NMatrix[[0.0,1.0],[2.0,3.0]]
-      STDERR.puts "@nm_2d"
     end
 
     it "should create an nmatrix of ones with dimensions and type the same as its argument" do
@@ -190,11 +188,8 @@ describe NMatrix do
     end
 
     it "should create an nmatrix of zeros with dimensions and type the same as its argument" do
-      STDERR.puts "A"
       NMatrix.zeros_like(@nm_1d).should eq NMatrix[0.0, 0.0, 0.0, 0.0, 0.0]
-      STDERR.puts "B"
       NMatrix.zeros_like(@nm_2d).should eq NMatrix[[0.0, 0.0], [0.0, 0.0]]
-      STDERR.puts "C"
     end
   end
 
diff --git a/spec/slice_set_spec.rb b/spec/slice_set_spec.rb
index 0919530..acbffc6 100644
--- a/spec/slice_set_spec.rb
+++ b/spec/slice_set_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -30,6 +30,10 @@ require File.dirname(__FILE__) + "/spec_helper.rb"
 describe "Set slice operation" do
   include RSpec::Longrun::DSL
 
+  #after :each do
+  #  GC.start
+  #end
+
   [:dense, :yale, :list].each do |stype|
     context "for #{stype}" do
       before :each do
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
index 39b0ff5..59045a5 100644
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -25,6 +25,7 @@
 # Common data and helper functions for testing.
 
 require "rspec/longrun"
+#require "narray/narray"
 
 require "./lib/nmatrix"
 require "./lib/nmatrix/rspec"
diff --git a/spec/stat_spec.rb b/spec/stat_spec.rb
index 0d60444..ffad66e 100644
--- a/spec/stat_spec.rb
+++ b/spec/stat_spec.rb
@@ -8,8 +8,8 @@
 #
 # == Copyright Information
 #
-# SciRuby is Copyright (c) 2010 - 2012, Ruby Science Foundation
-# NMatrix is Copyright (c) 2012, Ruby Science Foundation
+# SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
+# NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
 #
 # Please see LICENSE.txt for additional copyright notices.
 #
@@ -31,162 +31,178 @@ require 'pry'
 
 describe "Statistical functions" do
   context "mapping and reduction related functions" do
-
-    before :each do
-      @nm_1d = NMatrix[5.0,0.0,1.0,2.0,3.0]
-      @nm_2d = NMatrix[[0.0,1.0],[2.0,3.0]]
-    end
-
-    it "behaves like Enumerable#reduce with no argument to reduce" do
-      @nm_1d.reduce_along_dim(0) { |acc, el| acc + el }.to_f.should eq 11
-      @nm_2d.reduce_along_dim(1) { |acc, el| acc + el }.should eq NMatrix[[1, 5]]
-    end
-
-    it "should calculate the mean along the specified dimension" do
-      @nm_1d.mean.should eq NMatrix[2.2]
-      @nm_2d.mean.should eq NMatrix[[1.0,2.0]]
-    end
-
-    it "should calculate the minimum along the specified dimension" do
-      @nm_1d.min.should eq 0.0
-      @nm_2d.min.should eq NMatrix[[0.0, 1.0]]
-      @nm_2d.min(1).should eq NMatrix[[0.0], [2.0]]
-    end
-
-    it "should calculate the maximum along the specified dimension" do
-      @nm_1d.max.should eq 5.0
-      @nm_2d.max.should eq NMatrix[[2.0, 3.0]]
-    end
-
-    it "should calculate the variance along the specified dimension" do
-      @nm_1d.variance.should eq NMatrix[3.7]
-      @nm_2d.variance(1).should eq NMatrix[[0.5], [0.5]]
-    end
-
-    it "should calculate the sum along the specified dimension" do
-      @nm_1d.sum.should eq NMatrix[11]
-      @nm_2d.sum.should eq NMatrix[[2], [4]]
-    end
-
-    it "should calculate the standard deviation along the specified dimension" do
-      @nm_1d.std.should eq NMatrix[Math.sqrt(3.7)]
-      @nm_2d.std(1).should eq NMatrix[[Math.sqrt(0.5)], [Math.sqrt(0.5)]]
-    end
-
-    it "should raise an ArgumentError when any invalid dimension is provided" do
-      expect { @nm_1d.mean(3) }.to raise_exception(RangeError)
-    end
-
-    it "should convert to float if it contains only a single element" do
-      NMatrix[4.0].to_f.should eq 4.0
-      NMatrix[[[[4.0]]]].to_f.should eq 4.0
-    end
-
-    it "should raise an index error if it contains more than a single element" do
-      expect { @nm_1d.to_f }.to raise_error(IndexError)
-    end
-
-    it "should map a block to all elements" do
-      @nm_1d.map { |e| e ** 2 }.should eq NMatrix[25.0,0.0,1.0,4.0,9.0]
-      @nm_2d.map { |e| e ** 2 }.should eq NMatrix[[0.0,1.0],[4.0,9.0]]
-    end
-
-    it "should map! a block to all elements in place" do
-      fct = Proc.new { |e| e ** 2 }
-      expected1 = @nm_1d.map &fct
-      expected2 = @nm_2d.map &fct
-      @nm_1d.map! &fct
-      @nm_1d.should eq expected1
-      @nm_2d.map! &fct
-      @nm_2d.should eq expected2
-    end
-
-    it "should return an enumerator for map without a block" do
-      @nm_1d.map.should be_a Enumerator
-    end
-
-    it "should return an enumerator for reduce without a block" do
-      @nm_1d.reduce_along_dim(0).should be_a Enumerator
-    end
-
-    it "should return an enumerator for each_along_dim without a block" do
-      @nm_1d.each_along_dim(0).should be_a Enumerator
-    end
-
-    it "should iterate correctly for map without a block" do
-      en = @nm_1d.map
-      en.each { |e| e**2 }.should eq @nm_1d.map { |e| e**2 }
-      en = @nm_2d.map
-      en.each { |e| e**2 }.should eq @nm_2d.map { |e| e**2 }
-    end
-
-    it "should iterate correctly for reduce without a block" do
-      en = @nm_1d.reduce_along_dim(0, 1.0)
-      en.each { |a, e| a+e }.to_f.should eq 12
-      en = @nm_2d.reduce_along_dim(1, 1.0)
-      en.each { |a, e| a+e }.should eq NMatrix[[2.0],[6.0]]
-    end
-
-    it "should iterate correctly for each_along_dim without a block" do
-      res = NMatrix.zeros_like(@nm_1d[0...1])
-      en = @nm_1d.each_along_dim(0)
-      en.each { |e| res += e }
-      res.to_f.should eq 11
-
-      res = NMatrix.zeros_like (@nm_2d[0...2, 0])
-      en = @nm_2d.each_along_dim(1)
-      en.each { |e| res += e }
-      res.should eq NMatrix[[1.0], [5.0]]
-    end
-
-    it "should yield matrices of matching dtype for each_along_dim" do
-      m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128)
-      m.each_along_dim(1) do |sub_m|
-        sub_m.dtype.should eq :complex128
-      end
-    end
-
-    it "should reduce to a matrix of matching dtype for reduce_along_dim" do
-      m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128)
-      m.reduce_along_dim(1) do |acc, sub_m|
-        sub_m.dtype.should eq :complex128
-        acc
-      end
-
-      m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128)
-      m.reduce_along_dim(1, 0.0) do |acc, sub_m|
-        sub_m.dtype.should eq :complex128
-        acc
-      end
-    end
-
-    it "should allow overriding the dtype for reduce_along_dim" do
-      m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128]
-      m.reduce_along_dim(1, 0.0, :float64) do |acc, sub_m|
-        acc.dtype.should eq :float64
-        acc
-      end
-
-      m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128]
-      m.reduce_along_dim(1, nil, :float64) do |acc, sub_m|
-        acc.dtype.should eq :float64
-        acc
+    [:dense, :yale, :list].each do |stype|
+      context "on #{stype} matrices" do 
+        before :each do
+          @nm_1d = NMatrix.new([5], [5.0,0.0,1.0,2.0,3.0], stype: stype) unless stype == :yale
+          @nm_2d = NMatrix.new([2,2], [0.0, 1.0, 2.0, 3.0], stype: stype)
+        end
+
+        it "behaves like Enumerable#reduce with no argument to reduce" do
+          @nm_1d.reduce_along_dim(0) { |acc, el| acc + el }.to_f.should eq 11 unless stype == :yale
+          @nm_2d.reduce_along_dim(1) { |acc, el| acc + el }.should eq NMatrix.new([2,1], [1.0, 5.0], stype: stype)
+        end
+
+        it "should calculate the mean along the specified dimension" do
+          unless stype == :yale then
+            puts @nm_1d.mean
+            @nm_1d.mean.should eq NMatrix.new([1], [2.2], stype: stype, dtype: :float64)
+          end
+          @nm_2d.mean.should eq NMatrix[[1.0,2.0], stype: stype]
+          @nm_2d.mean(1).should eq NMatrix[[0.5], [2.5], stype: stype]
+        end
+
+        it "should calculate the minimum along the specified dimension" do
+          @nm_1d.min.should eq 0.0 unless stype == :yale
+          @nm_2d.min.should eq NMatrix[[0.0, 1.0], stype: stype]
+          @nm_2d.min(1).should eq NMatrix[[0.0], [2.0], stype: stype]
+        end
+
+        it "should calculate the maximum along the specified dimension" do
+          @nm_1d.max.should eq 5.0  unless stype == :yale
+          @nm_2d.max.should eq NMatrix[[2.0, 3.0], stype: stype]
+        end
+
+        it "should calculate the variance along the specified dimension" do
+          @nm_1d.variance.should eq NMatrix[3.7, stype: stype] unless stype == :yale
+          @nm_2d.variance(1).should eq NMatrix[[0.5], [0.5], stype: stype]
+        end
+
+        it "should calculate the sum along the specified dimension" do
+          @nm_1d.sum.should eq NMatrix[11.0, stype: stype] unless stype == :yale
+          @nm_2d.sum.should eq NMatrix[[2.0, 4.0], stype: stype]
+        end
+
+        it "should calculate the standard deviation along the specified dimension" do
+          @nm_1d.std.should eq NMatrix[Math.sqrt(3.7), stype: stype] unless stype == :yale
+          @nm_2d.std(1).should eq NMatrix[[Math.sqrt(0.5)], [Math.sqrt(0.5)], stype: stype]
+        end
+
+        it "should raise an ArgumentError when any invalid dimension is provided" do
+          expect { @nm_1d.mean(3) }.to raise_exception(RangeError) unless stype == :yale
+          expect { @nm_2d.mean(3) }.to raise_exception(RangeError)
+        end
+
+        it "should convert to float if it contains only a single element" do
+          NMatrix[4.0, stype: stype].to_f.should eq 4.0  unless stype == :yale
+          NMatrix[[[[4.0]]], stype: stype].to_f.should eq 4.0  unless stype == :yale
+          NMatrix[[4.0], stype: stype].to_f.should eq 4.0
+        end
+
+        it "should raise an index error if it contains more than a single element" do
+          expect { @nm_1d.to_f }.to raise_error(IndexError)  unless stype == :yale
+          expect { @nm_2d.to_f }.to raise_error(IndexError)
+        end
+
+        it "should map a block to all elements" do
+          #binding.pry if stype == :list
+          @nm_1d.map { |e| e ** 2 }.should eq NMatrix[25.0,0.0,1.0,4.0,9.0, stype: stype] unless stype == :yale
+          @nm_2d.map { |e| e ** 2 }.should eq NMatrix[[0.0,1.0],[4.0,9.0], stype: stype]
+        end
+
+        it "should map! a block to all elements in place" do
+          fct = Proc.new { |e| e ** 2 }
+          unless stype == :yale then
+            expected1 = @nm_1d.map &fct
+            @nm_1d.map! &fct
+            @nm_1d.should eq expected1
+          end
+          expected2 = @nm_2d.map &fct
+          @nm_2d.map! &fct
+          @nm_2d.should eq expected2
+        end
+
+        it "should return an enumerator for map without a block" do
+          @nm_2d.map.should be_a Enumerator
+        end
+
+        it "should return an enumerator for reduce without a block" do
+          @nm_2d.reduce_along_dim(0).should be_a Enumerator
+        end
+
+        it "should return an enumerator for each_along_dim without a block" do
+          @nm_2d.each_along_dim(0).should be_a Enumerator
+        end
+
+        it "should iterate correctly for map without a block" do
+          en = @nm_1d.map unless stype == :yale
+          en.each { |e| e**2 }.should eq @nm_1d.map { |e| e**2 } unless stype == :yale
+          en = @nm_2d.map
+          en.each { |e| e**2 }.should eq @nm_2d.map { |e| e**2 }
+        end
+
+        it "should iterate correctly for reduce without a block" do
+          unless stype == :yale then
+            en = @nm_1d.reduce_along_dim(0, 1.0)
+            en.each { |a, e| a+e }.to_f.should eq 12
+          end
+          en = @nm_2d.reduce_along_dim(1, 1.0)
+          en.each { |a, e| a+e }.should eq NMatrix[[2.0],[6.0], stype: stype]
+        end
+
+        it "should iterate correctly for each_along_dim without a block" do
+          unless stype == :yale then
+            res = NMatrix.zeros_like(@nm_1d[0...1])
+            en = @nm_1d.each_along_dim(0)
+            en.each { |e| res += e }
+            res.to_f.should eq 11
+          end
+          res = NMatrix.zeros_like (@nm_2d[0...2, 0])
+          en = @nm_2d.each_along_dim(1)
+          en.each { |e| res += e }
+          res.should eq NMatrix[[1.0], [5.0], stype: stype]
+        end
+
+        it "should yield matrices of matching dtype for each_along_dim" do
+          m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128, stype: stype)
+          m.each_along_dim(1) do |sub_m|
+            sub_m.dtype.should eq :complex128
+          end
+        end
+
+        it "should reduce to a matrix of matching dtype for reduce_along_dim" do
+          m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128, stype: stype)
+          m.reduce_along_dim(1) do |acc, sub_m|
+            sub_m.dtype.should eq :complex128
+            acc
+          end
+
+          m = NMatrix.new([2,3], [1,2,3,3,4,5], dtype: :complex128, stype: stype)
+          m.reduce_along_dim(1, 0.0) do |acc, sub_m|
+            sub_m.dtype.should eq :complex128
+            acc
+          end
+        end
+
+        it "should allow overriding the dtype for reduce_along_dim" do
+          m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128]
+          m.reduce_along_dim(1, 0.0, :float64) do |acc, sub_m|
+            acc.dtype.should eq :float64
+            acc
+          end
+
+          m = NMatrix[[1,2,3], [3,4,5], dtype: :complex128, stype: stype]
+          m.reduce_along_dim(1, nil, :float64) do |acc, sub_m|
+            acc.dtype.should eq :float64
+            acc
+          end
+        end
+
+        it "should convert integer dtypes to float when calculating mean" do
+          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
+          m.mean(0).dtype.should eq :float64
+        end
+
+        it "should convert integer dtypes to float when calculating variance" do
+          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
+          m.variance(0).dtype.should eq :float64
+        end
+
+        it "should convert integer dtypes to float when calculating standard deviation" do
+          m = NMatrix[[1,2,3], [3,4,5], dtype: :int32, stype: stype]
+          m.std(0).dtype.should eq :float64
+        end
       end
     end
-
-    it "should convert integer dtypes to float when calculating mean" do
-      m = NMatrix[[1,2,3], [3,4,5], dtype: :int32]
-      m.mean(0).dtype.should eq :float64
-    end
-
-    it "should convert integer dtypes to float when calculating variance" do
-      m = NMatrix[[1,2,3], [3,4,5], dtype: :int32]
-      m.variance(0).dtype.should eq :float64
-    end
-
-    it "should convert integer dtypes to float when calculating standard deviation" do
-      m = NMatrix[[1,2,3], [3,4,5], dtype: :int32]
-      m.std(0).dtype.should eq :float64
-    end
   end
-end
\ No newline at end of file
+end

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-ruby-extras/ruby-nmatrix.git