[med-svn] [r-cran-irlba] 02/05: New upstream version 2.3.1

Mon Oct 23 18:24:40 UTC 2017

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository r-cran-irlba.

commit b3329e5e8623cc1d30c44d3657132f3984899599
Author: Andreas Tille <tille at debian.org>
Date:   Mon Oct 23 20:12:02 2017 +0200

    New upstream version 2.3.1
---
 DESCRIPTION                 |  11 ++-
 MD5                         |  43 +++++----
 NAMESPACE                   |   3 +
 R/irlba.R                   |  59 ++++++------
 R/prcomp.R                  |  45 ++++++++-
 R/ssvd.R                    | 222 ++++++++++++++++++++++++++++++++++++++++++++
 R/svdr.R                    |  31 +++++--
 README.md                   |  42 ++++-----
 build/vignette.rds          | Bin 197 -> 197 bytes
 inst/doc/irlba.Rnw          | 129 +++++++++++++------------
 inst/doc/irlba.pdf          | Bin 181657 -> 181586 bytes
 man/irlba.Rd                |  29 ++----
 man/partial_eigen.Rd        |   1 -
 man/prcomp_irlba.Rd         |  17 +++-
 man/ssvd.Rd                 | 185 ++++++++++++++++++++++++++++++++++++
 man/summary.irlba_prcomp.Rd |  16 ++++
 man/svdr.Rd                 |  20 +++-
 src/irlb.c                  |  18 +++-
 src/utility.c               |   2 +-
 tests/edge.R                |   2 +-
 tests/prcomp.r              |  91 ++++++++++++++++++
 tests/ssvd.R                |  34 +++++++
 tests/svdr.R                |   4 +-
 tests/test.R                |  34 +++++--
 vignettes/irlba.Rnw         | 129 +++++++++++++------------
 25 files changed, 897 insertions(+), 270 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 35f8396..49a2ac7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: irlba
 Type: Package
 Title: Fast Truncated Singular Value Decomposition and Principal
         Components Analysis for Large Dense and Sparse Matrices
-Version: 2.2.1
-Date: 2017-05-16
+Version: 2.3.1
+Date: 2017-10-18
 Authors at R: c(
     person("Jim", "Baglama", rol=c("aut", "cph"), email="jbaglama at uri.edu"),
     person("Lothar", "Reichel", role=c("aut", "cph"), email="reichel at math.kent.edu"),
@@ -13,14 +13,15 @@ Description: Fast and memory efficient methods for truncated singular value
 Depends: Matrix
 LinkingTo: Matrix
 Imports: stats, methods
+Suggests: PMA
 License: GPL-3
 BugReports: https://github.com/bwlewis/irlba/issues
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
 NeedsCompilation: yes
-Packaged: 2017-05-16 20:24:46 UTC; blewis
+Packaged: 2017-10-18 19:45:21 UTC; blewis
 Author: Jim Baglama [aut, cph],
   Lothar Reichel [aut, cph],
   B. W. Lewis [aut, cre, cph]
 Maintainer: B. W. Lewis <blewis at illposed.net>
 Repository: CRAN
-Date/Publication: 2017-05-17 07:28:01 UTC
+Date/Publication: 2017-10-18 20:15:08 UTC
diff --git a/MD5 b/MD5
index 9100742..23e1e8a 100644
--- a/MD5
+++ b/MD5
@@ -1,23 +1,28 @@
-0bc045318da221e318dc9eca2f2c614a *DESCRIPTION
-2df3ba50b634767e4f69a4a9d9520166 *NAMESPACE
+8790f38b856ae1df2e03299b8ac0df46 *DESCRIPTION
+1f29b808f13eaa1ff708a79059d02dbf *NAMESPACE
 46c031e01c7cb76685cb4ef0837be3b4 *R/eigen.R
-0b00fde03142679e5ff0b9d74d8c648f *R/irlba.R
-da63d5e8855861a4d27d79800bd4c8b0 *R/prcomp.R
-6a74c49b8e03319481133c72d3f4de08 *R/svdr.R
+af985385066dd6568eb994b033fa4b9e *R/irlba.R
+463003ce65f3879e9bcc06a76e1d4581 *R/prcomp.R
+7df1aca6e73bcf10ca58de9e167074fe *R/ssvd.R
+b1edc9c12f09e61f3d1a34bafabf5890 *R/svdr.R
 20d6b2a573231dad638a6ed961cbcd40 *R/utility.R
-063e4582d2787b591a11d396b19189f7 *README.md
-adf849b0f1d127b206a67d3a451b64cf *build/vignette.rds
-751d1231241f7229377a7f3e066d1e60 *inst/doc/irlba.Rnw
-c9b5ff4a2272f0174c384d1f8c10045f *inst/doc/irlba.pdf
-3001af394110e4a3afb468f87dfa17b9 *man/irlba.Rd
-4c2aaeb543e9cd98eb6203fada578ce3 *man/partial_eigen.Rd
-3e425d237604f5286d6db3c9d32435ba *man/prcomp_irlba.Rd
-8f38539535f6102529a42e0a40ab423f *man/svdr.Rd
+a0019d133d41fce6a5bc63c79c623684 *README.md
+ccd53b685d0d4a927a7eba155ad0c758 *build/vignette.rds
+ffcb4c3e143d951b9c1a8dffe6267a06 *inst/doc/irlba.Rnw
+261814aecee8ff851cf8e83f0a004797 *inst/doc/irlba.pdf
+a6b1bfb7a98d9bb24de9ee2fe5a34ed4 *man/irlba.Rd
+062e536762dac8ada4b33fc7874dc36a *man/partial_eigen.Rd
+babf215accfc0ce1f434326d32d7d009 *man/prcomp_irlba.Rd
+e328f412fc755f80d64696cd5e8203fb *man/ssvd.Rd
+582c099850634f51829aa36637b605c8 *man/summary.irlba_prcomp.Rd
+3b98bc835e9f9086c095ce3630d031d7 *man/svdr.Rd
 619d13ce6900fca2139e2139408d39ad *src/Makevars
-68674c958bdb72d325877006f974ce72 *src/irlb.c
+a19c7820c052795dfa3af06da39b6f45 *src/irlb.c
 c3060f4abbe417e55fbed6f15af4ca62 *src/irlb.h
-07f335c271e72b7f0083588c9e214686 *src/utility.c
-7a09897e0456484e488df2f46591b797 *tests/edge.R
-9db2b0b6fd5ec7d6776e6b4bc30fbfe1 *tests/svdr.R
-e106ea2fb269fe4f63ed116b64f3c340 *tests/test.R
-751d1231241f7229377a7f3e066d1e60 *vignettes/irlba.Rnw
+32362225422fd8eb2eb8fe9d63d71062 *src/utility.c
+b0ca96e1ca2a891745a344014f776866 *tests/edge.R
+343ace18c9946b3dda3d41c4b1e90a13 *tests/prcomp.r
+3ad627da0fccf99f31b840fec794b404 *tests/ssvd.R
+c514d2213577042647987a7bcb19da78 *tests/svdr.R
+ed9586c691f0f3bbaedde07949818d76 *tests/test.R
+ffcb4c3e143d951b9c1a8dffe6267a06 *vignettes/irlba.Rnw
diff --git a/NAMESPACE b/NAMESPACE
index 42d4fe6..5749bd7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,8 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(summary,irlba_prcomp)
 export(irlba)
 export(partial_eigen)
 export(prcomp_irlba)
+export(ssvd)
 export(svdr)
 import(Matrix)
 importFrom(methods,slot)
@@ -10,4 +12,5 @@ importFrom(methods,slotNames)
 importFrom(stats,prcomp)
 importFrom(stats,rnorm)
 importFrom(stats,sd)
+importFrom(stats,var)
 useDynLib(irlba, .registration=TRUE, .fixes="C_")
diff --git a/R/irlba.R b/R/irlba.R
index 22fd0d8..01e72fb 100644
--- a/R/irlba.R
+++ b/R/irlba.R
@@ -106,10 +106,11 @@
 #'
 #' The function may generate the following warnings:
 #' \itemize{
-#'   \item{"did not converge--results might be invalid!; try increasing maxit or fastpath=FALSE" means that the algorithm didn't
+#'   \item{"did not converge--results might be invalid!; try increasing work or maxit"
+#'   means that the algorithm didn't
 #'   converge -- this is potentially a serious problem and the returned results may not be valid. \code{irlba}
 #'   reports a warning here instead of an error so that you can inspect whatever is returned. If this
-#'   happens, carefully heed the warning and inspect the result.}
+#'   happens, carefully heed the warning and inspect the result. You may also try setting \code{fastpath=FALSE}.}
 #'   \item{"You're computing a large percentage of total singular values, standard svd might work better!"
 #'     \code{irlba} is designed to efficiently compute a few of the largest singular values and associated
 #'      singular vectors of a matrix. The standard \code{svd} function will be more efficient for computing
@@ -126,7 +127,7 @@
 #' R implementation.
 #'
 #' @references
-#' Augmented Implicitly Restarted Lanczos Bidiagonalization Methods, J. Baglama and L. Reichel, SIAM J. Sci. Comput. 2005.
+#' Baglama, James, and Lothar Reichel. "Augmented implicitly restarted Lanczos bidiagonalization methods." SIAM Journal on Scientific Computing 27.1 (2005): 19-42.
 #'
 #' @examples
 #' set.seed(1)
@@ -158,24 +159,7 @@
 #'
 #' # A custom matrix multiplication function that scales the columns of A
 #' # (cf the scale option). This function scales the columns of A to unit norm.
-#' # This approach is deprecated (see below for a bettwe way to do this).
 #' col_scale <- sqrt(apply(A, 2, crossprod))
-#' mult <- function(x, y)
-#'         {
-#'           # check if x is a  vector
-#'           if (is.vector(x))
-#'           {
-#'             return((x %*% y) / col_scale)
-#'           }
-#'           # else x is the matrix
-#'           x %*% (y / col_scale)
-#'         }
-#' irlba(A, 3, mult=mult)$d
-#'
-#' # Compare with:
-#' svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
-#'
-#' # Compare with the new recommended approach:
 #' setClass("scaled_matrix", contains="matrix", slots=c(scale="numeric"))
 #' setMethod("%*%", signature(x="scaled_matrix", y="numeric"),
 #'    function(x ,y) x at .Data %*% (y / x at scale))
@@ -184,6 +168,10 @@
 #' a <- new("scaled_matrix", A, scale=col_scale)
 #' irlba(a, 3)$d
 #'
+#' # Compare with:
+#' svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
+#'
+#'
 #' @seealso \code{\link{svd}}, \code{\link{prcomp}}, \code{\link{partial_eigen}}, \code{\link{svdr}}
 #' @import Matrix
 #' @importFrom stats rnorm
@@ -221,26 +209,31 @@ function(A,                     # data matrix
   mcall <- as.list(match.call())
   # Maximum number of Ritz vectors to use in augmentation, may be less
   # depending on workspace size.
-  maxritz <- mcall[["maxritz"]]
+  maxritz <- mcall[["maxritz"]] # experimental
   if (is.null(maxritz)) maxritz <- 3
-  du <- mcall[["du"]]
-  dv <- mcall[["dv"]]
-  ds <- mcall[["ds"]]
+  du <- mcall[["du"]] # deprecated
+  dv <- mcall[["dv"]] # deprecated
+  ds <- mcall[["ds"]] # deprecated
   deflate <- is.null(du) + is.null(ds) + is.null(dv)
-  if (smallest) fastpath <- FALSE
+  if (is.logical(scale) && ! scale) scale <- NULL
+  if (is.logical(shift) && ! shift) shift <- NULL
+  if (is.logical(center) && ! center) center <- NULL
+  if (smallest) fastpath <- FALSE  # for now anyway
+  if (any(dim(A) > 2 ^ 32 - 1)) fastpath <- FALSE # for now
   if (deflate == 3)
   {
     deflate <- FALSE
   } else if (deflate == 0)
   {
     deflate <- TRUE
-    warning("The deflation options are deprecated and have been removed as formal arugments. Please modify your code to not use them.")
+    warning("The deflation options have been deprecated. Please modify your code to not use them.")
     if (length(ds) > 1) stop("deflation limited to one dimension")
     if (!is.null(dim(du))) du <- du[, 1]
     if (!is.null(dim(dv))) dv <- dv[, 1]
   } else stop("all three du ds dv parameters must be specified for deflation")
   if (!is.null(center))
   {
+    if (is.logical(center) && center) center <- colMeans(A)
     if (deflate) stop("the center parameter can't be specified together with deflation parameters")
     if (length(center) != ncol(A)) stop("center must be a vector of length ncol(A)")
     if (fastpath && ! right_only) du <- NULL
@@ -249,6 +242,7 @@ function(A,                     # data matrix
     dv <- center
     deflate <- TRUE
   }
+  if ("integer" == typeof(A)) A <- A + 0.0
   iscomplex <- is.complex(A)
   m <- nrow(A)
   n <- ncol(A)
@@ -285,7 +279,8 @@ function(A,                     # data matrix
   if (right_only)
   {
     w_dim <- 1
-    work <- min(min(m, n), work + 20) # typically need this to help convergence
+    # typically need to increase working dimensions to help convergence
+    if (! ("work" %in% names(as.list(match.call())))) work <- min(min(m, n), work + 20)
     fastpath <- FALSE
   }
   if (n > m && smallest)
@@ -334,7 +329,8 @@ function(A,                     # data matrix
     if (is.null(v))
     {
       v <- rnorm(n)
-      if (verbose) message("Initializing starting vector v with samples from standard normal distribution.\nUse `set.seed` first for reproducibility.")
+      if (verbose) message("Initializing starting vector v with samples from standard normal distribution.
+Use `set.seed` first for reproducibility.")
     } else if (is.list(v))  # restarted case
     {
       if (is.null(v$v) || is.null(v$d) || is.null(v$u)) stop("restart requires left and right singular vectors")
@@ -355,7 +351,7 @@ function(A,                     # data matrix
     CENTER <- NULL
     if (!is.null(scale))
     {
-      if (length(scale) != ncol(A)) stop("scale length must mactch number of matrix columns")
+      if (length(scale) != ncol(A)) stop("scale length must match number of matrix columns")
       SCALE <- as.double(scale)
     }
     if (!is.null(shift))
@@ -377,7 +373,7 @@ function(A,                     # data matrix
       ans$u <- matrix(head(ans$u, m * nu), nrow=m, ncol=nu)
       ans$v <- matrix(head(ans$v, n * nv), nrow=n, ncol=nv)
       if (tol * ans$d[1] < eps) warning("convergence criterion below machine epsilon")
-      if (ans[[6]] == -2) warning("did not converge--results might be invlaid!; try increasing maxit or fastpath=FALSE")
+      if (ans[[6]] == -2) warning("did not converge--results might be invlaid!; try increasing work or maxit")
       return(ans[-6])
     }
     errors <- c("invalid dimensions",
@@ -534,9 +530,10 @@ function(A,                     # data matrix
         if (interchange) F <- t(drop(mult(A, drop(W[, j_w]))))
         else F <- t(drop(mult(drop(W[, j_w]), A)))
       }
-#     Optionally apply shift and scale
+#     Optionally apply shift, scale, deflate
       if (!is.null(shift)) F <- F + shift * W[, j_w]
       if (!is.null(scale)) F <- F / scale
+      if (deflate) F <- F - sum(W[, j_w]) * dv
       mprod <- mprod + 1
       F <- drop(F - S * V[, j])
 #     Orthogonalize
diff --git a/R/prcomp.R b/R/prcomp.R
index 5a11207..63f1a68 100644
--- a/R/prcomp.R
+++ b/R/prcomp.R
@@ -13,6 +13,15 @@
 #'          scaled to have unit variance before the analysis takes place.
 #'          The default is \code{FALSE} for consistency with S, but scaling is often advisable.
 #'          Alternatively, a vector of length equal the number of columns of \code{x} can be supplied.
+#'
+#'          The value of \code{scale} determines how column scaling is performed
+#'          (after centering).  If \code{scale} is a numeric vector with length
+#'          equal to the number of columns of \code{x}, then each column of \code{x} is
+#'          divided by the corresponding value from \code{scale}.  If \code{scale} is
+#'          \code{TRUE} then scaling is done by dividing the (centered) columns of
+#'          \code{x} by their standard deviations if \code{center=TRUE}, and the
+#'          root mean square otherwise.  If \code{scale} is \code{FALSE}, no scaling is done.
+#'          See \code{\link{scale}} for more details.
 #' @param n integer number of principal component vectors to return, must be less than
 #' \code{min(dim(x))}.
 #' @param ... additional arguments passed to \code{\link{irlba}}.
@@ -56,9 +65,10 @@
 #' p2 <- prcomp(x, tol=0.7)
 #' summary(p2)
 #'
+#'
 #' @seealso \code{\link{prcomp}}
 #' @import Matrix
-#' @importFrom stats rnorm prcomp sd
+#' @importFrom stats rnorm prcomp sd var
 #' @importFrom methods slotNames slot
 #' @export
 prcomp_irlba <- function(x, n = 3, retx = TRUE, center = TRUE, scale. = FALSE, ...)
@@ -77,7 +87,15 @@ control that algorithm's convergence tolerance. See `?prcomp_irlba` for help.")
   } else args$center <- center
   if (is.logical(scale.))
   {
-    if (scale.) args$scale <- apply(x, 2, sd)
+    if (scale.)
+    {
+      if (is.numeric(args$center))
+      {
+        f <- function(i) sqrt(sum((x[, i] - center[i]) ^ 2) / (nrow(x) - 1L))
+        scale. <- vapply(seq(ncol(x)), f, pi, USE.NAMES=FALSE)
+      } else scale. <- apply(x, 2L, function(v) sqrt(sum(v ^ 2) / max(1, length(v) - 1L)))
+      args$scale <- scale.
+    }
   } else args$scale <- scale.
   if (!missing(...)) args <- c(args, list(...))
 
@@ -91,6 +109,27 @@ control that algorithm's convergence tolerance. See `?prcomp_irlba` for help.")
     ans <- c(ans, list(x = sweep(s$u, 2, s$d, FUN=`*`)))
     colnames(ans$x) <- paste("PC", seq(1, ncol(ans$rotation)), sep="")
   }
-  class(ans) <- "prcomp"
+  ans$totalvar <- sum(apply(x, 2, var))
+  class(ans) <- c("irlba_prcomp", "prcomp")
   ans
 }
+
+#' Summary method for truncated pca objects computed by \code{prcomp_irlba}.
+#' @param object An object returned by \code{prcomp_irlba}.
+#' @param ... Optional arguments passed to \code{summary}.
+#' @method summary irlba_prcomp
+#' @export
+summary.irlba_prcomp <- function(object, ...)
+{
+  chkDots(...)
+  vars <- object$sdev ^ 2
+  vars <- vars / object$totalvar
+  importance <- rbind("Standard deviation" = object$sdev,
+                      "Proportion of Variance" = round(vars, 5),
+                      "Cumulative Proportion" = round(cumsum(vars), 5))
+  k <- ncol(object$rotation)
+  colnames(importance) <- c(colnames(object$rotation), rep("", length(vars) - k))
+  object$importance <- importance
+  class(object) <- "summary.prcomp"
+  object
+}
diff --git a/R/ssvd.R b/R/ssvd.R
new file mode 100644
index 0000000..36cadd7
--- /dev/null
+++ b/R/ssvd.R
@@ -0,0 +1,222 @@
+#' Sparse regularized low-rank matrix approximation.
+#'
+#' Estimate an \eqn{{\ell}1}{l1}-penalized
+#' singular value or principal components decomposition (SVD or PCA) that introduces sparsity in the
+#' right singular vectors based on the fast and memory-efficient
+#' sPCA-rSVD algorithm of Haipeng Shen and Jianhua Huang.
+#' @param x A numeric real- or complex-valued matrix or real-valued sparse matrix.
+#' @param k Matrix rank of the computed decomposition (see the Details section below).
+#' @param n Number of nonzero components in the right singular vectors. If \code{k > 1},
+#'        then a single value of \code{n} specifies the number of nonzero components
+#'        in each regularized right singular vector. Or, specify a vector of length
+#'        \code{k} indicating the number of desired nonzero components in each
+#'        returned vector. See the examples.
+#' @param maxit Maximum number of soft-thresholding iterations.
+#' @param tol Convergence is determined when \eqn{\|U_j - U_{j-1}\|_F < tol}{||U_j - U_{j-1}||_F < tol}, where \eqn{U_j} is the matrix of estimated left regularized singular vectors at iteration \eqn{j}.
+#' @param center a logical value indicating whether the variables should be
+#'        shifted to be zero centered. Alternately, a centering vector of length
+#'        equal the number of columns of \code{x} can be supplied. Use \code{center=TRUE}
+#'        to perform a regularized sparse PCA.
+#' @param scale. a logical value indicating whether the variables should be
+#'        scaled to have unit variance before the analysis takes place.
+#'        Alternatively, a vector of length equal the number of columns of \code{x} can be supplied.
+#'
+#'        The value of \code{scale} determines how column scaling is performed
+#'        (after centering).  If \code{scale} is a numeric vector with length
+#'        equal to the number of columns of \code{x}, then each column of \code{x} is
+#'        divided by the corresponding value from \code{scale}.  If \code{scale} is
+#'        \code{TRUE} then scaling is done by dividing the (centered) columns of
+#'        \code{x} by their standard deviations if \code{center=TRUE}, and the
+#'        root mean square otherwise.  If \code{scale} is \code{FALSE}, no scaling is done.
+#'        See \code{\link{scale}} for more details.
+#' @param alpha Optional  scalar regularization parameter between zero and one (see Details below).
+#' @param  tsvd Optional initial rank-k truncated SVD or PCA (skips computation if supplied).
+#' @param ... Additional arguments passed to \code{\link{irlba}}.
+#' @details
+#' The \code{ssvd} function implements a version of an algorithm by
+#' Shen and Huang that computes a penalized SVD or PCA that introduces
+#' sparsity in the right singular vectors by solving a penalized least squares problem.
+#' The algorithm in the rank 1 case finds vectors \eqn{u, w}{u, w} that minimize
+#' \deqn{\|x - u w^T\|_F^2 + \lambda \|w\|_1}{||x - u w^T||_F^2 + lambda||w||_1}
+#' such that \eqn{\|u\| = 1}{||u|| = 1},
+#' and then sets \eqn{v = w / \|w\|}{v = w / ||w||} and
+#' \eqn{d = u^T x v}{d = u^T x v};
+#' see the referenced paper for details. The penalty \eqn{\lambda}{lambda} is
+#' implicitly determined from the specified desired number of nonzero values \code{n}.
+#' Higher rank output is determined similarly
+#' but using a sequence of \eqn{\lambda}{lambda} values determined to maintain the desired number
+#' of nonzero elements in each column of \code{v} specified by \code{n}.
+#' Unlike standard SVD or PCA, the columns of the returned \code{v} when \code{k > 1} may not be orthogonal.
+#'
+#' @return
+#' A list containing the following components:
+#' \itemize{
+#'    \item{u} {regularized left singular vectors with orthonormal columns}
+#'    \item{d} {regularized upper-triangluar projection matrix so that \code{x \%*\% v == u \%*\% d}} 
+#'    \item{v} {regularized, sparse right singular vectors with columns of unit norm}
+#'    \item{center, scale} {the centering and scaling used, if any}
+#'    \item{lambda} {the per-column regularization parameter found to obtain the desired sparsity}
+#'    \item{iter} {number of soft thresholding iterations}
+#'    \item{n} {value of input parameter \code{n}}
+#'    \item{alpha} {value of input parameter \code{alpha}}
+#' }
+#' @note
+#' Our \code{ssvd} implementation of the Shen-Huang method makes the following choices:
+#' \enumerate{
+#' \item{The l1 penalty is the only available penalty function. Other penalties may appear in the future.}
+#' \item{Given a desired number of nonzero elements in \code{v}, value(s) for the \eqn{\lambda}{lambda}
+#'       penalty are determined to achieve the sparsity goal subject to the parameter \code{alpha}.}
+#' \item{An experimental block implementation is used for results with rank greater than 1 (when \code{k > 1})
+#'       instead of the deflation method described in the reference.}
+#' \item{The choice of a penalty lambda associated with a given number of desired nonzero
+#'       components is not unique. The \code{alpha} parameter, a scalar between zero and one,
+#'       selects any possible value of lambda that produces the desired number of
+#'       nonzero entries. The default \code{alpha = 0} selects a penalized solution with
+#'       largest corresponding value of \code{d} in the 1-d case. Think of \code{alpha} as
+#'       fine-tuning of the penalty.}
+#' \item{Our method returns an upper-triangular matrix \code{d} when \code{k > 1} so
+#'       that \code{x \%*\% v == u \%*\% d}. Non-zero
+#'       elements above the diagonal result from non-orthogonality of the \code{v} matrix,
+#'       providing a simple interpretation of cumulative information, or explained variance
+#'       in the PCA case, via the singular value decomposition of \code{d \%*\% t(v)}.}
+#' }
+#'
+#' What if you have no idea for values of the argument \code{n} (the desired sparsity)?
+#' The reference describes a cross-validation and an ad-hoc approach; neither of which are
+#' in the package yet. Both are prohibitively computationally expensive for matrices with a huge
+#' number of columns. A future version of this package will include a revised approach to
+#' automatically selecting a reasonable sparsity constraint.
+#'
+#' Compare with the similar but more general functions \code{SPC} and \code{PMD} in the \code{PMA} package
+#' by Daniela M. Witten, Robert Tibshirani, Sam Gross, and Balasubramanian Narasimhan.
+#' The \code{PMD} function can compute low-rank regularized matrix decompositions with sparsity penalties
+#' on both the \code{u} and \code{v} vectors. The \code{ssvd} function is
+#' similar to the PMD(*, L1) method invocation of \code{PMD} or alternatively the \code{SPC} function.
+#' Although less general than \code{PMD}(*),
+#' the \code{ssvd} function can be faster and more memory efficient for the
+#' basic sparse PCA problem.
+#' See the examples below for more information.
+#'
+#' (* Note that the s4vd package by Martin Sill and Sebastian Kaiser, \url{https://cran.r-project.org/package=s4vd},
+#' includes a fast optimized version of a closely related algorithm by Shen, Huang, and Marron, that penalizes
+#' both \code{u} and \code{v}.)
+#'
+#' @references
+#' \itemize{
+#'   \item{Shen, Haipeng, and Jianhua Z. Huang. "Sparse principal component analysis via regularized low rank matrix approximation." Journal of multivariate analysis 99.6 (2008): 1015-1034.}
+#'   \item{Witten, Tibshirani and Hastie (2009) A penalized matrix decomposition, with applications to sparse principal components and canonical correlation analysis. _Biostatistics_ 10(3): 515-534.}
+#' }
+#' @examples
+#'
+#' set.seed(1)
+#' u <- matrix(rnorm(200), ncol=1)
+#' v <- matrix(c(runif(50, min=0.1), rep(0,250)), ncol=1)
+#' u <- u / drop(sqrt(crossprod(u)))
+#' v <- v / drop(sqrt(crossprod(v)))
+#' x <- u %*% t(v) + 0.001 * matrix(rnorm(200*300), ncol=300)
+#' s <- ssvd(x, n=50)
+#' table(actual=v[, 1] != 0, estimated=s$v[, 1] != 0)
+#' oldpar <- par(mfrow=c(2, 1))
+#' plot(u, cex=2, main="u (black circles), Estimated u (blue discs)")
+#' points(s$u, pch=19, col=4)
+#' plot(v, cex=2, main="v (black circles), Estimated v (blue discs)")
+#' points(s$v, pch=19, col=4)
+#'
+#' # Compare with SPC from the PMA package, regularizing only the v vector and choosing
+#' # the regularization constraint `sum(abs(s$v))` computed above by ssvd
+#' # (they find the about same solution in this "sparse SVD" case):
+#' if (requireNamespace("PMA", quietly = TRUE)) {
+#'   p <- PMA::SPC(x, sumabsv=sum(abs(s$v)), center=FALSE)
+#'   table(actual=v[, 1] != 0, estimated=p$v[, 1] != 0)
+#'   # compare optimized values
+#'   print(c(ssvd=s$d, SPC=p$d))
+#'
+#'   # Same example, but computing a "sparse PCA", again about the same results:
+#'   sp <- ssvd(x, n=50, center=TRUE)
+#'   pp <- PMA::SPC(x, sumabsv=sum(abs(sp$v)), center=TRUE)
+#'   print(c(ssvd=sp$d, SPC=pp$d))
+#' }
+#'
+#'
+#' # Let's consider a trivial rank-2 example (k=2) with noise. Like the
+#' # last example, we know the exact number of nonzero elements in each
+#' # solution vector of the noise-free matrix. Note the application of
+#' # different sparsity constraints on each column of the estimated v.
+#' set.seed(1)
+#' u <- qr.Q(qr(matrix(rnorm(400), ncol=2)))
+#' v <- matrix(0, ncol=2, nrow=300)
+#' v[sample(300, 15), 1] <- runif(15, min=0.1)
+#' v[sample(300, 50), 2] <- runif(50, min=0.1)
+#' v <- qr.Q(qr(v))
+#' x <- u %*% (c(2, 1) * t(v)) + .001 * matrix(rnorm(200 * 300), 200)
+#' s <- ssvd(x, k=2, n=colSums(v != 0))
+#'
+#' # Compare actual and estimated vectors:
+#' table(actual=v[, 1] != 0, estimated=s$v[, 1] != 0)
+#' table(actual=v[, 2] != 0, estimated=s$v[, 2] != 0)
+#' plot(v[, 1], cex=2, main="True v1 (black circles), Estimated v1 (blue discs)")
+#' points(s$v[, 1], pch=19, col=4)
+#' plot(v[, 2], cex=2, main="True v2 (black circles), Estimated v2 (blue discs)")
+#' points(s$v[, 2], pch=19, col=4)
+#' par(oldpar)
+#'
+#' @export
+ssvd <- function(x, k=1, n=2, maxit=500, tol=1e-3, center=FALSE, scale.=FALSE, alpha=0, tsvd=NULL, ...)
+{
+  if (alpha < 0  || alpha >= 1) stop("0 <= alpha < 1")
+  if (is.logical(center) && center) center <- colMeans(x)
+  if (is.logical(scale.))
+  {
+    if (scale.)
+    {
+      if (is.numeric(center))
+      {
+        f <- function(i) sqrt(sum( (x[, i] - center[i]) ^ 2) / (nrow(x) - 1L))
+        scale. <- vapply(seq(ncol(x)), f, pi, USE.NAMES=FALSE)
+      } else scale. <- apply(x, 2L, function(v) sqrt(sum(v ^ 2) / max(1, length(v) - 1L)))
+    }
+  }
+  if (all(n > ncol(x) - 1))
+  {
+    warning("no sparsity constraints specified")
+    return(irlba(x, k, ...))
+  }
+  n <- ncol(x) - n
+  if (length(n) != k) n <- rep(n, length.out=k) # warn?
+  s <- tsvd
+  if (is.null(tsvd)) s <- irlba(x, k, scale=scale., center=center, ...)
+  lambda <- c()
+  soft <- function(x, u, p)
+  {
+    y <- crossprod(x, u)
+    if (is.numeric(center)) y <- y - sum(u) * center
+    if (is.numeric(scale.)) y <- y / scale.
+    # apply a column-wise penalty
+    a <- abs(y)
+    z <- apply(a, 2, sort)
+    lambda <<- vapply(seq(length(p)), function(j) (1 - alpha) * z[p[j], j] + alpha * z[p[j] + 1, j], pi, USE.NAMES=FALSE)
+    sign(y) * pmax(sweep(a, 2, lambda, `-`), 0)
+  }
+  s$v <- s$d * s$v
+  iter <- 1
+  delta_u <- Inf
+  while (delta_u > tol && iter < maxit)
+  {
+    u <- s$u
+    s$v <- soft(x, s$u, n)
+    if (is.numeric(scale.)) s$v <- s$v / scale.
+    if (is.numeric(center)) s$u <- qr.Q(qr(x %*% s$v - drop(crossprod(center, s$v))))
+    else s$u <- qr.Q(qr(x %*% s$v))
+    delta_u <- sqrt(sum(apply(u - s$u, 2, crossprod)))
+    iter <- iter + 1
+  }
+  if (iter >= maxit)
+    warning("Maximum number of iterations reached before convergence: solution may not be optimal. Consider increasing 'maxit'.")
+  s$v <- s$v %*% diag(1 / sqrt(apply(s$v, 2, crossprod)), ncol(s$v), ncol(s$v))
+  d <- s$v
+  if (is.numeric(scale.)) d <- d / scale.
+  d1 <- x %*% d
+  if (is.numeric(center)) d1 <- d1 - drop(crossprod(center, d))
+  d <- crossprod(s$u, d1)
+  list(u = s$u, v = s$v, d = d, iter = iter, lambda = lambda, center=center, scale=scale., n=n, alpha=alpha)
+}
diff --git a/R/svdr.R b/R/svdr.R
index 0be2d24..fe3d85c 100644
--- a/R/svdr.R
+++ b/R/svdr.R
@@ -11,9 +11,14 @@
 #' the examples). In other problems, \code{irlba} may exhibit faster
 #' convergence.
 #'
+#' Also see an alternate implementation (\code{rsvd}) of this method by N. Benjamin Erichson
+#' in the https://cran.r-project.org/package=rsvd package.
+#'
 #' @param x numeric real- or complex-valued matrix or real-valued sparse matrix.
 #' @param k dimension of subspace to estimate (number of approximate singular values to compute).
-#' @param it fixed number of algorithm iterations, larger values improve accuracy.
+#' @param tol stop iteration when the largest absolute relative change in estimated singular
+#'   values from one iteration to the next falls below this value.
+#' @param it maximum number of algorithm iterations.
 #' @param extra number of extra vectors of dimension \code{ncol(x)}, larger values generally improve accuracy (with increased
 #' computational cost).
 #' @param center optional column centering vector whose values are implicitly subtracted from each
@@ -22,15 +27,17 @@
 #'   Use for efficient principal components computation.
 #' @param Q optional initial random matrix, defaults to a matrix of size \code{ncol(x)} by \code{k + extra} with
 #' entries sampled from a normal random distribution.
+#' @param return.Q if \code{TRUE} return the \code{Q} matrix for restarting (see examples).
 #' @return
 #' Returns a list with entries:
 #' \describe{
 #'   \item{d:}{ k approximate singular values}
 #'   \item{u:}{ k approximate left singular vectors}
 #'   \item{v:}{ k approximate right singular vectors}
-#'   \item{mprod:}{ The total number of matrix vector products carried out}
+#'   \item{mprod:}{ total number of matrix products carried out}
+#'   \item{Q:}{ optional subspace matrix (when \code{return.Q=TRUE})}
 #' }
-#' @seealso \code{\link{irlba}}, \code{\link{svd}}
+#' @seealso \code{\link{irlba}}, \code{\link{svd}}, \code{rsvd} in the rsvd package
 #' @references
 #' Finding structure with randomness: Stochastic algorithms for constructing
 #' approximate matrix decompositions N. Halko, P. G. Martinsson, J. Tropp. Sep. 2009.
@@ -82,27 +89,34 @@
 #'            row.names=c("IRLBA", "Randomized SVD"))
 #' }
 #' @export
-svdr <- function(x, k, it=3, extra=10, center=NULL, Q=NULL)
+svdr <- function(x, k, tol=1e-5, it=100L, extra=min(10L, dim(x) - k), center=NULL, Q=NULL, return.Q=FALSE)
 {
   n <- min(ncol(x), k + extra)
   if (isTRUE(center)) center <- colMeans(x)
   if (is.null(Q)) Q <-  matrix(rnorm(ncol(x) * n), ncol(x))
+  d <- rep(0, k)
   for (j in 1:it)
   {
     if (is.null(center))
     {
       Q <- qr.Q(qr(x %*% Q))
-      Q <- qr.Q(qr(t(crossprod(Q, x))))   # a.k.a. Q <- qr.Q(qr(t(x) %*% Q)), but avoiding t(x)
+      B <- crossprod(x, Q)
+      Q <- qr.Q(qr(B))
     } else
     {
       Q <- qr.Q(qr(x %*% Q - rep(1, nrow(x)) %*% crossprod(center, Q)))
-      Q <- qr.Q(qr(t(crossprod(Q, x) - tcrossprod(crossprod(Q, rep(1, nrow(x))), center))))
+      B <- crossprod(Q, x) - tcrossprod(crossprod(Q, rep(1, nrow(x))), center)
+      Q <- qr.Q(qr(t(B)))
     }
+    d1 <- svd(B, nu=0, nv=0)$d[1:k]
+    if (max(abs( (d1 - d) / d)) < tol) break
+    d <- d1
   }
+  if (return.Q) Q1 <- Q
   if (is.null(center))
   {
     Q <- qr.Q(qr(x %*% Q))
-    B <- t(Q) %*% x
+    B <- crossprod(Q, x)
   } else
   {
     Q <- qr.Q(qr(x %*% Q - rep(1, nrow(x)) %*% crossprod(center, Q)))
@@ -113,6 +127,7 @@ svdr <- function(x, k, it=3, extra=10, center=NULL, Q=NULL)
   s$u <- s$u[, 1:k]
   s$d <- s$d[1:k]
   s$v <- s$v[, 1:k]
-  s$mprod <- 2 * it + 1
+  s$mprod <- 2 * j + 1
+  if (return.Q) s$Q <- Q1
   s
 }
diff --git a/README.md b/README.md
index 836ec4d..866f099 100644
--- a/README.md
+++ b/README.md
@@ -8,35 +8,22 @@ functions (see help on each for details and examples).
 
 * `irlba()` partial SVD function
 * `svdr()` alternate partial SVD function based on randomized SVD
+* `svds()` l1-penalized matrix decompoisition for sparse PCA (based on Shen and Huang's algorithm)
 * `prcomp_irlba()`  principal components function similar to the `prcomp` function in stats package for computing the first few principal components of large matrices
 * `partial_eigen()` a very limited partial eigenvalue decomposition for symmetric matrices (see the [RSpectra](https://cran.r-project.org/package=RSpectra) package for more comprehensive truncated eigenvalue decomposition)
 
 Help documentation for each function includes extensive documentation and
 examples. Also see the package vignette, `vignette("irlba", package="irlba")`.
 
-## What's new in Version 2.2.1?
+An overview web page is here: https://bwlewis.github.io/irlba/.
 
-We include stronger convergence criteria and a new argument `svtol`
-for that. The new approach helps guarantee more accurate solutions for some
-difficult problems. The tradeoff is that the default behavior is a little
-slower than before because at least two Lanczos iterations are always run. The
-new convergence behavior can be disabled with `svtol=Inf`.
+## What's new in Version 2.3.0?
 
-The new package version includes a new function, svdr()--another state of the
-art truncated SVD method based on the randomized SVD algorithm of Gunnar
-Martinsson and others. Both irlba() and svdr() work well. Svdr uses a block
-method and may exhibit better convergence in problems where the largest
-singular values are clustered. See the documentation and examples in the
-package. (Block versions of irlba exists, but are not yet implemented by this R
-package--something coming in the future.)
-
-We re-introduced a solver for estimating the smallest singular values of a
-matrix and associated singular vector spaces. The solver is based on the
-oringial Harmonic Ritz vector augmentation method of Baglama and Reichel.
-Beware that this method is somewhat experimental and may fail to converge, or
-may converge poorly, to estimated singular values for very ill-conditioned
-matrices. Along with block methods for irlba, this is an active area of
-work--feel free to contribute!
+- Fixed an `irlba()` bug associated with centering (PCA), see https://github.com/bwlewis/irlba/issues/21.
+- Fixed `irlba()` scaling to conform to `scale`, see https://github.com/bwlewis/irlba/issues/22.
+- Improved `prcomp_irlba()` from a suggestion by N. Benjamin Erichson, see https://github.com/bwlewis/irlba/issues/23.
+- Significanty changed/improved `svdr()` convergence criterion.
+- Added a version of Shen and Huang's Sparse PCA/SVD L1-penalized matrix decomposition (`ssvd()`).
 
 
 ## Deprecated features
@@ -90,7 +77,7 @@ flexible than using custom arguments with idiosyncratic syntax and behavior.
 We've even used the new approach to implement distributed parallel matrix
 products for very large problems with amazingly little code.
 
-## Wishlist
+## Wishlist / help wanted...
 
 - Optional block implementation for some use cases
 - More Matrix classes supported in the fast code path
@@ -98,10 +85,10 @@ products for very large problems with amazingly little code.
 
 ## References
 
-* Augmented Implicitly Restarted Lanczos Bidiagonalization Methods, J. Baglama and L. Reichel, SIAM J. Sci. Comput. 2005. (http://www.math.uri.edu/~jbaglama/papers/paper14.pdf)
-* Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions N. Halko, P. G. Martinsson, J. Tropp. Sep. 2009.
-
-
+* Baglama, James, and Lothar Reichel. "Augmented implicitly restarted Lanczos bidiagonalization methods." SIAM Journal on Scientific Computing 27.1 (2005): 19-42.
+* Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp. "Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions." (2009).
+* Shen, Haipeng, and Jianhua Z. Huang. "Sparse principal component analysis via regularized low rank matrix approximation." Journal of multivariate analysis 99.6 (2008): 1015-1034.
+* Witten, Daniela M., Robert Tibshirani, and Trevor Hastie. "A penalized matrix decomposition, with applications to sparse principal components and canonical correlation analysis." Biostatistics 10.3 (2009): 515-534.
 
 ## Status
 <a href="https://travis-ci.org/bwlewis/irlba">
@@ -113,3 +100,6 @@ products for very large problems with amazingly little code.
 <a href="https://www.r-pkg.org/pkg/irlba">
   <img src="http://cranlogs.r-pkg.org/badges/irlba" />
 </a>
+<a href="https://cran.r-project.org/package=irlba">
+  <img src="https://www.r-pkg.org/badges/version/irlba" />
+</a>
diff --git a/build/vignette.rds b/build/vignette.rds
index e919b8c..0441a8d 100644
Binary files a/build/vignette.rds and b/build/vignette.rds differ
diff --git a/inst/doc/irlba.Rnw b/inst/doc/irlba.Rnw
index 5f11eb8..8fbe2a8 100644
--- a/inst/doc/irlba.Rnw
+++ b/inst/doc/irlba.Rnw
@@ -189,10 +189,15 @@ in the estimated singular values:
 
 \subsection{Convergence tolerance}
 
-IRLBA is an iterative method that estimates a few largest singular values
+IRLBA is an iterative method that estimates a few singular values
 and associated singular vectors. A sketch of the algorithm is outlined
-in Section \ref{sketch} below. The R {\tt tol} argument controls when the algorithm converges.
-Convergence occurs when
+in Section \ref{sketch} below. The R {\tt tol} and {\tt svtol} arguments control
+when the algorithm converges with {\tt tol} specifying
+subspace convergence, and {\tt svtol} specifying convergence of estimated
+singular values.
+
+Subspace convergence occurs when the algorithm iterations find
+estimated singular vectors that satisfy
 \[
 \|AV_k - US_k\| < \mbox{tol} \cdot \|A\|,
 \]
@@ -205,10 +210,23 @@ the algorithm stops when
 L <- irlba(A, k, tol)
 svd(A %*% L$v - L$u %*% diag(L$d))$d[1] < tol * L$d[1]
 \end{lstlisting}
-It's possible, but unlikely, to encounter problems that fail to converge before
+It's possible to encounter problems that fail to converge before
 the maximum number of algorithm iterations specified by the {\tt maxit}
 argument.
 
+When the largest singular values are clustered together it can be hard to
+detect subspace convergence. More recent versions of the IRLBA implementation
+include the {\tt svtol} argument that specifies a maximum for the relative
+change in each estimated singular value from one iteration to the next.
+
+The convergence tolerance values together help improve correct subspace
+detection in difficult settings when the singular values are clustered.
+But in the worst cases, block methods can perform better as shown in
+the documentation for the {\tt svdr} method.
+
+Also see the related {\tt rsvd} function by N. Benjamin Erichson,
+\href{https://cran.r-project.org/package=rsvd}{https://cran.r-project.org/package=rsvd}.
+
 
 \subsection{Differences with {\tt svd}}
 The {\tt irlba} function is designed to compute a {\it partial} singular
@@ -260,20 +278,20 @@ are only unique up to sign!
 >      x  <- matrix(rnorm(200), nrow=20)
 >      p1 <- prcomp_irlba(x, n=3)
 >      summary(p1)
-Importance of components:
-                         PC1    PC2    PC3
-Standard deviation     1.541 1.2513 1.1916
-Proportion of Variance 0.443 0.2921 0.2649
-Cumulative Proportion  0.443 0.7351 1.0000
+Importance of components%s:
+                          PC1    PC2    PC3
+Standard deviation     1.5411 1.2513 1.1916
+Proportion of Variance 0.2806 0.1850 0.1678
+Cumulative Proportion  0.2806 0.4656 0.6334
 
 >      # Compare with
 >      p2 <- prcomp(x, tol=0.7)
 >      summary(p2)
 Importance of components:
-                         PC1    PC2    PC3
-Standard deviation     1.541 1.2513 1.1916
-Proportion of Variance 0.443 0.2921 0.2649
-Cumulative Proportion  0.443 0.7351 1.0000
+                          PC1    PC2    PC3
+Standard deviation     1.5411 1.2513 1.1916
+Proportion of Variance 0.2806 0.1850 0.1678
+Cumulative Proportion  0.2806 0.4656 0.6334
 \end{lstlisting}
 Alternatively, you can compute principal components directly using the
 singular value decomposition and the {\tt center} option:
@@ -289,10 +307,7 @@ singular value decomposition and the {\tt center} option:
              [,1]
 [1,] 1.652423e-12
 \end{lstlisting}
-The implementation of the {\tt center} function argument takes advantage of
-computational efficiencies in the IRLB algorithm that result in a modest
-savings of a few vector inner products per iteration compared to a naive
-implementation using a custom matrix vector product to center the matrix.
+
 
 \subsection{Truncated symmetric eigenvalue decomposition}
 
@@ -302,59 +317,43 @@ sparse real-valued matrix. The function is particularly well-suited to
 estimating the largest eigenvalues and corresponding eigenvectors of symmetric
 positive semi-definite matrices of the form $A^T A$.
 
+
+
 \subsection{User-Defined Matrix Multiplication}
 
-The {\tt irlba} function includes options for specifying a custom matrix
-multiplication function. Custom matrix multiplication functions can be used,
-for example, with the {\tt big.matrix} class from the {\tt bigmemory}/{\tt
-bigalgebra} packages, or to compute the partial SVD of matrix-free linear
-operators.
-
-User-defined matrix operations may specified using the optional {\tt mult}
-parameter. If defined, it must be a function of two arguments that computes
-matrix vector products. Either argument can be a vector, and the {\tt mult}
-function must deal with that. The following example illustrates a simple
-custom matrix function that scales the columns of the matrix, and then
-compares it with other ways of doing the same thing.
+The {\tt irlba} function only uses matrix vector products with the input data
+matrix to compute its solution. It's easy to use R's native object model to
+define custom matrix classes with user-defined matrix multiplication functions.
+Such functions can be used to support special matrix objects, out of core
+computation of large problems, or matrix-free operators.
+
+Here is a simple example that defines a matrix product that scales the
+columns of the matrix to have unit norm (cf the {\tt scale} option).
+
 \begin{lstlisting}
-> set.seed(1)
-> A <- matrix(runif(200), nrow=20)
-> col_scale <- 1:10
-> mult <- function(x,y)
-+         {
-+           # check if x is a plain vector
-+           if(is.vector(x))
-+           {
-+             return((x %*% y)/col_scale)
-+           }
-+           # else x is the matrix
-+           x %*% (y/col_scale)
-+         }
-
-> irlba(A, 3, mult=mult)$d
-[1] 3.1384503 0.9477628 0.4313855
-
-> # Compare with:
-> irlba(A, 3, scale=col_scale)$d
-[1] 3.1384503 0.9477628 0.4313855
-
-> # Compare with:
-> svd(scale(A, scale=col_scale, center=FALSE))$d[1:3]
-[1] 3.1384503 0.9477628 0.4313855
+> A <- matrix(runif(400), nrow=20)
+> col_scale <- sqrt(apply(A, 2, crossprod))
+> setClass("scaled_matrix", contains="matrix", slots=c(scale="numeric"))
+> setMethod("%*%", signature(x="scaled_matrix", y="numeric"),
++       function(x ,y) x at .Data %*% (y / x at scale))
+> setMethod("%*%", signature(x="numeric", y="scaled_matrix"),
++       function(x ,y) (x %*% y at .Data) / y at scale)
+> a <- new("scaled_matrix", A, scale=col_scale)
+> irlba(a, 3)$d
+
+[1] 3.9298391 0.9565016 0.8266859
+
+# Compare with
+> svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
+
+[1] 3.9298391 0.9565016 0.8266859
 \end{lstlisting}
-Alternatively, simply use R's operator overloading methods to define
-customized matrix vector products. This is the approach taken in
-the
-\\
+
+See the following link for an example that uses large-scale out of core computation:
 \href{https://bwlewis.github.io/1000_genomes_examples/PCA_whole_genome.html}{http://bwlewis.github.io/1000\_genomes\_examples/PCA\_whole\_genome.html}
-vignette to work with large out of core genomics data.
-I prefer the simple operator overloading approach and may consider
-retiring the {\tt mult} function argument in some future package version.
-
-NOTE! When a user-defined matrix multiplication function is used, either using
-the {\tt mult} argument or through operator overloading, the R reference IRLBA
-implementation is used instead of the faster C code path in newer package
-versions.
+
+NOTE! The reference R algorithm implementation is used whenever user-defined
+matrix multiplication is specified (instead of the faster C code path).
 
 
 \section{A Quick Summary of the IRLBA Method}\label{sketch}
diff --git a/inst/doc/irlba.pdf b/inst/doc/irlba.pdf
index 432a249..73962b7 100644
Binary files a/inst/doc/irlba.pdf and b/inst/doc/irlba.pdf differ
diff --git a/man/irlba.Rd b/man/irlba.Rd
index d62dff6..b0fd54b 100644
--- a/man/irlba.Rd
+++ b/man/irlba.Rd
@@ -134,10 +134,11 @@ subspace. See the examples.
 
 The function may generate the following warnings:
 \itemize{
-  \item{"did not converge--results might be invalid!; try increasing maxit or fastpath=FALSE" means that the algorithm didn't
+  \item{"did not converge--results might be invalid!; try increasing work or maxit"
+  means that the algorithm didn't
   converge -- this is potentially a serious problem and the returned results may not be valid. \code{irlba}
   reports a warning here instead of an error so that you can inspect whatever is returned. If this
-  happens, carefully heed the warning and inspect the result.}
+  happens, carefully heed the warning and inspect the result. You may also try setting \code{fastpath=FALSE}.}
   \item{"You're computing a large percentage of total singular values, standard svd might work better!"
     \code{irlba} is designed to efficiently compute a few of the largest singular values and associated
      singular vectors of a matrix. The standard \code{svd} function will be more efficient for computing
@@ -183,24 +184,7 @@ cbind(P$v,
 
 # A custom matrix multiplication function that scales the columns of A
 # (cf the scale option). This function scales the columns of A to unit norm.
-# This approach is deprecated (see below for a bettwe way to do this).
 col_scale <- sqrt(apply(A, 2, crossprod))
-mult <- function(x, y)
-        {
-          # check if x is a  vector
-          if (is.vector(x))
-          {
-            return((x \%*\% y) / col_scale)
-          }
-          # else x is the matrix
-          x \%*\% (y / col_scale)
-        }
-irlba(A, 3, mult=mult)$d
-
-# Compare with:
-svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
-
-# Compare with the new recommended approach:
 setClass("scaled_matrix", contains="matrix", slots=c(scale="numeric"))
 setMethod("\%*\%", signature(x="scaled_matrix", y="numeric"),
    function(x ,y) x at .Data \%*\% (y / x at scale))
@@ -209,11 +193,14 @@ setMethod("\%*\%", signature(x="numeric", y="scaled_matrix"),
 a <- new("scaled_matrix", A, scale=col_scale)
 irlba(a, 3)$d
 
+# Compare with:
+svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
+
+
 }
 \references{
-Augmented Implicitly Restarted Lanczos Bidiagonalization Methods, J. Baglama and L. Reichel, SIAM J. Sci. Comput. 2005.
+Baglama, James, and Lothar Reichel. "Augmented implicitly restarted Lanczos bidiagonalization methods." SIAM Journal on Scientific Computing 27.1 (2005): 19-42.
 }
 \seealso{
 \code{\link{svd}}, \code{\link{prcomp}}, \code{\link{partial_eigen}}, \code{\link{svdr}}
 }
-
diff --git a/man/partial_eigen.Rd b/man/partial_eigen.Rd
index efc1685..e26ccbb 100644
--- a/man/partial_eigen.Rd
+++ b/man/partial_eigen.Rd
@@ -63,4 +63,3 @@ Augmented Implicitly Restarted Lanczos Bidiagonalization Methods, J. Baglama and
 \seealso{
 \code{\link{eigen}}, \code{\link{irlba}}
 }
-
diff --git a/man/prcomp_irlba.Rd b/man/prcomp_irlba.Rd
index e7f5592..da4f864 100644
--- a/man/prcomp_irlba.Rd
+++ b/man/prcomp_irlba.Rd
@@ -20,9 +20,18 @@ shifted to be zero centered. Alternately, a centering vector of length
 equal the number of columns of \code{x} can be supplied.}
 
 \item{scale.}{a logical value indicating whether the variables should be
-scaled to have unit variance before the analysis takes place.
-The default is \code{FALSE} for consistency with S, but scaling is often advisable.
-Alternatively, a vector of length equal the number of columns of \code{x} can be supplied.}
+         scaled to have unit variance before the analysis takes place.
+         The default is \code{FALSE} for consistency with S, but scaling is often advisable.
+         Alternatively, a vector of length equal the number of columns of \code{x} can be supplied.
+
+         The value of \code{scale} determines how column scaling is performed
+         (after centering).  If \code{scale} is a numeric vector with length
+         equal to the number of columns of \code{x}, then each column of \code{x} is
+         divided by the corresponding value from \code{scale}.  If \code{scale} is
+         \code{TRUE} then scaling is done by dividing the (centered) columns of
+         \code{x} by their standard deviations if \code{center=TRUE}, and the
+         root mean square otherwise.  If \code{scale} is \code{FALSE}, no scaling is done.
+         See \code{\link{scale}} for more details.}
 
 \item{...}{additional arguments passed to \code{\link{irlba}}.}
 }
@@ -69,8 +78,8 @@ summary(p1)
 p2 <- prcomp(x, tol=0.7)
 summary(p2)
 
+
 }
 \seealso{
 \code{\link{prcomp}}
 }
-
diff --git a/man/ssvd.Rd b/man/ssvd.Rd
new file mode 100644
index 0000000..3b3c2bb
--- /dev/null
+++ b/man/ssvd.Rd
@@ -0,0 +1,185 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ssvd.R
+\name{ssvd}
+\alias{ssvd}
+\title{Sparse regularized low-rank matrix approximation.}
+\usage{
+ssvd(x, k = 1, n = 2, maxit = 500, tol = 0.001, center = FALSE,
+  scale. = FALSE, alpha = 0, tsvd = NULL, ...)
+}
+\arguments{
+\item{x}{A numeric real- or complex-valued matrix or real-valued sparse matrix.}
+
+\item{k}{Matrix rank of the computed decomposition (see the Details section below).}
+
+\item{n}{Number of nonzero components in the right singular vectors. If \code{k > 1},
+then a single value of \code{n} specifies the number of nonzero components
+in each regularized right singular vector. Or, specify a vector of length
+\code{k} indicating the number of desired nonzero components in each
+returned vector. See the examples.}
+
+\item{maxit}{Maximum number of soft-thresholding iterations.}
+
+\item{tol}{Convergence is determined when \eqn{\|U_j - U_{j-1}\|_F < tol}{||U_j - U_{j-1}||_F < tol}, where \eqn{U_j} is the matrix of estimated left regularized singular vectors at iteration \eqn{j}.}
+
+\item{center}{a logical value indicating whether the variables should be
+shifted to be zero centered. Alternately, a centering vector of length
+equal the number of columns of \code{x} can be supplied. Use \code{center=TRUE}
+to perform a regularized sparse PCA.}
+
+\item{scale.}{a logical value indicating whether the variables should be
+       scaled to have unit variance before the analysis takes place.
+       Alternatively, a vector of length equal the number of columns of \code{x} can be supplied.
+
+       The value of \code{scale} determines how column scaling is performed
+       (after centering).  If \code{scale} is a numeric vector with length
+       equal to the number of columns of \code{x}, then each column of \code{x} is
+       divided by the corresponding value from \code{scale}.  If \code{scale} is
+       \code{TRUE} then scaling is done by dividing the (centered) columns of
+       \code{x} by their standard deviations if \code{center=TRUE}, and the
+       root mean square otherwise.  If \code{scale} is \code{FALSE}, no scaling is done.
+       See \code{\link{scale}} for more details.}
+
+\item{alpha}{Optional  scalar regularization parameter between zero and one (see Details below).}
+
+\item{tsvd}{Optional initial rank-k truncated SVD or PCA (skips computation if supplied).}
+
+\item{...}{Additional arguments passed to \code{\link{irlba}}.}
+}
+\value{
+A list containing the following components:
+\itemize{
+   \item{u} {regularized left singular vectors with orthonormal columns}
+   \item{d} {regularized upper-triangluar projection matrix so that \code{x \%*\% v == u \%*\% d}} 
+   \item{v} {regularized, sparse right singular vectors with columns of unit norm}
+   \item{center, scale} {the centering and scaling used, if any}
+   \item{lambda} {the per-column regularization parameter found to obtain the desired sparsity}
+   \item{iter} {number of soft thresholding iterations}
+   \item{n} {value of input parameter \code{n}}
+   \item{alpha} {value of input parameter \code{alpha}}
+}
+}
+\description{
+Estimate an \eqn{{\ell}1}{l1}-penalized
+singular value or principal components decomposition (SVD or PCA) that introduces sparsity in the
+right singular vectors based on the fast and memory-efficient
+sPCA-rSVD algorithm of Haipeng Shen and Jianhua Huang.
+}
+\details{
+The \code{ssvd} function implements a version of an algorithm by
+Shen and Huang that computes a penalized SVD or PCA that introduces
+sparsity in the right singular vectors by solving a penalized least squares problem.
+The algorithm in the rank 1 case finds vectors \eqn{u, w}{u, w} that minimize
+\deqn{\|x - u w^T\|_F^2 + \lambda \|w\|_1}{||x - u w^T||_F^2 + lambda||w||_1}
+such that \eqn{\|u\| = 1}{||u|| = 1},
+and then sets \eqn{v = w / \|w\|}{v = w / ||w||} and
+\eqn{d = u^T x v}{d = u^T x v};
+see the referenced paper for details. The penalty \eqn{\lambda}{lambda} is
+implicitly determined from the specified desired number of nonzero values \code{n}.
+Higher rank output is determined similarly
+but using a sequence of \eqn{\lambda}{lambda} values determined to maintain the desired number
+of nonzero elements in each column of \code{v} specified by \code{n}.
+Unlike standard SVD or PCA, the columns of the returned \code{v} when \code{k > 1} may not be orthogonal.
+}
+\note{
+Our \code{ssvd} implementation of the Shen-Huang method makes the following choices:
+\enumerate{
+\item{The l1 penalty is the only available penalty function. Other penalties may appear in the future.}
+\item{Given a desired number of nonzero elements in \code{v}, value(s) for the \eqn{\lambda}{lambda}
+      penalty are determined to achieve the sparsity goal subject to the parameter \code{alpha}.}
+\item{An experimental block implementation is used for results with rank greater than 1 (when \code{k > 1})
+      instead of the deflation method described in the reference.}
+\item{The choice of a penalty lambda associated with a given number of desired nonzero
+      components is not unique. The \code{alpha} parameter, a scalar between zero and one,
+      selects any possible value of lambda that produces the desired number of
+      nonzero entries. The default \code{alpha = 0} selects a penalized solution with
+      largest corresponding value of \code{d} in the 1-d case. Think of \code{alpha} as
+      fine-tuning of the penalty.}
+\item{Our method returns an upper-triangular matrix \code{d} when \code{k > 1} so
+      that \code{x \%*\% v == u \%*\% d}. Non-zero
+      elements above the diagonal result from non-orthogonality of the \code{v} matrix,
+      providing a simple interpretation of cumulative information, or explained variance
+      in the PCA case, via the singular value decomposition of \code{d \%*\% t(v)}.}
+}
+
+What if you have no idea for values of the argument \code{n} (the desired sparsity)?
+The reference describes a cross-validation and an ad-hoc approach; neither of which are
+in the package yet. Both are prohibitively computationally expensive for matrices with a huge
+number of columns. A future version of this package will include a revised approach to
+automatically selecting a reasonable sparsity constraint.
+
+Compare with the similar but more general functions \code{SPC} and \code{PMD} in the \code{PMA} package
+by Daniela M. Witten, Robert Tibshirani, Sam Gross, and Balasubramanian Narasimhan.
+The \code{PMD} function can compute low-rank regularized matrix decompositions with sparsity penalties
+on both the \code{u} and \code{v} vectors. The \code{ssvd} function is
+similar to the PMD(*, L1) method invocation of \code{PMD} or alternatively the \code{SPC} function.
+Although less general than \code{PMD}(*),
+the \code{ssvd} function can be faster and more memory efficient for the
+basic sparse PCA problem.
+See the examples below for more information.
+
+(* Note that the s4vd package by Martin Sill and Sebastian Kaiser, \url{https://cran.r-project.org/package=s4vd},
+includes a fast optimized version of a closely related algorithm by Shen, Huang, and Marron, that penalizes
+both \code{u} and \code{v}.)
+}
+\examples{
+
+set.seed(1)
+u <- matrix(rnorm(200), ncol=1)
+v <- matrix(c(runif(50, min=0.1), rep(0,250)), ncol=1)
+u <- u / drop(sqrt(crossprod(u)))
+v <- v / drop(sqrt(crossprod(v)))
+x <- u \%*\% t(v) + 0.001 * matrix(rnorm(200*300), ncol=300)
+s <- ssvd(x, n=50)
+table(actual=v[, 1] != 0, estimated=s$v[, 1] != 0)
+oldpar <- par(mfrow=c(2, 1))
+plot(u, cex=2, main="u (black circles), Estimated u (blue discs)")
+points(s$u, pch=19, col=4)
+plot(v, cex=2, main="v (black circles), Estimated v (blue discs)")
+points(s$v, pch=19, col=4)
+
+# Compare with SPC from the PMA package, regularizing only the v vector and choosing
+# the regularization constraint `sum(abs(s$v))` computed above by ssvd
+# (they find the about same solution in this "sparse SVD" case):
+if (requireNamespace("PMA", quietly = TRUE)) {
+  p <- PMA::SPC(x, sumabsv=sum(abs(s$v)), center=FALSE)
+  table(actual=v[, 1] != 0, estimated=p$v[, 1] != 0)
+  # compare optimized values
+  print(c(ssvd=s$d, SPC=p$d))
+
+  # Same example, but computing a "sparse PCA", again about the same results:
+  sp <- ssvd(x, n=50, center=TRUE)
+  pp <- PMA::SPC(x, sumabsv=sum(abs(sp$v)), center=TRUE)
+  print(c(ssvd=sp$d, SPC=pp$d))
+}
+
+
+# Let's consider a trivial rank-2 example (k=2) with noise. Like the
+# last example, we know the exact number of nonzero elements in each
+# solution vector of the noise-free matrix. Note the application of
+# different sparsity constraints on each column of the estimated v.
+set.seed(1)
+u <- qr.Q(qr(matrix(rnorm(400), ncol=2)))
+v <- matrix(0, ncol=2, nrow=300)
+v[sample(300, 15), 1] <- runif(15, min=0.1)
+v[sample(300, 50), 2] <- runif(50, min=0.1)
+v <- qr.Q(qr(v))
+x <- u \%*\% (c(2, 1) * t(v)) + .001 * matrix(rnorm(200 * 300), 200)
+s <- ssvd(x, k=2, n=colSums(v != 0))
+
+# Compare actual and estimated vectors:
+table(actual=v[, 1] != 0, estimated=s$v[, 1] != 0)
+table(actual=v[, 2] != 0, estimated=s$v[, 2] != 0)
+plot(v[, 1], cex=2, main="True v1 (black circles), Estimated v1 (blue discs)")
+points(s$v[, 1], pch=19, col=4)
+plot(v[, 2], cex=2, main="True v2 (black circles), Estimated v2 (blue discs)")
+points(s$v[, 2], pch=19, col=4)
+par(oldpar)
+
+}
+\references{
+\itemize{
+  \item{Shen, Haipeng, and Jianhua Z. Huang. "Sparse principal component analysis via regularized low rank matrix approximation." Journal of multivariate analysis 99.6 (2008): 1015-1034.}
+  \item{Witten, Tibshirani and Hastie (2009) A penalized matrix decomposition, with applications to sparse principal components and canonical correlation analysis. _Biostatistics_ 10(3): 515-534.}
+}
+}
diff --git a/man/summary.irlba_prcomp.Rd b/man/summary.irlba_prcomp.Rd
new file mode 100644
index 0000000..50f0dc0
--- /dev/null
+++ b/man/summary.irlba_prcomp.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/prcomp.R
+\name{summary.irlba_prcomp}
+\alias{summary.irlba_prcomp}
+\title{Summary method for truncated pca objects computed by \code{prcomp_irlba}.}
+\usage{
+\method{summary}{irlba_prcomp}(object, ...)
+}
+\arguments{
+\item{object}{An object returned by \code{prcomp_irlba}.}
+
+\item{...}{Optional arguments passed to \code{summary}.}
+}
+\description{
+Summary method for truncated pca objects computed by \code{prcomp_irlba}.
+}
diff --git a/man/svdr.Rd b/man/svdr.Rd
index 608a45b..7fb8b87 100644
--- a/man/svdr.Rd
+++ b/man/svdr.Rd
@@ -5,14 +5,18 @@
 \title{Find a few approximate largest singular values and corresponding
 singular vectors of a matrix.}
 \usage{
-svdr(x, k, it = 3, extra = 10, center = NULL, Q = NULL)
+svdr(x, k, tol = 1e-05, it = 100L, extra = min(10L, dim(x) - k),
+  center = NULL, Q = NULL, return.Q = FALSE)
 }
 \arguments{
 \item{x}{numeric real- or complex-valued matrix or real-valued sparse matrix.}
 
 \item{k}{dimension of subspace to estimate (number of approximate singular values to compute).}
 
-\item{it}{fixed number of algorithm iterations, larger values improve accuracy.}
+\item{tol}{stop iteration when the largest absolute relative change in estimated singular
+values from one iteration to the next falls below this value.}
+
+\item{it}{maximum number of algorithm iterations.}
 
 \item{extra}{number of extra vectors of dimension \code{ncol(x)}, larger values generally improve accuracy (with increased
 computational cost).}
@@ -24,6 +28,8 @@ Use for efficient principal components computation.}
 
 \item{Q}{optional initial random matrix, defaults to a matrix of size \code{ncol(x)} by \code{k + extra} with
 entries sampled from a normal random distribution.}
+
+\item{return.Q}{if \code{TRUE} return the \code{Q} matrix for restarting (see examples).}
 }
 \value{
 Returns a list with entries:
@@ -31,7 +37,8 @@ Returns a list with entries:
   \item{d:}{ k approximate singular values}
   \item{u:}{ k approximate left singular vectors}
   \item{v:}{ k approximate right singular vectors}
-  \item{mprod:}{ The total number of matrix vector products carried out}
+  \item{mprod:}{ total number of matrix products carried out}
+  \item{Q:}{ optional subspace matrix (when \code{return.Q=TRUE})}
 }
 }
 \description{
@@ -45,6 +52,10 @@ less work for problems with clustered large singular values (see
 the examples). In other problems, \code{irlba} may exhibit faster
 convergence.
 }
+\details{
+Also see an alternate implementation (\code{rsvd}) of this method by N. Benjamin Erichson
+in the https://cran.r-project.org/package=rsvd package.
+}
 \examples{
 set.seed(1)
 
@@ -98,6 +109,5 @@ Finding structure with randomness: Stochastic algorithms for constructing
 approximate matrix decompositions N. Halko, P. G. Martinsson, J. Tropp. Sep. 2009.
 }
 \seealso{
-\code{\link{irlba}}, \code{\link{svd}}
+\code{\link{irlba}}, \code{\link{svd}}, \code{rsvd} in the rsvd package
 }
-
diff --git a/src/irlb.c b/src/irlb.c
index e269341..6ebd989 100644
--- a/src/irlb.c
+++ b/src/irlb.c
@@ -96,7 +96,7 @@ IRLB (SEXP X, SEXP NU, SEXP INIT, SEXP WORK, SEXP MAXIT, SEXP TOL, SEXP EPS,
   double *V1, *U1, *W, *F, *B, *BU, *BV, *BS, *BW, *res, *T, *scale, *shift,
     *center, *SVRATIO;
   int i, iter, mprod, ret;
-  R_xlen_t m, n;
+  int m, n;
 
   int mult = INTEGER (MULT)[0];
   void *AS = NULL;
@@ -115,7 +115,7 @@ IRLB (SEXP X, SEXP NU, SEXP INIT, SEXP WORK, SEXP MAXIT, SEXP TOL, SEXP EPS,
       n = ncols (X);
     }
   int nu = INTEGER (NU)[0];
-  R_xlen_t work = INTEGER (WORK)[0];
+  int work = INTEGER (WORK)[0];
   int maxit = INTEGER (MAXIT)[0];
   double tol = REAL (TOL)[0];
   double svtol = REAL (SVTOL)[0];
@@ -194,7 +194,7 @@ IRLB (SEXP X, SEXP NU, SEXP INIT, SEXP WORK, SEXP MAXIT, SEXP TOL, SEXP EPS,
  */
 int
 irlb (double *A,                // Input data matrix (double case)
-      void * AS,                // input data matrix (sparse case)
+      void *AS,                 // input data matrix (sparse case)
       int mult,                 // 0 -> use double *A, 1 -> use AS
       int m,                    // data matrix number of rows, must be > 3.
       int n,                    // data matrix number of columns, must be > 3.
@@ -245,6 +245,8 @@ irlb (double *A,                // Input data matrix (double case)
 
   if (restart == 0)
     memset (B, 0, work * work * sizeof (double));
+  memset(svratio, 0, work * sizeof(double));
+
 /* Main iteration */
   while (iter < maxit)
     {
@@ -325,9 +327,10 @@ irlb (double *A,                // Input data matrix (double case)
             }
           mprod++;
           R_CheckUserInterrupt ();
-/* optionally apply shift and scale */
+/* optionally apply shift, scale, center */
           if (shift)
             {
+              // Note, not a bug because shift only applies to square matrices
               for (kk = 0; kk < m; ++kk)
                 F[kk] = F[kk] + shift[0] * W[j * m + kk];
             }
@@ -336,6 +339,13 @@ irlb (double *A,                // Input data matrix (double case)
               for (kk = 0; kk < n; ++kk)
                 F[kk] = F[kk] / scale[kk];
             }
+          if (center)
+            {
+              beta = 0;
+              for (kk = 0; kk < m; ++kk) beta += W[j *m + kk];
+              for (kk = 0; kk < n; ++kk)
+                F[kk] = F[kk] - beta * center[kk]; // XXX XXX XXX
+            }
           SS = -S;
           F77_NAME (daxpy) (&n, &SS, V + j * n, &inc, F, &inc);
           orthog (V, F, T, n, j + 1, 1);
diff --git a/src/utility.c b/src/utility.c
index a2d6385..e3ac3cc 100644
--- a/src/utility.c
+++ b/src/utility.c
@@ -75,7 +75,7 @@ convtests (int Bsz, int n, double tol, double svtol, double Smax,
            double *svratio, double *residuals, int *k, int *converged, double S)
 {
   int j, Len_res = 0;
-  for (j = 0; j < Bsz; ++j)
+  for (j = 0; j < Bsz; j++)
     {
       if ((fabs (residuals[j]) < tol * Smax) && (svratio[j] < svtol))
         Len_res++;
diff --git a/tests/edge.R b/tests/edge.R
index 19da719..02b9ce1 100644
--- a/tests/edge.R
+++ b/tests/edge.R
@@ -34,7 +34,7 @@ test <- function()
   # Convergence
   loc <<- "convergence"
   A <- S$u %*% (c(1e-5, rep(1e-9, 9)) * t(S$v))
-  for (tol in 10 ^ - (7:12))
+  for (tol in 10 ^ - (7:11))
   {
     L <- irlba(A, 3, tol=tol, svtol=Inf)
     converged <- svd(A %*% L$v - L$u  %*% diag(L$d))$d[1] < tol * L$d[1]
diff --git a/tests/prcomp.r b/tests/prcomp.r
new file mode 100644
index 0000000..4b2d260
--- /dev/null
+++ b/tests/prcomp.r
@@ -0,0 +1,91 @@
+require("irlba")
+
+# prcomp convenience function
+x  <- matrix(rnorm(200), nrow=20)
+p1 <- prcomp_irlba(x, n=3)
+p2 <- prcomp(x, tol=0.7)
+if (!isTRUE(all.equal(p1$sdev[1:2], p2$sdev[1:2])))
+{
+  stop("Failed basic prcomp test")
+}
+
+s <- summary(p1)
+
+# scaling bug identified in issue #21
+normalize_signs <- function(X, Y) {
+  for (i in 1:ncol(X)) {
+    if (sign(X[1, i]) != sign(Y[1, i])) {
+      Y[, i] <- -Y[, i]
+    }
+  }
+  return(Y)
+}
+
+all.equal_pca <- function(X, Y) {
+  Y <- normalize_signs(X, Y)
+  return(all.equal(X, Y, check.attributes=F, tolerance=1e-4))
+}
+
+set.seed(1)
+X <- matrix(rnorm(2000), ncol=40)
+M <- 5 # number of PCA components
+centers <- colMeans(X)
+sds <- apply(X, 2, sd)
+rms <- apply(X, 2, function(x) sqrt(sum(x^2) / (length(x) - 1)))
+Xc <- sweep(X, 2, centers, `-`)
+Xs <- sweep(X, 2, sds, `/`)
+Xcs <- sweep(Xc, 2, sds, `/`)
+Xrms <- sweep(X, 2, rms, `/`)
+
+# unscaled
+scaled <- FALSE
+centered <- FALSE
+pca <- prcomp(X, center=centered, scale.=scaled)
+sv <- svd(X)
+svir <- irlba(X, nv=M, nu=M)
+pcair <- prcomp_irlba(X, n=M, center=centered, scale.=scaled)
+Xpca <- predict(pca)[, 1:M]
+Xsvl <- sv$u[, 1:M] %*% diag(sv$d[1:M])
+Xsvr <- X %*% sv$v[, 1:M]
+Xsvirl <- svir$u %*% diag(svir$d)
+Xsvirr <- X %*% svir$v
+Xpcair <- predict(pcair)
+Xpcair2 <- X %*% pcair$rotation
+
+if (! isTRUE(all.equal_pca(Xsvl, Xsvr)) &&
+     isTRUE(all.equal_pca(Xpca, Xsvl)) &&
+     isTRUE(all.equal_pca(Xsvirl, Xsvirr)) &&
+     isTRUE(all.equal_pca(Xpca, Xsvirl)) &&
+     isTRUE(all.equal_pca(Xpcair, Xpcair2)) &&
+     isTRUE(all.equal_pca(Xpca, Xpcair)) &&
+     isTRUE(all.equal_pca(Xpcair, Xsvirl)))
+{
+  stop("failed unscaled, uncentered prcomp")
+}
+
+# scaled, uncentered
+scaled <- TRUE
+centered <- FALSE
+pca <- prcomp(X, center=centered, scale.=scaled)
+sv <- svd(Xrms)
+svir <- irlba(X, nv=M, nu=M, scale=rms)
+pcair <- prcomp_irlba(X, n=M, center=centered, scale.=scaled)
+
+Xpca <- predict(pca)[, 1:M]
+Xsvl <- sv$u[, 1:M] %*% diag(sv$d[1:M])
+Xsvr <- Xrms %*% sv$v[, 1:M]
+Xsvirl <- svir$u %*% diag(svir$d)
+Xsvirr <- Xrms %*% svir$v
+Xpcair <- predict(pcair)
+Xpcair2 <- Xrms %*% pcair$rotation
+
+if (!  isTRUE(all.equal_pca(Xsvl, Xsvr)) &&
+      isTRUE(all.equal_pca(Xpca, Xsvl)) &&
+      isTRUE(all.equal_pca(Xsvirl, Xsvirr)) &&
+      isTRUE(all.equal_pca(Xpca, Xsvirl)) &&
+      isTRUE(all.equal_pca(Xpcair, Xpcair2)) &&
+      isTRUE(all.equal_pca(Xpca, Xpcair)) &&
+      isTRUE(all.equal_pca(Xpcair, Xsvirl)))
+{
+  stop("failed scaled, uncentered prcomp")
+}
diff --git a/tests/ssvd.R b/tests/ssvd.R
new file mode 100644
index 0000000..50fd999
--- /dev/null
+++ b/tests/ssvd.R
@@ -0,0 +1,34 @@
+# Tests for sparse SVD/PCA
+require("irlba")
+loc <- ""
+
+test <- function()
+{
+  on.exit(message("Error occured in: ", loc))
+  loc <<- "sparse SVD"
+  set.seed(1)
+  x <- matrix(rnorm(100), 10)
+  s <- ssvd(x, 1, n=5)
+  stopifnot(isTRUE(all.equal(sqrt(drop(crossprod(x %*% s$v - s$u %*% s$d))), 0)))
+
+  loc <<- "sparse PCA"
+  set.seed(1)
+  x <- matrix(rnorm(100), 10)
+  s <- ssvd(x, 1, n=5, center=TRUE)
+  stopifnot(isTRUE(all.equal(sqrt(drop(crossprod(scale(x, center=TRUE, scale=FALSE) %*% s$v - s$u %*% s$d))), 0)))
+
+  loc <<- "sparse PCA + scale"
+  set.seed(1)
+  x <- matrix(rnorm(100), 10)
+  s <- ssvd(x, 1, n=5, center=TRUE, scale.=TRUE)
+  isTRUE(all.equal(sqrt(drop(crossprod(scale(x, center=TRUE, scale=TRUE) %*% s$v - s$u %*% s$d))), 0))
+
+  loc <<- "sparse scaled"
+  set.seed(1)
+  x <- matrix(rnorm(100), 10)
+  s <- ssvd(x, 1, n=5, center=FALSE, scale.=TRUE)
+  isTRUE(all.equal(sqrt(drop(crossprod(scale(x, center=FALSE, scale=TRUE) %*% s$v - s$u %*% s$d))), 0))
+
+  on.exit()
+}
+test()
diff --git a/tests/svdr.R b/tests/svdr.R
index 7feecdb..f1acdd2 100644
--- a/tests/svdr.R
+++ b/tests/svdr.R
@@ -15,13 +15,13 @@ test <- function()
 
   loc <<- "svdr dense m > n"
   A <- matrix(rnorm(50 * 40), 50)
-  L <- svdr(A, 5, 10, extra=15)
+  L <- svdr(A, 5, extra=15)
   S <- svd(A, nu=5, nv=5)
   stopifnot(isTRUE(all.equal(L$d, S$d[1:5])))
 
   loc <<- "svdr dense m < n"
   A <- matrix(rnorm(50 * 40), 40)
-  L <- svdr(A, 5, 10, extra=15)
+  L <- svdr(A, 5, extra=15)
   S <- svd(A, nu=5, nv=5)
   stopifnot(isTRUE(all.equal(L$d, S$d[1:5])))
 
diff --git a/tests/test.R b/tests/test.R
index ecaabdf..b8c85b8 100644
--- a/tests/test.R
+++ b/tests/test.R
@@ -115,15 +115,6 @@ for (FAST in c(FALSE, TRUE))
     stop("Failed reorthogonalization test", " fastpath=", FAST)
   }
 
-  # prcomp convenience function
-  x  <- matrix(rnorm(200), nrow=20)
-  p1 <- prcomp_irlba(x, n=3, fastpath=FAST)
-  p2 <- prcomp(x, tol=0.7)
-  if (!isTRUE(all.equal(p1$sdev[1:2], p2$sdev[1:2])))
-  {
-    stop("Failed prcomp test", " fastpath=", FAST)
-  }
-
   # very non-square dense matrices
   set.seed(1)
   A <- matrix(rnorm(2000), 20)
@@ -161,6 +152,31 @@ for (FAST in c(FALSE, TRUE))
   {
     stop("Failed tprolate test fastpath=", FAST)
   }
+
+  # test for issue #7 and issue #14
+  mx <- matrix(sample(1:100, 100 * 100, replace=TRUE), nrow=100)
+  set.seed(1)
+  l <- irlba(mx, nv=30, center=colMeans(mx), fastpath=FAST)
+  s <- svd(scale(mx, center=TRUE, scale=FALSE))
+  if (isTRUE(max(abs(l$d - s$d[1:30])) > 1e-3))
+  {
+    stop("Failed integer matrix test fastpath=", FAST)
+  }
+
+  # test for https://github.com/bwlewis/irlba/issues/22
+  set.seed(1000)
+  ncells <- 50
+  ngenes <- 1000
+  counts <- matrix(as.double(rpois(ncells*ngenes, lambda=100)), nrow=ncells)
+  centers <- colMeans(counts)
+  set.seed(1)
+  out <- irlba(scale(counts, scale=FALSE, center=centers), nu=10, nv=10)
+  set.seed(1)
+  l <- irlba(counts, center=centers, nu=10, nv=10, fastpath=FAST)
+  if (isTRUE(max(abs(out$d - l$d)) > 1e-3))
+  {
+    stop("Failed centering test (n > m) fastpath=", FAST)
+  }
 }
 
 # smallest=TRUE, m > n  (fastpath always FALSE in this case)
diff --git a/vignettes/irlba.Rnw b/vignettes/irlba.Rnw
index 5f11eb8..8fbe2a8 100644
--- a/vignettes/irlba.Rnw
+++ b/vignettes/irlba.Rnw
@@ -189,10 +189,15 @@ in the estimated singular values:
 
 \subsection{Convergence tolerance}
 
-IRLBA is an iterative method that estimates a few largest singular values
+IRLBA is an iterative method that estimates a few singular values
 and associated singular vectors. A sketch of the algorithm is outlined
-in Section \ref{sketch} below. The R {\tt tol} argument controls when the algorithm converges.
-Convergence occurs when
+in Section \ref{sketch} below. The R {\tt tol} and {\tt svtol} arguments control
+when the algorithm converges with {\tt tol} specifying
+subspace convergence, and {\tt svtol} specifying convergence of estimated
+singular values.
+
+Subspace convergence occurs when the algorithm iterations find
+estimated singular vectors that satisfy
 \[
 \|AV_k - US_k\| < \mbox{tol} \cdot \|A\|,
 \]
@@ -205,10 +210,23 @@ the algorithm stops when
 L <- irlba(A, k, tol)
 svd(A %*% L$v - L$u %*% diag(L$d))$d[1] < tol * L$d[1]
 \end{lstlisting}
-It's possible, but unlikely, to encounter problems that fail to converge before
+It's possible to encounter problems that fail to converge before
 the maximum number of algorithm iterations specified by the {\tt maxit}
 argument.
 
+When the largest singular values are clustered together it can be hard to
+detect subspace convergence. More recent versions of the IRLBA implementation
+include the {\tt svtol} argument that specifies a maximum for the relative
+change in each estimated singular value from one iteration to the next.
+
+The convergence tolerance values together help improve correct subspace
+detection in difficult settings when the singular values are clustered.
+But in the worst cases, block methods can perform better as shown in
+the documentation for the {\tt svdr} method.
+
+Also see the related {\tt rsvd} function by N. Benjamin Erichson,
+\href{https://cran.r-project.org/package=rsvd}{https://cran.r-project.org/package=rsvd}.
+
 
 \subsection{Differences with {\tt svd}}
 The {\tt irlba} function is designed to compute a {\it partial} singular
@@ -260,20 +278,20 @@ are only unique up to sign!
 >      x  <- matrix(rnorm(200), nrow=20)
 >      p1 <- prcomp_irlba(x, n=3)
 >      summary(p1)
-Importance of components:
-                         PC1    PC2    PC3
-Standard deviation     1.541 1.2513 1.1916
-Proportion of Variance 0.443 0.2921 0.2649
-Cumulative Proportion  0.443 0.7351 1.0000
+Importance of components%s:
+                          PC1    PC2    PC3
+Standard deviation     1.5411 1.2513 1.1916
+Proportion of Variance 0.2806 0.1850 0.1678
+Cumulative Proportion  0.2806 0.4656 0.6334
 
 >      # Compare with
 >      p2 <- prcomp(x, tol=0.7)
 >      summary(p2)
 Importance of components:
-                         PC1    PC2    PC3
-Standard deviation     1.541 1.2513 1.1916
-Proportion of Variance 0.443 0.2921 0.2649
-Cumulative Proportion  0.443 0.7351 1.0000
+                          PC1    PC2    PC3
+Standard deviation     1.5411 1.2513 1.1916
+Proportion of Variance 0.2806 0.1850 0.1678
+Cumulative Proportion  0.2806 0.4656 0.6334
 \end{lstlisting}
 Alternatively, you can compute principal components directly using the
 singular value decomposition and the {\tt center} option:
@@ -289,10 +307,7 @@ singular value decomposition and the {\tt center} option:
              [,1]
 [1,] 1.652423e-12
 \end{lstlisting}
-The implementation of the {\tt center} function argument takes advantage of
-computational efficiencies in the IRLB algorithm that result in a modest
-savings of a few vector inner products per iteration compared to a naive
-implementation using a custom matrix vector product to center the matrix.
+
 
 \subsection{Truncated symmetric eigenvalue decomposition}
 
@@ -302,59 +317,43 @@ sparse real-valued matrix. The function is particularly well-suited to
 estimating the largest eigenvalues and corresponding eigenvectors of symmetric
 positive semi-definite matrices of the form $A^T A$.
 
+
+
 \subsection{User-Defined Matrix Multiplication}
 
-The {\tt irlba} function includes options for specifying a custom matrix
-multiplication function. Custom matrix multiplication functions can be used,
-for example, with the {\tt big.matrix} class from the {\tt bigmemory}/{\tt
-bigalgebra} packages, or to compute the partial SVD of matrix-free linear
-operators.
-
-User-defined matrix operations may specified using the optional {\tt mult}
-parameter. If defined, it must be a function of two arguments that computes
-matrix vector products. Either argument can be a vector, and the {\tt mult}
-function must deal with that. The following example illustrates a simple
-custom matrix function that scales the columns of the matrix, and then
-compares it with other ways of doing the same thing.
+The {\tt irlba} function only uses matrix vector products with the input data
+matrix to compute its solution. It's easy to use R's native object model to
+define custom matrix classes with user-defined matrix multiplication functions.
+Such functions can be used to support special matrix objects, out of core
+computation of large problems, or matrix-free operators.
+
+Here is a simple example that defines a matrix product that scales the
+columns of the matrix to have unit norm (cf the {\tt scale} option).
+
 \begin{lstlisting}
-> set.seed(1)
-> A <- matrix(runif(200), nrow=20)
-> col_scale <- 1:10
-> mult <- function(x,y)
-+         {
-+           # check if x is a plain vector
-+           if(is.vector(x))
-+           {
-+             return((x %*% y)/col_scale)
-+           }
-+           # else x is the matrix
-+           x %*% (y/col_scale)
-+         }
-
-> irlba(A, 3, mult=mult)$d
-[1] 3.1384503 0.9477628 0.4313855
-
-> # Compare with:
-> irlba(A, 3, scale=col_scale)$d
-[1] 3.1384503 0.9477628 0.4313855
-
-> # Compare with:
-> svd(scale(A, scale=col_scale, center=FALSE))$d[1:3]
-[1] 3.1384503 0.9477628 0.4313855
+> A <- matrix(runif(400), nrow=20)
+> col_scale <- sqrt(apply(A, 2, crossprod))
+> setClass("scaled_matrix", contains="matrix", slots=c(scale="numeric"))
+> setMethod("%*%", signature(x="scaled_matrix", y="numeric"),
++       function(x ,y) x at .Data %*% (y / x at scale))
+> setMethod("%*%", signature(x="numeric", y="scaled_matrix"),
++       function(x ,y) (x %*% y at .Data) / y at scale)
+> a <- new("scaled_matrix", A, scale=col_scale)
+> irlba(a, 3)$d
+
+[1] 3.9298391 0.9565016 0.8266859
+
+# Compare with
+> svd(sweep(A, 2, col_scale, FUN=`/`))$d[1:3]
+
+[1] 3.9298391 0.9565016 0.8266859
 \end{lstlisting}
-Alternatively, simply use R's operator overloading methods to define
-customized matrix vector products. This is the approach taken in
-the
-\\
+
+See the following link for an example that uses large-scale out of core computation:
 \href{https://bwlewis.github.io/1000_genomes_examples/PCA_whole_genome.html}{http://bwlewis.github.io/1000\_genomes\_examples/PCA\_whole\_genome.html}
-vignette to work with large out of core genomics data.
-I prefer the simple operator overloading approach and may consider
-retiring the {\tt mult} function argument in some future package version.
-
-NOTE! When a user-defined matrix multiplication function is used, either using
-the {\tt mult} argument or through operator overloading, the R reference IRLBA
-implementation is used instead of the faster C code path in newer package
-versions.
+
+NOTE! The reference R algorithm implementation is used whenever user-defined
+matrix multiplication is specified (instead of the faster C code path).
 
 
 \section{A Quick Summary of the IRLBA Method}\label{sketch}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-irlba.git