[caffe-contrib] 01/02: New upstream version 1.0.0~rc3+20160930-ga7f950b

Sun Oct 2 14:09:50 UTC 2016

This is an automated email from the git hooks/post-receive script.

cdluminate-guest pushed a commit to branch master
in repository caffe-contrib.

commit 0a7e928f797ca82ca4e037c23ef38558e0a34e74
Author: Zhou Mo <cdluminate at gmail.com>
Date:   Sun Oct 2 14:09:23 2016 +0000

    New upstream version 1.0.0~rc3+20160930-ga7f950b
---
 .gitignore                                         |  3 ++
 Makefile                                           |  5 +++-
 cmake/Cuda.cmake                                   | 15 +++++++---
 cmake/Dependencies.cmake                           |  8 ++++-
 cmake/Modules/FindAtlas.cmake                      |  6 ++--
 cmake/Modules/FindvecLib.cmake                     |  7 +++--
 docs/install_apt.md                                |  9 ++++--
 docs/tutorial/solver.md                            | 13 ++-------
 .../cifar10/cifar10_full_sigmoid_solver.prototxt   |  2 +-
 .../cifar10_full_sigmoid_solver_bn.prototxt        |  2 +-
 examples/cpp_classification/readme.md              |  4 +--
 examples/net_surgery.ipynb                         |  2 +-
 include/caffe/layer_factory.hpp                    |  2 +-
 include/caffe/layers/batch_norm_layer.hpp          | 29 +++++++++---------
 include/caffe/layers/bias_layer.hpp                | 10 +++----
 include/caffe/layers/scale_layer.hpp               | 12 ++++----
 include/caffe/solver.hpp                           |  2 +-
 include/caffe/solver_factory.hpp                   |  2 +-
 include/caffe/util/mkl_alternate.hpp               |  5 ++++
 include/caffe/util/upgrade_proto.hpp               |  6 ++++
 scripts/travis/install-deps.sh                     |  2 +-
 src/caffe/layers/batch_norm_layer.cpp              | 12 ++++++++
 src/caffe/layers/loss_layer.cpp                    |  4 +--
 src/caffe/layers/scale_layer.cpp                   | 14 +++++++--
 src/caffe/proto/caffe.proto                        |  2 +-
 src/caffe/util/benchmark.cpp                       |  3 +-
 src/caffe/util/upgrade_proto.cpp                   | 34 +++++++++++++++++++++-
 27 files changed, 148 insertions(+), 67 deletions(-)

diff --git a/.gitignore b/.gitignore
index 53c1fb0..281ef32 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,6 +47,9 @@
 # PyCharm files
 .idea
 
+# Visual Studio Code files
+.vscode
+
 # OSX dir files
 .DS_Store
 
diff --git a/Makefile b/Makefile
index 403e00a..2489406 100644
--- a/Makefile
+++ b/Makefile
@@ -382,8 +382,11 @@ else
 		LIBRARIES += cblas
 		# 10.10 has accelerate while 10.9 has veclib
 		XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/')
+		XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
 		XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
-		ifeq ($(XCODE_CLT_GEQ_6), 1)
+		ifeq ($(XCODE_CLT_GEQ_7), 1)
+			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
+		else ifeq ($(XCODE_CLT_GEQ_6), 1)
 			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
 			LDFLAGS += -framework Accelerate
 		else
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 286a428..eeeb732 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -174,11 +174,18 @@ function(detect_cuDNN)
             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE}
             DOC "Path to cuDNN include directory." )
 
-  get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
-  find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a
-                             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist}
-                             DOC "Path to cuDNN library.")
+  # dynamic libs have different suffix in mac and linux
+  if(APPLE)
+    set(CUDNN_LIB_NAME "libcudnn.dylib")
+  else()
+    set(CUDNN_LIB_NAME "libcudnn.so")
+  endif()
 
+  get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
+  find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME}
+   PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} ${__libpath_hist}/../lib
+   DOC "Path to cuDNN library.")
+  
   if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
     set(HAVE_CUDNN  TRUE PARENT_SCOPE)
     set(CUDNN_FOUND TRUE PARENT_SCOPE)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index c7b6a17..ae9ce8e 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -26,7 +26,7 @@ include(cmake/ProtoBuf.cmake)
 # ---[ HDF5
 find_package(HDF5 COMPONENTS HL REQUIRED)
 include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
-list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
+list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
 
 # ---[ LMDB
 if(USE_LMDB)
@@ -102,6 +102,12 @@ elseif(APPLE)
   find_package(vecLib REQUIRED)
   include_directories(SYSTEM ${vecLib_INCLUDE_DIR})
   list(APPEND Caffe_LINKER_LIBS ${vecLib_LINKER_LIBS})
+
+  if(VECLIB_FOUND)
+    if(NOT vecLib_INCLUDE_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*")
+      add_definitions(-DUSE_ACCELERATE)
+    endif()
+  endif()
 endif()
 
 # ---[ Python
diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake
index 6e15643..9c665a4 100644
--- a/cmake/Modules/FindAtlas.cmake
+++ b/cmake/Modules/FindAtlas.cmake
@@ -26,9 +26,9 @@ set(Atlas_LIB_SEARCH_PATHS
 find_path(Atlas_CBLAS_INCLUDE_DIR   NAMES cblas.h   PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 
-find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                 PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas  PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas       PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                       PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_LAPACK_LIBRARY NAMES lapack alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
 
 set(LOOKED_FOR
   Atlas_CBLAS_INCLUDE_DIR
diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
index 9600da4..4604336 100644
--- a/cmake/Modules/FindvecLib.cmake
+++ b/cmake/Modules/FindvecLib.cmake
@@ -14,9 +14,10 @@ set(__veclib_include_suffix "Frameworks/vecLib.framework/Versions/Current/Header
 
 find_path(vecLib_INCLUDE_DIR vecLib.h
           DOC "vecLib include directory"
-          PATHS /System/Library/${__veclib_include_suffix}
-                /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
-                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/)
+          PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
+                /System/Library/${__veclib_include_suffix}
+                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+          NO_DEFAULT_PATH)
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(vecLib DEFAULT_MSG vecLib_INCLUDE_DIR)
diff --git a/docs/install_apt.md b/docs/install_apt.md
index 2976e3c..3de5a49 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -9,14 +9,19 @@ title: Installation: Ubuntu
     sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
     sudo apt-get install --no-install-recommends libboost-all-dev
 
-**CUDA**: Install via the NVIDIA package instead of `apt-get` to be certain of the library and driver versions.
-Install the library and latest driver separately; the driver bundled with the library is usually out-of-date.
+**CUDA**: Install by `apt-get` or the NVIDIA `.run` package.
+The NVIDIA package tends to follow more recent library and driver versions, but the installation is more manual.
+If installing from packages, install the library and latest driver separately; the driver bundled with the library is usually out-of-date.
 This can be skipped for CPU-only installation.
 
 **BLAS**: install ATLAS by `sudo apt-get install libatlas-base-dev` or install OpenBLAS or MKL for better CPU performance.
 
 **Python** (optional): if you use the default Python you will need to `sudo apt-get install` the `python-dev` package to have the Python headers for building the pycaffe interface.
 
+**Compatibility notes, 16.04**
+
+CUDA 8 is required on Ubuntu 16.04.
+
 **Remaining dependencies, 14.04**
 
 Everything is packaged in 14.04.
diff --git a/docs/tutorial/solver.md b/docs/tutorial/solver.md
index b719f71..81c6263 100644
--- a/docs/tutorial/solver.md
+++ b/docs/tutorial/solver.md
@@ -209,18 +209,11 @@ What distinguishes the method from SGD is the weight setting $$ W $$ on which we
 The **RMSprop** (`type: "RMSProp"`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are
 
 $$
-(v_t)_i =
-\begin{cases}
-(v_{t-1})_i + \delta, &(\nabla L(W_t))_i(\nabla L(W_{t-1}))_i > 0\\
-(v_{t-1})_i \cdot (1-\delta), & \text{else}
-\end{cases}
+\operatorname{MS}((W_t)_i)= \delta\operatorname{MS}((W_{t-1})_i)+ (1-\delta)(\nabla L(W_t))_i^2 \\
+(W_{t+1})_i= (W_{t})_i -\alpha\frac{(\nabla L(W_t))_i}{\sqrt{\operatorname{MS}((W_t)_i)}}
 $$
 
-$$
-(W_{t+1})_i =(W_t)_i - \alpha (v_t)_i,
-$$
-
-If the gradient updates results in oscillations the gradient is reduced by times $$1-\delta$$. Otherwise it will be increased by $$\delta$$. The default value of $$\delta$$ (`rms_decay`) is set to $$\delta = 0.02$$.
+The default value of $$\delta$$ (`rms_decay`) is set to $$\delta=0.99$$.
 
 [1] T. Tieleman, and G. Hinton.
     [RMSProp: Divide the gradient by a running average of its recent magnitude](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf).
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
index 7dd3ecb..a8e5539 100644
--- a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
+++ b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
@@ -17,7 +17,7 @@ momentum: 0.9
 lr_policy: "step"
 gamma: 1
 stepsize: 5000
-# Display every 200 iterations
+# Display every 100 iterations
 display: 100
 # The maximum number of iterations
 max_iter: 60000
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
index a57b280..a4dabd6 100644
--- a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
+++ b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
@@ -17,7 +17,7 @@ momentum: 0.9
 lr_policy: "step"
 gamma: 1
 stepsize: 5000
-# Display every 200 iterations
+# Display every 100 iterations
 display: 100
 # The maximum number of iterations
 max_iter: 60000
diff --git a/examples/cpp_classification/readme.md b/examples/cpp_classification/readme.md
index 0de2885..4f683aa 100644
--- a/examples/cpp_classification/readme.md
+++ b/examples/cpp_classification/readme.md
@@ -10,7 +10,7 @@ priority: 10
 
 Caffe, at its core, is written in C++. It is possible to use the C++
 API of Caffe to implement an image classification application similar
-to the Python code presented in one of the Notebook example. To look
+to the Python code presented in one of the Notebook examples. To look
 at a more general-purpose example of the Caffe C++ API, you should
 study the source code of the command line tool `caffe` in `tools/caffe.cpp`.
 
@@ -19,7 +19,7 @@ study the source code of the command line tool `caffe` in `tools/caffe.cpp`.
 A simple C++ code is proposed in
 `examples/cpp_classification/classification.cpp`. For the sake of
 simplicity, this example does not support oversampling of a single
-sample nor batching of multiple independant samples. This example is
+sample nor batching of multiple independent samples. This example is
 not trying to reach the maximum possible classification throughput on
 a system, but special care was given to avoid unnecessary
 pessimization while keeping the code readable.
diff --git a/examples/net_surgery.ipynb b/examples/net_surgery.ipynb
index d50d503..217c2d1 100644
--- a/examples/net_surgery.ipynb
+++ b/examples/net_surgery.ipynb
@@ -5479,7 +5479,7 @@
     "\n",
     "Let's take the standard Caffe Reference ImageNet model \"CaffeNet\" and transform it into a fully convolutional net for efficient, dense inference on large inputs. This model generates a classification map that covers a given input size instead of a single classification. In particular a 8 $\\times$ 8 classification map on a 451 $\\times$ 451 input gives 64x the output in only 3x the time. The computation exploits a natural efficiency of convolutional network (convnet) structure by  [...]
     "\n",
-    "To do so we translate the `InnerProduct` matrix multiplication layers of CaffeNet into `Convolutional` layers. This is the only change: the other layer types are agnostic to spatial size. Convolution is translation-invariant, activations are elementwise operations, and so on. The `fc6` inner product when carried out as convolution by `fc6-conv` turns into a 6 \\times 6 filter with stride 1 on `pool5`. Back in image space this gives a classification for each 227 $\\times$ 227 box wit [...]
+    "To do so we translate the `InnerProduct` matrix multiplication layers of CaffeNet into `Convolutional` layers. This is the only change: the other layer types are agnostic to spatial size. Convolution is translation-invariant, activations are elementwise operations, and so on. The `fc6` inner product when carried out as convolution by `fc6-conv` turns into a 6 $\\times$ 6 filter with stride 1 on `pool5`. Back in image space this gives a classification for each 227 $\\times$ 227 box w [...]
    ]
   },
   {
diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp
index f385afc..2369c13 100644
--- a/include/caffe/layer_factory.hpp
+++ b/include/caffe/layer_factory.hpp
@@ -1,6 +1,6 @@
 /**
  * @brief A layer factory that allows one to register layers.
- * During runtime, registered layers could be called by passing a LayerParameter
+ * During runtime, registered layers can be called by passing a LayerParameter
  * protobuffer to the CreateLayer function:
  *
  *     LayerRegistry<Dtype>::CreateLayer(param);
diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index 9b2d512..43f7b28 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -13,25 +13,22 @@ namespace caffe {
  * @brief Normalizes the input to have 0-mean and/or unit (1) variance across
  *        the batch.
  *
- * This layer computes Batch Normalization described in [1].  For
- * each channel in the data (i.e. axis 1), it subtracts the mean and divides
- * by the variance, where both statistics are computed across both spatial
- * dimensions and across the different examples in the batch.
+ * This layer computes Batch Normalization as described in [1]. For each channel
+ * in the data (i.e. axis 1), it subtracts the mean and divides by the variance,
+ * where both statistics are computed across both spatial dimensions and across
+ * the different examples in the batch.
  *
- * By default, during training time, the network is computing global mean/
- * variance statistics via a running average, which is then used at test
- * time to allow deterministic outputs for each input.  You can manually
- * toggle whether the network is accumulating or using the statistics via the
- * use_global_stats option.  IMPORTANT: for this feature to work, you MUST
- * set the learning rate to zero for all three parameter blobs, i.e.,
- * param {lr_mult: 0} three times in the layer definition.
+ * By default, during training time, the network is computing global
+ * mean/variance statistics via a running average, which is then used at test
+ * time to allow deterministic outputs for each input. You can manually toggle
+ * whether the network is accumulating or using the statistics via the
+ * use_global_stats option. For reference, these statistics are kept in the
+ * layer's three blobs: (0) mean, (1) variance, and (2) moving average factor.
  *
  * Note that the original paper also included a per-channel learned bias and
- * scaling factor.  It is possible (though a bit cumbersome) to implement
- * this in caffe using a single-channel DummyDataLayer filled with zeros,
- * followed by a Convolution layer with output the same size as the current.
- * This produces a channel-specific value that can be added or multiplied by
- * the BatchNorm layer's output.
+ * scaling factor. To implement this in Caffe, define a `ScaleLayer` configured
+ * with `bias_term: true` after each `BatchNormLayer` to handle both the bias
+ * and scaling factor.
  *
  * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network
  *     Training by Reducing Internal Covariate Shift." arXiv preprint
diff --git a/include/caffe/layers/bias_layer.hpp b/include/caffe/layers/bias_layer.hpp
index eedc3aa..9639c9c 100644
--- a/include/caffe/layers/bias_layer.hpp
+++ b/include/caffe/layers/bias_layer.hpp
@@ -10,13 +10,13 @@
 namespace caffe {
 
 /**
- * @brief Computes a sum of two input Blobs, with the shape of the
- *        latter Blob "broadcast" to match the shape of the former.
- *        Equivalent to tiling the latter Blob, then computing the elementwise
- *        sum.
+ * @brief Computes a sum of two input Blobs, with the shape of the latter Blob
+ *        "broadcast" to match the shape of the former. Equivalent to tiling
+ *        the latter Blob, then computing the elementwise sum.
  *
  * The second input may be omitted, in which case it's learned as a parameter
- * of the layer.
+ * of the layer. Note: in case bias and scaling are desired, both operations can
+ * be handled by `ScaleLayer` configured with `bias_term: true`.
  */
 template <typename Dtype>
 class BiasLayer : public Layer<Dtype> {
diff --git a/include/caffe/layers/scale_layer.hpp b/include/caffe/layers/scale_layer.hpp
index 924df2e..45b714d 100644
--- a/include/caffe/layers/scale_layer.hpp
+++ b/include/caffe/layers/scale_layer.hpp
@@ -12,13 +12,15 @@
 namespace caffe {
 
 /**
- * @brief Computes a product of two input Blobs, with the shape of the
- *        latter Blob "broadcast" to match the shape of the former.
+ * @brief Computes the elementwise product of two input Blobs, with the shape of
+ *        the latter Blob "broadcast" to match the shape of the former.
  *        Equivalent to tiling the latter Blob, then computing the elementwise
- *        product.
+ *        product. Note: for efficiency and convenience, this layer can
+ *        additionally perform a "broadcast" sum too when `bias_term: true`
+ *        is set.
  *
- * The second input may be omitted, in which case it's learned as a parameter
- * of the layer.
+ * The latter, scale input may be omitted, in which case it's learned as
+ * parameter of the layer (as is the bias, if it is included).
  */
 template <typename Dtype>
 class ScaleLayer: public Layer<Dtype> {
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 38259ed..eafcee3 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -12,7 +12,7 @@ namespace caffe {
 /**
   * @brief Enumeration of actions that a client of the Solver may request by
   * implementing the Solver's action request function, which a
-  * a client may optionally provide in order to request early termination
+  * client may optionally provide in order to request early termination
   * or saving a snapshot without exiting. In the executable caffe, this
   * mechanism is used to allow the snapshot to be saved when stopping
   * execution with a SIGINT (Ctrl-C).
diff --git a/include/caffe/solver_factory.hpp b/include/caffe/solver_factory.hpp
index cfff721..a5b1607 100644
--- a/include/caffe/solver_factory.hpp
+++ b/include/caffe/solver_factory.hpp
@@ -15,7 +15,7 @@
  * and its type is its C++ class name, but without the "Solver" at the end
  * ("MyAwesomeSolver" -> "MyAwesome").
  *
- * If the solver is going to be created simply by its constructor, in your c++
+ * If the solver is going to be created simply by its constructor, in your C++
  * file, add the following line:
  *
  *    REGISTER_SOLVER_CLASS(MyAwesome);
diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp
index 3355b66..95df0f9 100644
--- a/include/caffe/util/mkl_alternate.hpp
+++ b/include/caffe/util/mkl_alternate.hpp
@@ -7,9 +7,14 @@
 
 #else  // If use MKL, simply include the MKL header
 
+#ifdef USE_ACCELERATE
+#include <Accelerate/Accelerate.h>
+#else
 extern "C" {
 #include <cblas.h>
 }
+#endif  // USE_ACCELERATE
+
 #include <math.h>
 
 // Functions that caffe uses but are not present if MKL is not linked.
diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp
index 14e1936..b145822 100644
--- a/include/caffe/util/upgrade_proto.hpp
+++ b/include/caffe/util/upgrade_proto.hpp
@@ -65,6 +65,12 @@ bool NetNeedsInputUpgrade(const NetParameter& net_param);
 // Perform all necessary transformations to upgrade input fields into layers.
 void UpgradeNetInput(NetParameter* net_param);
 
+// Return true iff the Net contains batch norm layers with manual local LRs.
+bool NetNeedsBatchNormUpgrade(const NetParameter& net_param);
+
+// Perform all necessary transformations to upgrade batch norm layers.
+void UpgradeNetBatchNorm(NetParameter* net_param);
+
 // Return true iff the solver contains any old solver_type specified as enums
 bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param);
 
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index ee16d36..daef5c4 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -56,7 +56,7 @@ else
       dh-autoreconf \
       unzip
 
-    wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
+    wget https://github.com/google/protobuf/archive/3.0.x.tar.gz -O protobuf3.tar.gz
     tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
     rm protobuf3.tar.gz
     cd $PROTOBUF3_DIR
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index a69d8f9..e661abb 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -34,6 +34,18 @@ void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                 this->blobs_[i]->mutable_cpu_data());
     }
   }
+  // Mask statistics from optimization by setting local learning rates
+  // for mean, variance, and the bias correction to zero.
+  for (int i = 0; i < this->blobs_.size(); ++i) {
+    if (this->layer_param_.param_size() == i) {
+      ParamSpec* fixed_param_spec = this->layer_param_.add_param();
+      fixed_param_spec->set_lr_mult(0.f);
+    } else {
+      CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
+          << "Cannot configure batch normalization statistics as layer "
+          << "parameters.";
+    }
+  }
 }
 
 template <typename Dtype>
diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp
index c0b7a86..afb1ce9 100644
--- a/src/caffe/layers/loss_layer.cpp
+++ b/src/caffe/layers/loss_layer.cpp
@@ -16,8 +16,8 @@ void LossLayer<Dtype>::LayerSetUp(
 template <typename Dtype>
 void LossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(bottom[0]->num(), bottom[1]->num())
-      << "The data and label should have the same number.";
+  CHECK_EQ(bottom[0]->shape(0), bottom[1]->shape(0))
+      << "The data and label should have the same first dimension.";
   vector<int> loss_shape(0);  // Loss layers output a scalar; 0 axes.
   top[0]->Reshape(loss_shape);
 }
diff --git a/src/caffe/layers/scale_layer.cpp b/src/caffe/layers/scale_layer.cpp
index ecdbb12..e652dad 100644
--- a/src/caffe/layers/scale_layer.cpp
+++ b/src/caffe/layers/scale_layer.cpp
@@ -56,9 +56,17 @@ void ScaleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     bias_bottom_vec_.resize(1);
     bias_bottom_vec_[0] = bottom[0];
     bias_layer_->SetUp(bias_bottom_vec_, top);
-    bias_param_id_ = this->blobs_.size();
-    this->blobs_.resize(bias_param_id_ + 1);
-    this->blobs_[bias_param_id_] = bias_layer_->blobs()[0];
+    if (this->blobs_.size() + bottom.size() < 3) {
+      // case: blobs.size == 1 && bottom.size == 1
+      // or blobs.size == 0 && bottom.size == 2
+      bias_param_id_ = this->blobs_.size();
+      this->blobs_.resize(bias_param_id_ + 1);
+      this->blobs_[bias_param_id_] = bias_layer_->blobs()[0];
+    } else {
+      // bias param already initialized
+      bias_param_id_ = this->blobs_.size() - 1;
+      bias_layer_->blobs()[0] = this->blobs_[bias_param_id_];
+    }
     bias_propagate_down_.resize(1, false);
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 1556781..6940a70 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -219,7 +219,7 @@ message SolverParameter {
 
   // RMSProp decay value
   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
-  optional float rms_decay = 38;
+  optional float rms_decay = 38 [default = 0.99];
 
   // If true, print information about the state of the net that may help with
   // debugging learning problems.
diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp
index 1d269c3..d994225 100644
--- a/src/caffe/util/benchmark.cpp
+++ b/src/caffe/util/benchmark.cpp
@@ -44,7 +44,6 @@ void Timer::Stop() {
     if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
       CUDA_CHECK(cudaEventRecord(stop_gpu_, 0));
-      CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
 #else
       NO_GPU;
 #endif
@@ -66,6 +65,7 @@ float Timer::MicroSeconds() {
   }
   if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
+    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
     CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
                                     stop_gpu_));
     // Cuda only measure milliseconds
@@ -89,6 +89,7 @@ float Timer::MilliSeconds() {
   }
   if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
+    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
     CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
                                     stop_gpu_));
 #else
diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp
index 9e18691..a0aacbe 100644
--- a/src/caffe/util/upgrade_proto.cpp
+++ b/src/caffe/util/upgrade_proto.cpp
@@ -14,7 +14,8 @@ namespace caffe {
 
 bool NetNeedsUpgrade(const NetParameter& net_param) {
   return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param)
-      || NetNeedsDataUpgrade(net_param) || NetNeedsInputUpgrade(net_param);
+      || NetNeedsDataUpgrade(net_param) || NetNeedsInputUpgrade(net_param)
+      || NetNeedsBatchNormUpgrade(net_param);
 }
 
 bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
@@ -71,6 +72,14 @@ bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
     LOG(WARNING) << "Note that future Caffe releases will only support "
                  << "input layers and not input fields.";
   }
+  // NetParameter uses old style batch norm layers; try to upgrade it.
+  if (NetNeedsBatchNormUpgrade(*param)) {
+    LOG(INFO) << "Attempting to upgrade batch norm layers using deprecated "
+              << "params: " << param_file;
+    UpgradeNetBatchNorm(param);
+    LOG(INFO) << "Successfully upgraded batch norm layers using deprecated "
+              << "params.";
+  }
   return success;
 }
 
@@ -991,6 +1000,29 @@ void UpgradeNetInput(NetParameter* net_param) {
   net_param->clear_input_dim();
 }
 
+bool NetNeedsBatchNormUpgrade(const NetParameter& net_param) {
+  for (int i = 0; i < net_param.layer_size(); ++i) {
+    // Check if BatchNorm layers declare three parameters, as required by
+    // the previous BatchNorm layer definition.
+    if (net_param.layer(i).type() == "BatchNorm"
+        && net_param.layer(i).param_size() == 3) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void UpgradeNetBatchNorm(NetParameter* net_param) {
+  for (int i = 0; i < net_param->layer_size(); ++i) {
+    // Check if BatchNorm layers declare three parameters, as required by
+    // the previous BatchNorm layer definition.
+    if (net_param->layer(i).type() == "BatchNorm"
+        && net_param->layer(i).param_size() == 3) {
+      net_param->mutable_layer(i)->clear_param();
+    }
+  }
+}
+
 // Return true iff the solver contains any old solver_type specified as enums
 bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) {
   if (solver_param.has_solver_type()) {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/caffe-contrib.git